2023-02-11 15:29:51 +08:00
|
|
|
#include "VTEncoding.hpp"
|
|
|
|
|
2023-02-26 21:48:03 +08:00
|
|
|
namespace LibCmo::EncodingHelper {
|
2023-02-11 15:29:51 +08:00
|
|
|
|
|
|
|
#pragma region assist functions
|
|
|
|
|
|
|
|
#if defined(LIBCMO_OS_WIN32)
|
|
|
|
|
|
|
|
#define LIBCMO_STR_EQUAL(a, b) strcmp(reinterpret_cast<const char*>(a), reinterpret_cast<const char*>(b)) == 0
|
2023-02-26 21:48:03 +08:00
|
|
|
bool GetWindowsCodePage(const char* u8_encoding_spec, UINT* result) {
|
|
|
|
if (LIBCMO_STR_EQUAL(u8_encoding_spec, u8"CP_ACP")) *result = CP_ACP;
|
|
|
|
else if (LIBCMO_STR_EQUAL(u8_encoding_spec, u8"CP_MACCP")) *result = CP_MACCP;
|
|
|
|
else if (LIBCMO_STR_EQUAL(u8_encoding_spec, u8"CP_OEMCP")) *result = CP_OEMCP;
|
|
|
|
else if (LIBCMO_STR_EQUAL(u8_encoding_spec, u8"CP_THREAD_ACPP")) *result = CP_THREAD_ACP;
|
|
|
|
else if (LIBCMO_STR_EQUAL(u8_encoding_spec, u8"CP_UTF8")) *result = CP_UTF8;
|
|
|
|
else {
|
|
|
|
char* pend = nullptr;
|
|
|
|
errno = 0;
|
|
|
|
uint64_t v = std::strtoull(u8_encoding_spec, &pend, 10);
|
|
|
|
|
|
|
|
if (pend == u8_encoding_spec || errno == ERANGE) return false;
|
|
|
|
*result = static_cast<UINT>(v);
|
2023-02-11 15:29:51 +08:00
|
|
|
}
|
2023-02-26 21:48:03 +08:00
|
|
|
return true;
|
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
#undef LIBCMO_STR_EQUAL
|
|
|
|
|
2023-03-04 11:11:36 +08:00
|
|
|
bool WcharToChar(const wchar_t* src, std::string& dest, const UINT codepage) {
|
2023-02-26 21:48:03 +08:00
|
|
|
int count, write_result;
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-02-26 21:48:03 +08:00
|
|
|
//converter to CHAR
|
2023-03-03 11:06:26 +08:00
|
|
|
count = WideCharToMultiByte(codepage, 0, src, -1, NULL, 0, NULL, NULL);
|
2023-02-26 21:48:03 +08:00
|
|
|
if (count <= 0) return false;
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-08-28 22:20:46 +08:00
|
|
|
dest.resize(count - 1);
|
2023-03-03 11:06:26 +08:00
|
|
|
write_result = WideCharToMultiByte(codepage, 0, src, -1, dest.data(), count, NULL, NULL);
|
2023-02-26 21:48:03 +08:00
|
|
|
if (write_result <= 0) return false;
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-02-26 21:48:03 +08:00
|
|
|
return true;
|
|
|
|
}
|
2023-03-04 11:11:36 +08:00
|
|
|
bool WcharToChar(const std::wstring& src, std::string& dest, const UINT codepage) {
|
2023-02-26 21:48:03 +08:00
|
|
|
return WcharToChar(src.c_str(), dest, codepage);
|
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-03-04 11:11:36 +08:00
|
|
|
bool CharToWchar(const char* src, std::wstring& dest, const UINT codepage) {
|
2023-02-26 21:48:03 +08:00
|
|
|
int wcount, write_result;
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-02-26 21:48:03 +08:00
|
|
|
// convert to WCHAR
|
|
|
|
wcount = MultiByteToWideChar(codepage, 0, src, -1, NULL, 0);
|
|
|
|
if (wcount <= 0) return false;
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-08-28 22:20:46 +08:00
|
|
|
dest.resize(wcount - 1);
|
2023-03-03 11:06:26 +08:00
|
|
|
write_result = MultiByteToWideChar(codepage, 0, src, -1, dest.data(), wcount);
|
2023-02-26 21:48:03 +08:00
|
|
|
if (write_result <= 0) return false;
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-02-26 21:48:03 +08:00
|
|
|
return true;
|
|
|
|
}
|
2023-03-04 11:11:36 +08:00
|
|
|
bool CharToWchar(const std::string& src, std::wstring& dest, const UINT codepage) {
|
2023-02-26 21:48:03 +08:00
|
|
|
return CharToWchar(src.c_str(), dest, codepage);
|
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-03-04 11:11:36 +08:00
|
|
|
bool CharToChar(const char* src, std::string& dest, const UINT src_codepage, const UINT dest_codepage) {
|
2023-02-26 21:48:03 +08:00
|
|
|
std::wstring intermediary;
|
|
|
|
if (!CharToWchar(src, intermediary, src_codepage)) return false;
|
|
|
|
if (!WcharToChar(intermediary, dest, dest_codepage)) return false;
|
|
|
|
return true;
|
|
|
|
}
|
2023-03-04 11:11:36 +08:00
|
|
|
bool CharToChar(const std::string& src, std::string& dest, const UINT src_codepage, const UINT dest_codepage) {
|
2023-02-26 21:48:03 +08:00
|
|
|
return CharToChar(src.c_str(), dest, src_codepage, dest_codepage);
|
|
|
|
}
|
2023-02-25 22:58:28 +08:00
|
|
|
|
2023-02-11 15:29:51 +08:00
|
|
|
#else
|
|
|
|
|
2023-03-03 11:06:26 +08:00
|
|
|
static constexpr const size_t IconvInc = 16;
|
2023-03-04 14:08:16 +08:00
|
|
|
static const iconv_t InvalidIconvDescriptor = reinterpret_cast<iconv_t>(-1);
|
2023-03-03 11:06:26 +08:00
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
bool CreateIconvDescriptor(const char* enc_from, const char* enc_to, iconv_t& val) {
|
|
|
|
val = iconv_open(enc_to, enc_from);
|
|
|
|
return val != InvalidIconvDescriptor;
|
|
|
|
}
|
|
|
|
void DestroyIconvDescriptor(iconv_t& val) {
|
|
|
|
if (val == InvalidIconvDescriptor) return;
|
|
|
|
|
|
|
|
iconv_close(val);
|
|
|
|
val = InvalidIconvDescriptor;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reference: https://stackoverflow.com/questions/13297458/simple-utf8-utf16-string-conversion-with-iconv
|
|
|
|
bool DoIconv(iconv_t& cd, const std::string& str_from, std::string& str_to) {
|
2023-03-01 15:51:56 +08:00
|
|
|
char *inbuf = nullptr, *outbuf = nullptr;
|
|
|
|
size_t inbytesleft, outbytesleft, nchars, result_len;
|
|
|
|
|
|
|
|
// check empty
|
|
|
|
if (str_from.empty()) {
|
|
|
|
str_to.clear();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
// check iconv descriptor
|
|
|
|
if (cd == InvalidIconvDescriptor) {
|
|
|
|
// invalid iconv descriptor
|
2023-03-01 15:51:56 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// pre-resize
|
|
|
|
str_to.resize(str_from.size() + IconvInc);
|
|
|
|
// setup some variables
|
|
|
|
inbytesleft = str_from.size();
|
2023-03-04 14:08:16 +08:00
|
|
|
inbuf = const_cast<char*>(str_from.c_str());
|
2023-03-01 15:51:56 +08:00
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
outbytesleft = str_to.size();
|
2023-03-01 15:51:56 +08:00
|
|
|
outbuf = str_to.data();
|
|
|
|
|
|
|
|
result_len = str_to.size();
|
|
|
|
|
|
|
|
// conv core
|
|
|
|
nchars = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
|
|
|
while (nchars == (size_t)-1 && errno == E2BIG) {
|
|
|
|
// record the length has been converted
|
|
|
|
size_t len = outbuf - str_to.data();
|
|
|
|
|
|
|
|
// resize for variables
|
|
|
|
result_len += IconvInc;
|
|
|
|
outbytesleft += IconvInc;
|
|
|
|
|
|
|
|
// resize for container
|
|
|
|
str_to.resize(result_len);
|
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
// assign new outbuf from failed position
|
2023-03-01 15:51:56 +08:00
|
|
|
outbuf = str_to.data() + len;
|
|
|
|
nchars = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
|
|
|
}
|
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
// restore descriptor initial state
|
|
|
|
iconv(cd, nullptr, nullptr, nullptr, nullptr);
|
2023-03-01 15:51:56 +08:00
|
|
|
|
|
|
|
// check error
|
|
|
|
if (nchars == (size_t)-1) {
|
|
|
|
// failed
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
// success
|
|
|
|
// resize result to get correct data
|
|
|
|
str_to.resize(result_len - outbytesleft);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#pragma endregion
|
|
|
|
|
|
|
|
#pragma region core functions
|
|
|
|
|
|
|
|
#if defined(LIBCMO_OS_WIN32)
|
|
|
|
|
2023-03-04 11:11:36 +08:00
|
|
|
ENCODING_TOKEN CreateEncodingToken(const std::string& token_string) {
|
2023-08-26 16:37:26 +08:00
|
|
|
ENCODING_TOKEN token = new UINT();
|
2023-02-26 21:48:03 +08:00
|
|
|
if (!GetWindowsCodePage(token_string.c_str(), token)) {
|
|
|
|
*token = CP_ACP;
|
2023-02-25 22:58:28 +08:00
|
|
|
}
|
2023-02-26 21:48:03 +08:00
|
|
|
return token;
|
|
|
|
}
|
2023-03-04 11:11:36 +08:00
|
|
|
void DestroyEncodingToken(const ENCODING_TOKEN& token) {
|
2023-02-26 21:48:03 +08:00
|
|
|
if (token != ENCODING_TOKEN_DEFAULT) {
|
|
|
|
delete token;
|
2023-02-25 22:58:28 +08:00
|
|
|
}
|
2023-02-26 21:48:03 +08:00
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-03-04 11:11:36 +08:00
|
|
|
bool GetUtf8VirtoolsName(const std::string& native_name, std::string& u8_name, const ENCODING_TOKEN& token) {
|
|
|
|
if (token == ENCODING_TOKEN_DEFAULT) return false;
|
|
|
|
return CharToChar(native_name, u8_name, *token, CP_UTF8);
|
2023-02-26 21:48:03 +08:00
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-03-04 11:11:36 +08:00
|
|
|
bool GetNativeVirtoolsName(const std::string& u8_name, std::string& native_name, const ENCODING_TOKEN& token) {
|
|
|
|
if (token == ENCODING_TOKEN_DEFAULT) return false;
|
|
|
|
return CharToChar(u8_name, native_name, CP_UTF8, *token);
|
2023-02-26 21:48:03 +08:00
|
|
|
}
|
2023-02-11 15:29:51 +08:00
|
|
|
|
2023-08-30 10:03:02 +08:00
|
|
|
void U8PathToStdPath(std::filesystem::path& stdpath, const char* u8_path) {
|
2023-02-26 21:48:03 +08:00
|
|
|
std::wstring intermediary;
|
|
|
|
if (CharToWchar(u8_path, intermediary, CP_UTF8)) {
|
|
|
|
stdpath = intermediary.c_str();
|
|
|
|
} else {
|
|
|
|
// fallback
|
|
|
|
stdpath = u8_path;
|
2023-02-12 18:08:29 +08:00
|
|
|
}
|
2023-02-26 21:48:03 +08:00
|
|
|
}
|
2023-08-30 10:03:02 +08:00
|
|
|
|
|
|
|
void StdPathToU8Path(std::string& u8path, std::filesystem::path& stdpath) {
|
|
|
|
if (!WcharToChar(stdpath.wstring(), u8path, CP_UTF8)) {
|
|
|
|
// fallback
|
|
|
|
u8path = stdpath.string();
|
|
|
|
}
|
|
|
|
}
|
2023-08-28 17:04:28 +08:00
|
|
|
|
2023-08-30 10:03:02 +08:00
|
|
|
FILE* U8FOpen(const char* u8_filepath, const char* u8_mode) {
|
|
|
|
std::wstring wmode, wpath;
|
|
|
|
bool suc = CharToWchar(u8_mode, wmode, CP_UTF8);
|
|
|
|
suc = suc && CharToWchar(u8_filepath, wpath, CP_UTF8);
|
|
|
|
|
|
|
|
if (suc) {
|
|
|
|
return _wfopen(wpath.c_str(), wmode.c_str());
|
2023-08-28 17:04:28 +08:00
|
|
|
} else {
|
|
|
|
// fallback
|
2023-08-30 10:03:02 +08:00
|
|
|
return std::fopen(u8_filepath, u8_mode);
|
2023-08-28 17:04:28 +08:00
|
|
|
}
|
2023-02-26 21:48:03 +08:00
|
|
|
}
|
2023-02-12 18:08:29 +08:00
|
|
|
|
2023-02-11 15:29:51 +08:00
|
|
|
#else
|
2023-02-25 22:58:28 +08:00
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
IconvPair::IconvPair() :
|
|
|
|
FromUtf8(InvalidIconvDescriptor), ToUtf8(InvalidIconvDescriptor) {
|
|
|
|
}
|
|
|
|
|
|
|
|
IconvPair::~IconvPair() {
|
|
|
|
DestroyIconvDescriptor(this->FromUtf8);
|
|
|
|
DestroyIconvDescriptor(this->ToUtf8);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-03-03 11:06:26 +08:00
|
|
|
static constexpr const char UTF8_SYMBOL[] = "UTF-8";
|
2023-03-01 15:51:56 +08:00
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
ENCODING_TOKEN CreateEncodingToken(const std::string& token_string) {
|
2023-08-26 16:37:26 +08:00
|
|
|
ENCODING_TOKEN token = new IconvPair();
|
2023-03-04 14:08:16 +08:00
|
|
|
if (!CreateIconvDescriptor(UTF8_SYMBOL, token_string.c_str(), token->FromUtf8) ||
|
|
|
|
!CreateIconvDescriptor(token_string.c_str(), UTF8_SYMBOL, token->ToUtf8)) {
|
|
|
|
delete token;
|
|
|
|
return ENCODING_TOKEN_DEFAULT;
|
|
|
|
}
|
2023-03-01 15:51:56 +08:00
|
|
|
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
void DestroyEncodingToken(const ENCODING_TOKEN& token) {
|
2023-03-01 15:51:56 +08:00
|
|
|
if (token != ENCODING_TOKEN_DEFAULT) {
|
2023-03-04 14:08:16 +08:00
|
|
|
delete token;
|
2023-03-01 15:51:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
bool GetUtf8VirtoolsName(const std::string& native_name, std::string& u8_name, const ENCODING_TOKEN& token) {
|
|
|
|
if (token == ENCODING_TOKEN_DEFAULT) return false;
|
|
|
|
return DoIconv(token->ToUtf8, native_name, u8_name);
|
2023-03-01 15:51:56 +08:00
|
|
|
}
|
|
|
|
|
2023-03-04 14:08:16 +08:00
|
|
|
bool GetNativeVirtoolsName(const std::string& u8_name, std::string& native_name, const ENCODING_TOKEN& token) {
|
|
|
|
if (token == ENCODING_TOKEN_DEFAULT) return false;
|
|
|
|
return DoIconv(token->FromUtf8, u8_name, native_name);
|
2023-03-01 15:51:56 +08:00
|
|
|
}
|
|
|
|
|
2023-08-30 10:03:02 +08:00
|
|
|
void U8PathToStdPath(std::filesystem::path& stdpath, const char* u8_path) {
|
2023-03-01 15:51:56 +08:00
|
|
|
stdpath = u8_path;
|
|
|
|
}
|
|
|
|
|
2023-08-30 10:03:02 +08:00
|
|
|
void StdPathToU8Path(std::string& u8path, std::filesystem::path& stdpath) {
|
|
|
|
u8path = stdpath.string();
|
|
|
|
}
|
|
|
|
|
|
|
|
FILE* U8FOpen(const char* u8_filepath, const char* u8_mode) {
|
|
|
|
return std::fopen(u8_filepath, u8_mode);
|
2023-03-01 15:51:56 +08:00
|
|
|
}
|
2023-02-25 22:58:28 +08:00
|
|
|
|
2023-02-11 15:29:51 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#pragma endregion
|
|
|
|
|
|
|
|
}
|
|
|
|
|