refactor: bring char8_t to this library.
- add yycc_char8_t and yycc_u8string in code to indicate explicit utf8 char type and string. it also has a polyfill if compiler and library do not support utf8 char type. - refactor the whole encoding helper. allow converting string with embedded NUL. but not tested.
This commit is contained in:
parent
bb17bb6a1f
commit
c15b57d055
@ -4,89 +4,189 @@
|
|||||||
|
|
||||||
namespace YYCC::EncodingHelper {
|
namespace YYCC::EncodingHelper {
|
||||||
|
|
||||||
|
/* Define some assistant macros for easy writing. */
|
||||||
|
|
||||||
|
#define CONVFCT_TYPE2(fct_name, src_char_type, dst_char_type, ...) if (src == nullptr) return false; \
|
||||||
|
std::basic_string<src_char_type> cache(src); \
|
||||||
|
return fct_name(cache, dst, ##__VA_ARGS__);
|
||||||
|
|
||||||
|
#define CONVFCT_TYPE3(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
|
||||||
|
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
#define CONVFCT_TYPE4(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
|
||||||
|
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
|
||||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||||
|
|
||||||
bool WcharToChar(const wchar_t* src, std::string& dest, UINT codepage) {
|
#pragma region WcharToChar
|
||||||
int count, write_result;
|
|
||||||
|
|
||||||
//converter to CHAR
|
bool WcharToChar(const std::wstring& src, std::string& dst, UINT code_page) {
|
||||||
count = WideCharToMultiByte(codepage, 0, reinterpret_cast<LPCWCH>(src), -1, NULL, 0, NULL, NULL);
|
// if src is empty, direct output
|
||||||
if (count <= 0) return false;
|
if (src.empty()) {
|
||||||
|
dst.clear();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
dest.resize(count - 1);
|
// init WideCharToMultiByte used variables
|
||||||
write_result = WideCharToMultiByte(codepage, 0, reinterpret_cast<LPCWCH>(src), -1, reinterpret_cast<LPSTR>(dest.data()), count, NULL, NULL);
|
// setup src pointer
|
||||||
|
LPCWCH lpWideCharStr = reinterpret_cast<LPCWCH>(src.c_str());
|
||||||
|
// check whether source string is too large.
|
||||||
|
size_t cSrcSize = src.size();
|
||||||
|
if (cSrcSize > std::numeric_limits<int>::max()) return false;
|
||||||
|
int cchWideChar = static_cast<int>(src.size());
|
||||||
|
|
||||||
|
// do convertion
|
||||||
|
// do a dry-run first to fetch desired size.
|
||||||
|
int desired_size = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, NULL, 0, NULL, NULL);
|
||||||
|
if (desired_size <= 0) return false;
|
||||||
|
// resize dest for receiving result
|
||||||
|
dst.resize(static_cast<size_t>(desired_size));
|
||||||
|
// do real convertion
|
||||||
|
int write_result = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, reinterpret_cast<LPSTR>(dst.data()), desired_size, NULL, NULL);
|
||||||
if (write_result <= 0) return false;
|
if (write_result <= 0) return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool WcharToUTF8(const wchar_t* src, std::string& dest) {
|
bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page) {
|
||||||
return WcharToChar(src, dest, CP_UTF8);
|
CONVFCT_TYPE2(WcharToChar, wchar_t, char, code_page);
|
||||||
}
|
}
|
||||||
std::string WcharToChar(const wchar_t* src, UINT codepage) {
|
std::string WcharToChar(const std::wstring& src, UINT code_page) {
|
||||||
std::string ret;
|
CONVFCT_TYPE3(WcharToChar, wchar_t, char, code_page);
|
||||||
if (!WcharToChar(src, ret, codepage)) ret.clear();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
std::string WcharToUTF8(const wchar_t* src) {
|
std::string WcharToChar(const wchar_t* src, UINT code_page) {
|
||||||
return WcharToChar(src, CP_UTF8);
|
CONVFCT_TYPE4(WcharToChar, wchar_t, char, code_page);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CharToWchar(const char* src, std::wstring& dest, UINT codepage) {
|
#pragma endregion
|
||||||
int wcount, write_result;
|
|
||||||
|
|
||||||
// convert to WCHAR
|
#pragma region CharToWchar
|
||||||
wcount = MultiByteToWideChar(codepage, 0, reinterpret_cast<LPCCH>(src), -1, NULL, 0);
|
|
||||||
if (wcount <= 0) return false;
|
|
||||||
|
|
||||||
dest.resize(wcount - 1);
|
bool CharToWchar(const std::string& src, std::wstring& dst, UINT code_page) {
|
||||||
write_result = MultiByteToWideChar(codepage, 0, reinterpret_cast<LPCCH>(src), -1, reinterpret_cast<LPWSTR>(dest.data()), wcount);
|
// if src is empty, direct output
|
||||||
|
if (src.empty()) {
|
||||||
|
dst.clear();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// init WideCharToMultiByte used variables
|
||||||
|
// setup src pointer
|
||||||
|
LPCCH lpMultiByteStr = reinterpret_cast<LPCCH>(src.c_str());
|
||||||
|
// check whether source string is too large.
|
||||||
|
size_t cSrcSize = src.size();
|
||||||
|
if (cSrcSize > std::numeric_limits<int>::max()) return false;
|
||||||
|
int cbMultiByte = static_cast<int>(src.size());
|
||||||
|
|
||||||
|
// do convertion
|
||||||
|
// do a dry-run first to fetch desired size.
|
||||||
|
int desired_size = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, NULL, 0);
|
||||||
|
if (desired_size <= 0) return false;
|
||||||
|
// resize dest for receiving result
|
||||||
|
dst.resize(static_cast<size_t>(desired_size));
|
||||||
|
// do real convertion
|
||||||
|
int write_result = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, reinterpret_cast<LPWSTR>(dst.data()), desired_size);
|
||||||
if (write_result <= 0) return false;
|
if (write_result <= 0) return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool UTF8ToWchar(const char* src, std::wstring& dest) {
|
bool CharToWchar(const char* src, std::wstring& dst, UINT code_page) {
|
||||||
return CharToWchar(src, dest, CP_UTF8);
|
CONVFCT_TYPE2(CharToWchar, char, wchar_t, code_page);
|
||||||
}
|
}
|
||||||
std::wstring CharToWchar(const char* src, UINT codepage) {
|
std::wstring CharToWchar(const std::string& src, UINT code_page) {
|
||||||
std::wstring ret;
|
CONVFCT_TYPE3(CharToWchar, char, wchar_t, code_page);
|
||||||
if (!CharToWchar(src, ret, codepage)) ret.clear();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
std::wstring UTF8ToWchar(const char* src) {
|
std::wstring CharToWchar(const char* src, UINT code_page) {
|
||||||
return CharToWchar(src, CP_UTF8);
|
CONVFCT_TYPE4(CharToWchar, char, wchar_t, code_page);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CharToChar(const char* src, std::string& dest, UINT src_codepage, UINT dest_codepage) {
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region CharToChar
|
||||||
|
|
||||||
|
bool CharToChar(const std::string& src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
|
||||||
std::wstring intermediary;
|
std::wstring intermediary;
|
||||||
if (!CharToWchar(src, intermediary, src_codepage)) return false;
|
if (!CharToWchar(src, intermediary, src_code_page)) return false;
|
||||||
if (!WcharToChar(intermediary.c_str(), dest, dest_codepage)) return false;
|
if (!WcharToChar(intermediary.c_str(), dst, dst_code_page)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
std::string CharToChar(const char* src, UINT src_codepage, UINT dest_codepage) {
|
bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
|
||||||
std::string ret;
|
CONVFCT_TYPE2(CharToChar, char, char, src_code_page, dst_code_page);
|
||||||
if (!CharToChar(src, ret, src_codepage, dest_codepage)) ret.clear();
|
}
|
||||||
|
std::string CharToChar(const std::string& src, UINT src_code_page, UINT dst_code_page) {
|
||||||
|
CONVFCT_TYPE3(CharToChar, char, char, src_code_page, dst_code_page);
|
||||||
|
}
|
||||||
|
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page) {
|
||||||
|
CONVFCT_TYPE4(CharToChar, char, char, src_code_page, dst_code_page);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region WcharToUTF8
|
||||||
|
|
||||||
|
bool WcharToUTF8(const std::wstring& src, yycc_u8string& dst) {
|
||||||
|
std::string adapted_dst;
|
||||||
|
bool ret = WcharToChar(src, adapted_dst, CP_UTF8);
|
||||||
|
if (ret) dst.assign(reinterpret_cast<const yycc_char8_t*>(adapted_dst.c_str()), adapted_dst.size());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst) {
|
||||||
|
CONVFCT_TYPE2(WcharToUTF8, wchar_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
yycc_u8string WcharToUTF8(const std::wstring& src) {
|
||||||
|
CONVFCT_TYPE3(WcharToUTF8, wchar_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
yycc_u8string WcharToUTF8(const wchar_t* src) {
|
||||||
|
CONVFCT_TYPE4(WcharToUTF8, wchar_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region UTF8ToWchar
|
||||||
|
|
||||||
|
bool UTF8ToWchar(const yycc_u8string& src, std::wstring& dst) {
|
||||||
|
std::string adapted_src(reinterpret_cast<const char*>(src.c_str()), src.size());
|
||||||
|
return CharToWchar(adapted_src, dst, CP_UTF8);
|
||||||
|
}
|
||||||
|
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst) {
|
||||||
|
CONVFCT_TYPE2(UTF8ToWchar, yycc_char8_t, wchar_t);
|
||||||
|
}
|
||||||
|
std::wstring UTF8ToWchar(const yycc_u8string& src) {
|
||||||
|
CONVFCT_TYPE3(UTF8ToWchar, yycc_char8_t, wchar_t);
|
||||||
|
}
|
||||||
|
std::wstring UTF8ToWchar(const yycc_char8_t* src) {
|
||||||
|
CONVFCT_TYPE4(UTF8ToWchar, yycc_char8_t, wchar_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#pragma region UTF8 UTF16 UTF32 Help Funcs
|
||||||
|
|
||||||
|
/*
|
||||||
|
According to the documentation introduced in CppReference.
|
||||||
|
The standard library is guaranteed to provide several specific specializations of \c std::codecvt.
|
||||||
|
The UTF8 char type in UTF8 related specializations of \c std::codecvt is different.
|
||||||
|
It is also independend from we defined \c yycc_char8_t.
|
||||||
|
So it is essential define a type which can correctly trigger specific specializations of \c std::codecv in there.
|
||||||
|
*/
|
||||||
#if defined(__cpp_char8_t)
|
#if defined(__cpp_char8_t)
|
||||||
using CodecvtUTF8Char_t = char8_t;
|
using CodecvtUTF8Char_t = char8_t;
|
||||||
#else
|
#else
|
||||||
using CodecvtUTF8Char_t = char;
|
using CodecvtUTF8Char_t = char;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
||||||
using CodecvtFacet_t = std::codecvt<_TChar, CodecvtUTF8Char_t, std::mbstate_t>;
|
using CodecvtFacet_t = std::codecvt<_TChar, CodecvtUTF8Char_t, std::mbstate_t>;
|
||||||
|
|
||||||
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
||||||
static bool UTF8ToUTFOther(const char* _src, std::basic_string<_TChar>& dest) {
|
static bool UTF8ToUTFOther(const yycc_u8string& src, std::basic_string<_TChar>& dst) {
|
||||||
// Reference:
|
// Reference:
|
||||||
// https://zh.cppreference.com/w/cpp/locale/codecvt/in
|
// https://zh.cppreference.com/w/cpp/locale/codecvt/in
|
||||||
|
|
||||||
// init src string
|
|
||||||
if (_src == nullptr) return false;
|
|
||||||
std::string src(_src);
|
|
||||||
|
|
||||||
// init locale and get codecvt facet
|
// init locale and get codecvt facet
|
||||||
// same reason in UTFOtherToUTF8 to keeping reference to locale
|
// same reason in UTFOtherToUTF8 to keeping reference to locale
|
||||||
const auto& this_locale = std::locale::classic();
|
const auto& this_locale = std::locale::classic();
|
||||||
@ -94,12 +194,12 @@ namespace YYCC::EncodingHelper {
|
|||||||
|
|
||||||
// convertion preparation
|
// convertion preparation
|
||||||
std::mbstate_t mb{};
|
std::mbstate_t mb{};
|
||||||
dest.resize(src.size());
|
dst.resize(src.size());
|
||||||
const CodecvtUTF8Char_t* intern_from = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str()),
|
const CodecvtUTF8Char_t* intern_from = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str()),
|
||||||
*intern_from_end = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str() + src.size()),
|
*intern_from_end = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str() + src.size()),
|
||||||
*intern_from_next = nullptr;
|
*intern_from_next = nullptr;
|
||||||
_TChar* extern_to = dest.data(),
|
_TChar* extern_to = dst.data(),
|
||||||
*extern_to_end = dest.data() + dest.size(),
|
*extern_to_end = dst.data() + dst.size(),
|
||||||
*extern_to_next = nullptr;
|
*extern_to_next = nullptr;
|
||||||
// do convertion
|
// do convertion
|
||||||
auto result = this_codecvt.in(
|
auto result = this_codecvt.in(
|
||||||
@ -112,36 +212,15 @@ namespace YYCC::EncodingHelper {
|
|||||||
if (result != CodecvtFacet_t<_TChar>::ok)
|
if (result != CodecvtFacet_t<_TChar>::ok)
|
||||||
return false;
|
return false;
|
||||||
// resize result and return
|
// resize result and return
|
||||||
dest.resize(extern_to_next - dest.data());
|
dst.resize(extern_to_next - dst.data());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UTF8ToUTF16(const char* src, std::u16string& dest) {
|
|
||||||
return UTF8ToUTFOther<char16_t>(src, dest);
|
|
||||||
}
|
|
||||||
std::u16string UTF8ToUTF16(const char* src) {
|
|
||||||
std::u16string ret;
|
|
||||||
if (!UTF8ToUTF16(src, ret)) ret.clear();
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
bool UTF8ToUTF32(const char* src, std::u32string& dest) {
|
|
||||||
return UTF8ToUTFOther<char32_t>(src, dest);
|
|
||||||
}
|
|
||||||
std::u32string UTF8ToUTF32(const char* src) {
|
|
||||||
std::u32string ret;
|
|
||||||
if (!UTF8ToUTF32(src, ret)) ret.clear();
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
||||||
static bool UTFOtherToUTF8(const _TChar* _src, std::string& dest) {
|
static bool UTFOtherToUTF8(const std::basic_string<_TChar>& src, yycc_u8string& dst) {
|
||||||
// Reference:
|
// Reference:
|
||||||
// https://zh.cppreference.com/w/cpp/locale/codecvt/out
|
// https://zh.cppreference.com/w/cpp/locale/codecvt/out
|
||||||
|
|
||||||
// initialize src string
|
|
||||||
if (_src == nullptr) return false;
|
|
||||||
std::basic_string<_TChar> src(_src);
|
|
||||||
|
|
||||||
// init locale and get codecvt facet
|
// init locale and get codecvt facet
|
||||||
// the reference to locale must be preserved until convertion done.
|
// the reference to locale must be preserved until convertion done.
|
||||||
// because the life time of codecvt facet is equal to the reference to locale.
|
// because the life time of codecvt facet is equal to the reference to locale.
|
||||||
@ -150,12 +229,12 @@ namespace YYCC::EncodingHelper {
|
|||||||
|
|
||||||
// do convertion preparation
|
// do convertion preparation
|
||||||
std::mbstate_t mb{};
|
std::mbstate_t mb{};
|
||||||
dest.resize(src.size() * this_codecvt.max_length());
|
dst.resize(src.size() * this_codecvt.max_length());
|
||||||
const _TChar* intern_from = src.c_str(),
|
const _TChar* intern_from = src.c_str(),
|
||||||
*intern_from_end = src.c_str() + src.size(),
|
*intern_from_end = src.c_str() + src.size(),
|
||||||
*intern_from_next = nullptr;
|
*intern_from_next = nullptr;
|
||||||
CodecvtUTF8Char_t* extern_to = reinterpret_cast<CodecvtUTF8Char_t*>(dest.data()),
|
CodecvtUTF8Char_t* extern_to = reinterpret_cast<CodecvtUTF8Char_t*>(dst.data()),
|
||||||
*extern_to_end = reinterpret_cast<CodecvtUTF8Char_t*>(dest.data() + dest.size()),
|
*extern_to_end = reinterpret_cast<CodecvtUTF8Char_t*>(dst.data() + dst.size()),
|
||||||
*extern_to_next = nullptr;
|
*extern_to_next = nullptr;
|
||||||
// do convertion
|
// do convertion
|
||||||
auto result = this_codecvt.out(
|
auto result = this_codecvt.out(
|
||||||
@ -168,26 +247,83 @@ namespace YYCC::EncodingHelper {
|
|||||||
if (result != CodecvtFacet_t<_TChar>::ok)
|
if (result != CodecvtFacet_t<_TChar>::ok)
|
||||||
return false;
|
return false;
|
||||||
// resize result and retuen
|
// resize result and retuen
|
||||||
dest.resize(extern_to_next - reinterpret_cast<CodecvtUTF8Char_t*>(dest.data()));
|
dst.resize(extern_to_next - reinterpret_cast<CodecvtUTF8Char_t*>(dst.data()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UTF16ToUTF8(const char16_t* src, std::string& dest) {
|
#pragma endregion
|
||||||
return UTFOtherToUTF8<char16_t>(src, dest);
|
|
||||||
|
#pragma region UTF8ToUTF16
|
||||||
|
|
||||||
|
bool UTF8ToUTF16(const yycc_u8string& src, std::u16string& dst) {
|
||||||
|
return UTF8ToUTFOther<char16_t>(src, dst);
|
||||||
}
|
}
|
||||||
std::string UTF16ToUTF8(const char16_t* src) {
|
bool UTF8ToUTF16(const yycc_char8_t* src, std::u16string& dst) {
|
||||||
std::string ret;
|
CONVFCT_TYPE2(UTF8ToUTF16, yycc_char8_t, char16_t);
|
||||||
if (!UTF16ToUTF8(src, ret)) ret.clear();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
bool UTF32ToUTF8(const char32_t* src, std::string& dest) {
|
std::u16string UTF8ToUTF16(const yycc_u8string& src) {
|
||||||
return UTFOtherToUTF8<char32_t>(src, dest);
|
CONVFCT_TYPE3(UTF8ToUTF16, yycc_char8_t, char16_t);
|
||||||
}
|
}
|
||||||
std::string UTF32ToUTF8(const char32_t* src) {
|
std::u16string UTF8ToUTF16(const yycc_char8_t* src) {
|
||||||
std::string ret;
|
CONVFCT_TYPE4(UTF8ToUTF16, yycc_char8_t, char16_t);
|
||||||
if (!UTF32ToUTF8(src, ret)) ret.clear();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region UTF16ToUTF8
|
||||||
|
|
||||||
|
bool UTF16ToUTF8(const std::u16string& src, yycc_u8string& dst) {
|
||||||
|
return UTFOtherToUTF8<char16_t>(src, dst);
|
||||||
|
}
|
||||||
|
bool UTF16ToUTF8(const char16_t* src, yycc_u8string& dst) {
|
||||||
|
CONVFCT_TYPE2(UTF16ToUTF8, char16_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
yycc_u8string UTF16ToUTF8(const std::u16string& src) {
|
||||||
|
CONVFCT_TYPE3(UTF16ToUTF8, char16_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
yycc_u8string UTF16ToUTF8(const char16_t* src) {
|
||||||
|
CONVFCT_TYPE4(UTF16ToUTF8, char16_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region UTF8ToUTF32
|
||||||
|
|
||||||
|
bool UTF8ToUTF32(const yycc_u8string& src, std::u32string& dst) {
|
||||||
|
return UTF8ToUTFOther<char32_t>(src, dst);
|
||||||
|
}
|
||||||
|
bool UTF8ToUTF32(const yycc_char8_t* src, std::u32string& dst) {
|
||||||
|
CONVFCT_TYPE2(UTF8ToUTF32, yycc_char8_t, char32_t);
|
||||||
|
}
|
||||||
|
std::u32string UTF8ToUTF32(const yycc_u8string& src) {
|
||||||
|
CONVFCT_TYPE3(UTF8ToUTF32, yycc_char8_t, char32_t);
|
||||||
|
}
|
||||||
|
std::u32string UTF8ToUTF32(const yycc_char8_t* src) {
|
||||||
|
CONVFCT_TYPE4(UTF8ToUTF32, yycc_char8_t, char32_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region UTF32ToUTF8
|
||||||
|
|
||||||
|
bool UTF32ToUTF8(const std::u32string& src, yycc_u8string& dst) {
|
||||||
|
return UTFOtherToUTF8<char32_t>(src, dst);
|
||||||
|
}
|
||||||
|
bool UTF32ToUTF8(const char32_t* src, yycc_u8string& dst) {
|
||||||
|
CONVFCT_TYPE2(UTF32ToUTF8, char32_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
yycc_u8string UTF32ToUTF8(const std::u32string& src) {
|
||||||
|
CONVFCT_TYPE3(UTF32ToUTF8, char32_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
yycc_u8string UTF32ToUTF8(const char32_t* src) {
|
||||||
|
CONVFCT_TYPE4(UTF32ToUTF8, char32_t, yycc_char8_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#undef CONVFCT_TYPE2
|
||||||
|
#undef CONVFCT_TYPE3
|
||||||
|
#undef CONVFCT_TYPE4
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,29 +52,53 @@ namespace YYCC::EncodingHelper {
|
|||||||
|
|
||||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||||
|
|
||||||
bool WcharToChar(const wchar_t* src, std::string& dest, UINT codepage);
|
bool WcharToChar(const std::wstring& src, std::string& dst, UINT code_page);
|
||||||
bool WcharToUTF8(const wchar_t* src, std::string& dest);
|
bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page);
|
||||||
std::string WcharToChar(const wchar_t* src, UINT codepage);
|
std::string WcharToChar(const std::wstring& src, UINT code_page);
|
||||||
std::string WcharToUTF8(const wchar_t* src);
|
std::string WcharToChar(const wchar_t* src, UINT code_page);
|
||||||
|
|
||||||
bool CharToWchar(const char* src, std::wstring& dest, UINT codepage);
|
bool CharToWchar(const std::string& src, std::wstring& dst, UINT code_page);
|
||||||
bool UTF8ToWchar(const char* src, std::wstring& dest);
|
bool CharToWchar(const char* src, std::wstring& dst, UINT code_page);
|
||||||
std::wstring CharToWchar(const char* src, UINT codepage);
|
std::wstring CharToWchar(const std::string& src, UINT code_page);
|
||||||
std::wstring UTF8ToWchar(const char* src);
|
std::wstring CharToWchar(const char* src, UINT code_page);
|
||||||
|
|
||||||
bool CharToChar(const char* src, std::string& dest, UINT src_codepage, UINT dest_codepage);
|
bool CharToChar(const std::string& src, std::string& dst, UINT src_code_page, UINT dst_code_page);
|
||||||
std::string CharToChar(const char* src, UINT src_codepage, UINT dest_codepage);
|
bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page);
|
||||||
|
std::string CharToChar(const std::string& src, UINT src_code_page, UINT dst_code_page);
|
||||||
|
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page);
|
||||||
|
|
||||||
|
|
||||||
|
bool WcharToUTF8(const std::wstring& src, yycc_u8string& dst);
|
||||||
|
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst);
|
||||||
|
yycc_u8string WcharToUTF8(const std::wstring& src);
|
||||||
|
yycc_u8string WcharToUTF8(const wchar_t* src);
|
||||||
|
|
||||||
|
bool UTF8ToWchar(const yycc_u8string& src, std::wstring& dst);
|
||||||
|
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst);
|
||||||
|
std::wstring UTF8ToWchar(const yycc_u8string& src);
|
||||||
|
std::wstring UTF8ToWchar(const yycc_char8_t* src);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool UTF8ToUTF16(const char* src, std::u16string& dest);
|
bool UTF8ToUTF16(const yycc_u8string& src, std::u16string& dst);
|
||||||
std::u16string UTF8ToUTF16(const char* src);
|
bool UTF8ToUTF16(const yycc_char8_t* src, std::u16string& dst);
|
||||||
bool UTF8ToUTF32(const char* src, std::u32string& dest);
|
std::u16string UTF8ToUTF16(const yycc_u8string& src);
|
||||||
std::u32string UTF8ToUTF32(const char* src);
|
std::u16string UTF8ToUTF16(const yycc_char8_t* src);
|
||||||
|
|
||||||
bool UTF16ToUTF8(const char16_t* src, std::string& dest);
|
bool UTF16ToUTF8(const std::u16string& src, yycc_u8string& dst);
|
||||||
std::string UTF16ToUTF8(const char16_t* src);
|
bool UTF16ToUTF8(const char16_t* src, yycc_u8string& dst);
|
||||||
bool UTF32ToUTF8(const char32_t* src, std::string& dest);
|
yycc_u8string UTF16ToUTF8(const std::u16string& src);
|
||||||
std::string UTF32ToUTF8(const char32_t* src);
|
yycc_u8string UTF16ToUTF8(const char16_t* src);
|
||||||
|
|
||||||
|
|
||||||
|
bool UTF8ToUTF32(const yycc_u8string& src, std::u32string& dst);
|
||||||
|
bool UTF8ToUTF32(const yycc_char8_t* src, std::u32string& dst);
|
||||||
|
std::u32string UTF8ToUTF32(const yycc_u8string& src);
|
||||||
|
std::u32string UTF8ToUTF32(const yycc_char8_t* src);
|
||||||
|
|
||||||
|
bool UTF32ToUTF8(const std::u32string& src, yycc_u8string& dst);
|
||||||
|
bool UTF32ToUTF8(const char32_t* src, yycc_u8string& dst);
|
||||||
|
yycc_u8string UTF32ToUTF8(const std::u32string& src);
|
||||||
|
yycc_u8string UTF32ToUTF8(const char32_t* src);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -24,14 +24,23 @@
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//// Decide the char type we used
|
// Define the UTF8 char type we used.
|
||||||
//#include <string>
|
// Also define an universal macro to create UTF8 string literal.
|
||||||
//namespace YYCC {
|
// And do a polyfill if no embedded char8_t type.
|
||||||
//#if defined(__cpp_char8_t)
|
#include <string>
|
||||||
// using u8char = char8_t;
|
namespace YYCC {
|
||||||
// using u8string = std::std::string
|
#if defined(__cpp_char8_t)
|
||||||
//#else
|
using yycc_char8_t = char8_t;
|
||||||
// using u8char = char;
|
using yycc_u8string = std::u8string;
|
||||||
// using u8string = std::string;
|
|
||||||
//#endif
|
#define _YYCC_U8(strl) u8 ## strl
|
||||||
//}
|
#define YYCC_U8(strl) (_YYCC_U8(strl))
|
||||||
|
#else
|
||||||
|
using yycc_char8_t = unsigned char;
|
||||||
|
using yycc_u8string = std::basic_string<yycc_char8_t>;
|
||||||
|
|
||||||
|
#define _YYCC_U8(strl) u8 ## strl
|
||||||
|
#define YYCC_U8(strl) (reinterpret_cast<const yycc_char8_t*>(_YYCC_U8(strl)))
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user