fix: use new method to do the convertion among UTF8, UTF16 and UTF32.
- use std::codevct as the convertion method among UTF8, UTF16 and UTF32. - fix the issue that COM Guard was accidently dropped by compiler because no reference to it.
This commit is contained in:
parent
1fd132f0c9
commit
3fa05b43d9
|
@ -21,6 +21,10 @@ namespace YYCC::COMHelper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsInitialized() const {
|
||||||
|
return m_HasInit;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool m_HasInit;
|
bool m_HasInit;
|
||||||
};
|
};
|
||||||
|
@ -33,7 +37,11 @@ namespace YYCC::COMHelper {
|
||||||
* So we use a static instance in here.
|
* So we use a static instance in here.
|
||||||
* And make it be const so no one can change it.
|
* And make it be const so no one can change it.
|
||||||
*/
|
*/
|
||||||
static const ComGuard c_ComGuard;
|
static const ComGuard c_ComGuard {};
|
||||||
|
|
||||||
|
bool IsInitialized() {
|
||||||
|
return c_ComGuard.IsInitialized();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,6 +64,17 @@ namespace YYCC::COMHelper {
|
||||||
|
|
||||||
using SmartLPWSTR = std::unique_ptr<std::remove_pointer_t<LPWSTR>, CoTaskMemDeleter>;
|
using SmartLPWSTR = std::unique_ptr<std::remove_pointer_t<LPWSTR>, CoTaskMemDeleter>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check whether COM environment has been initialized.
|
||||||
|
* @return True if it is, otherwise false.
|
||||||
|
* @remarks
|
||||||
|
* This function will call corresponding function of COM Guard.
|
||||||
|
* Do not remove this function and you must preserve at least one reference to this function in final program.
|
||||||
|
* Some compiler will try to drop COM Guard in final program if no reference to it and it will cause the initialization of COM environment failed.
|
||||||
|
* This is the reason why I order you do the things said above.
|
||||||
|
*/
|
||||||
|
bool IsInitialized();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -245,12 +245,12 @@ namespace YYCC::ConsoleHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Write(const char* u8_strl) {
|
void Write(const char* u8_strl) {
|
||||||
va_list empty;
|
va_list empty{};
|
||||||
RawWrite<false, false, false>(u8_strl, empty);
|
RawWrite<false, false, false>(u8_strl, empty);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WriteLine(const char* u8_strl) {
|
void WriteLine(const char* u8_strl) {
|
||||||
va_list empty;
|
va_list empty{};
|
||||||
RawWrite<false, false, true>(u8_strl, empty);
|
RawWrite<false, false, true>(u8_strl, empty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,12 +269,12 @@ namespace YYCC::ConsoleHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ErrWrite(const char* u8_strl) {
|
void ErrWrite(const char* u8_strl) {
|
||||||
va_list empty;
|
va_list empty{};
|
||||||
RawWrite<false, true, false>(u8_strl, empty);
|
RawWrite<false, true, false>(u8_strl, empty);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ErrWriteLine(const char* u8_strl) {
|
void ErrWriteLine(const char* u8_strl) {
|
||||||
va_list empty;
|
va_list empty{};
|
||||||
RawWrite<false, true, true>(u8_strl, empty);
|
RawWrite<false, true, true>(u8_strl, empty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -173,6 +173,9 @@ namespace YYCC::DialogHelper {
|
||||||
// prepare result variable
|
// prepare result variable
|
||||||
HRESULT hr;
|
HRESULT hr;
|
||||||
|
|
||||||
|
// check whether COM environment has been initialized
|
||||||
|
if (!COMHelper::IsInitialized()) return false;
|
||||||
|
|
||||||
// create file dialog instance
|
// create file dialog instance
|
||||||
// fetch dialog CLSID first
|
// fetch dialog CLSID first
|
||||||
CLSID dialog_clsid;
|
CLSID dialog_clsid;
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#include "EncodingHelper.hpp"
|
#include "EncodingHelper.hpp"
|
||||||
|
|
||||||
#include <cuchar>
|
#include <locale>
|
||||||
#include <climits>
|
|
||||||
|
|
||||||
namespace YYCC::EncodingHelper {
|
namespace YYCC::EncodingHelper {
|
||||||
|
|
||||||
|
@ -71,61 +70,49 @@ namespace YYCC::EncodingHelper {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__cpp_char8_t)
|
||||||
|
using CodecvtUTF8Char_t = char8_t;
|
||||||
|
#else
|
||||||
|
using CodecvtUTF8Char_t = char;
|
||||||
|
#endif
|
||||||
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
||||||
static bool UTF8ToUTFOther(const char* src, std::basic_string<_TChar>& dest) {
|
using CodecvtFacet_t = std::codecvt<_TChar, CodecvtUTF8Char_t, std::mbstate_t>;
|
||||||
// Reference:
|
|
||||||
// https://zh.cppreference.com/w/cpp/string/multibyte/mbrtoc32
|
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
||||||
// https://zh.cppreference.com/w/cpp/string/multibyte/mbrtoc16
|
static bool UTF8ToUTFOther(const char* _src, std::basic_string<_TChar>& dest) {
|
||||||
// https://learn.microsoft.com/zh-cn/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170
|
// Reference:
|
||||||
//
|
// https://zh.cppreference.com/w/cpp/locale/codecvt/in
|
||||||
// Due to the same reason introduced in UTFOtherToUTF8,
|
|
||||||
// we use these function as convertion function.
|
|
||||||
|
|
||||||
// init src string
|
// init src string
|
||||||
if (src == nullptr) return false;
|
if (_src == nullptr) return false;
|
||||||
std::string src_string(src);
|
std::string src(_src);
|
||||||
// init result string
|
|
||||||
dest.clear();
|
|
||||||
|
|
||||||
// init essential cvt variables
|
// init locale and get codecvt facet
|
||||||
std::mbstate_t state {};
|
// same reason in UTFOtherToUTF8 to keeping reference to locale
|
||||||
_TChar c1632;
|
const auto& this_locale = std::locale::classic();
|
||||||
const char* ptr = src_string.c_str();
|
const auto& this_codecvt = std::use_facet<CodecvtFacet_t<_TChar>>(this_locale);
|
||||||
const char* end = src_string.c_str() + src_string.size() + 1;
|
|
||||||
|
// convertion preparation
|
||||||
// start convertion
|
std::mbstate_t mb{};
|
||||||
while (true) {
|
dest.resize(src.size());
|
||||||
// do convertion
|
const CodecvtUTF8Char_t* intern_from = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str()),
|
||||||
size_t rc;
|
*intern_from_end = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str() + src.size()),
|
||||||
if constexpr (std::is_same_v<_TChar, char16_t>) {
|
*intern_from_next = nullptr;
|
||||||
rc = std::mbrtoc16(&c1632, ptr, end - ptr, &state);
|
_TChar* extern_to = dest.data(),
|
||||||
} else {
|
*extern_to_end = dest.data() + dest.size(),
|
||||||
rc = std::mbrtoc32(&c1632, ptr, end - ptr, &state);
|
*extern_to_next = nullptr;
|
||||||
}
|
// do convertion
|
||||||
if (!rc) break;
|
auto result = this_codecvt.in(
|
||||||
|
mb,
|
||||||
// check result
|
intern_from, intern_from_end, intern_from_next,
|
||||||
if (rc == static_cast<size_t>(-1)) {
|
extern_to, extern_to_end, extern_to_next
|
||||||
// encoding error, return false
|
);
|
||||||
return false;
|
|
||||||
} else if (rc == static_cast<size_t>(-2)) {
|
|
||||||
// insufficient sequence, return false
|
|
||||||
return false;
|
|
||||||
} else if (rc == static_cast<size_t>(-3)) {
|
|
||||||
// UTF16 pair case (usually is emoji, one emoji is represented by 2 UTF16)
|
|
||||||
//
|
|
||||||
// only push result char but do not increase pointer
|
|
||||||
// because this char is output from state.
|
|
||||||
dest.push_back(c1632);
|
|
||||||
} else {
|
|
||||||
// normal case
|
|
||||||
// append to result
|
|
||||||
dest.push_back(c1632);
|
|
||||||
// inc ptr
|
|
||||||
ptr += rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// check result
|
||||||
|
if (result != CodecvtFacet_t<_TChar>::ok)
|
||||||
|
return false;
|
||||||
|
// resize result and return
|
||||||
|
dest.resize(extern_to_next - dest.data());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,40 +134,41 @@ namespace YYCC::EncodingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
|
||||||
static bool UTFOtherToUTF8(const _TChar* src, std::string& dest) {
|
static bool UTFOtherToUTF8(const _TChar* _src, std::string& dest) {
|
||||||
// Reference:
|
// Reference:
|
||||||
// https://zh.cppreference.com/w/cpp/string/multibyte/c32rtomb
|
// https://zh.cppreference.com/w/cpp/locale/codecvt/out
|
||||||
// https://zh.cppreference.com/w/cpp/string/multibyte/c16rtomb
|
|
||||||
// https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170
|
|
||||||
//
|
|
||||||
// Due to Microsoft implementation, c16rtomb and c32rtomb
|
|
||||||
// always convert UTF32 and UTF16 string into UTF8 string no matter current c locale.
|
|
||||||
// At the same time, most Linux use UTF8 as their locale.
|
|
||||||
// So using c16rtomb and c32rtomb do the convertion from UTF32 or UTF16 to UTF8 is reasonable.
|
|
||||||
|
|
||||||
// initialize src string
|
// initialize src string
|
||||||
if (src == nullptr) return false;
|
if (_src == nullptr) return false;
|
||||||
std::basic_string<_TChar> src_string(src);
|
std::basic_string<_TChar> src(_src);
|
||||||
// init result string
|
|
||||||
dest.clear();
|
|
||||||
|
|
||||||
// init essential cvt variables
|
// init locale and get codecvt facet
|
||||||
std::mbstate_t state {};
|
// the reference to locale must be preserved until convertion done.
|
||||||
char out[MB_LEN_MAX] {};
|
// because the life time of codecvt facet is equal to the reference to locale.
|
||||||
for (_TChar c : src_string) {
|
const auto& this_locale = std::locale::classic();
|
||||||
// do convertion
|
const auto& this_codecvt = std::use_facet<CodecvtFacet_t<_TChar>>(this_locale);
|
||||||
std::size_t rc;
|
|
||||||
if constexpr (std::is_same_v<_TChar, char16_t>) {
|
|
||||||
rc = std::c16rtomb(out, c, &state);
|
|
||||||
} else {
|
|
||||||
rc = std::c32rtomb(out, c, &state);
|
|
||||||
}
|
|
||||||
// convertion failed
|
|
||||||
if (rc == static_cast<size_t>(-1)) return false;
|
|
||||||
// otherwise append result
|
|
||||||
dest.append(out, rc);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// do convertion preparation
|
||||||
|
std::mbstate_t mb{};
|
||||||
|
dest.resize(src.size() * this_codecvt.max_length());
|
||||||
|
const _TChar* intern_from = src.c_str(),
|
||||||
|
*intern_from_end = src.c_str() + src.size(),
|
||||||
|
*intern_from_next = nullptr;
|
||||||
|
CodecvtUTF8Char_t* extern_to = reinterpret_cast<CodecvtUTF8Char_t*>(dest.data()),
|
||||||
|
*extern_to_end = reinterpret_cast<CodecvtUTF8Char_t*>(dest.data() + dest.size()),
|
||||||
|
*extern_to_next = nullptr;
|
||||||
|
// do convertion
|
||||||
|
auto result = this_codecvt.out(
|
||||||
|
mb,
|
||||||
|
intern_from, intern_from_end, intern_from_next,
|
||||||
|
extern_to, extern_to_end, extern_to_next
|
||||||
|
);
|
||||||
|
|
||||||
|
// check result
|
||||||
|
if (result != CodecvtFacet_t<_TChar>::ok)
|
||||||
|
return false;
|
||||||
|
// resize result and retuen
|
||||||
|
dest.resize(extern_to_next - reinterpret_cast<CodecvtUTF8Char_t*>(dest.data()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user