From 3fa05b43d928c8e39ec0133cfc02224d28145332 Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Thu, 20 Jun 2024 10:16:13 +0800 Subject: [PATCH] fix: use new method to do the convertion among UTF8, UTF16 and UTF32. - use std::codevct as the convertion method among UTF8, UTF16 and UTF32. - fix the issue that COM Guard was accidently dropped by compiler because no reference to it. --- src/COMHelper.cpp | 10 ++- src/COMHelper.hpp | 11 +++ src/ConsoleHelper.cpp | 8 +-- src/DialogHelper.cpp | 3 + src/EncodingHelper.cpp | 152 +++++++++++++++++++---------------------- 5 files changed, 97 insertions(+), 87 deletions(-) diff --git a/src/COMHelper.cpp b/src/COMHelper.cpp index 2453a7c..b628a8f 100644 --- a/src/COMHelper.cpp +++ b/src/COMHelper.cpp @@ -21,6 +21,10 @@ namespace YYCC::COMHelper { } } + bool IsInitialized() const { + return m_HasInit; + } + protected: bool m_HasInit; }; @@ -33,7 +37,11 @@ namespace YYCC::COMHelper { * So we use a static instance in here. * And make it be const so no one can change it. */ - static const ComGuard c_ComGuard; + static const ComGuard c_ComGuard {}; + + bool IsInitialized() { + return c_ComGuard.IsInitialized(); + } } diff --git a/src/COMHelper.hpp b/src/COMHelper.hpp index 22c1ce3..338109c 100644 --- a/src/COMHelper.hpp +++ b/src/COMHelper.hpp @@ -64,6 +64,17 @@ namespace YYCC::COMHelper { using SmartLPWSTR = std::unique_ptr, CoTaskMemDeleter>; + /** + * @brief Check whether COM environment has been initialized. + * @return True if it is, otherwise false. + * @remarks + * This function will call corresponding function of COM Guard. + * Do not remove this function and you must preserve at least one reference to this function in final program. + * Some compiler will try to drop COM Guard in final program if no reference to it and it will cause the initialization of COM environment failed. + * This is the reason why I order you do the things said above. + */ + bool IsInitialized(); + } #endif diff --git a/src/ConsoleHelper.cpp b/src/ConsoleHelper.cpp index 5b998e5..4bd25dc 100644 --- a/src/ConsoleHelper.cpp +++ b/src/ConsoleHelper.cpp @@ -245,12 +245,12 @@ namespace YYCC::ConsoleHelper { } void Write(const char* u8_strl) { - va_list empty; + va_list empty{}; RawWrite(u8_strl, empty); } void WriteLine(const char* u8_strl) { - va_list empty; + va_list empty{}; RawWrite(u8_strl, empty); } @@ -269,12 +269,12 @@ namespace YYCC::ConsoleHelper { } void ErrWrite(const char* u8_strl) { - va_list empty; + va_list empty{}; RawWrite(u8_strl, empty); } void ErrWriteLine(const char* u8_strl) { - va_list empty; + va_list empty{}; RawWrite(u8_strl, empty); } diff --git a/src/DialogHelper.cpp b/src/DialogHelper.cpp index d9f41e2..8c807e9 100644 --- a/src/DialogHelper.cpp +++ b/src/DialogHelper.cpp @@ -173,6 +173,9 @@ namespace YYCC::DialogHelper { // prepare result variable HRESULT hr; + // check whether COM environment has been initialized + if (!COMHelper::IsInitialized()) return false; + // create file dialog instance // fetch dialog CLSID first CLSID dialog_clsid; diff --git a/src/EncodingHelper.cpp b/src/EncodingHelper.cpp index 47733df..3915cef 100644 --- a/src/EncodingHelper.cpp +++ b/src/EncodingHelper.cpp @@ -1,7 +1,6 @@ #include "EncodingHelper.hpp" -#include -#include +#include namespace YYCC::EncodingHelper { @@ -71,61 +70,49 @@ namespace YYCC::EncodingHelper { #endif +#if defined(__cpp_char8_t) + using CodecvtUTF8Char_t = char8_t; +#else + using CodecvtUTF8Char_t = char; +#endif template || std::is_same_v<_TChar, char32_t>, int> = 0> - static bool UTF8ToUTFOther(const char* src, std::basic_string<_TChar>& dest) { - // Reference: - // https://zh.cppreference.com/w/cpp/string/multibyte/mbrtoc32 - // https://zh.cppreference.com/w/cpp/string/multibyte/mbrtoc16 - // https://learn.microsoft.com/zh-cn/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170 - // - // Due to the same reason introduced in UTFOtherToUTF8, - // we use these function as convertion function. + using CodecvtFacet_t = std::codecvt<_TChar, CodecvtUTF8Char_t, std::mbstate_t>; + + template || std::is_same_v<_TChar, char32_t>, int> = 0> + static bool UTF8ToUTFOther(const char* _src, std::basic_string<_TChar>& dest) { + // Reference: + // https://zh.cppreference.com/w/cpp/locale/codecvt/in // init src string - if (src == nullptr) return false; - std::string src_string(src); - // init result string - dest.clear(); + if (_src == nullptr) return false; + std::string src(_src); - // init essential cvt variables - std::mbstate_t state {}; - _TChar c1632; - const char* ptr = src_string.c_str(); - const char* end = src_string.c_str() + src_string.size() + 1; - - // start convertion - while (true) { - // do convertion - size_t rc; - if constexpr (std::is_same_v<_TChar, char16_t>) { - rc = std::mbrtoc16(&c1632, ptr, end - ptr, &state); - } else { - rc = std::mbrtoc32(&c1632, ptr, end - ptr, &state); - } - if (!rc) break; - - // check result - if (rc == static_cast(-1)) { - // encoding error, return false - return false; - } else if (rc == static_cast(-2)) { - // insufficient sequence, return false - return false; - } else if (rc == static_cast(-3)) { - // UTF16 pair case (usually is emoji, one emoji is represented by 2 UTF16) - // - // only push result char but do not increase pointer - // because this char is output from state. - dest.push_back(c1632); - } else { - // normal case - // append to result - dest.push_back(c1632); - // inc ptr - ptr += rc; - } - } + // init locale and get codecvt facet + // same reason in UTFOtherToUTF8 to keeping reference to locale + const auto& this_locale = std::locale::classic(); + const auto& this_codecvt = std::use_facet>(this_locale); + + // convertion preparation + std::mbstate_t mb{}; + dest.resize(src.size()); + const CodecvtUTF8Char_t* intern_from = reinterpret_cast(src.c_str()), + *intern_from_end = reinterpret_cast(src.c_str() + src.size()), + *intern_from_next = nullptr; + _TChar* extern_to = dest.data(), + *extern_to_end = dest.data() + dest.size(), + *extern_to_next = nullptr; + // do convertion + auto result = this_codecvt.in( + mb, + intern_from, intern_from_end, intern_from_next, + extern_to, extern_to_end, extern_to_next + ); + // check result + if (result != CodecvtFacet_t<_TChar>::ok) + return false; + // resize result and return + dest.resize(extern_to_next - dest.data()); return true; } @@ -147,40 +134,41 @@ namespace YYCC::EncodingHelper { } template || std::is_same_v<_TChar, char32_t>, int> = 0> - static bool UTFOtherToUTF8(const _TChar* src, std::string& dest) { + static bool UTFOtherToUTF8(const _TChar* _src, std::string& dest) { // Reference: - // https://zh.cppreference.com/w/cpp/string/multibyte/c32rtomb - // https://zh.cppreference.com/w/cpp/string/multibyte/c16rtomb - // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170 - // - // Due to Microsoft implementation, c16rtomb and c32rtomb - // always convert UTF32 and UTF16 string into UTF8 string no matter current c locale. - // At the same time, most Linux use UTF8 as their locale. - // So using c16rtomb and c32rtomb do the convertion from UTF32 or UTF16 to UTF8 is reasonable. - + // https://zh.cppreference.com/w/cpp/locale/codecvt/out + // initialize src string - if (src == nullptr) return false; - std::basic_string<_TChar> src_string(src); - // init result string - dest.clear(); + if (_src == nullptr) return false; + std::basic_string<_TChar> src(_src); - // init essential cvt variables - std::mbstate_t state {}; - char out[MB_LEN_MAX] {}; - for (_TChar c : src_string) { - // do convertion - std::size_t rc; - if constexpr (std::is_same_v<_TChar, char16_t>) { - rc = std::c16rtomb(out, c, &state); - } else { - rc = std::c32rtomb(out, c, &state); - } - // convertion failed - if (rc == static_cast(-1)) return false; - // otherwise append result - dest.append(out, rc); - } + // init locale and get codecvt facet + // the reference to locale must be preserved until convertion done. + // because the life time of codecvt facet is equal to the reference to locale. + const auto& this_locale = std::locale::classic(); + const auto& this_codecvt = std::use_facet>(this_locale); + // do convertion preparation + std::mbstate_t mb{}; + dest.resize(src.size() * this_codecvt.max_length()); + const _TChar* intern_from = src.c_str(), + *intern_from_end = src.c_str() + src.size(), + *intern_from_next = nullptr; + CodecvtUTF8Char_t* extern_to = reinterpret_cast(dest.data()), + *extern_to_end = reinterpret_cast(dest.data() + dest.size()), + *extern_to_next = nullptr; + // do convertion + auto result = this_codecvt.out( + mb, + intern_from, intern_from_end, intern_from_next, + extern_to, extern_to_end, extern_to_next + ); + + // check result + if (result != CodecvtFacet_t<_TChar>::ok) + return false; + // resize result and retuen + dest.resize(extern_to_next - reinterpret_cast(dest.data())); return true; }