From f153f9bc222977bbed3cadd13094838e0498945f Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Tue, 18 Jun 2024 16:03:41 +0800 Subject: [PATCH] doc: update comments of code. - add lost testbench for wchar encoding convertion. - update code documentation. --- src/COMHelper.hpp | 6 ++--- src/EncodingHelper.cpp | 4 ++-- src/EncodingHelper.hpp | 39 ++++++++++++++++++++++++++++++++ src/ExceptionHelper.hpp | 12 ++++++++-- src/WinFctHelper.hpp | 3 +-- testbench/main.cpp | 50 ++++++++++++++++++++++++++++++----------- 6 files changed, 92 insertions(+), 22 deletions(-) diff --git a/src/COMHelper.hpp b/src/COMHelper.hpp index c847829..22c1ce3 100644 --- a/src/COMHelper.hpp +++ b/src/COMHelper.hpp @@ -12,16 +12,16 @@ /** * @brief COM fucntions related namespace. * @details - * This namespace is Windows specific and it will disappear on other platforms. + * This namespace is Windows specific and is unavailable on other platforms. * * This namespace contain a COM Guard which make sure COM was initialized in current module when loading current module. * It is essential because all calling to COM functions should be under the premise that COM has been initialized. * This guard also will uninitialize COM when unloading this module. * * This namespace also provided various memory-safe types for interacting with COM functions. - * Although Microsoft also has similar smart pointer called CComPtr. + * Although Microsoft also has similar smart pointer called \c CComPtr. * But this library is eager to hide all Microsoft-related functions calling. - * Using CComPtr is not corresponding with the philosophy of this library. + * Using \c CComPtr is not corresponding with the philosophy of this library. * So these std-based smart pointer type were created. * * This namespace is used by internal functions as intended. diff --git a/src/EncodingHelper.cpp b/src/EncodingHelper.cpp index 76f6cef..a0126d1 100644 --- a/src/EncodingHelper.cpp +++ b/src/EncodingHelper.cpp @@ -71,7 +71,7 @@ namespace YYCC::EncodingHelper { #endif template || std::is_same_v<_TChar, char32_t>, int> = 0> - bool UTF8ToUTFOther(const char* src, std::basic_string<_TChar>& dest) { + static bool UTF8ToUTFOther(const char* src, std::basic_string<_TChar>& dest) { // Reference: // https://zh.cppreference.com/w/cpp/string/multibyte/mbrtoc32 // https://zh.cppreference.com/w/cpp/string/multibyte/mbrtoc16 @@ -146,7 +146,7 @@ namespace YYCC::EncodingHelper { } template || std::is_same_v<_TChar, char32_t>, int> = 0> - bool UTFOtherToUTF8(const _TChar* src, std::string& dest) { + static bool UTFOtherToUTF8(const _TChar* src, std::string& dest) { // Reference: // https://zh.cppreference.com/w/cpp/string/multibyte/c32rtomb // https://zh.cppreference.com/w/cpp/string/multibyte/c16rtomb diff --git a/src/EncodingHelper.hpp b/src/EncodingHelper.hpp index 6fa7a59..fb3ffc1 100644 --- a/src/EncodingHelper.hpp +++ b/src/EncodingHelper.hpp @@ -9,6 +9,45 @@ #include "WinImportSuffix.hpp" #endif +/** + * @brief The namespace handling encoding issues. + * @details + * \par Windows Encoding Convertion + * This namespace provides the convertion between wchar_t, UTF8 and code-page-based string: + * The function name has following format: \c AAAToBBB. + * AAA is the source string and BBB is target string. + * AAA and BBB has following possible value: + * \li \c Char: Code-page-based string. Usually it will add a code page parameter for function to get the code page of this string. For code page, please see Microsoft document. + * \li \c UTF8: UTF8 string. + * \li \c Wchar: wchar_t string. + * \par + * For example: \c WcharToUTF8 will perform the convertion from wchar_t to UTF8, + * and \c CharToChar will perform the convertion between 2 code-page-based string and caller can specify individual code page for these 2 string. + * \par + * These functions are Windows specific and are unavailable on other platforms. + * Becasue Windows use wchar_t string as its function arguments for globalization, and this library use UTF8 everywhere. + * So it should have a bidirectional way to do convertion between wchar_t string and UTF8 string. + * + * \par UTF32, UTF16 and UTF8 Convertion + * This namespace also provide the convertion among UTF32, UTF16 and UTF8. + * These convertion functions are suit for all platforms, not Windows oriented. + * \par + * Due to implementation, this library assume all non-Windows system use UTF8 as their C locale. + * Otherwise these functions will produce wrong result. + * + * \par Function Parameters + * We provide these encoding convertion functions with following 2 types: + * \li Function returns \c bool and its parameter order source string pointer and a corresponding \c std::basic_string container for receiving result. + * \li Function returns corresponding \c std::basic_string result, and its parameter only order source string pointer. + * \par + * For these 2 declarations, both of them will not throw any exception and do not accept nullptr as source string. + * The only difference is that the way to indicate convertion error. + * \par + * First declaration will return false to indicate there is an error when doing convertion. Please note that the content of string container passing in may still be changed! + * Last declaration will return empty string to indicate error. Please note if you pass empty string in, they still will output empty string but it doesn't mean an error. + * So last declaration is used in the scenario that we don't care whether the convertion success did. For example, output something to console. + * +*/ namespace YYCC::EncodingHelper { #if YYCC_OS == YYCC_OS_WINDOWS diff --git a/src/ExceptionHelper.hpp b/src/ExceptionHelper.hpp index abe9e40..f07f6c0 100644 --- a/src/ExceptionHelper.hpp +++ b/src/ExceptionHelper.hpp @@ -2,6 +2,16 @@ #include "YYCCInternal.hpp" #if YYCC_OS == YYCC_OS_WINDOWS +/** + * @brief Windows specific unhandled exception processor. + * @details + * This namespace is Windows specific. On other platforms, the whole namespace is unavailable. + * + * This namespace allow user register unhandled exception handler on Windows + * to output error log into \c stderr and log file, and generate coredump if possible. + * This is useful for bug tracing on Windows, especially most Windows user are naive and don't know how to report bug. + * +*/ namespace YYCC::ExceptionHelper { /** @@ -15,7 +25,6 @@ namespace YYCC::ExceptionHelper { * in temp folder (for convenient debugging of developer when reporting bugs) if it can. * * This function usually is called at the start of program. - * @remarks This function is Windows only. */ void Register(); /** @@ -27,7 +36,6 @@ namespace YYCC::ExceptionHelper { * You must call this function if you have called Register() before. * * This function usually is called at the end of program. - * @remarks This function is Windows only. */ void Unregister(); diff --git a/src/WinFctHelper.hpp b/src/WinFctHelper.hpp index 21a6fe6..f4c7651 100644 --- a/src/WinFctHelper.hpp +++ b/src/WinFctHelper.hpp @@ -12,8 +12,7 @@ * @brief The helper providing assistance to Win32 functions. * @details * This helper is Windows specific. - * If current environment is not Windows, - * the whole namespace will disappear. + * If current environment is not Windows, the whole namespace will be unavailable. */ namespace YYCC::WinFctHelper { diff --git a/testbench/main.cpp b/testbench/main.cpp index fc00331..ac7cbc8 100644 --- a/testbench/main.cpp +++ b/testbench/main.cpp @@ -4,7 +4,8 @@ namespace Console = YYCC::ConsoleHelper; namespace YYCCTestbench { -#pragma region UNICODE Test Data + +#pragma region Unicode Test Data // UNICODE Test Strings // Ref: https://stackoverflow.com/questions/478201/how-to-test-an-application-for-correct-encoding-e-g-utf-8 @@ -25,6 +26,7 @@ namespace YYCCTestbench { #define CPP_U8_LITERAL(strl) strl #define CPP_U16_LITERAL(strl) CONCAT(u, strl) #define CPP_U32_LITERAL(strl) CONCAT(U, strl) +#define CPP_WSTR_LITERAL(strl) CONCAT(L, strl) static std::vector c_UTF8TestStrTable { CPP_U8_LITERAL(TEST_UNICODE_STR_JAPAN), @@ -40,6 +42,20 @@ namespace YYCCTestbench { CPP_U8_LITERAL(TEST_UNICODE_STR_MATHMATICS), CPP_U8_LITERAL(TEST_UNICODE_STR_EMOJI), }; + static std::vector c_WStrTestStrTable { + CPP_WSTR_LITERAL(TEST_UNICODE_STR_JAPAN), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_CHINA), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_KOREA), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_ISRAEL), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_EGYPT), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_GREECE), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_RUSSIA), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_THAILAND), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_FRANCE), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_SPAIN), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_MATHMATICS), + CPP_WSTR_LITERAL(TEST_UNICODE_STR_EMOJI), + }; static std::vector c_UTF16TestStrTable { CPP_U16_LITERAL(TEST_UNICODE_STR_JAPAN), CPP_U16_LITERAL(TEST_UNICODE_STR_CHINA), @@ -69,6 +85,7 @@ namespace YYCCTestbench { CPP_U32_LITERAL(TEST_UNICODE_STR_EMOJI), }; +#undef CPP_WSTR_LITERAL #undef CPP_U32_LITERAL #undef CPP_U16_LITERAL #undef CPP_U8_LITERAL @@ -76,9 +93,6 @@ namespace YYCCTestbench { #pragma endregion - - - static void Assert(bool condition, const char* description) { if (condition) { Console::FormatLine(YYCC_COLOR_LIGHT_GREEN("OK: %s"), description); @@ -126,9 +140,10 @@ namespace YYCCTestbench { } static void EncodingTestbench() { + // get test tuple size + size_t count = c_UTF8TestStrTable.size(); // check the convertion between given string - size_t count = c_UTF8TestStrTable.size(); for (size_t i = 0u; i < count; ++i) { // get item const auto& u8str = c_UTF8TestStrTable[i]; @@ -141,17 +156,26 @@ namespace YYCCTestbench { std::u32string u32cache; // do convertion check - Assert(YYCC::EncodingHelper::UTF8ToUTF16(u8str.c_str(), u16cache), "YYCC::EncodingHelper::UTF8ToUTF16"); - Assert(u16cache == u16str, "YYCC::EncodingHelper::UTF8ToUTF16"); + Assert(YYCC::EncodingHelper::UTF8ToUTF16(u8str.c_str(), u16cache) && u16cache == u16str, "YYCC::EncodingHelper::UTF8ToUTF16"); + Assert(YYCC::EncodingHelper::UTF8ToUTF32(u8str.c_str(), u32cache) && u32cache == u32str, "YYCC::EncodingHelper::UTF8ToUTF32"); - Assert(YYCC::EncodingHelper::UTF8ToUTF32(u8str.c_str(), u32cache), "YYCC::EncodingHelper::UTF8ToUTF32"); - Assert(u32cache == u32str, "YYCC::EncodingHelper::UTF8ToUTF32"); + Assert(YYCC::EncodingHelper::UTF16ToUTF8(u16str.c_str(), u8cache) && u8cache == u8str, "YYCC::EncodingHelper::UTF16ToUTF8"); + Assert(YYCC::EncodingHelper::UTF32ToUTF8(u32str.c_str(), u8cache) && u8cache == u8str, "YYCC::EncodingHelper::UTF32ToUTF8"); + } - Assert(YYCC::EncodingHelper::UTF16ToUTF8(u16str.c_str(), u8cache), "YYCC::EncodingHelper::UTF16ToUTF8"); - Assert(u8cache == u8str, "YYCC::EncodingHelper::UTF16ToUTF8"); + // check wstring convertion on windows + for (size_t i = 0u; i < count; ++i) { + // get item + const auto& u8str = c_UTF8TestStrTable[i]; + const auto& wstr = c_WStrTestStrTable[i]; - Assert(YYCC::EncodingHelper::UTF32ToUTF8(u32str.c_str(), u8cache), "YYCC::EncodingHelper::UTF32ToUTF8"); - Assert(u8cache == u8str, "YYCC::EncodingHelper::UTF32ToUTF8"); + // create cache variables + std::string u8cache; + std::wstring wcache; + + // do convertion check + Assert(YYCC::EncodingHelper::UTF8ToWchar(u8str.c_str(), wcache) && wcache == wstr, "YYCC::EncodingHelper::UTF8ToWchar"); + Assert(YYCC::EncodingHelper::WcharToUTF8(wstr.c_str(), u8cache) && u8cache == u8str, "YYCC::EncodingHelper::WcharToUTF8"); } }