diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d5e4e04..a82e814 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ PRIVATE ${CMAKE_CURRENT_LIST_DIR}/DialogHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/EncodingHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/ExceptionHelper.hpp + ${CMAKE_CURRENT_LIST_DIR}/FsPathPatch.hpp ${CMAKE_CURRENT_LIST_DIR}/IOHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/ParserHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/StringHelper.hpp @@ -25,6 +26,7 @@ PRIVATE ${CMAKE_CURRENT_LIST_DIR}/DialogHelper.cpp ${CMAKE_CURRENT_LIST_DIR}/EncodingHelper.cpp ${CMAKE_CURRENT_LIST_DIR}/ExceptionHelper.cpp + ${CMAKE_CURRENT_LIST_DIR}/FsPathPatch.cpp ${CMAKE_CURRENT_LIST_DIR}/IOHelper.cpp ${CMAKE_CURRENT_LIST_DIR}/ParserHelper.cpp ${CMAKE_CURRENT_LIST_DIR}/StringHelper.cpp @@ -72,9 +74,11 @@ FILES ${CMAKE_CURRENT_LIST_DIR}/DialogHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/EncodingHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/ExceptionHelper.hpp + ${CMAKE_CURRENT_LIST_DIR}/FsPathPatch.hpp ${CMAKE_CURRENT_LIST_DIR}/IOHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/ParserHelper.hpp ${CMAKE_CURRENT_LIST_DIR}/StringHelper.hpp + ${CMAKE_CURRENT_LIST_DIR}/WinFctHelper.hpp # Windows including guard pair ${CMAKE_CURRENT_LIST_DIR}/WinImportPrefix.hpp ${CMAKE_CURRENT_LIST_DIR}/WinImportSuffix.hpp diff --git a/src/FsPathPatch.cpp b/src/FsPathPatch.cpp new file mode 100644 index 0000000..37f33f6 --- /dev/null +++ b/src/FsPathPatch.cpp @@ -0,0 +1,41 @@ +#include "FsPathPatch.hpp" + +#include "EncodingHelper.hpp" +#include +#include + +namespace YYCC::FsPathPatch { + + std::filesystem::path FromUTF8Path(const char* u8_path) { +#if YYCC_OS == YYCC_OS_WINDOWS + + // convert path to wchar + std::wstring wpath; + if (!YYCC::EncodingHelper::UTF8ToWchar(u8_path, wpath)) + throw std::invalid_argument("Fail to convert given UTF8 string."); + + // call microsoft specified fopen which support wchar as argument. + return std::filesystem::path(wpath); + +#else + return std::filesystem::path(u8_path); +#endif + } + + std::string ToUTF8Path(const std::filesystem::path& path) { +#if YYCC_OS == YYCC_OS_WINDOWS + + // get and convert to utf8 + std::string u8_path; + if (!YYCC::EncodingHelper::WcharToUTF8(path.c_str(), u8_path)) + throw std::invalid_argument("Fail to convert to UTF8 string."); + + // return utf8 path + return u8_path; + +#else + return path.string(); +#endif + } + +} diff --git a/src/FsPathPatch.hpp b/src/FsPathPatch.hpp new file mode 100644 index 0000000..799d585 --- /dev/null +++ b/src/FsPathPatch.hpp @@ -0,0 +1,41 @@ +#pragma once +#include "YYCCInternal.hpp" + +#include + +/** + * @brief The patch namespace resolving \c std::filesystem::path encoding issue. + * @details + * This patch is Windows oriented. + * If you are in Windows, this patch will perform extra operations to achieve goals, + * and in other platforms, they just redirect request to corresponding vanilla C++ functions. + * + * As you know, the underlying char type of \c std::filesystem::path is \c wchar_t on Windows, + * and in other platforms, it is simple \c char. + * Due to this, if you passing UTF8 char sequence to \c std::filesystem::path on Windows, + * the library implementation will assume your input is based on current Windows code page, not UTF8. + * And the final path stored in \c std::filesystem::path is not what you expcected. + * + * This patch namespace always use UTF8 as its argument. There is no ambiguous issue. + * You should use the functions provided by this namespace on any platforms + * instead of vanilla \c std::filesystem::path functions. +*/ +namespace YYCC::FsPathPatch { + + /** + * @brief Constructs the path from a UTF8 character sequence + * @param[in] u8_path UTF8 path string for building this std::filesystem::path. + * @return std::filesystem::path instance. + * @exception std::invalid_argument Fail to parse given UTF8 string (maybe invalid?). + */ + std::filesystem::path FromUTF8Path(const char* u8_path); + + /** + * @brief Returns the UTF8 representation of the pathname + * @param path[in] The string to be output. + * @return UTF8 encoded string representing given path. + * @exception std::invalid_argument Fail to parse to UTF8 string. + */ + std::string ToUTF8Path(const std::filesystem::path& path); + +} diff --git a/src/IOHelper.cpp b/src/IOHelper.cpp index 23541ea..e2b0f52 100644 --- a/src/IOHelper.cpp +++ b/src/IOHelper.cpp @@ -32,22 +32,5 @@ namespace YYCC::IOHelper { #endif } - std::filesystem::path UTF8Path(const char* u8_path) { -#if YYCC_OS == YYCC_OS_WINDOWS - - // convert path to wchar - std::wstring wpath; - if (!YYCC::EncodingHelper::UTF8ToWchar(u8_path, wpath)) - throw std::invalid_argument("Fail to convert given UTF8 string."); - - // call microsoft specified fopen which support wchar as argument. - return std::filesystem::path(wpath); - -#else - return std::filesystem::path(u8_path); -#endif - } - - } diff --git a/src/IOHelper.hpp b/src/IOHelper.hpp index 0e5f641..ab77694 100644 --- a/src/IOHelper.hpp +++ b/src/IOHelper.hpp @@ -34,15 +34,4 @@ namespace YYCC::IOHelper { */ FILE* UTF8FOpen(const char* u8_filepath, const char* u8_mode); - /** - * @brief Build std::filesystem::path from UTF8 string. - * @param[in] u8_path UTF8 path string for building this std::filesystem::path. - * @return std::filesystem::path instance. - * @exception std::invalid_argument Fail to parse given UTF8 string (maybe invalid?). - * @remarks - * This function is suit for Windows. - * On other platforms, it will simply call the constructor of std::filesystem::path. - */ - std::filesystem::path UTF8Path(const char* u8_path); - } diff --git a/src/YYCCommonplace.hpp b/src/YYCCommonplace.hpp index f66fcf7..ae819ff 100644 --- a/src/YYCCommonplace.hpp +++ b/src/YYCCommonplace.hpp @@ -10,3 +10,4 @@ #include "ExceptionHelper.hpp" #include "IOHelper.hpp" #include "WinFctHelper.hpp" +#include "FsPathPatch.hpp" diff --git a/testbench/main.cpp b/testbench/main.cpp index 8b80947..4a4446d 100644 --- a/testbench/main.cpp +++ b/testbench/main.cpp @@ -5,6 +5,23 @@ namespace Console = YYCC::ConsoleHelper; namespace YYCCTestbench { + // UTF8 Test String Table + // Ref: https://stackoverflow.com/questions/478201/how-to-test-an-application-for-correct-encoding-e-g-utf-8 + static std::vector c_UTF8TestStrTable { + "\u30E6\u30FC\u30B6\u30FC\u5225\u30B5\u30A4\u30C8", // JAPAN + "\u7B80\u4F53\u4E2D\u6587", // CHINA + "\uD06C\uB85C\uC2A4 \uD50C\uB7AB\uD3FC\uC73C\uB85C", // KOREA + "\u05DE\u05D3\u05D5\u05E8\u05D9\u05DD \u05DE\u05D1\u05D5\u05E7\u05E9\u05D9\u05DD", // ISRAEL + "\u0623\u0641\u0636\u0644 \u0627\u0644\u0628\u062D\u0648\u062B", // EGYPT + "\u03A3\u1F72 \u03B3\u03BD\u03C9\u03C1\u03AF\u03B6\u03C9 \u1F00\u03C0\u1F78", // GREECE + "\u0414\u0435\u0441\u044F\u0442\u0443\u044E \u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0443\u044E", // RUSSIA + "\u0E41\u0E1C\u0E48\u0E19\u0E14\u0E34\u0E19\u0E2E\u0E31\u0E48\u0E19\u0E40\u0E2A\u0E37\u0E48\u0E2D\u0E21\u0E42\u0E17\u0E23\u0E21\u0E41\u0E2A\u0E19\u0E2A\u0E31\u0E07\u0E40\u0E27\u0E0A", // THAILAND + "fran\u00E7ais langue \u00E9trang\u00E8re", // FRANCE + "ma\u00F1ana ol\u00E9", // SPAIN + "\u222E E\u22C5da = Q, n \u2192 \u221E, \u2211 f(i) = \u220F g(i)", // MATHMATICS + "\xF0\x9F\x8D\xA3 \xE2\x9C\x96 \xF0\x9F\x8D\xBA", // EMOJI + }; + static void Assert(bool condition, const char* description) { if (condition) { Console::FormatLine(YYCC_COLOR_LIGHT_GREEN("OK: %s"), description); @@ -34,34 +51,19 @@ namespace YYCCTestbench { #undef TEST_MACRO // UTF8 Output Test - // Ref: https://stackoverflow.com/questions/478201/how-to-test-an-application-for-correct-encoding-e-g-utf-8 Console::WriteLine("UTF8 Output Test:"); - static std::vector c_TestStrings { - "\u30E6\u30FC\u30B6\u30FC\u5225\u30B5\u30A4\u30C8", // JAPAN - "\u7B80\u4F53\u4E2D\u6587", // CHINA - "\uD06C\uB85C\uC2A4 \uD50C\uB7AB\uD3FC\uC73C\uB85C", // KOREA - "\u05DE\u05D3\u05D5\u05E8\u05D9\u05DD \u05DE\u05D1\u05D5\u05E7\u05E9\u05D9\u05DD", // ISRAEL - "\u0623\u0641\u0636\u0644 \u0627\u0644\u0628\u062D\u0648\u062B", // EGYPT - "\u03A3\u1F72 \u03B3\u03BD\u03C9\u03C1\u03AF\u03B6\u03C9 \u1F00\u03C0\u1F78", // GREECE - "\u0414\u0435\u0441\u044F\u0442\u0443\u044E \u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u0443\u044E", // RUSSIA - "\u0E41\u0E1C\u0E48\u0E19\u0E14\u0E34\u0E19\u0E2E\u0E31\u0E48\u0E19\u0E40\u0E2A\u0E37\u0E48\u0E2D\u0E21\u0E42\u0E17\u0E23\u0E21\u0E41\u0E2A\u0E19\u0E2A\u0E31\u0E07\u0E40\u0E27\u0E0A", // THAILAND - "fran\u00E7ais langue \u00E9trang\u00E8re", // FRANCE - "ma\u00F1ana ol\u00E9", // SPAIN - "\u222E E\u22C5da = Q, n \u2192 \u221E, \u2211 f(i) = \u220F g(i)", // MATHMATICS - "\xF0\x9F\x8D\xA3 \xE2\x9C\x96 \xF0\x9F\x8D\xBA", // EMOJI - }; - for (const auto* ptr : c_TestStrings) { - Console::FormatLine("\t%s", ptr); + for (const auto& strl : c_UTF8TestStrTable) { + Console::FormatLine("\t%s", strl.c_str()); } // UTF8 Input Test Console::WriteLine("UTF8 Input Test:"); - for (const auto* ptr : c_TestStrings) { - Console::FormatLine("\tPlease type: %s", ptr); + for (const auto& strl : c_UTF8TestStrTable) { + Console::FormatLine("\tPlease type: %s", strl.c_str()); Console::Write("\t> "); std::string gotten(Console::ReadLine()); - Assert(gotten == ptr, YYCC::StringHelper::Printf("Got: %s", gotten.c_str()).c_str()); + Assert(gotten == strl, YYCC::StringHelper::Printf("Got: %s", gotten.c_str()).c_str()); } } @@ -70,7 +72,7 @@ namespace YYCCTestbench { // Test Printf auto test_printf = YYCC::StringHelper::Printf("%s == %s", "Hello World", "Hello, world"); Assert(test_printf == "Hello World == Hello, world", "YYCC::StringHelper::Printf"); - + // Test Replace auto test_replace = YYCC::StringHelper::Replace("aabbcc", "bb", "dd"); // normal case Assert(test_replace == "aaddcc", "YYCC::StringHelper::Replace"); @@ -128,7 +130,7 @@ namespace YYCCTestbench { type_t cache; \ Assert(YYCC::ParserHelper::TryParse(cache_string, cache) && cache == value, "YYCC::StringHelper::TryParse<" #type_t ">"); \ } - + TEST_MACRO(int8_t, INT8_C(-61), "-61"); TEST_MACRO(uint8_t, UINT8_C(200), "200"); TEST_MACRO(int16_t, INT16_C(6161), "6161"); @@ -138,7 +140,7 @@ namespace YYCCTestbench { TEST_MACRO(int64_t, INT64_C(616161616161), "616161616161"); TEST_MACRO(uint64_t, UINT64_C(9223372036854775807), "9223372036854775807"); TEST_MACRO(bool, true, "true"); - + #undef TEST_MACRO // Test failed TryParse @@ -147,7 +149,7 @@ namespace YYCCTestbench { type_t cache; \ Assert(!YYCC::ParserHelper::TryParse(cache_string, cache), "YYCC::StringHelper::TryParse<" #type_t ">"); \ } - + TEST_MACRO(int8_t, INT8_C(-61), "6161"); TEST_MACRO(uint8_t, UINT8_C(200), "32800"); TEST_MACRO(int16_t, INT16_C(6161), "61616161"); @@ -157,7 +159,7 @@ namespace YYCCTestbench { TEST_MACRO(int64_t, INT64_C(616161616161), "616161616161616161616161"); TEST_MACRO(uint64_t, UINT64_C(9223372036854775807), "92233720368547758079223372036854775807"); TEST_MACRO(bool, true, "hello, world!"); - + #undef TEST_MACRO // Test ToString @@ -191,11 +193,11 @@ namespace YYCCTestbench { YYCC::DialogHelper::FileDialog params; auto& filters = params.ConfigreFileTypes(); - filters.Add("Microsoft Word (*.docx; *.doc)", {"*.docx", "*.doc"}); - filters.Add("Microsoft Excel (*.xlsx; *.xls)", {"*.xlsx", "*.xls"}); - filters.Add("Microsoft PowerPoint (*.pptx; *.ppt)", {"*.pptx", "*.ppt"}); - filters.Add("Text File (*.txt)", {"*.txt"}); - filters.Add("All Files (*.*)", {"*.*"}); + filters.Add("Microsoft Word (*.docx; *.doc)", { "*.docx", "*.doc" }); + filters.Add("Microsoft Excel (*.xlsx; *.xls)", { "*.xlsx", "*.xls" }); + filters.Add("Microsoft PowerPoint (*.pptx; *.ppt)", { "*.pptx", "*.ppt" }); + filters.Add("Text File (*.txt)", { "*.txt" }); + filters.Add("All Files (*.*)", { "*.*" }); params.SetDefaultFileTypeIndex(0u); if (YYCC::DialogHelper::OpenFileDialog(params, ret)) { Console::FormatLine("Open File: %s", ret.c_str()); @@ -223,8 +225,28 @@ namespace YYCCTestbench { Console::FormatLine("Temp Directory: %s", test_temp.c_str()); std::string test_module_name; - Assert(YYCC::WinFctHelper::GetModuleName(YYCC::WinFctHelper::GetCurrentModule(), test_module_name), "YYCC::WinFctHelper::GetModuleName"); - Console::FormatLine("Current Module Name: %s", test_module_name.c_str()); + Assert(YYCC::WinFctHelper::GetModuleFileName(YYCC::WinFctHelper::GetCurrentModule(), test_module_name), "YYCC::WinFctHelper::GetModuleFileName"); + Console::FormatLine("Current Module File Name: %s", test_module_name.c_str()); + } + + static void FsPathPatch() { + + std::filesystem::path test_path; + for (const auto& strl : c_UTF8TestStrTable) { + test_path /= YYCC::FsPathPatch::FromUTF8Path(strl.c_str()); + } + std::string test_slashed_path(YYCC::FsPathPatch::ToUTF8Path(test_path)); + +#if YYCC_OS == YYCC_OS_WINDOWS + std::wstring wdecilmer(1u, std::filesystem::path::preferred_separator); + std::string decilmer(YYCC::EncodingHelper::WcharToUTF8(wdecilmer.c_str())); +#else + std::string decilmer(1u, std::filesystem::path::preferred_separator); +#endif + std::string test_joined_path(YYCC::StringHelper::Join(c_UTF8TestStrTable, decilmer.c_str())); + + Assert(test_slashed_path == test_joined_path, "YYCC::FsPathPatch"); + } } @@ -234,5 +256,6 @@ int main(int argc, char** args) { //YYCCTestbench::StringTestbench(); //YYCCTestbench::ParserTestbench(); //YYCCTestbench::DialogTestbench(); - YYCCTestbench::WinFctTestbench(); + //YYCCTestbench::WinFctTestbench(); + YYCCTestbench::FsPathPatch(); }