From 65b81f5cfa6d86119b42bbc8a2bf35445fb7739f Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Fri, 5 Jul 2024 10:36:24 +0800 Subject: [PATCH] refactor: rename Native String to Ordinary String. - rename Native to Ordinary in code and documentation. - fulfill some documentations. --- doc/src/encoding_helper.dox | 32 ++++++++++++++++----------- doc/src/library_encoding.dox | 43 +++++++++++++++++++++++++----------- src/ConsoleHelper.cpp | 2 +- src/EncodingHelper.cpp | 12 +++++----- src/EncodingHelper.hpp | 8 +++---- src/ExceptionHelper.cpp | 4 ++-- src/FsPathPatch.cpp | 2 +- src/ParserHelper.hpp | 16 +++++++------- src/StringHelper.cpp | 6 ++--- 9 files changed, 74 insertions(+), 51 deletions(-) diff --git a/doc/src/encoding_helper.dox b/doc/src/encoding_helper.dox index 95aa2b0..2e72fbe 100644 --- a/doc/src/encoding_helper.dox +++ b/doc/src/encoding_helper.dox @@ -4,16 +4,22 @@ YYCC::EncodingHelper namespace include all encoding related functions: -\li The convertion between native string and UTF8 string which has been introduced in chapter \ref library_encoding. +\li The convertion between ordinary string and UTF8 string which has been introduced in chapter \ref library_encoding. \li Windows specific convertion between \c WCHAR, UTF8 string and string encoded by other encoding. \li The convertion among UTF8, UTF16 and UTF32. - -\section encoding_helper__native_utf8_conv Native & UTF8 Convertion +\section encoding_helper__ordinary_utf8_conv Ordinary & UTF8 Convertion These convertion functions have been introduced in previous page. See \ref library_encoding for more infomation. +YYCC supports following convertions: + +\li YYCC::EncodingHelper::ToUTF8: Convert ordinary string to UTF8 string. +\li YYCC::EncodingHelper::ToUTF8View: Same as ToUTF8, but return string view instead. +\li YYCC::EncodingHelper::ToOrdinary: Convert UTF8 string to ordinary string. +\li YYCC::EncodingHelper::ToOrdinaryView: Same as ToOrdinary, but return string view instead. + \section encoding_helper__win_conv Windows Specific Convertion During Windows programming, the convertion between Microsoft specified \c wchar_t and \c char is an essential operation. @@ -26,11 +32,11 @@ Please use them carefully (make sure that you are using them only in Windows env YYCC supports following convertions: -\li \c WcharToChar: Convert \c wchar_t string to code page specified string. -\li \c CharToWchar: The reversed convertion of WcharToChar. -\li \c CharToChar: Convert string between 2 different code pages. It's a shortcut of calling CharToWchar and WcharToChar successively. -\li \c WcharToUTF8: Convert \c wchar_t string to UTF8 string. -\li \c UTF8ToWchar: The reversed convertion of WcharToUTF8. +\li YYCC::EncodingHelper::WcharToChar: Convert \c wchar_t string to code page specified string. +\li YYCC::EncodingHelper::CharToWchar: The reversed convertion of WcharToChar. +\li YYCC::EncodingHelper::CharToChar: Convert string between 2 different code pages. It's a shortcut of calling CharToWchar and WcharToChar successively. +\li YYCC::EncodingHelper::WcharToUTF8: Convert \c wchar_t string to UTF8 string. +\li YYCC::EncodingHelper::UTF8ToWchar: The reversed convertion of WcharToUTF8. Code Page is a Windows concept. If you don't understand it, please view corresponding Microsoft documentation. @@ -47,14 +53,14 @@ They can be used in any platform, not confined in Windows platforms. YYCC supports following convertions: -\li \c UTF8ToUTF16: Convert UTF8 string to UTF16 string. -\li \c UTF16ToUTF8: The reversed convertion of UTF8ToUTF16. -\li \c UTF8ToUTF32: Convert UTF8 string to UTF32 string. -\li \c UTF32ToUTF8: The reversed convertion of UTF8ToUTF32. +\li YYCC::EncodingHelper::UTF8ToUTF16: Convert UTF8 string to UTF16 string. +\li YYCC::EncodingHelper::UTF16ToUTF8: The reversed convertion of UTF8ToUTF16. +\li YYCC::EncodingHelper::UTF8ToUTF32: Convert UTF8 string to UTF32 string. +\li YYCC::EncodingHelper::UTF32ToUTF8: The reversed convertion of UTF8ToUTF32. \section encoding_helper__overloads Function Overloads -Every encoding convertion functions (except the convertion between UTF8 and native string) have 4 different overloads for different scenarios. +Every encoding convertion functions (except the convertion between UTF8 and ordinary string) have 4 different overloads for different scenarios. Take YYCC::EncodingHelper::WcharToChar for example. There are following 4 overloads: diff --git a/doc/src/library_encoding.dox b/doc/src/library_encoding.dox index c04b40e..960b231 100644 --- a/doc/src/library_encoding.dox +++ b/doc/src/library_encoding.dox @@ -59,6 +59,23 @@ I notice standard library change UTF8 related functions frequently and its API a For example, standard library brings \c std::codecvt_utf8 in C++ 11, deprecate it in C++ 17 and even remove it in C++ 26. That's unacceptable! So I create my own UTF8 type to avoid the scenario that standard library remove \c char8_t in future. +\section library_encoding__concept Concepts + +In following content, you may be face with 2 words: ordinary string and UTF8 string. + +UTF8 string, as its name, is the string encoded with UTF8. +The char type of it must is \c yycc_char8_t. +(equivalent to \c char8_t after C++ 20.) + +Ordinary string means the plain, native string. +The result of C++ string literal without any prefix \c "foo bar" is a rdinary string. +The char type of it is \c char. +Its encoding depends on compiler and environment. +(UTF8 in Linux, or system code page in Windows if UTF8 switch was not enabled in MSVC.) + +For more infomation, please browse CppReference: +https://en.cppreference.com/w/cpp/language/string_literal + \section library_encoding__utf8_literal UTF8 Literal String literal is a C++ concept. @@ -123,35 +140,35 @@ char* mutable_utf8 = const_cast(absolutely_is_utf8); // This is not safe. yycc_char8_t* mutable_converted = YYCC::EncodingHelper::ToUTF8(mutable_utf8); \endcode -YYCC::EncodingHelper::ToUTF8 has 2 overloads which can handle const and mutable stirng pointer convertion respectively. +YYCC::EncodingHelper::ToUTF8 has 2 overloads which can handle constant and mutable stirng pointer convertion respectively. -YYCC also has ability that convert YYCC UTF8 char type to native char type by YYCC::EncodingHelper::ToNative. +YYCC also has ability that convert YYCC UTF8 char type to ordinary char type by YYCC::EncodingHelper::ToOrdinary. Here is an exmaple: \code const yycc_char8_t* yycc_utf8 = YYCC_U8("I am UTF8 string."); -const char* converted = YYCC::EncodingHelper::ToNative(yycc_utf8); +const char* converted = YYCC::EncodingHelper::ToOrdinary(yycc_utf8); yycc_char8_t* mutable_yycc_utf8 = const_cast(yycc_utf8); // Not safe. Also just for example. -char* mutable_converted = YYCC::EncodingHelper::ToNative(mutable_yycc_utf8); +char* mutable_converted = YYCC::EncodingHelper::ToOrdinary(mutable_yycc_utf8); \endcode -Same as YYCC::EncodingHelper::ToUTF8, YYCC::EncodingHelper::ToNative also has 2 overloads to handle const and mutable string pointer. +Same as YYCC::EncodingHelper::ToUTF8, YYCC::EncodingHelper::ToOrdinary also has 2 overloads to handle constant and mutable string pointer. \section library_encoding__utf8_container UTF8 String Container String container usually means the standard library string container, such as \c std::string, \c std::wstring, \c std::u32string and etc. In many personal project, programmer may use \c std::string everywhere because \c std::u8string may not be presented when writing peoject. -How to do convertion between native string container and YYCC UTF8 string container? +How to do convertion between ordinary string container and YYCC UTF8 string container? It is definitely illegal that directly do force convertion. Because they may have different class layout. Calm down and I will tell you how to do correct convertion. -YYCC provides YYCC::EncodingHelper::ToUTF8 to convert native string container to YYCC UTF8 string container. +YYCC provides YYCC::EncodingHelper::ToUTF8 to convert ordinary string container to YYCC UTF8 string container. There is an exmaple: \code -std::string native_string("I am UTF8"); -yycc_u8string yycc_string = YYCC::EncodingHelper::ToUTF8(native_string); +std::string ordinary_string("I am UTF8"); +yycc_u8string yycc_string = YYCC::EncodingHelper::ToUTF8(ordinary_string); auto result = YYCC::EncodingHelper::UTF8ToUTF32(yycc_string); \endcode @@ -160,19 +177,19 @@ However, there is a implicit convertion from \c std::string to \c std::string_vi so you can directly pass a \c std::string instance to it. String view will reduce unnecessary memory copy. -If you just want to pass native string container to function, and this function accepts \c yycc_u8string_view as its argument, +If you just want to pass ordinary string container to function, and this function accepts \c yycc_u8string_view as its argument, you can use alternative YYCC::EncodingHelper::ToUTF8View. \code -std::string native_string("I am UTF8"); -yycc_u8string_view yycc_string = YYCC::EncodingHelper::ToUTF8View(native_string); +std::string ordinary_string("I am UTF8"); +yycc_u8string_view yycc_string = YYCC::EncodingHelper::ToUTF8View(ordinary_string); auto result = YYCC::EncodingHelper::UTF8ToUTF32(yycc_string); \endcode Comparing with previous one, this example use less memory. The reduced memory is the content of \c yycc_string because string view is a view, not the copy of original string. -Same as UTF8 string pointer, we also have YYCC::EncodingHelper::ToNative and YYCC::EncodingHelper::ToNativeView do correspondant reverse convertion. +Same as UTF8 string pointer, we also have YYCC::EncodingHelper::ToOrdinary and YYCC::EncodingHelper::ToOrdinaryView do correspondant reverse convertion. Try to do your own research and figure out how to use them. It's pretty easy. diff --git a/src/ConsoleHelper.cpp b/src/ConsoleHelper.cpp index 00b84de..f74e194 100644 --- a/src/ConsoleHelper.cpp +++ b/src/ConsoleHelper.cpp @@ -226,7 +226,7 @@ namespace YYCC::ConsoleHelper { WinConsoleWrite(strl, bIsErr); #else // in linux, directly use C function to write. - std::fputs(EncodingHelper::ToNative(strl.c_str()), bIsErr ? stderr : stdout); + std::fputs(EncodingHelper::ToOrdinary(strl.c_str()), bIsErr ? stderr : stdout); #endif } diff --git a/src/EncodingHelper.cpp b/src/EncodingHelper.cpp index c7750a6..3a043db 100644 --- a/src/EncodingHelper.cpp +++ b/src/EncodingHelper.cpp @@ -4,7 +4,7 @@ namespace YYCC::EncodingHelper { -#pragma region UTF8 Native Convertion +#pragma region UTF8 Ordinary Convertion const yycc_char8_t* ToUTF8(const char* src) { return reinterpret_cast(src); @@ -19,16 +19,16 @@ namespace YYCC::EncodingHelper { return yycc_u8string_view(reinterpret_cast(src.data()), src.size()); } - const char* ToNative(const yycc_char8_t* src) { + const char* ToOrdinary(const yycc_char8_t* src) { return reinterpret_cast(src); } - char* ToNative(yycc_char8_t* src) { + char* ToOrdinary(yycc_char8_t* src) { return reinterpret_cast(src); } - std::string ToNative(const yycc_u8string_view& src) { + std::string ToOrdinary(const yycc_u8string_view& src) { return std::string(reinterpret_cast(src.data()), src.size()); } - std::string_view ToNativeView(const yycc_u8string_view& src) { + std::string_view ToOrdinaryView(const yycc_u8string_view& src) { return std::string_view(reinterpret_cast(src.data()), src.size()); } @@ -176,7 +176,7 @@ return ret; #pragma region UTF8ToWchar bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst) { - std::string_view adapted_src(ToNativeView(src)); + std::string_view adapted_src(ToOrdinaryView(src)); return CharToWchar(adapted_src, dst, CP_UTF8); } bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst) { diff --git a/src/EncodingHelper.hpp b/src/EncodingHelper.hpp index ccfbdab..8f28169 100644 --- a/src/EncodingHelper.hpp +++ b/src/EncodingHelper.hpp @@ -58,10 +58,10 @@ namespace YYCC::EncodingHelper { yycc_u8string ToUTF8(const std::string_view& src); yycc_u8string_view ToUTF8View(const std::string_view& src); - const char* ToNative(const yycc_char8_t* src); - char* ToNative(yycc_char8_t* src); - std::string ToNative(const yycc_u8string_view& src); - std::string_view ToNativeView(const yycc_u8string_view& src); + const char* ToOrdinary(const yycc_char8_t* src); + char* ToOrdinary(yycc_char8_t* src); + std::string ToOrdinary(const yycc_u8string_view& src); + std::string_view ToOrdinaryView(const yycc_u8string_view& src); #if YYCC_OS == YYCC_OS_WINDOWS diff --git a/src/ExceptionHelper.cpp b/src/ExceptionHelper.cpp index f02a6d9..23955d0 100644 --- a/src/ExceptionHelper.cpp +++ b/src/ExceptionHelper.cpp @@ -122,7 +122,7 @@ namespace YYCC::ExceptionHelper { if (fs != nullptr) { va_list arg1; va_start(arg1, fmt); - std::vfprintf(fs, EncodingHelper::ToNative(fmt), arg1); + std::vfprintf(fs, EncodingHelper::ToOrdinary(fmt), arg1); std::fputs("\n", fs); va_end(arg1); } @@ -145,7 +145,7 @@ namespace YYCC::ExceptionHelper { static void UExceptionErrLogWriteLine(std::FILE* fs, const yycc_char8_t* strl) { // write to file if (fs != nullptr) { - std::fputs(EncodingHelper::ToNative(strl), fs); + std::fputs(EncodingHelper::ToOrdinary(strl), fs); std::fputs("\n", fs); } // write to stderr diff --git a/src/FsPathPatch.cpp b/src/FsPathPatch.cpp index 5977db9..1251919 100644 --- a/src/FsPathPatch.cpp +++ b/src/FsPathPatch.cpp @@ -18,7 +18,7 @@ namespace YYCC::FsPathPatch { return std::filesystem::path(wpath); #else - return std::filesystem::path(EncodingHelper::ToNative(u8_path)); + return std::filesystem::path(EncodingHelper::ToOrdinary(u8_path)); #endif } diff --git a/src/ParserHelper.hpp b/src/ParserHelper.hpp index ebd4f26..98588a5 100644 --- a/src/ParserHelper.hpp +++ b/src/ParserHelper.hpp @@ -16,13 +16,13 @@ namespace YYCC::ParserHelper { template, int> = 0> bool TryParse(const yycc_u8string_view& strl, _Ty& num) { auto [ptr, ec] = std::from_chars( - EncodingHelper::ToNative(strl.data()), - EncodingHelper::ToNative(strl.data() + strl.size()), + EncodingHelper::ToOrdinary(strl.data()), + EncodingHelper::ToOrdinary(strl.data() + strl.size()), num, std::chars_format::general ); if (ec == std::errc()) { // check whether the full string is matched - return ptr == EncodingHelper::ToNative(strl.data() + strl.size()); + return ptr == EncodingHelper::ToOrdinary(strl.data() + strl.size()); } else if (ec == std::errc::invalid_argument) { // given string is invalid return false; @@ -37,13 +37,13 @@ namespace YYCC::ParserHelper { template && !std::is_same_v<_Ty, bool>, int> = 0> bool TryParse(const yycc_u8string_view& strl, _Ty& num, int base = 10) { auto [ptr, ec] = std::from_chars( - EncodingHelper::ToNative(strl.data()), - EncodingHelper::ToNative(strl.data() + strl.size()), + EncodingHelper::ToOrdinary(strl.data()), + EncodingHelper::ToOrdinary(strl.data() + strl.size()), num, base ); if (ec == std::errc()) { // check whether the full string is matched - return ptr == EncodingHelper::ToNative(strl.data() + strl.size()); + return ptr == EncodingHelper::ToOrdinary(strl.data() + strl.size()); } else if (ec == std::errc::invalid_argument) { // given string is invalid return false; @@ -76,8 +76,8 @@ namespace YYCC::ParserHelper { yycc_u8string ToString(_Ty num) { std::array buffer; auto [ptr, ec] = std::to_chars( - EncodingHelper::ToNative(buffer.data()), - EncodingHelper::ToNative(buffer.data() + buffer.size()), + EncodingHelper::ToOrdinary(buffer.data()), + EncodingHelper::ToOrdinary(buffer.data() + buffer.size()), num ); if (ec == std::errc()) { diff --git a/src/StringHelper.cpp b/src/StringHelper.cpp index b53c084..6a8fe88 100644 --- a/src/StringHelper.cpp +++ b/src/StringHelper.cpp @@ -25,7 +25,7 @@ namespace YYCC::StringHelper { int count = std::vsnprintf( nullptr, 0, - EncodingHelper::ToNative(format), + EncodingHelper::ToOrdinary(format), args1 ); if (count < 0) { @@ -40,9 +40,9 @@ namespace YYCC::StringHelper { // however std::vsnprintf already have a trailing NULL, so we plus 1 for it. strl.resize(count); int write_result = std::vsnprintf( - EncodingHelper::ToNative(strl.data()), + EncodingHelper::ToOrdinary(strl.data()), strl.size() + 1, - EncodingHelper::ToNative(format), + EncodingHelper::ToOrdinary(format), args2 ); va_end(args2);