Compare commits

...

3 Commits

Author SHA1 Message Date
61ad1ff3ce refactor: refactor encoding helper again.
- add the convertion between yycc_char8_t and system char type because we decide use our char8_t in the whole library.
- make a clear boundary between yycc char8_t declarations and related assist functions. the declarations present in internal header and assist functions are written in encoding helper.
- use std::basic_string_view instead of std::basic_string to provide more abilities to encoding convertion functions and reduce the redundant memory occupation at the same time.
2024-06-27 20:49:02 +08:00
c15b57d055 refactor: bring char8_t to this library.
- add yycc_char8_t and yycc_u8string in code to indicate explicit utf8 char type and string. it also has a polyfill if compiler and library do not support utf8 char type.
- refactor the whole encoding helper. allow converting string with embedded NUL. but not tested.
2024-06-26 21:04:56 +08:00
bb17bb6a1f chore: update build system
- use configuration-arch-based path in MSVC to make sure generated package can be used by native MSVC project.
- add github action and corresponding build script. but not tested.
- fix some testbench code.
2024-06-20 15:51:40 +08:00
10 changed files with 432 additions and 131 deletions

3
.gitattributes vendored
View File

@ -1 +1,2 @@
Doxyfile.in eol=lf Doxyfile.in eol=lf
*.bat eol=crlf

35
.github/workflows/nightly.yml.disabled vendored Normal file
View File

@ -0,0 +1,35 @@
name: YYCC Nightly Build
on:
workflow_dispatch:
push:
branches:
- master
jobs:
msvc-build:
strategy:
matrix:
vs: ['2019']
msvc_arch: ['x86']
runs-on: windows-2019
steps:
- name: Fetching Repository
uses: actions/checkout@v3
- name: Building YYCC
shell: cmd
run: |
set VS=${{ matrix.vs }}
set VCVARS="C:\Program Files (x86)\Microsoft Visual Studio\%VS%\Enterprise\VC\Auxiliary\Build\vcvarsall.bat"
if not exist %VCVARS% set VCVARS="C:\Program Files\Microsoft Visual Studio\%VS%\Enterprise\VC\Auxiliary\Build\vcvarsall.bat"
call %VCVARS% ${{ matrix.msvc_arch }}
.\script\build.bat
- name: Uploading Nightly Build
uses: actions/upload-artifact@v3
with:
name: YYCC-windows-nightly
path: bin/install/*
retention-days: 30

View File

@ -8,6 +8,18 @@ project(YYCC
option(YYCC_BUILD_TESTBENCH "Build testbench of YYCCommonplace." OFF) option(YYCC_BUILD_TESTBENCH "Build testbench of YYCCommonplace." OFF)
option(YYCC_BUILD_DOC "Build document of YYCCommonplace." OFF) option(YYCC_BUILD_DOC "Build document of YYCCommonplace." OFF)
# Detect MSVC IDE environment.
# If we in it, we should add configuration and build type in install path.
if (CMAKE_GENERATOR MATCHES "Visual Studio")
# Do Visual Studio specific
set(YYCC_INSTALL_PATH_LIB lib/${CMAKE_VS_PLATFORM_NAME}/$<CONFIG>)
set(YYCC_INSTALL_PATH_BIN bin/${CMAKE_VS_PLATFORM_NAME})
else()
# Other stuff
set(YYCC_INSTALL_PATH_LIB lib)
set(YYCC_INSTALL_PATH_BIN bin)
endif()
# Import 2 build targets # Import 2 build targets
add_subdirectory(src) add_subdirectory(src)
if (YYCC_BUILD_TESTBENCH) if (YYCC_BUILD_TESTBENCH)

35
script/build.bat Normal file
View File

@ -0,0 +1,35 @@
@ECHO OFF
SET README_PATH=%CD%\README.md
IF EXIST %README_PATH% (
REM DO NOTHING
) ELSE (
ECHO Error: You must run this script at the root folder of this project!
EXIT /b
)
:: Create essential folder
MKDIR bin
CD bin
MKDIR Win32
MKDIR x64
MKDIR install
:: Build for Win32
CD Win32
cmake -G "Visual Studio 16 2019" -A Win32 -DYYCC_BUILD_TESTBENCH=ON ../..
cmake --build . --config Debug
cmake --install . --prefix=../install --config Debug
cmake --build . --config Release
cmake --install . --prefix=../install --config Release
CD ..
:: Build for x64
CD x64
cmake -G "Visual Studio 16 2019" -A x64 -DYYCC_BUILD_TESTBENCH=ON ../..
cmake --build . --config Debug
cmake --install . --prefix=../install --config Debug
cmake --build . --config Release
cmake --install . --prefix=../install --config Release
CD ..
ECHO DONE

View File

@ -67,13 +67,11 @@ PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/utf-8> $<$<CXX_COMPILER_ID:MSVC>:/utf-8>
) )
# Install project # Install binary and headers
# Install binary
install(TARGETS YYCCommonplace install(TARGETS YYCCommonplace
EXPORT YYCCommonplaceTargets EXPORT YYCCommonplaceTargets
LIBRARY DESTINATION lib LIBRARY DESTINATION ${YYCC_INSTALL_PATH_LIB}
ARCHIVE DESTINATION lib ARCHIVE DESTINATION ${YYCC_INSTALL_PATH_LIB}
RUNTIME DESTINATION bin
INCLUDES DESTINATION include INCLUDES DESTINATION include
FILE_SET HEADERS DESTINATION include FILE_SET HEADERS DESTINATION include
) )

View File

@ -4,88 +4,224 @@
namespace YYCC::EncodingHelper { namespace YYCC::EncodingHelper {
#pragma region UTF8 Native Convertion
const yycc_char8_t* ToUTF8(const char* src) {
return reinterpret_cast<const yycc_char8_t*>(src);
}
yycc_char8_t* ToUTF8(char* src) {
return reinterpret_cast<yycc_char8_t*>(src);
}
yycc_u8string ToUTF8(const std::string_view& src) {
return yycc_u8string(reinterpret_cast<const yycc_char8_t*>(src.data()), src.size());
}
yycc_u8string_view ToUTF8View(const std::string_view& src) {
return yycc_u8string_view(reinterpret_cast<const yycc_char8_t*>(src.data()), src.size());
}
const char* ToNative(const yycc_char8_t* src) {
return reinterpret_cast<const char*>(src);
}
char* ToNative(yycc_char8_t* src) {
return reinterpret_cast<char*>(src);
}
std::string ToNative(const yycc_u8string_view& src) {
return std::string(reinterpret_cast<const char*>(src.data()), src.size());
}
std::string_view ToNativeView(const yycc_u8string_view& src) {
return std::string_view(reinterpret_cast<const char*>(src.data()), src.size());
}
#pragma endregion
/* Define some assistant macros for easy writing. */
#define CONVFCT_TYPE2(fct_name, src_char_type, dst_char_type, ...) if (src == nullptr) return false; \
std::basic_string_view<src_char_type> cache(src); \
return fct_name(cache, dst, ##__VA_ARGS__);
#define CONVFCT_TYPE3(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
return ret;
#define CONVFCT_TYPE4(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
return ret;
#if YYCC_OS == YYCC_OS_WINDOWS #if YYCC_OS == YYCC_OS_WINDOWS
bool WcharToChar(const wchar_t* src, std::string& dest, UINT codepage) { #pragma region WcharToChar
int count, write_result;
bool WcharToChar(const std::wstring_view& src, std::string& dst, UINT code_page) {
// if src is empty, direct output
if (src.empty()) {
dst.clear();
return true;
}
//converter to CHAR // init WideCharToMultiByte used variables
count = WideCharToMultiByte(codepage, 0, reinterpret_cast<LPCWCH>(src), -1, NULL, 0, NULL, NULL); // setup src pointer
if (count <= 0) return false; LPCWCH lpWideCharStr = reinterpret_cast<LPCWCH>(src.data());
// check whether source string is too large.
size_t cSrcSize = src.size();
if (cSrcSize > std::numeric_limits<int>::max()) return false;
int cchWideChar = static_cast<int>(src.size());
dest.resize(count - 1); // do convertion
write_result = WideCharToMultiByte(codepage, 0, reinterpret_cast<LPCWCH>(src), -1, reinterpret_cast<LPSTR>(dest.data()), count, NULL, NULL); // do a dry-run first to fetch desired size.
int desired_size = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, NULL, 0, NULL, NULL);
if (desired_size <= 0) return false;
// resize dest for receiving result
dst.resize(static_cast<size_t>(desired_size));
// do real convertion
int write_result = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, reinterpret_cast<LPSTR>(dst.data()), desired_size, NULL, NULL);
if (write_result <= 0) return false; if (write_result <= 0) return false;
return true; return true;
} }
bool WcharToUTF8(const wchar_t* src, std::string& dest) { bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page) {
return WcharToChar(src, dest, CP_UTF8); CONVFCT_TYPE2(WcharToChar, wchar_t, char, code_page);
} }
std::string WcharToChar(const wchar_t* src, UINT codepage) { std::string WcharToChar(const std::wstring_view& src, UINT code_page) {
std::string ret; CONVFCT_TYPE3(WcharToChar, wchar_t, char, code_page);
if (!WcharToChar(src, ret, codepage)) ret.clear();
return ret;
} }
std::string WcharToUTF8(const wchar_t* src) { std::string WcharToChar(const wchar_t* src, UINT code_page) {
return WcharToChar(src, CP_UTF8); CONVFCT_TYPE4(WcharToChar, wchar_t, char, code_page);
} }
bool CharToWchar(const char* src, std::wstring& dest, UINT codepage) { #pragma endregion
int wcount, write_result;
#pragma region CharToWchar
// convert to WCHAR bool CharToWchar(const std::string_view& src, std::wstring& dst, UINT code_page) {
wcount = MultiByteToWideChar(codepage, 0, reinterpret_cast<LPCCH>(src), -1, NULL, 0); // if src is empty, direct output
if (wcount <= 0) return false; if (src.empty()) {
dst.clear();
return true;
}
dest.resize(wcount - 1); // init WideCharToMultiByte used variables
write_result = MultiByteToWideChar(codepage, 0, reinterpret_cast<LPCCH>(src), -1, reinterpret_cast<LPWSTR>(dest.data()), wcount); // setup src pointer
LPCCH lpMultiByteStr = reinterpret_cast<LPCCH>(src.data());
// check whether source string is too large.
size_t cSrcSize = src.size();
if (cSrcSize > std::numeric_limits<int>::max()) return false;
int cbMultiByte = static_cast<int>(src.size());
// do convertion
// do a dry-run first to fetch desired size.
int desired_size = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, NULL, 0);
if (desired_size <= 0) return false;
// resize dest for receiving result
dst.resize(static_cast<size_t>(desired_size));
// do real convertion
int write_result = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, reinterpret_cast<LPWSTR>(dst.data()), desired_size);
if (write_result <= 0) return false; if (write_result <= 0) return false;
return true; return true;
} }
bool UTF8ToWchar(const char* src, std::wstring& dest) { bool CharToWchar(const char* src, std::wstring& dst, UINT code_page) {
return CharToWchar(src, dest, CP_UTF8); CONVFCT_TYPE2(CharToWchar, char, wchar_t, code_page);
} }
std::wstring CharToWchar(const char* src, UINT codepage) { std::wstring CharToWchar(const std::string_view& src, UINT code_page) {
std::wstring ret; CONVFCT_TYPE3(CharToWchar, char, wchar_t, code_page);
if (!CharToWchar(src, ret, codepage)) ret.clear();
return ret;
} }
std::wstring UTF8ToWchar(const char* src) { std::wstring CharToWchar(const char* src, UINT code_page) {
return CharToWchar(src, CP_UTF8); CONVFCT_TYPE4(CharToWchar, char, wchar_t, code_page);
} }
bool CharToChar(const char* src, std::string& dest, UINT src_codepage, UINT dest_codepage) { #pragma endregion
#pragma region CharToChar
bool CharToChar(const std::string_view& src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
std::wstring intermediary; std::wstring intermediary;
if (!CharToWchar(src, intermediary, src_codepage)) return false; if (!CharToWchar(src, intermediary, src_code_page)) return false;
if (!WcharToChar(intermediary.c_str(), dest, dest_codepage)) return false; if (!WcharToChar(intermediary, dst, dst_code_page)) return false;
return true; return true;
} }
std::string CharToChar(const char* src, UINT src_codepage, UINT dest_codepage) { bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
std::string ret; CONVFCT_TYPE2(CharToChar, char, char, src_code_page, dst_code_page);
if (!CharToChar(src, ret, src_codepage, dest_codepage)) ret.clear(); }
return ret; std::string CharToChar(const std::string_view& src, UINT src_code_page, UINT dst_code_page) {
CONVFCT_TYPE3(CharToChar, char, char, src_code_page, dst_code_page);
}
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page) {
CONVFCT_TYPE4(CharToChar, char, char, src_code_page, dst_code_page);
} }
#pragma endregion
#pragma region WcharToUTF8
bool WcharToUTF8(const std::wstring_view& src, yycc_u8string& dst) {
std::string adapted_dst;
bool ret = WcharToChar(src, adapted_dst, CP_UTF8);
if (ret) dst = ToUTF8(adapted_dst);
return ret;
}
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst) {
CONVFCT_TYPE2(WcharToUTF8, wchar_t, yycc_char8_t);
}
yycc_u8string WcharToUTF8(const std::wstring_view& src) {
CONVFCT_TYPE3(WcharToUTF8, wchar_t, yycc_char8_t);
}
yycc_u8string WcharToUTF8(const wchar_t* src) {
CONVFCT_TYPE4(WcharToUTF8, wchar_t, yycc_char8_t);
}
#pragma endregion
#pragma region UTF8ToWchar
bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst) {
std::string_view adapted_src(ToNativeView(src));
return CharToWchar(adapted_src, dst, CP_UTF8);
}
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst) {
CONVFCT_TYPE2(UTF8ToWchar, yycc_char8_t, wchar_t);
}
std::wstring UTF8ToWchar(const yycc_u8string_view& src) {
CONVFCT_TYPE3(UTF8ToWchar, yycc_char8_t, wchar_t);
}
std::wstring UTF8ToWchar(const yycc_char8_t* src) {
CONVFCT_TYPE4(UTF8ToWchar, yycc_char8_t, wchar_t);
}
#pragma endregion
#endif #endif
#pragma region UTF8 UTF16 UTF32 Help Funcs
/*
According to the documentation introduced in CppReference.
The standard library is guaranteed to provide several specific specializations of \c std::codecvt.
The UTF8 char type in UTF8 related specializations of \c std::codecvt is different.
It is also independend from we defined \c yycc_char8_t.
So it is essential define a type which can correctly trigger specific specializations of \c std::codecv in there.
*/
#if defined(__cpp_char8_t) #if defined(__cpp_char8_t)
using CodecvtUTF8Char_t = char8_t; using CodecvtUTF8Char_t = char8_t;
#else #else
using CodecvtUTF8Char_t = char; using CodecvtUTF8Char_t = char;
#endif #endif
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0> template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
using CodecvtFacet_t = std::codecvt<_TChar, CodecvtUTF8Char_t, std::mbstate_t>; using CodecvtFacet_t = std::codecvt<_TChar, CodecvtUTF8Char_t, std::mbstate_t>;
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0> template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
static bool UTF8ToUTFOther(const char* _src, std::basic_string<_TChar>& dest) { static bool UTF8ToUTFOther(const yycc_u8string_view& src, std::basic_string<_TChar>& dst) {
// Reference: // Reference:
// https://zh.cppreference.com/w/cpp/locale/codecvt/in // https://zh.cppreference.com/w/cpp/locale/codecvt/in
// init src string // if src is empty, return directly
if (_src == nullptr) return false; if (src.empty()) {
std::string src(_src); dst.clear();
return true;
}
// init locale and get codecvt facet // init locale and get codecvt facet
// same reason in UTFOtherToUTF8 to keeping reference to locale // same reason in UTFOtherToUTF8 to keeping reference to locale
@ -94,12 +230,12 @@ namespace YYCC::EncodingHelper {
// convertion preparation // convertion preparation
std::mbstate_t mb{}; std::mbstate_t mb{};
dest.resize(src.size()); dst.resize(src.size());
const CodecvtUTF8Char_t* intern_from = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str()), const CodecvtUTF8Char_t* intern_from = reinterpret_cast<const CodecvtUTF8Char_t*>(src.data()),
*intern_from_end = reinterpret_cast<const CodecvtUTF8Char_t*>(src.c_str() + src.size()), *intern_from_end = reinterpret_cast<const CodecvtUTF8Char_t*>(src.data() + src.size()),
*intern_from_next = nullptr; *intern_from_next = nullptr;
_TChar* extern_to = dest.data(), _TChar* extern_to = dst.data(),
*extern_to_end = dest.data() + dest.size(), *extern_to_end = dst.data() + dst.size(),
*extern_to_next = nullptr; *extern_to_next = nullptr;
// do convertion // do convertion
auto result = this_codecvt.in( auto result = this_codecvt.in(
@ -112,35 +248,20 @@ namespace YYCC::EncodingHelper {
if (result != CodecvtFacet_t<_TChar>::ok) if (result != CodecvtFacet_t<_TChar>::ok)
return false; return false;
// resize result and return // resize result and return
dest.resize(extern_to_next - dest.data()); dst.resize(extern_to_next - dst.data());
return true; return true;
} }
bool UTF8ToUTF16(const char* src, std::u16string& dest) {
return UTF8ToUTFOther<char16_t>(src, dest);
}
std::u16string UTF8ToUTF16(const char* src) {
std::u16string ret;
if (!UTF8ToUTF16(src, ret)) ret.clear();
return ret;
}
bool UTF8ToUTF32(const char* src, std::u32string& dest) {
return UTF8ToUTFOther<char32_t>(src, dest);
}
std::u32string UTF8ToUTF32(const char* src) {
std::u32string ret;
if (!UTF8ToUTF32(src, ret)) ret.clear();
return ret;
}
template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0> template<typename _TChar, std::enable_if_t<std::is_same_v<_TChar, char16_t> || std::is_same_v<_TChar, char32_t>, int> = 0>
static bool UTFOtherToUTF8(const _TChar* _src, std::string& dest) { static bool UTFOtherToUTF8(const std::basic_string_view<_TChar>& src, yycc_u8string& dst) {
// Reference: // Reference:
// https://zh.cppreference.com/w/cpp/locale/codecvt/out // https://zh.cppreference.com/w/cpp/locale/codecvt/out
// initialize src string // if src is empty, return directly
if (_src == nullptr) return false; if (src.empty()) {
std::basic_string<_TChar> src(_src); dst.clear();
return true;
}
// init locale and get codecvt facet // init locale and get codecvt facet
// the reference to locale must be preserved until convertion done. // the reference to locale must be preserved until convertion done.
@ -150,12 +271,12 @@ namespace YYCC::EncodingHelper {
// do convertion preparation // do convertion preparation
std::mbstate_t mb{}; std::mbstate_t mb{};
dest.resize(src.size() * this_codecvt.max_length()); dst.resize(src.size() * this_codecvt.max_length());
const _TChar* intern_from = src.c_str(), const _TChar* intern_from = src.data(),
*intern_from_end = src.c_str() + src.size(), *intern_from_end = src.data() + src.size(),
*intern_from_next = nullptr; *intern_from_next = nullptr;
CodecvtUTF8Char_t* extern_to = reinterpret_cast<CodecvtUTF8Char_t*>(dest.data()), CodecvtUTF8Char_t* extern_to = reinterpret_cast<CodecvtUTF8Char_t*>(dst.data()),
*extern_to_end = reinterpret_cast<CodecvtUTF8Char_t*>(dest.data() + dest.size()), *extern_to_end = reinterpret_cast<CodecvtUTF8Char_t*>(dst.data() + dst.size()),
*extern_to_next = nullptr; *extern_to_next = nullptr;
// do convertion // do convertion
auto result = this_codecvt.out( auto result = this_codecvt.out(
@ -168,26 +289,83 @@ namespace YYCC::EncodingHelper {
if (result != CodecvtFacet_t<_TChar>::ok) if (result != CodecvtFacet_t<_TChar>::ok)
return false; return false;
// resize result and retuen // resize result and retuen
dest.resize(extern_to_next - reinterpret_cast<CodecvtUTF8Char_t*>(dest.data())); dst.resize(extern_to_next - reinterpret_cast<CodecvtUTF8Char_t*>(dst.data()));
return true; return true;
} }
bool UTF16ToUTF8(const char16_t* src, std::string& dest) { #pragma endregion
return UTFOtherToUTF8<char16_t>(src, dest);
#pragma region UTF8ToUTF16
bool UTF8ToUTF16(const yycc_u8string_view& src, std::u16string& dst) {
return UTF8ToUTFOther<char16_t>(src, dst);
} }
std::string UTF16ToUTF8(const char16_t* src) { bool UTF8ToUTF16(const yycc_char8_t* src, std::u16string& dst) {
std::string ret; CONVFCT_TYPE2(UTF8ToUTF16, yycc_char8_t, char16_t);
if (!UTF16ToUTF8(src, ret)) ret.clear();
return ret;
} }
bool UTF32ToUTF8(const char32_t* src, std::string& dest) { std::u16string UTF8ToUTF16(const yycc_u8string_view& src) {
return UTFOtherToUTF8<char32_t>(src, dest); CONVFCT_TYPE3(UTF8ToUTF16, yycc_char8_t, char16_t);
} }
std::string UTF32ToUTF8(const char32_t* src) { std::u16string UTF8ToUTF16(const yycc_char8_t* src) {
std::string ret; CONVFCT_TYPE4(UTF8ToUTF16, yycc_char8_t, char16_t);
if (!UTF32ToUTF8(src, ret)) ret.clear();
return ret;
} }
#pragma endregion
#pragma region UTF16ToUTF8
bool UTF16ToUTF8(const std::u16string_view& src, yycc_u8string& dst) {
return UTFOtherToUTF8<char16_t>(src, dst);
}
bool UTF16ToUTF8(const char16_t* src, yycc_u8string& dst) {
CONVFCT_TYPE2(UTF16ToUTF8, char16_t, yycc_char8_t);
}
yycc_u8string UTF16ToUTF8(const std::u16string_view& src) {
CONVFCT_TYPE3(UTF16ToUTF8, char16_t, yycc_char8_t);
}
yycc_u8string UTF16ToUTF8(const char16_t* src) {
CONVFCT_TYPE4(UTF16ToUTF8, char16_t, yycc_char8_t);
}
#pragma endregion
#pragma region UTF8ToUTF32
bool UTF8ToUTF32(const yycc_u8string_view& src, std::u32string& dst) {
return UTF8ToUTFOther<char32_t>(src, dst);
}
bool UTF8ToUTF32(const yycc_char8_t* src, std::u32string& dst) {
CONVFCT_TYPE2(UTF8ToUTF32, yycc_char8_t, char32_t);
}
std::u32string UTF8ToUTF32(const yycc_u8string_view& src) {
CONVFCT_TYPE3(UTF8ToUTF32, yycc_char8_t, char32_t);
}
std::u32string UTF8ToUTF32(const yycc_char8_t* src) {
CONVFCT_TYPE4(UTF8ToUTF32, yycc_char8_t, char32_t);
}
#pragma endregion
#pragma region UTF32ToUTF8
bool UTF32ToUTF8(const std::u32string_view& src, yycc_u8string& dst) {
return UTFOtherToUTF8<char32_t>(src, dst);
}
bool UTF32ToUTF8(const char32_t* src, yycc_u8string& dst) {
CONVFCT_TYPE2(UTF32ToUTF8, char32_t, yycc_char8_t);
}
yycc_u8string UTF32ToUTF8(const std::u32string_view& src) {
CONVFCT_TYPE3(UTF32ToUTF8, char32_t, yycc_char8_t);
}
yycc_u8string UTF32ToUTF8(const char32_t* src) {
CONVFCT_TYPE4(UTF32ToUTF8, char32_t, yycc_char8_t);
}
#pragma endregion
#undef CONVFCT_TYPE2
#undef CONVFCT_TYPE3
#undef CONVFCT_TYPE4
} }

View File

@ -21,20 +21,20 @@
* \li \c UTF8: UTF8 string. * \li \c UTF8: UTF8 string.
* \li \c Wchar: wchar_t string. * \li \c Wchar: wchar_t string.
* \par * \par
* For example: \c WcharToUTF8 will perform the convertion from wchar_t to UTF8, * For example: \c WcharToUTF8 will perform the convertion from wchar_t to UTF8,
* and \c CharToChar will perform the convertion between 2 code-page-based string and caller can specify individual code page for these 2 string. * and \c CharToChar will perform the convertion between 2 code-page-based string and caller can specify individual code page for these 2 string.
* \par * \par
* These functions are Windows specific and are unavailable on other platforms. * These functions are Windows specific and are unavailable on other platforms.
* Becasue Windows use wchar_t string as its function arguments for globalization, and this library use UTF8 everywhere. * Becasue Windows use wchar_t string as its function arguments for globalization, and this library use UTF8 everywhere.
* So it should have a bidirectional way to do convertion between wchar_t string and UTF8 string. * So it should have a bidirectional way to do convertion between wchar_t string and UTF8 string.
* *
* \par UTF32, UTF16 and UTF8 Convertion * \par UTF32, UTF16 and UTF8 Convertion
* This namespace also provide the convertion among UTF32, UTF16 and UTF8. * This namespace also provide the convertion among UTF32, UTF16 and UTF8.
* These convertion functions are suit for all platforms, not Windows oriented. * These convertion functions are suit for all platforms, not Windows oriented.
* \par * \par
* Due to implementation, this library assume all non-Windows system use UTF8 as their C locale. * Due to implementation, this library assume all non-Windows system use UTF8 as their C locale.
* Otherwise these functions will produce wrong result. * Otherwise these functions will produce wrong result.
* *
* \par Function Parameters * \par Function Parameters
* We provide these encoding convertion functions with following 2 types: * We provide these encoding convertion functions with following 2 types:
* \li Function returns \c bool and its parameter order source string pointer and a corresponding \c std::basic_string container for receiving result. * \li Function returns \c bool and its parameter order source string pointer and a corresponding \c std::basic_string container for receiving result.
@ -46,35 +46,71 @@
* First declaration will return false to indicate there is an error when doing convertion. Please note that the content of string container passing in may still be changed! * First declaration will return false to indicate there is an error when doing convertion. Please note that the content of string container passing in may still be changed!
* Last declaration will return empty string to indicate error. Please note if you pass empty string in, they still will output empty string but it doesn't mean an error. * Last declaration will return empty string to indicate error. Please note if you pass empty string in, they still will output empty string but it doesn't mean an error.
* So last declaration is used in the scenario that we don't care whether the convertion success did. For example, output something to console. * So last declaration is used in the scenario that we don't care whether the convertion success did. For example, output something to console.
* *
*/ */
namespace YYCC::EncodingHelper { namespace YYCC::EncodingHelper {
#define YYCC_U8(strl) (reinterpret_cast<const yycc_char8_t*>(u8 ## strl))
const yycc_char8_t* ToUTF8(const char* src);
yycc_char8_t* ToUTF8(char* src);
yycc_u8string ToUTF8(const std::string_view& src);
yycc_u8string_view ToUTF8View(const std::string_view& src);
const char* ToNative(const yycc_char8_t* src);
char* ToNative(yycc_char8_t* src);
std::string ToNative(const yycc_u8string_view& src);
std::string_view ToNativeView(const yycc_u8string_view& src);
#if YYCC_OS == YYCC_OS_WINDOWS #if YYCC_OS == YYCC_OS_WINDOWS
bool WcharToChar(const wchar_t* src, std::string& dest, UINT codepage); bool WcharToChar(const std::wstring_view& src, std::string& dst, UINT code_page);
bool WcharToUTF8(const wchar_t* src, std::string& dest); bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page);
std::string WcharToChar(const wchar_t* src, UINT codepage); std::string WcharToChar(const std::wstring_view& src, UINT code_page);
std::string WcharToUTF8(const wchar_t* src); std::string WcharToChar(const wchar_t* src, UINT code_page);
bool CharToWchar(const char* src, std::wstring& dest, UINT codepage); bool CharToWchar(const std::string_view& src, std::wstring& dst, UINT code_page);
bool UTF8ToWchar(const char* src, std::wstring& dest); bool CharToWchar(const char* src, std::wstring& dst, UINT code_page);
std::wstring CharToWchar(const char* src, UINT codepage); std::wstring CharToWchar(const std::string_view& src, UINT code_page);
std::wstring UTF8ToWchar(const char* src); std::wstring CharToWchar(const char* src, UINT code_page);
bool CharToChar(const char* src, std::string& dest, UINT src_codepage, UINT dest_codepage); bool CharToChar(const std::string_view& src, std::string& dst, UINT src_code_page, UINT dst_code_page);
std::string CharToChar(const char* src, UINT src_codepage, UINT dest_codepage); bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page);
std::string CharToChar(const std::string_view& src, UINT src_code_page, UINT dst_code_page);
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page);
bool WcharToUTF8(const std::wstring_view& src, yycc_u8string& dst);
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst);
yycc_u8string WcharToUTF8(const std::wstring_view& src);
yycc_u8string WcharToUTF8(const wchar_t* src);
bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst);
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst);
std::wstring UTF8ToWchar(const yycc_u8string_view& src);
std::wstring UTF8ToWchar(const yycc_char8_t* src);
#endif #endif
bool UTF8ToUTF16(const char* src, std::u16string& dest); bool UTF8ToUTF16(const yycc_u8string_view& src, std::u16string& dst);
std::u16string UTF8ToUTF16(const char* src); bool UTF8ToUTF16(const yycc_char8_t* src, std::u16string& dst);
bool UTF8ToUTF32(const char* src, std::u32string& dest); std::u16string UTF8ToUTF16(const yycc_u8string_view& src);
std::u32string UTF8ToUTF32(const char* src); std::u16string UTF8ToUTF16(const yycc_char8_t* src);
bool UTF16ToUTF8(const char16_t* src, std::string& dest); bool UTF16ToUTF8(const std::u16string_view& src, yycc_u8string& dst);
std::string UTF16ToUTF8(const char16_t* src); bool UTF16ToUTF8(const char16_t* src, yycc_u8string& dst);
bool UTF32ToUTF8(const char32_t* src, std::string& dest); yycc_u8string UTF16ToUTF8(const std::u16string_view& src);
std::string UTF32ToUTF8(const char32_t* src); yycc_u8string UTF16ToUTF8(const char16_t* src);
bool UTF8ToUTF32(const yycc_u8string_view& src, std::u32string& dst);
bool UTF8ToUTF32(const yycc_char8_t* src, std::u32string& dst);
std::u32string UTF8ToUTF32(const yycc_u8string_view& src);
std::u32string UTF8ToUTF32(const yycc_char8_t* src);
bool UTF32ToUTF8(const std::u32string_view& src, yycc_u8string& dst);
bool UTF32ToUTF8(const char32_t* src, yycc_u8string& dst);
yycc_u8string UTF32ToUTF8(const std::u32string_view& src);
yycc_u8string UTF32ToUTF8(const char32_t* src);
} }

View File

@ -24,14 +24,20 @@
#endif #endif
//// Decide the char type we used // Define the UTF8 char type we used.
//#include <string> // And do a polyfill if no embedded char8_t type.
//namespace YYCC { #include <string>
//#if defined(__cpp_char8_t) #include <string_view>
// using u8char = char8_t; namespace YYCC {
// using u8string = std::std::string #if defined(__cpp_char8_t)
//#else using yycc_char8_t = char8_t;
// using u8char = char; using yycc_u8string = std::u8string;
// using u8string = std::string; using yycc_u8string_view = std::u8string_view;
//#endif #else
//} using yycc_char8_t = unsigned char;
using yycc_u8string = std::basic_string<yycc_char8_t>;
using yycc_u8string_view = std::basic_string_view<yycc_char8_t>;
#endif
}

View File

@ -33,11 +33,9 @@ PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/utf-8> $<$<CXX_COMPILER_ID:MSVC>:/utf-8>
) )
# Install binary # Install testbench only on Release mode
install(TARGETS YYCCTestbench install(TARGETS YYCCTestbench
EXPORT YYCCTestbenchTargets EXPORT YYCCTestbenchTargets
LIBRARY DESTINATION lib CONFIGURATIONS Release
ARCHIVE DESTINATION lib RUNTIME DESTINATION ${YYCC_INSTALL_PATH_BIN}
RUNTIME DESTINATION bin
INCLUDES DESTINATION include
) )

View File

@ -352,7 +352,9 @@ namespace YYCCTestbench {
static void WinFctTestbench() { static void WinFctTestbench() {
#if YYCC_OS == YYCC_OS_WINDOWS #if YYCC_OS == YYCC_OS_WINDOWS
Console::FormatLine("Current Module HANDLE: 0x%" PRI_XPTR_LEFT_PADDING PRIXPTR, YYCC::WinFctHelper::GetCurrentModule()); HMODULE test_current_module;
Assert((test_current_module = YYCC::WinFctHelper::GetCurrentModule()) != nullptr, "YYCC::WinFctHelper::GetCurrentModule");
Console::FormatLine("Current Module HANDLE: 0x%" PRI_XPTR_LEFT_PADDING PRIXPTR, test_current_module);
std::string test_temp; std::string test_temp;
Assert(YYCC::WinFctHelper::GetTempDirectory(test_temp), "YYCC::WinFctHelper::GetTempDirectory"); Assert(YYCC::WinFctHelper::GetTempDirectory(test_temp), "YYCC::WinFctHelper::GetTempDirectory");