refactor: write iconv.
- write iconv encoding (not finished). - rename united_codec to pycodec.
This commit is contained in:
@ -117,7 +117,7 @@ BreakFunctionDefinitionParameters: false
|
|||||||
BreakInheritanceList: BeforeColon
|
BreakInheritanceList: BeforeColon
|
||||||
BreakStringLiterals: false
|
BreakStringLiterals: false
|
||||||
BreakTemplateDeclarations: Yes
|
BreakTemplateDeclarations: Yes
|
||||||
ColumnLimit: 100
|
ColumnLimit: 140
|
||||||
CommentPragmas: '^ IWYU pragma:'
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
CompactNamespaces: false
|
CompactNamespaces: false
|
||||||
ConstructorInitializerIndentWidth: 4
|
ConstructorInitializerIndentWidth: 4
|
||||||
@ -206,7 +206,7 @@ RequiresExpressionIndentation: OuterScope
|
|||||||
SeparateDefinitionBlocks: Leave
|
SeparateDefinitionBlocks: Leave
|
||||||
ShortNamespaceLines: 1
|
ShortNamespaceLines: 1
|
||||||
SkipMacroDefinitionBody: false
|
SkipMacroDefinitionBody: false
|
||||||
SortIncludes: CaseSensitive
|
SortIncludes: Never
|
||||||
SortJavaStaticImport: Before
|
SortJavaStaticImport: Before
|
||||||
SortUsingDeclarations: Lexicographic
|
SortUsingDeclarations: Lexicographic
|
||||||
SpaceAfterCStyleCast: true
|
SpaceAfterCStyleCast: true
|
||||||
|
@ -7,6 +7,7 @@ project(YYCC
|
|||||||
# Provide options
|
# Provide options
|
||||||
option(YYCC_BUILD_TESTBENCH "Build testbench of YYCCommonplace." OFF)
|
option(YYCC_BUILD_TESTBENCH "Build testbench of YYCCommonplace." OFF)
|
||||||
option(YYCC_BUILD_DOC "Build document of YYCCommonplace." OFF)
|
option(YYCC_BUILD_DOC "Build document of YYCCommonplace." OFF)
|
||||||
|
option(YYCC_ENFORCE_ICONV "Enforce iconv support for this library (e.g. in MSYS2 environment)." OFF)
|
||||||
option(YYCC_DEBUG_UE_FILTER "YYCC developer used switch for testing Windows unhandled exception filter. Should not set to ON!!!" OFF)
|
option(YYCC_DEBUG_UE_FILTER "YYCC developer used switch for testing Windows unhandled exception filter. Should not set to ON!!!" OFF)
|
||||||
|
|
||||||
# Setup install path from CMake provided install path for convenient use.
|
# Setup install path from CMake provided install path for convenient use.
|
||||||
@ -27,6 +28,10 @@ if (YYCC_BUILD_TESTBENCH)
|
|||||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||||
find_package(GTest REQUIRED)
|
find_package(GTest REQUIRED)
|
||||||
endif ()
|
endif ()
|
||||||
|
# Iconv is required if we are not in Windows or user request it
|
||||||
|
if (YYCC_ENFORCE_ICONV OR (NOT WIN32))
|
||||||
|
find_package(Iconv REQUIRED)
|
||||||
|
endif ()
|
||||||
|
|
||||||
# Import 3 build targets
|
# Import 3 build targets
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@ -18,7 +18,7 @@ PRIVATE
|
|||||||
yycc/encoding/utf.cpp
|
yycc/encoding/utf.cpp
|
||||||
yycc/encoding/windows.cpp
|
yycc/encoding/windows.cpp
|
||||||
yycc/encoding/iconv.cpp
|
yycc/encoding/iconv.cpp
|
||||||
yycc/encoding/united_codec.cpp
|
yycc/encoding/pycodec.cpp
|
||||||
# YYCC/COMHelper.cpp
|
# YYCC/COMHelper.cpp
|
||||||
# YYCC/ArgParser.cpp
|
# YYCC/ArgParser.cpp
|
||||||
# YYCC/ConfigManager.cpp
|
# YYCC/ConfigManager.cpp
|
||||||
@ -69,7 +69,7 @@ FILES
|
|||||||
yycc/encoding/utf.hpp
|
yycc/encoding/utf.hpp
|
||||||
yycc/encoding/windows.hpp
|
yycc/encoding/windows.hpp
|
||||||
yycc/encoding/iconv.hpp
|
yycc/encoding/iconv.hpp
|
||||||
yycc/encoding/united_codec.hpp
|
yycc/encoding/pycodec.hpp
|
||||||
|
|
||||||
# # Headers
|
# # Headers
|
||||||
# # Common headers
|
# # Common headers
|
||||||
@ -102,6 +102,13 @@ PUBLIC
|
|||||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>"
|
"$<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>"
|
||||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||||
)
|
)
|
||||||
|
# Link Iconv if we have import it
|
||||||
|
if (Iconv_FOUND)
|
||||||
|
target_link_libraries(YYCCommonplace
|
||||||
|
PRIVATE
|
||||||
|
Iconv::Iconv
|
||||||
|
)
|
||||||
|
endif ()
|
||||||
# Link with DbgHelp.lib on Windows
|
# Link with DbgHelp.lib on Windows
|
||||||
target_link_libraries(YYCCommonplace
|
target_link_libraries(YYCCommonplace
|
||||||
PRIVATE
|
PRIVATE
|
||||||
@ -112,11 +119,13 @@ target_compile_features(YYCCommonplace PUBLIC cxx_std_17)
|
|||||||
set_target_properties(YYCCommonplace PROPERTIES CXX_EXTENSION OFF)
|
set_target_properties(YYCCommonplace PROPERTIES CXX_EXTENSION OFF)
|
||||||
# Setup macros
|
# Setup macros
|
||||||
target_compile_definitions(YYCCommonplace
|
target_compile_definitions(YYCCommonplace
|
||||||
# Debug macro should populate to child projects
|
|
||||||
PUBLIC
|
PUBLIC
|
||||||
|
# Debug macro. And it should populate to child projects
|
||||||
$<$<BOOL:${YYCC_DEBUG_UE_FILTER}>:YYCC_DEBUG_UE_FILTER>
|
$<$<BOOL:${YYCC_DEBUG_UE_FILTER}>:YYCC_DEBUG_UE_FILTER>
|
||||||
# Unicode charset for private using
|
# Iconv environment macro
|
||||||
|
$<$<BOOL:${YYCC_ENFORCE_ICONV}>:YYCC_FEAT_ICONV>
|
||||||
PRIVATE
|
PRIVATE
|
||||||
|
# Unicode charset for private using
|
||||||
$<$<CXX_COMPILER_ID:MSVC>:UNICODE>
|
$<$<CXX_COMPILER_ID:MSVC>:UNICODE>
|
||||||
$<$<CXX_COMPILER_ID:MSVC>:_UNICODE>
|
$<$<CXX_COMPILER_ID:MSVC>:_UNICODE>
|
||||||
)
|
)
|
||||||
|
@ -1,209 +0,0 @@
|
|||||||
#include "EncodingHelper.hpp"
|
|
||||||
|
|
||||||
#include <locale>
|
|
||||||
|
|
||||||
namespace YYCC::EncodingHelper {
|
|
||||||
|
|
||||||
/* Define some assistant macros for easy writing. */
|
|
||||||
|
|
||||||
#define CONVFCT_TYPE2(fct_name, src_char_type, dst_char_type, ...) if (src == nullptr) return false; \
|
|
||||||
std::basic_string_view<src_char_type> cache(src); \
|
|
||||||
return fct_name(cache, dst, ##__VA_ARGS__);
|
|
||||||
|
|
||||||
#define CONVFCT_TYPE3(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
|
|
||||||
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
#define CONVFCT_TYPE4(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
|
|
||||||
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
|
|
||||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
|
||||||
|
|
||||||
#pragma region WcharToChar
|
|
||||||
|
|
||||||
bool WcharToChar(const std::wstring_view& src, std::string& dst, UINT code_page) {
|
|
||||||
// if src is empty, direct output
|
|
||||||
if (src.empty()) {
|
|
||||||
dst.clear();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// init WideCharToMultiByte used variables
|
|
||||||
// setup src pointer
|
|
||||||
LPCWCH lpWideCharStr = reinterpret_cast<LPCWCH>(src.data());
|
|
||||||
// check whether source string is too large.
|
|
||||||
size_t cSrcSize = src.size();
|
|
||||||
if (cSrcSize > std::numeric_limits<int>::max()) return false;
|
|
||||||
int cchWideChar = static_cast<int>(src.size());
|
|
||||||
|
|
||||||
// do convertion
|
|
||||||
// do a dry-run first to fetch desired size.
|
|
||||||
int desired_size = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, NULL, 0, NULL, NULL);
|
|
||||||
if (desired_size <= 0) return false;
|
|
||||||
// resize dest for receiving result
|
|
||||||
dst.resize(static_cast<size_t>(desired_size));
|
|
||||||
// do real convertion
|
|
||||||
int write_result = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, reinterpret_cast<LPSTR>(dst.data()), desired_size, NULL, NULL);
|
|
||||||
if (write_result <= 0) return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page) {
|
|
||||||
CONVFCT_TYPE2(WcharToChar, wchar_t, char, code_page);
|
|
||||||
}
|
|
||||||
std::string WcharToChar(const std::wstring_view& src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE3(WcharToChar, wchar_t, char, code_page);
|
|
||||||
}
|
|
||||||
std::string WcharToChar(const wchar_t* src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE4(WcharToChar, wchar_t, char, code_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#pragma region CharToWchar
|
|
||||||
|
|
||||||
bool CharToWchar(const std::string_view& src, std::wstring& dst, UINT code_page) {
|
|
||||||
// if src is empty, direct output
|
|
||||||
if (src.empty()) {
|
|
||||||
dst.clear();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// init WideCharToMultiByte used variables
|
|
||||||
// setup src pointer
|
|
||||||
LPCCH lpMultiByteStr = reinterpret_cast<LPCCH>(src.data());
|
|
||||||
// check whether source string is too large.
|
|
||||||
size_t cSrcSize = src.size();
|
|
||||||
if (cSrcSize > std::numeric_limits<int>::max()) return false;
|
|
||||||
int cbMultiByte = static_cast<int>(src.size());
|
|
||||||
|
|
||||||
// do convertion
|
|
||||||
// do a dry-run first to fetch desired size.
|
|
||||||
int desired_size = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, NULL, 0);
|
|
||||||
if (desired_size <= 0) return false;
|
|
||||||
// resize dest for receiving result
|
|
||||||
dst.resize(static_cast<size_t>(desired_size));
|
|
||||||
// do real convertion
|
|
||||||
int write_result = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, reinterpret_cast<LPWSTR>(dst.data()), desired_size);
|
|
||||||
if (write_result <= 0) return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool CharToWchar(const char* src, std::wstring& dst, UINT code_page) {
|
|
||||||
CONVFCT_TYPE2(CharToWchar, char, wchar_t, code_page);
|
|
||||||
}
|
|
||||||
std::wstring CharToWchar(const std::string_view& src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE3(CharToWchar, char, wchar_t, code_page);
|
|
||||||
}
|
|
||||||
std::wstring CharToWchar(const char* src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE4(CharToWchar, char, wchar_t, code_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#pragma region CharToChar
|
|
||||||
|
|
||||||
bool CharToChar(const std::string_view& src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
|
|
||||||
std::wstring intermediary;
|
|
||||||
if (!CharToWchar(src, intermediary, src_code_page)) return false;
|
|
||||||
if (!WcharToChar(intermediary, dst, dst_code_page)) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
|
|
||||||
CONVFCT_TYPE2(CharToChar, char, char, src_code_page, dst_code_page);
|
|
||||||
}
|
|
||||||
std::string CharToChar(const std::string_view& src, UINT src_code_page, UINT dst_code_page) {
|
|
||||||
CONVFCT_TYPE3(CharToChar, char, char, src_code_page, dst_code_page);
|
|
||||||
}
|
|
||||||
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page) {
|
|
||||||
CONVFCT_TYPE4(CharToChar, char, char, src_code_page, dst_code_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#pragma region WcharToUTF8
|
|
||||||
|
|
||||||
bool WcharToUTF8(const std::wstring_view& src, yycc_u8string& dst) {
|
|
||||||
std::string adapted_dst;
|
|
||||||
bool ret = WcharToChar(src, adapted_dst, CP_UTF8);
|
|
||||||
if (ret) dst = ToUTF8(adapted_dst);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst) {
|
|
||||||
CONVFCT_TYPE2(WcharToUTF8, wchar_t, yycc_char8_t);
|
|
||||||
}
|
|
||||||
yycc_u8string WcharToUTF8(const std::wstring_view& src) {
|
|
||||||
CONVFCT_TYPE3(WcharToUTF8, wchar_t, yycc_char8_t);
|
|
||||||
}
|
|
||||||
yycc_u8string WcharToUTF8(const wchar_t* src) {
|
|
||||||
CONVFCT_TYPE4(WcharToUTF8, wchar_t, yycc_char8_t);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#pragma region UTF8ToWchar
|
|
||||||
|
|
||||||
bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst) {
|
|
||||||
std::string_view adapted_src(ToOrdinaryView(src));
|
|
||||||
return CharToWchar(adapted_src, dst, CP_UTF8);
|
|
||||||
}
|
|
||||||
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst) {
|
|
||||||
CONVFCT_TYPE2(UTF8ToWchar, yycc_char8_t, wchar_t);
|
|
||||||
}
|
|
||||||
std::wstring UTF8ToWchar(const yycc_u8string_view& src) {
|
|
||||||
CONVFCT_TYPE3(UTF8ToWchar, yycc_char8_t, wchar_t);
|
|
||||||
}
|
|
||||||
std::wstring UTF8ToWchar(const yycc_char8_t* src) {
|
|
||||||
CONVFCT_TYPE4(UTF8ToWchar, yycc_char8_t, wchar_t);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#pragma region CharToUTF8
|
|
||||||
|
|
||||||
bool CharToUTF8(const std::string_view& src, yycc_u8string& dst, UINT code_page) {
|
|
||||||
std::string adapted_dst;
|
|
||||||
bool ret = CharToChar(src, adapted_dst, code_page, CP_UTF8);
|
|
||||||
if (ret) dst = ToUTF8(adapted_dst);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
bool CharToUTF8(const char* src, yycc_u8string& dst, UINT code_page) {
|
|
||||||
CONVFCT_TYPE2(CharToUTF8, char, yycc_char8_t, code_page);
|
|
||||||
}
|
|
||||||
yycc_u8string CharToUTF8(const std::string_view& src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE3(CharToUTF8, char, yycc_char8_t, code_page);
|
|
||||||
}
|
|
||||||
yycc_u8string CharToUTF8(const char* src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE4(CharToUTF8, char, yycc_char8_t, code_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#pragma region UTF8ToChar
|
|
||||||
|
|
||||||
bool UTF8ToChar(const yycc_u8string_view& src, std::string& dst, UINT code_page) {
|
|
||||||
std::string_view adapted_src(ToOrdinaryView(src));
|
|
||||||
return CharToChar(adapted_src, dst, CP_UTF8, code_page);
|
|
||||||
}
|
|
||||||
bool UTF8ToChar(const yycc_char8_t* src, std::string& dst, UINT code_page) {
|
|
||||||
CONVFCT_TYPE2(UTF8ToChar, yycc_char8_t, char, code_page);
|
|
||||||
}
|
|
||||||
std::string UTF8ToChar(const yycc_u8string_view& src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE3(UTF8ToChar, yycc_char8_t, char, code_page);
|
|
||||||
}
|
|
||||||
std::string UTF8ToChar(const yycc_char8_t* src, UINT code_page) {
|
|
||||||
CONVFCT_TYPE4(UTF8ToChar, yycc_char8_t, char, code_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma endregion
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef CONVFCT_TYPE2
|
|
||||||
#undef CONVFCT_TYPE3
|
|
||||||
#undef CONVFCT_TYPE4
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
#include "YYCCInternal.hpp"
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
|
||||||
#include "WinImportPrefix.hpp"
|
|
||||||
#include <Windows.h>
|
|
||||||
#include "WinImportSuffix.hpp"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief The helper for all encoding stuff.
|
|
||||||
* @details
|
|
||||||
* For more infomations about how to use the functions provided by this namespace,
|
|
||||||
* please see \ref library_encoding and \ref encoding_helper.
|
|
||||||
*/
|
|
||||||
namespace YYCC::EncodingHelper {
|
|
||||||
|
|
||||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
|
||||||
|
|
||||||
bool WcharToChar(const std::wstring_view& src, std::string& dst, UINT code_page);
|
|
||||||
bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page);
|
|
||||||
std::string WcharToChar(const std::wstring_view& src, UINT code_page);
|
|
||||||
std::string WcharToChar(const wchar_t* src, UINT code_page);
|
|
||||||
|
|
||||||
bool CharToWchar(const std::string_view& src, std::wstring& dst, UINT code_page);
|
|
||||||
bool CharToWchar(const char* src, std::wstring& dst, UINT code_page);
|
|
||||||
std::wstring CharToWchar(const std::string_view& src, UINT code_page);
|
|
||||||
std::wstring CharToWchar(const char* src, UINT code_page);
|
|
||||||
|
|
||||||
bool CharToChar(const std::string_view& src, std::string& dst, UINT src_code_page, UINT dst_code_page);
|
|
||||||
bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page);
|
|
||||||
std::string CharToChar(const std::string_view& src, UINT src_code_page, UINT dst_code_page);
|
|
||||||
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page);
|
|
||||||
|
|
||||||
|
|
||||||
bool WcharToUTF8(const std::wstring_view& src, yycc_u8string& dst);
|
|
||||||
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst);
|
|
||||||
yycc_u8string WcharToUTF8(const std::wstring_view& src);
|
|
||||||
yycc_u8string WcharToUTF8(const wchar_t* src);
|
|
||||||
|
|
||||||
bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst);
|
|
||||||
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst);
|
|
||||||
std::wstring UTF8ToWchar(const yycc_u8string_view& src);
|
|
||||||
std::wstring UTF8ToWchar(const yycc_char8_t* src);
|
|
||||||
|
|
||||||
bool CharToUTF8(const std::string_view& src, yycc_u8string& dst, UINT code_page);
|
|
||||||
bool CharToUTF8(const char* src, yycc_u8string& dst, UINT code_page);
|
|
||||||
yycc_u8string CharToUTF8(const std::string_view& src, UINT code_page);
|
|
||||||
yycc_u8string CharToUTF8(const char* src, UINT code_page);
|
|
||||||
|
|
||||||
bool UTF8ToChar(const yycc_u8string_view& src, std::string& dst, UINT code_page);
|
|
||||||
bool UTF8ToChar(const yycc_char8_t* src, std::string& dst, UINT code_page);
|
|
||||||
std::string UTF8ToChar(const yycc_u8string_view& src, UINT code_page);
|
|
||||||
std::string UTF8ToChar(const yycc_char8_t* src, UINT code_page);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
|
@ -0,0 +1,194 @@
|
|||||||
|
#include "iconv.hpp"
|
||||||
|
|
||||||
|
#if YYCC_FEAT_ICONV || (YYCC_OS != YYCC_OS_WINDOWS)
|
||||||
|
|
||||||
|
#include "../string/reinterpret.hpp"
|
||||||
|
#include <cerrno>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <iconv.h>
|
||||||
|
|
||||||
|
#define NS_YYCC_STRING ::yycc::string
|
||||||
|
#define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret
|
||||||
|
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
|
||||||
|
|
||||||
|
#pragma region Iconv Shit Fix
|
||||||
|
|
||||||
|
// YYC MARK:
|
||||||
|
// I don't know what Iconv is for, Iconv put an huge pieces of shit into its header file "iconv.h" (at least for me).
|
||||||
|
// Especially a macro called iconv, which pollutes my namespace name while also can not be disabled because I need to rely on it to access essential functions.
|
||||||
|
// I can't simply redefine it, because I can't make sure that this "iconv" is defined in that way on all platforms.
|
||||||
|
// So I can only write some definitions of functions and types here, and extract the functions and types I need before I declare the namespace.
|
||||||
|
// And at the same time remove those annoying macro definitions. Hopefully, the compiler will optimize these wrapper functions.
|
||||||
|
|
||||||
|
typedef iconv_t that_iconv_t;
|
||||||
|
static iconv_t that_iconv_open(const char* tocode, const char* fromcode) {
|
||||||
|
return iconv_open(tocode, fromcode);
|
||||||
|
}
|
||||||
|
static int that_iconv_close(iconv_t cd) {
|
||||||
|
return iconv_close(cd);
|
||||||
|
}
|
||||||
|
static size_t that_iconv(iconv_t cd, const char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
|
||||||
|
// YYC MARK:
|
||||||
|
// This is also bullshit. I don't know why the real signature of this function differ with its document written by GNU.
|
||||||
|
// I have to make a "const" cast in there.
|
||||||
|
return iconv(cd, const_cast<char**>(inbuf), inbytesleft, outbuf, outbytesleft);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef iconv_t
|
||||||
|
#undef iconv_open
|
||||||
|
#undef iconv_close
|
||||||
|
|
||||||
|
#undef iconv
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
namespace yycc::encoding::iconv {
|
||||||
|
|
||||||
|
static const that_iconv_t INVALID_ICONV_TOKEN = reinterpret_cast<that_iconv_t>(-1);
|
||||||
|
|
||||||
|
#pragma region PrivToken
|
||||||
|
|
||||||
|
class PrivToken {
|
||||||
|
public:
|
||||||
|
PrivToken(const CodeName& from_code, const CodeName& to_code) : inner(INVALID_ICONV_TOKEN) {
|
||||||
|
// We must cast them into string container, not string view,
|
||||||
|
// because they may not have NULL terminator.
|
||||||
|
std::string iconv_from_code = NS_YYCC_STRING_REINTERPRET::as_ordinary(from_code),
|
||||||
|
iconv_to_code = NS_YYCC_STRING_REINTERPRET::as_ordinary(to_code);
|
||||||
|
// Call iconv_t creator
|
||||||
|
that_iconv_t descriptor = that_iconv_open(iconv_to_code.c_str(), iconv_from_code.c_str());
|
||||||
|
if (descriptor == INVALID_ICONV_TOKEN) {
|
||||||
|
if (errno == EINVAL) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("impossible errno when calling iconv_open()");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Setup value
|
||||||
|
this->inner = descriptor;
|
||||||
|
}
|
||||||
|
~PrivToken() {
|
||||||
|
if (this->inner != INVALID_ICONV_TOKEN) {
|
||||||
|
that_iconv_close(this->inner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
YYCC_DELETE_COPY(PrivToken)
|
||||||
|
YYCC_DEFAULT_MOVE(PrivToken)
|
||||||
|
|
||||||
|
bool is_valid() const { return this->inner != INVALID_ICONV_TOKEN; }
|
||||||
|
that_iconv_t get_inner() const { return this->inner; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
that_iconv_t inner;
|
||||||
|
};
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region Token
|
||||||
|
|
||||||
|
Token::Token(const CodeName& from_code, const CodeName& to_code) : inner(std::make_unique<PrivToken>(from_code, to_code)) {}
|
||||||
|
|
||||||
|
Token::~Token() {}
|
||||||
|
|
||||||
|
bool Token::is_valid() const {
|
||||||
|
return this->inner->is_valid();
|
||||||
|
}
|
||||||
|
|
||||||
|
PrivToken* Token::get_inner() const {
|
||||||
|
return this->inner.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region Kernel
|
||||||
|
|
||||||
|
// YYC MARK:
|
||||||
|
// Use std::monostate to simulate std::expected<void>.
|
||||||
|
|
||||||
|
constexpr const size_t ICONV_INC_LEN = 16u;
|
||||||
|
constexpr size_t ICONV_ERR_RV = static_cast<size_t>(-1);
|
||||||
|
|
||||||
|
// Reference: https://stackoverflow.com/questions/13297458/simple-utf8-utf16-string-conversion-with-iconv
|
||||||
|
|
||||||
|
static ConvResult<std::monostate> iconv_kernel(
|
||||||
|
const Token& token, const uint8_t* arg_inbuf, size_t arg_inbytes, uint8_t** arg_outbuf, size_t* arg_outbytes) {
|
||||||
|
#define SETUP_RV(buf, len) \
|
||||||
|
*arg_outbuf = (buf); \
|
||||||
|
*arg_outbytes = (len);
|
||||||
|
|
||||||
|
// ===== Check Requirements =====
|
||||||
|
// Unwrap and check iconv_t
|
||||||
|
that_iconv_t cd = token.get_inner()->get_inner();
|
||||||
|
if (cd == INVALID_ICONV_TOKEN) return ConvError::InvalidCd;
|
||||||
|
|
||||||
|
// Check nullptr output variables
|
||||||
|
if (arg_outbuf == nullptr || arg_outbytes == nullptr) return ConvError::NullPointer;
|
||||||
|
// Check empty input
|
||||||
|
if (arg_inbytes == 0u) {
|
||||||
|
SETUP_RV(nullptr, 0u);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
// Check nullptr input variables
|
||||||
|
if (arg_inbuf == nullptr) return ConvError::NullPointer;
|
||||||
|
|
||||||
|
// ===== Do Iconv =====
|
||||||
|
// setup input variables
|
||||||
|
size_t inbytesleft = arg_inbytes;
|
||||||
|
const char* inbuf = reinterpret_cast<const char*>(arg_inbuf);
|
||||||
|
// pre-allocation output variables
|
||||||
|
size_t outbytesall = arg_inbytes + ICONV_INC_LEN;
|
||||||
|
char* outbufbase = static_cast<char*>(std::malloc(outbytesall));
|
||||||
|
if (outbufbase == nullptr) throw std::bad_alloc();
|
||||||
|
size_t outbytesleft = outbytesall;
|
||||||
|
char* outbuf = outbufbase;
|
||||||
|
|
||||||
|
// conv core
|
||||||
|
size_t nchars = that_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||||
|
while (nchars == ICONV_ERR_RV && errno == E2BIG) {
|
||||||
|
// record the length has been converted
|
||||||
|
size_t len = outbuf - str_to.data();
|
||||||
|
|
||||||
|
// resize for variables
|
||||||
|
outbytesall += ICONV_INC_LEN;
|
||||||
|
outbytesleft += ICONV_INC_LEN;
|
||||||
|
|
||||||
|
// resize for container
|
||||||
|
str_to.resize(outbytesall);
|
||||||
|
|
||||||
|
// assign new outbuf from failed position
|
||||||
|
outbuf = str_to.data() + len;
|
||||||
|
nchars = that_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||||
|
}
|
||||||
|
|
||||||
|
// restore descriptor initial state
|
||||||
|
that_iconv(cd, nullptr, nullptr, nullptr, nullptr);
|
||||||
|
|
||||||
|
// check error
|
||||||
|
if (nchars == ICONV_ERR_RV) {
|
||||||
|
// Free allocated buffer
|
||||||
|
std::free(outbufbase);
|
||||||
|
|
||||||
|
if (errno == EILSEQ) {
|
||||||
|
return ConvError::InvalidMbSeq;
|
||||||
|
} else if (errno == EINVAL) {
|
||||||
|
return ConvError::IncompleteMbSeq;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("impossible errno when calling iconv_open()");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// success
|
||||||
|
// compute result data
|
||||||
|
SETUP_RV(reinterpret_cast<uint8_t*>(outbufbase), outbytesall - outbytesleft);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef SETUP_RV
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
} // namespace yycc::encoding::iconv
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -0,0 +1,58 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "../macro/os_detector.hpp"
|
||||||
|
|
||||||
|
#if YYCC_FEAT_ICONV || (YYCC_OS != YYCC_OS_WINDOWS)
|
||||||
|
|
||||||
|
#include "../macro/class_copy_move.hpp"
|
||||||
|
#include "../patch/expected.hpp"
|
||||||
|
#include "../string.hpp"
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#define NS_YYCC_STRING ::yycc::string
|
||||||
|
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
|
||||||
|
|
||||||
|
namespace yycc::encoding::iconv {
|
||||||
|
|
||||||
|
// YYC MARK:
|
||||||
|
// I don't want to include "iconv.h" in there.
|
||||||
|
// One of reasons is that I want to hide all implementation of Iconv.
|
||||||
|
// Another reason is that "iconv.h" defines some annoying macros which intervene some names in this files.
|
||||||
|
// So I introduce PIMPL design mode. Use a pointer to hide all details in class PrivToken.
|
||||||
|
|
||||||
|
using CodeName = NS_YYCC_STRING::u8string_view;
|
||||||
|
|
||||||
|
/// @private
|
||||||
|
class PrivToken;
|
||||||
|
|
||||||
|
class Token {
|
||||||
|
public:
|
||||||
|
Token(const CodeName& from_code, const CodeName& to_code);
|
||||||
|
~Token();
|
||||||
|
YYCC_DELETE_COPY(Token)
|
||||||
|
YYCC_DEFAULT_MOVE(Token)
|
||||||
|
|
||||||
|
bool is_valid() const;
|
||||||
|
PrivToken* get_inner() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<PrivToken> inner;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// @private
|
||||||
|
enum class ConvError {
|
||||||
|
InvalidCd, ///< Given token is invalid.
|
||||||
|
NullPointer, ///< Some of essential pointer in argument is nullptr.
|
||||||
|
InvalidMbSeq, ///< An invalid multibyte sequence has been encountered in the input.
|
||||||
|
IncompleteMbSeq, ///< An incomplete multibyte sequence has been encountered in the input.
|
||||||
|
};
|
||||||
|
|
||||||
|
/// @private
|
||||||
|
template<typename T>
|
||||||
|
using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected<T, ConvError>;
|
||||||
|
|
||||||
|
} // namespace yycc::encoding::iconv
|
||||||
|
|
||||||
|
#undef NS_YYCC_PATCH_EXPECTED
|
||||||
|
#undef NS_YYCC_STRING
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include "../windows/import_guard_head.hpp"
|
#include "../windows/import_guard_head.hpp"
|
||||||
#include "../windows/import_guard_tail.hpp"
|
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
|
#include "../windows/import_guard_tail.hpp"
|
||||||
|
|
||||||
#define NS_YYCC_STRING ::yycc::string
|
#define NS_YYCC_STRING ::yycc::string
|
||||||
#define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret
|
#define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret
|
||||||
@ -134,9 +134,7 @@ namespace yycc::encoding::windows {
|
|||||||
|
|
||||||
#pragma region Char -> Char
|
#pragma region Char -> Char
|
||||||
|
|
||||||
ConvResult<std::string> priv_to_char(const std::string_view& src,
|
ConvResult<std::string> priv_to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page) {
|
||||||
CodePage src_code_page,
|
|
||||||
CodePage dst_code_page) {
|
|
||||||
namespace expected = NS_YYCC_PATCH_EXPECTED;
|
namespace expected = NS_YYCC_PATCH_EXPECTED;
|
||||||
|
|
||||||
// Perform first convertion
|
// Perform first convertion
|
||||||
@ -205,8 +203,7 @@ namespace yycc::encoding::windows {
|
|||||||
|
|
||||||
#pragma region Char -> UTF8
|
#pragma region Char -> UTF8
|
||||||
|
|
||||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src,
|
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src, CodePage code_page) {
|
||||||
CodePage code_page) {
|
|
||||||
namespace expected = NS_YYCC_PATCH_EXPECTED;
|
namespace expected = NS_YYCC_PATCH_EXPECTED;
|
||||||
|
|
||||||
auto rv = priv_to_char(src, code_page, CP_UTF8);
|
auto rv = priv_to_char(src, code_page, CP_UTF8);
|
||||||
@ -228,6 +225,7 @@ namespace yycc::encoding::windows {
|
|||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
#pragma region UTF8 -> Char
|
#pragma region UTF8 -> Char
|
||||||
|
|
||||||
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src,
|
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src,
|
||||||
CodePage code_page) {
|
CodePage code_page) {
|
||||||
return priv_to_char(NS_YYCC_STRING_REINTERPRET::as_ordinary_view(src), CP_UTF8, code_page);
|
return priv_to_char(NS_YYCC_STRING_REINTERPRET::as_ordinary_view(src), CP_UTF8, code_page);
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include "../macro/os_detector.hpp"
|
#include "../macro/os_detector.hpp"
|
||||||
#include "../string.hpp"
|
|
||||||
|
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||||
|
|
||||||
#include "../patch/expected.hpp"
|
#include "../patch/expected.hpp"
|
||||||
|
#include "../string.hpp"
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#define NS_YYCC_STRING ::yycc::string
|
#define NS_YYCC_STRING ::yycc::string
|
||||||
@ -9,14 +12,12 @@
|
|||||||
|
|
||||||
namespace yycc::encoding::windows {
|
namespace yycc::encoding::windows {
|
||||||
|
|
||||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
|
||||||
|
|
||||||
using CodePage = uint32_t;
|
using CodePage = uint32_t;
|
||||||
|
|
||||||
/// @private
|
/// @private
|
||||||
enum class ConvError {
|
enum class ConvError {
|
||||||
TooLargeLength, ///< The length of given string is too large exceeding the maximum capacity of Win32 function.
|
TooLargeLength, ///< The length of given string is too large exceeding the maximum capacity of Win32 function.
|
||||||
NoDesiredSize, ///< Can not compute the desired size of result string.
|
NoDesiredSize, ///< Can not compute the desired size of result string.
|
||||||
BadWrittenSize, ///< The size of written data is not matched with expected size.
|
BadWrittenSize, ///< The size of written data is not matched with expected size.
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -34,47 +35,39 @@ namespace yycc::encoding::windows {
|
|||||||
bool to_wchar(const std::string_view& src, std::wstring& dst, CodePage code_page);
|
bool to_wchar(const std::string_view& src, std::wstring& dst, CodePage code_page);
|
||||||
std::wstring to_wchar(const std::string_view& src, CodePage code_page);
|
std::wstring to_wchar(const std::string_view& src, CodePage code_page);
|
||||||
|
|
||||||
// YYC MARK:
|
|
||||||
// Following functions are basically the alias of above functions.
|
|
||||||
|
|
||||||
// Char -> Char
|
// Char -> Char
|
||||||
ConvResult<std::string> priv_to_char(const std::string_view& src,
|
// This is the combination of "WChar -> Char" and "Char -> WChar"
|
||||||
CodePage src_code_page,
|
ConvResult<std::string> priv_to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page);
|
||||||
CodePage dst_code_page);
|
bool to_char(const std::string_view& src, std::string& dst, CodePage src_code_page, CodePage dst_code_page);
|
||||||
bool to_char(const std::string_view& src,
|
|
||||||
std::string& dst,
|
|
||||||
CodePage src_code_page,
|
|
||||||
CodePage dst_code_page);
|
|
||||||
std::string to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page);
|
std::string to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page);
|
||||||
|
|
||||||
// YYC MARK:
|
|
||||||
// Following functions are basically the specialized UTF8 functions.
|
|
||||||
|
|
||||||
// WChar -> UTF8
|
// WChar -> UTF8
|
||||||
|
// This is the specialization of "WChar -> Char"
|
||||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src);
|
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src);
|
||||||
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
|
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
|
||||||
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
|
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
|
||||||
|
|
||||||
// UTF8 -> WChar
|
// UTF8 -> WChar
|
||||||
|
// This is the specialization of "Char -> WChar"
|
||||||
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||||
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
|
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
|
||||||
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||||
|
|
||||||
// Char -> UTF8
|
// Char -> UTF8
|
||||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src,
|
// This is the specialization of "Char -> Char"
|
||||||
CodePage code_page);
|
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src, CodePage code_page);
|
||||||
bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst, CodePage code_page);
|
bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst, CodePage code_page);
|
||||||
NS_YYCC_STRING::u8string to_utf8(const std::string_view& src, CodePage code_page);
|
NS_YYCC_STRING::u8string to_utf8(const std::string_view& src, CodePage code_page);
|
||||||
|
|
||||||
// UTF8 -> Char
|
// UTF8 -> Char
|
||||||
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src,
|
// This is the specialization of "Char -> Char"
|
||||||
CodePage code_page);
|
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src, CodePage code_page);
|
||||||
bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst, CodePage code_page);
|
bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst, CodePage code_page);
|
||||||
std::string to_char(const NS_YYCC_STRING::u8string_view& src, CodePage code_page);
|
std::string to_char(const NS_YYCC_STRING::u8string_view& src, CodePage code_page);
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace yycc::encoding::windows
|
} // namespace yycc::encoding::windows
|
||||||
|
|
||||||
#undef NS_YYCC_PATCH_EXPECTED
|
#undef NS_YYCC_PATCH_EXPECTED
|
||||||
#undef NS_YYCC_STRING
|
#undef NS_YYCC_STRING
|
||||||
|
|
||||||
|
#endif
|
||||||
|
Reference in New Issue
Block a user