refactor: continue refactor to make the project can be built
This commit is contained in:
@ -17,7 +17,7 @@ PRIVATE
|
||||
yycc/encoding/stlcvt.cpp
|
||||
yycc/encoding/windows.cpp
|
||||
yycc/encoding/iconv.cpp
|
||||
yycc/encoding/pycodec.cpp
|
||||
#yycc/encoding/pycodec.cpp
|
||||
)
|
||||
target_sources(YYCCommonplace
|
||||
PUBLIC
|
||||
@ -28,6 +28,7 @@ FILES
|
||||
yycc/version.hpp
|
||||
yycc/macro/version_cmp.hpp
|
||||
yycc/macro/os_detector.hpp
|
||||
yycc/macro/stl_detector.hpp
|
||||
yycc/macro/endian_detector.hpp
|
||||
yycc/macro/compiler_detector.hpp
|
||||
yycc/macro/class_copy_move.hpp
|
||||
@ -73,8 +74,9 @@ PUBLIC
|
||||
# Iconv environment macro
|
||||
$<$<BOOL:${YYCC_ENFORCE_ICONV}>:YYCC_FEAT_ICONV>
|
||||
# OS macro
|
||||
$<$<BOOL:${WIN32}>:YYCC_OS_WINDOWS>
|
||||
$<$<PLATFORM_ID:Windows>:YYCC_OS_WINDOWS>
|
||||
$<$<PLATFORM_ID:Linux>:YYCC_OS_LINUX>
|
||||
$<$<PLATFORM_ID:Darwin>:YYCC_OS_MACOS>
|
||||
# Compiler macro
|
||||
$<$<CXX_COMPILER_ID:GNU>:YYCC_CC_GCC>
|
||||
$<$<CXX_COMPILER_ID:Clang>:YYCC_CC_CLANG>
|
||||
@ -100,7 +102,14 @@ PUBLIC
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/utf-8>
|
||||
)
|
||||
|
||||
# TODO: Fix GCC stacktrace link issue
|
||||
# Fix GCC std::stacktrace link error
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
|
||||
target_link_libraries(YYCCommonplace PRIVATE stdc++exp)
|
||||
else ()
|
||||
target_link_libraries(YYCCommonplace PRIVATE stdc++_libbacktrace)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Install binary and headers
|
||||
install(TARGETS YYCCommonplace
|
||||
|
@ -229,7 +229,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
CharToUtf8::~CharToUtf8() {}
|
||||
|
||||
ConvResult<std::u8string> CharToUtf8::priv_to_utf8(const std::string_view& src) {
|
||||
ConvResult<std::u8string> CharToUtf8::to_utf8(const std::string_view& src) {
|
||||
USER_CONVFN(char, char8_t);
|
||||
}
|
||||
|
||||
@ -241,7 +241,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
Utf8ToChar::~Utf8ToChar() {}
|
||||
|
||||
ConvResult<std::string> Utf8ToChar::priv_to_char(const std::u8string_view& src) {
|
||||
ConvResult<std::string> Utf8ToChar::to_char(const std::u8string_view& src) {
|
||||
USER_CONVFN(char8_t, char);
|
||||
}
|
||||
|
||||
@ -253,7 +253,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
WcharToUtf8::~WcharToUtf8() {}
|
||||
|
||||
ConvResult<std::u8string> WcharToUtf8::priv_to_utf8(const std::wstring_view& src) {
|
||||
ConvResult<std::u8string> WcharToUtf8::to_utf8(const std::wstring_view& src) {
|
||||
USER_CONVFN(wchar_t, char8_t);
|
||||
}
|
||||
|
||||
@ -265,7 +265,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
Utf8ToWchar::~Utf8ToWchar() {}
|
||||
|
||||
ConvResult<std::wstring> Utf8ToWchar::priv_to_wchar(const std::u8string_view& src) {
|
||||
ConvResult<std::wstring> Utf8ToWchar::to_wchar(const std::u8string_view& src) {
|
||||
USER_CONVFN(char8_t, wchar_t);
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
Utf8ToUtf16::~Utf8ToUtf16() {}
|
||||
|
||||
ConvResult<std::u16string> Utf8ToUtf16::priv_to_utf16(const std::u8string_view& src) {
|
||||
ConvResult<std::u16string> Utf8ToUtf16::to_utf16(const std::u8string_view& src) {
|
||||
USER_CONVFN(char8_t, char16_t);
|
||||
}
|
||||
|
||||
@ -289,7 +289,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
Utf16ToUtf8::~Utf16ToUtf8() {}
|
||||
|
||||
ConvResult<std::u8string> Utf16ToUtf8::priv_to_utf8(const std::u16string_view& src) {
|
||||
ConvResult<std::u8string> Utf16ToUtf8::to_utf8(const std::u16string_view& src) {
|
||||
USER_CONVFN(char16_t, char8_t);
|
||||
}
|
||||
|
||||
@ -301,7 +301,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
Utf8ToUtf32::~Utf8ToUtf32() {}
|
||||
|
||||
ConvResult<std::u32string> Utf8ToUtf32::priv_to_utf32(const std::u8string_view& src) {
|
||||
ConvResult<std::u32string> Utf8ToUtf32::to_utf32(const std::u8string_view& src) {
|
||||
USER_CONVFN(char8_t, char32_t);
|
||||
}
|
||||
|
||||
@ -313,7 +313,7 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
Utf32ToUtf8::~Utf32ToUtf8() {}
|
||||
|
||||
ConvResult<std::u8string> Utf32ToUtf8::priv_to_utf8(const std::u32string_view& src) {
|
||||
ConvResult<std::u8string> Utf32ToUtf8::to_utf8(const std::u32string_view& src) {
|
||||
USER_CONVFN(char32_t, char8_t);
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,5 @@
|
||||
#pragma once
|
||||
#include "../macro/os_detector.hpp"
|
||||
|
||||
#if YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS)
|
||||
|
||||
#include "../macro/class_copy_move.hpp"
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@ -51,6 +48,8 @@ namespace yycc::encoding::iconv {
|
||||
template<typename T>
|
||||
using ConvResult = std::expected<T, ConvError>;
|
||||
|
||||
#if YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS)
|
||||
|
||||
/// @brief Char -> UTF8
|
||||
class CharToUtf8 {
|
||||
public:
|
||||
@ -60,7 +59,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(CharToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<std::u8string> priv_to_utf8(const std::string_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -75,7 +74,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToChar)
|
||||
|
||||
public:
|
||||
ConvResult<std::string> priv_to_char(const std::u8string_view& src);
|
||||
ConvResult<std::string> to_char(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -90,7 +89,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(WcharToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<std::u8string> priv_to_utf8(const std::wstring_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::wstring_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -105,7 +104,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToWchar)
|
||||
|
||||
public:
|
||||
ConvResult<std::wstring> priv_to_wchar(const std::u8string_view& src);
|
||||
ConvResult<std::wstring> to_wchar(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -120,7 +119,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToUtf16)
|
||||
|
||||
public:
|
||||
ConvResult<std::u16string> priv_to_utf16(const std::u8string_view& src);
|
||||
ConvResult<std::u16string> to_utf16(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -135,7 +134,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(Utf16ToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<std::u8string> priv_to_utf8(const std::u16string_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::u16string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -150,7 +149,7 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToUtf32)
|
||||
|
||||
public:
|
||||
ConvResult<std::u32string> priv_to_utf32(const std::u8string_view& src);
|
||||
ConvResult<std::u32string> to_utf32(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
@ -165,12 +164,12 @@ namespace yycc::encoding::iconv {
|
||||
YYCC_DEFAULT_MOVE(Utf32ToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<std::u8string> priv_to_utf8(const std::u32string_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::u32string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
};
|
||||
|
||||
} // namespace yycc::encoding::iconv
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace yycc::encoding::iconv
|
||||
|
@ -1,359 +1,370 @@
|
||||
#include "pycodec.hpp"
|
||||
|
||||
#include "../string/op.hpp"
|
||||
#include <map>
|
||||
|
||||
#define NS_YYCC_STRING ::yycc::string
|
||||
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
|
||||
using namespace std::literals::string_view_literals;
|
||||
namespace op = ::yycc::string::op;
|
||||
|
||||
namespace yycc::encoding::pycodec {
|
||||
|
||||
#pragma region Encoding Name
|
||||
|
||||
static const std::map<NS_YYCC_STRING::u8string, NS_YYCC_STRING::u8string> ALISA_MAP {
|
||||
{ YYCC_U8("646"), YYCC_U8("ascii") },
|
||||
{ YYCC_U8("us-ascii"), YYCC_U8("ascii") },
|
||||
{ YYCC_U8("big5-tw"), YYCC_U8("big5") },
|
||||
{ YYCC_U8("csbig5"), YYCC_U8("big5") },
|
||||
{ YYCC_U8("big5-hkscs"), YYCC_U8("big5hkscs") },
|
||||
{ YYCC_U8("hkscs"), YYCC_U8("big5hkscs") },
|
||||
{ YYCC_U8("ibm037"), YYCC_U8("cp037") },
|
||||
{ YYCC_U8("ibm039"), YYCC_U8("cp037") },
|
||||
{ YYCC_U8("273"), YYCC_U8("cp273") },
|
||||
{ YYCC_U8("ibm273"), YYCC_U8("cp273") },
|
||||
{ YYCC_U8("csibm273"), YYCC_U8("cp273") },
|
||||
{ YYCC_U8("ebcdic-cp-he"), YYCC_U8("cp424") },
|
||||
{ YYCC_U8("ibm424"), YYCC_U8("cp424") },
|
||||
{ YYCC_U8("437"), YYCC_U8("cp437") },
|
||||
{ YYCC_U8("ibm437"), YYCC_U8("cp437") },
|
||||
{ YYCC_U8("ebcdic-cp-be"), YYCC_U8("cp500") },
|
||||
{ YYCC_U8("ebcdic-cp-ch"), YYCC_U8("cp500") },
|
||||
{ YYCC_U8("ibm500"), YYCC_U8("cp500") },
|
||||
{ YYCC_U8("ibm775"), YYCC_U8("cp775") },
|
||||
{ YYCC_U8("850"), YYCC_U8("cp850") },
|
||||
{ YYCC_U8("ibm850"), YYCC_U8("cp850") },
|
||||
{ YYCC_U8("852"), YYCC_U8("cp852") },
|
||||
{ YYCC_U8("ibm852"), YYCC_U8("cp852") },
|
||||
{ YYCC_U8("855"), YYCC_U8("cp855") },
|
||||
{ YYCC_U8("ibm855"), YYCC_U8("cp855") },
|
||||
{ YYCC_U8("857"), YYCC_U8("cp857") },
|
||||
{ YYCC_U8("ibm857"), YYCC_U8("cp857") },
|
||||
{ YYCC_U8("858"), YYCC_U8("cp858") },
|
||||
{ YYCC_U8("ibm858"), YYCC_U8("cp858") },
|
||||
{ YYCC_U8("860"), YYCC_U8("cp860") },
|
||||
{ YYCC_U8("ibm860"), YYCC_U8("cp860") },
|
||||
{ YYCC_U8("861"), YYCC_U8("cp861") },
|
||||
{ YYCC_U8("cp-is"), YYCC_U8("cp861") },
|
||||
{ YYCC_U8("ibm861"), YYCC_U8("cp861") },
|
||||
{ YYCC_U8("862"), YYCC_U8("cp862") },
|
||||
{ YYCC_U8("ibm862"), YYCC_U8("cp862") },
|
||||
{ YYCC_U8("863"), YYCC_U8("cp863") },
|
||||
{ YYCC_U8("ibm863"), YYCC_U8("cp863") },
|
||||
{ YYCC_U8("ibm864"), YYCC_U8("cp864") },
|
||||
{ YYCC_U8("865"), YYCC_U8("cp865") },
|
||||
{ YYCC_U8("ibm865"), YYCC_U8("cp865") },
|
||||
{ YYCC_U8("866"), YYCC_U8("cp866") },
|
||||
{ YYCC_U8("ibm866"), YYCC_U8("cp866") },
|
||||
{ YYCC_U8("869"), YYCC_U8("cp869") },
|
||||
{ YYCC_U8("cp-gr"), YYCC_U8("cp869") },
|
||||
{ YYCC_U8("ibm869"), YYCC_U8("cp869") },
|
||||
{ YYCC_U8("932"), YYCC_U8("cp932") },
|
||||
{ YYCC_U8("ms932"), YYCC_U8("cp932") },
|
||||
{ YYCC_U8("mskanji"), YYCC_U8("cp932") },
|
||||
{ YYCC_U8("ms-kanji"), YYCC_U8("cp932") },
|
||||
{ YYCC_U8("windows-31j"), YYCC_U8("cp932") },
|
||||
{ YYCC_U8("949"), YYCC_U8("cp949") },
|
||||
{ YYCC_U8("ms949"), YYCC_U8("cp949") },
|
||||
{ YYCC_U8("uhc"), YYCC_U8("cp949") },
|
||||
{ YYCC_U8("950"), YYCC_U8("cp950") },
|
||||
{ YYCC_U8("ms950"), YYCC_U8("cp950") },
|
||||
{ YYCC_U8("ibm1026"), YYCC_U8("cp1026") },
|
||||
{ YYCC_U8("1125"), YYCC_U8("cp1125") },
|
||||
{ YYCC_U8("ibm1125"), YYCC_U8("cp1125") },
|
||||
{ YYCC_U8("cp866u"), YYCC_U8("cp1125") },
|
||||
{ YYCC_U8("ruscii"), YYCC_U8("cp1125") },
|
||||
{ YYCC_U8("ibm1140"), YYCC_U8("cp1140") },
|
||||
{ YYCC_U8("windows-1250"), YYCC_U8("cp1250") },
|
||||
{ YYCC_U8("windows-1251"), YYCC_U8("cp1251") },
|
||||
{ YYCC_U8("windows-1252"), YYCC_U8("cp1252") },
|
||||
{ YYCC_U8("windows-1253"), YYCC_U8("cp1253") },
|
||||
{ YYCC_U8("windows-1254"), YYCC_U8("cp1254") },
|
||||
{ YYCC_U8("windows-1255"), YYCC_U8("cp1255") },
|
||||
{ YYCC_U8("windows-1256"), YYCC_U8("cp1256") },
|
||||
{ YYCC_U8("windows-1257"), YYCC_U8("cp1257") },
|
||||
{ YYCC_U8("windows-1258"), YYCC_U8("cp1258") },
|
||||
{ YYCC_U8("eucjp"), YYCC_U8("euc_jp") },
|
||||
{ YYCC_U8("ujis"), YYCC_U8("euc_jp") },
|
||||
{ YYCC_U8("u-jis"), YYCC_U8("euc_jp") },
|
||||
{ YYCC_U8("jisx0213"), YYCC_U8("euc_jis_2004") },
|
||||
{ YYCC_U8("eucjis2004"), YYCC_U8("euc_jis_2004") },
|
||||
{ YYCC_U8("eucjisx0213"), YYCC_U8("euc_jisx0213") },
|
||||
{ YYCC_U8("euckr"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("korean"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("ksc5601"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("ks_c-5601"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("ks_c-5601-1987"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("ksx1001"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("ks_x-1001"), YYCC_U8("euc_kr") },
|
||||
{ YYCC_U8("chinese"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("csiso58gb231280"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("euc-cn"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("euccn"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("eucgb2312-cn"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("gb2312-1980"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("gb2312-80"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("iso-ir-58"), YYCC_U8("gb2312") },
|
||||
{ YYCC_U8("936"), YYCC_U8("gbk") },
|
||||
{ YYCC_U8("cp936"), YYCC_U8("gbk") },
|
||||
{ YYCC_U8("ms936"), YYCC_U8("gbk") },
|
||||
{ YYCC_U8("gb18030-2000"), YYCC_U8("gb18030") },
|
||||
{ YYCC_U8("hzgb"), YYCC_U8("hz") },
|
||||
{ YYCC_U8("hz-gb"), YYCC_U8("hz") },
|
||||
{ YYCC_U8("hz-gb-2312"), YYCC_U8("hz") },
|
||||
{ YYCC_U8("csiso2022jp"), YYCC_U8("iso2022_jp") },
|
||||
{ YYCC_U8("iso2022jp"), YYCC_U8("iso2022_jp") },
|
||||
{ YYCC_U8("iso-2022-jp"), YYCC_U8("iso2022_jp") },
|
||||
{ YYCC_U8("iso2022jp-1"), YYCC_U8("iso2022_jp_1") },
|
||||
{ YYCC_U8("iso-2022-jp-1"), YYCC_U8("iso2022_jp_1") },
|
||||
{ YYCC_U8("iso2022jp-2"), YYCC_U8("iso2022_jp_2") },
|
||||
{ YYCC_U8("iso-2022-jp-2"), YYCC_U8("iso2022_jp_2") },
|
||||
{ YYCC_U8("iso2022jp-2004"), YYCC_U8("iso2022_jp_2004") },
|
||||
{ YYCC_U8("iso-2022-jp-2004"), YYCC_U8("iso2022_jp_2004") },
|
||||
{ YYCC_U8("iso2022jp-3"), YYCC_U8("iso2022_jp_3") },
|
||||
{ YYCC_U8("iso-2022-jp-3"), YYCC_U8("iso2022_jp_3") },
|
||||
{ YYCC_U8("iso2022jp-ext"), YYCC_U8("iso2022_jp_ext") },
|
||||
{ YYCC_U8("iso-2022-jp-ext"), YYCC_U8("iso2022_jp_ext") },
|
||||
{ YYCC_U8("csiso2022kr"), YYCC_U8("iso2022_kr") },
|
||||
{ YYCC_U8("iso2022kr"), YYCC_U8("iso2022_kr") },
|
||||
{ YYCC_U8("iso-2022-kr"), YYCC_U8("iso2022_kr") },
|
||||
{ YYCC_U8("iso-8859-1"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("iso8859-1"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("8859"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("cp819"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("latin"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("latin1"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("l1"), YYCC_U8("latin_1") },
|
||||
{ YYCC_U8("iso-8859-2"), YYCC_U8("iso8859_2") },
|
||||
{ YYCC_U8("latin2"), YYCC_U8("iso8859_2") },
|
||||
{ YYCC_U8("l2"), YYCC_U8("iso8859_2") },
|
||||
{ YYCC_U8("iso-8859-3"), YYCC_U8("iso8859_3") },
|
||||
{ YYCC_U8("latin3"), YYCC_U8("iso8859_3") },
|
||||
{ YYCC_U8("l3"), YYCC_U8("iso8859_3") },
|
||||
{ YYCC_U8("iso-8859-4"), YYCC_U8("iso8859_4") },
|
||||
{ YYCC_U8("latin4"), YYCC_U8("iso8859_4") },
|
||||
{ YYCC_U8("l4"), YYCC_U8("iso8859_4") },
|
||||
{ YYCC_U8("iso-8859-5"), YYCC_U8("iso8859_5") },
|
||||
{ YYCC_U8("cyrillic"), YYCC_U8("iso8859_5") },
|
||||
{ YYCC_U8("iso-8859-6"), YYCC_U8("iso8859_6") },
|
||||
{ YYCC_U8("arabic"), YYCC_U8("iso8859_6") },
|
||||
{ YYCC_U8("iso-8859-7"), YYCC_U8("iso8859_7") },
|
||||
{ YYCC_U8("greek"), YYCC_U8("iso8859_7") },
|
||||
{ YYCC_U8("greek8"), YYCC_U8("iso8859_7") },
|
||||
{ YYCC_U8("iso-8859-8"), YYCC_U8("iso8859_8") },
|
||||
{ YYCC_U8("hebrew"), YYCC_U8("iso8859_8") },
|
||||
{ YYCC_U8("iso-8859-9"), YYCC_U8("iso8859_9") },
|
||||
{ YYCC_U8("latin5"), YYCC_U8("iso8859_9") },
|
||||
{ YYCC_U8("l5"), YYCC_U8("iso8859_9") },
|
||||
{ YYCC_U8("iso-8859-10"), YYCC_U8("iso8859_10") },
|
||||
{ YYCC_U8("latin6"), YYCC_U8("iso8859_10") },
|
||||
{ YYCC_U8("l6"), YYCC_U8("iso8859_10") },
|
||||
{ YYCC_U8("iso-8859-11"), YYCC_U8("iso8859_11") },
|
||||
{ YYCC_U8("thai"), YYCC_U8("iso8859_11") },
|
||||
{ YYCC_U8("iso-8859-13"), YYCC_U8("iso8859_13") },
|
||||
{ YYCC_U8("latin7"), YYCC_U8("iso8859_13") },
|
||||
{ YYCC_U8("l7"), YYCC_U8("iso8859_13") },
|
||||
{ YYCC_U8("iso-8859-14"), YYCC_U8("iso8859_14") },
|
||||
{ YYCC_U8("latin8"), YYCC_U8("iso8859_14") },
|
||||
{ YYCC_U8("l8"), YYCC_U8("iso8859_14") },
|
||||
{ YYCC_U8("iso-8859-15"), YYCC_U8("iso8859_15") },
|
||||
{ YYCC_U8("latin9"), YYCC_U8("iso8859_15") },
|
||||
{ YYCC_U8("l9"), YYCC_U8("iso8859_15") },
|
||||
{ YYCC_U8("iso-8859-16"), YYCC_U8("iso8859_16") },
|
||||
{ YYCC_U8("latin10"), YYCC_U8("iso8859_16") },
|
||||
{ YYCC_U8("l10"), YYCC_U8("iso8859_16") },
|
||||
{ YYCC_U8("cp1361"), YYCC_U8("johab") },
|
||||
{ YYCC_U8("ms1361"), YYCC_U8("johab") },
|
||||
{ YYCC_U8("kz_1048"), YYCC_U8("kz1048") },
|
||||
{ YYCC_U8("strk1048_2002"), YYCC_U8("kz1048") },
|
||||
{ YYCC_U8("rk1048"), YYCC_U8("kz1048") },
|
||||
{ YYCC_U8("maccyrillic"), YYCC_U8("mac_cyrillic") },
|
||||
{ YYCC_U8("macgreek"), YYCC_U8("mac_greek") },
|
||||
{ YYCC_U8("maciceland"), YYCC_U8("mac_iceland") },
|
||||
{ YYCC_U8("maclatin2"), YYCC_U8("mac_latin2") },
|
||||
{ YYCC_U8("maccentraleurope"), YYCC_U8("mac_latin2") },
|
||||
{ YYCC_U8("mac_centeuro"), YYCC_U8("mac_latin2") },
|
||||
{ YYCC_U8("macroman"), YYCC_U8("mac_roman") },
|
||||
{ YYCC_U8("macintosh"), YYCC_U8("mac_roman") },
|
||||
{ YYCC_U8("macturkish"), YYCC_U8("mac_turkish") },
|
||||
{ YYCC_U8("csptcp154"), YYCC_U8("ptcp154") },
|
||||
{ YYCC_U8("pt154"), YYCC_U8("ptcp154") },
|
||||
{ YYCC_U8("cp154"), YYCC_U8("ptcp154") },
|
||||
{ YYCC_U8("cyrillic-asian"), YYCC_U8("ptcp154") },
|
||||
{ YYCC_U8("csshiftjis"), YYCC_U8("shift_jis") },
|
||||
{ YYCC_U8("shiftjis"), YYCC_U8("shift_jis") },
|
||||
{ YYCC_U8("sjis"), YYCC_U8("shift_jis") },
|
||||
{ YYCC_U8("s_jis"), YYCC_U8("shift_jis") },
|
||||
{ YYCC_U8("shiftjis2004"), YYCC_U8("shift_jis_2004") },
|
||||
{ YYCC_U8("sjis_2004"), YYCC_U8("shift_jis_2004") },
|
||||
{ YYCC_U8("sjis2004"), YYCC_U8("shift_jis_2004") },
|
||||
{ YYCC_U8("shiftjisx0213"), YYCC_U8("shift_jisx0213") },
|
||||
{ YYCC_U8("sjisx0213"), YYCC_U8("shift_jisx0213") },
|
||||
{ YYCC_U8("s_jisx0213"), YYCC_U8("shift_jisx0213") },
|
||||
{ YYCC_U8("u32"), YYCC_U8("utf_32") },
|
||||
{ YYCC_U8("utf32"), YYCC_U8("utf_32") },
|
||||
{ YYCC_U8("utf-32be"), YYCC_U8("utf_32_be") },
|
||||
{ YYCC_U8("utf-32le"), YYCC_U8("utf_32_le") },
|
||||
{ YYCC_U8("u16"), YYCC_U8("utf_16") },
|
||||
{ YYCC_U8("utf16"), YYCC_U8("utf_16") },
|
||||
{ YYCC_U8("utf-16be"), YYCC_U8("utf_16_be") },
|
||||
{ YYCC_U8("utf-16le"), YYCC_U8("utf_16_le") },
|
||||
{ YYCC_U8("u7"), YYCC_U8("utf_7") },
|
||||
{ YYCC_U8("unicode-1-1-utf-7"), YYCC_U8("utf_7") },
|
||||
{ YYCC_U8("u8"), YYCC_U8("utf_8") },
|
||||
{ YYCC_U8("utf"), YYCC_U8("utf_8") },
|
||||
{ YYCC_U8("utf8"), YYCC_U8("utf_8") },
|
||||
{ YYCC_U8("utf-8"), YYCC_U8("utf_8") },
|
||||
{ YYCC_U8("cp65001"), YYCC_U8("utf_8") },
|
||||
static const std::map<std::u8string_view, std::u8string_view> ALIAS_MAP{
|
||||
{u8"646"sv, u8"ascii"sv},
|
||||
{u8"us-ascii"sv, u8"ascii"sv},
|
||||
{u8"big5-tw"sv, u8"big5"sv},
|
||||
{u8"csbig5"sv, u8"big5"sv},
|
||||
{u8"big5-hkscs"sv, u8"big5hkscs"sv},
|
||||
{u8"hkscs"sv, u8"big5hkscs"sv},
|
||||
{u8"ibm037"sv, u8"cp037"sv},
|
||||
{u8"ibm039"sv, u8"cp037"sv},
|
||||
{u8"273"sv, u8"cp273"sv},
|
||||
{u8"ibm273"sv, u8"cp273"sv},
|
||||
{u8"csibm273"sv, u8"cp273"sv},
|
||||
{u8"ebcdic-cp-he"sv, u8"cp424"sv},
|
||||
{u8"ibm424"sv, u8"cp424"sv},
|
||||
{u8"437"sv, u8"cp437"sv},
|
||||
{u8"ibm437"sv, u8"cp437"sv},
|
||||
{u8"ebcdic-cp-be"sv, u8"cp500"sv},
|
||||
{u8"ebcdic-cp-ch"sv, u8"cp500"sv},
|
||||
{u8"ibm500"sv, u8"cp500"sv},
|
||||
{u8"ibm775"sv, u8"cp775"sv},
|
||||
{u8"850"sv, u8"cp850"sv},
|
||||
{u8"ibm850"sv, u8"cp850"sv},
|
||||
{u8"852"sv, u8"cp852"sv},
|
||||
{u8"ibm852"sv, u8"cp852"sv},
|
||||
{u8"855"sv, u8"cp855"sv},
|
||||
{u8"ibm855"sv, u8"cp855"sv},
|
||||
{u8"857"sv, u8"cp857"sv},
|
||||
{u8"ibm857"sv, u8"cp857"sv},
|
||||
{u8"858"sv, u8"cp858"sv},
|
||||
{u8"ibm858"sv, u8"cp858"sv},
|
||||
{u8"860"sv, u8"cp860"sv},
|
||||
{u8"ibm860"sv, u8"cp860"sv},
|
||||
{u8"861"sv, u8"cp861"sv},
|
||||
{u8"cp-is"sv, u8"cp861"sv},
|
||||
{u8"ibm861"sv, u8"cp861"sv},
|
||||
{u8"862"sv, u8"cp862"sv},
|
||||
{u8"ibm862"sv, u8"cp862"sv},
|
||||
{u8"863"sv, u8"cp863"sv},
|
||||
{u8"ibm863"sv, u8"cp863"sv},
|
||||
{u8"ibm864"sv, u8"cp864"sv},
|
||||
{u8"865"sv, u8"cp865"sv},
|
||||
{u8"ibm865"sv, u8"cp865"sv},
|
||||
{u8"866"sv, u8"cp866"sv},
|
||||
{u8"ibm866"sv, u8"cp866"sv},
|
||||
{u8"869"sv, u8"cp869"sv},
|
||||
{u8"cp-gr"sv, u8"cp869"sv},
|
||||
{u8"ibm869"sv, u8"cp869"sv},
|
||||
{u8"932"sv, u8"cp932"sv},
|
||||
{u8"ms932"sv, u8"cp932"sv},
|
||||
{u8"mskanji"sv, u8"cp932"sv},
|
||||
{u8"ms-kanji"sv, u8"cp932"sv},
|
||||
{u8"windows-31j"sv, u8"cp932"sv},
|
||||
{u8"949"sv, u8"cp949"sv},
|
||||
{u8"ms949"sv, u8"cp949"sv},
|
||||
{u8"uhc"sv, u8"cp949"sv},
|
||||
{u8"950"sv, u8"cp950"sv},
|
||||
{u8"ms950"sv, u8"cp950"sv},
|
||||
{u8"ibm1026"sv, u8"cp1026"sv},
|
||||
{u8"1125"sv, u8"cp1125"sv},
|
||||
{u8"ibm1125"sv, u8"cp1125"sv},
|
||||
{u8"cp866u"sv, u8"cp1125"sv},
|
||||
{u8"ruscii"sv, u8"cp1125"sv},
|
||||
{u8"ibm1140"sv, u8"cp1140"sv},
|
||||
{u8"windows-1250"sv, u8"cp1250"sv},
|
||||
{u8"windows-1251"sv, u8"cp1251"sv},
|
||||
{u8"windows-1252"sv, u8"cp1252"sv},
|
||||
{u8"windows-1253"sv, u8"cp1253"sv},
|
||||
{u8"windows-1254"sv, u8"cp1254"sv},
|
||||
{u8"windows-1255"sv, u8"cp1255"sv},
|
||||
{u8"windows-1256"sv, u8"cp1256"sv},
|
||||
{u8"windows-1257"sv, u8"cp1257"sv},
|
||||
{u8"windows-1258"sv, u8"cp1258"sv},
|
||||
{u8"eucjp"sv, u8"euc_jp"sv},
|
||||
{u8"ujis"sv, u8"euc_jp"sv},
|
||||
{u8"u-jis"sv, u8"euc_jp"sv},
|
||||
{u8"jisx0213"sv, u8"euc_jis_2004"sv},
|
||||
{u8"eucjis2004"sv, u8"euc_jis_2004"sv},
|
||||
{u8"eucjisx0213"sv, u8"euc_jisx0213"sv},
|
||||
{u8"euckr"sv, u8"euc_kr"sv},
|
||||
{u8"korean"sv, u8"euc_kr"sv},
|
||||
{u8"ksc5601"sv, u8"euc_kr"sv},
|
||||
{u8"ks_c-5601"sv, u8"euc_kr"sv},
|
||||
{u8"ks_c-5601-1987"sv, u8"euc_kr"sv},
|
||||
{u8"ksx1001"sv, u8"euc_kr"sv},
|
||||
{u8"ks_x-1001"sv, u8"euc_kr"sv},
|
||||
{u8"chinese"sv, u8"gb2312"sv},
|
||||
{u8"csiso58gb231280"sv, u8"gb2312"sv},
|
||||
{u8"euc-cn"sv, u8"gb2312"sv},
|
||||
{u8"euccn"sv, u8"gb2312"sv},
|
||||
{u8"eucgb2312-cn"sv, u8"gb2312"sv},
|
||||
{u8"gb2312-1980"sv, u8"gb2312"sv},
|
||||
{u8"gb2312-80"sv, u8"gb2312"sv},
|
||||
{u8"iso-ir-58"sv, u8"gb2312"sv},
|
||||
{u8"936"sv, u8"gbk"sv},
|
||||
{u8"cp936"sv, u8"gbk"sv},
|
||||
{u8"ms936"sv, u8"gbk"sv},
|
||||
{u8"gb18030-2000"sv, u8"gb18030"sv},
|
||||
{u8"hzgb"sv, u8"hz"sv},
|
||||
{u8"hz-gb"sv, u8"hz"sv},
|
||||
{u8"hz-gb-2312"sv, u8"hz"sv},
|
||||
{u8"csiso2022jp"sv, u8"iso2022_jp"sv},
|
||||
{u8"iso2022jp"sv, u8"iso2022_jp"sv},
|
||||
{u8"iso-2022-jp"sv, u8"iso2022_jp"sv},
|
||||
{u8"iso2022jp-1"sv, u8"iso2022_jp_1"sv},
|
||||
{u8"iso-2022-jp-1"sv, u8"iso2022_jp_1"sv},
|
||||
{u8"iso2022jp-2"sv, u8"iso2022_jp_2"sv},
|
||||
{u8"iso-2022-jp-2"sv, u8"iso2022_jp_2"sv},
|
||||
{u8"iso2022jp-2004"sv, u8"iso2022_jp_2004"sv},
|
||||
{u8"iso-2022-jp-2004"sv, u8"iso2022_jp_2004"sv},
|
||||
{u8"iso2022jp-3"sv, u8"iso2022_jp_3"sv},
|
||||
{u8"iso-2022-jp-3"sv, u8"iso2022_jp_3"sv},
|
||||
{u8"iso2022jp-ext"sv, u8"iso2022_jp_ext"sv},
|
||||
{u8"iso-2022-jp-ext"sv, u8"iso2022_jp_ext"sv},
|
||||
{u8"csiso2022kr"sv, u8"iso2022_kr"sv},
|
||||
{u8"iso2022kr"sv, u8"iso2022_kr"sv},
|
||||
{u8"iso-2022-kr"sv, u8"iso2022_kr"sv},
|
||||
{u8"iso-8859-1"sv, u8"latin_1"sv},
|
||||
{u8"iso8859-1"sv, u8"latin_1"sv},
|
||||
{u8"8859"sv, u8"latin_1"sv},
|
||||
{u8"cp819"sv, u8"latin_1"sv},
|
||||
{u8"latin"sv, u8"latin_1"sv},
|
||||
{u8"latin1"sv, u8"latin_1"sv},
|
||||
{u8"l1"sv, u8"latin_1"sv},
|
||||
{u8"iso-8859-2"sv, u8"iso8859_2"sv},
|
||||
{u8"latin2"sv, u8"iso8859_2"sv},
|
||||
{u8"l2"sv, u8"iso8859_2"sv},
|
||||
{u8"iso-8859-3"sv, u8"iso8859_3"sv},
|
||||
{u8"latin3"sv, u8"iso8859_3"sv},
|
||||
{u8"l3"sv, u8"iso8859_3"sv},
|
||||
{u8"iso-8859-4"sv, u8"iso8859_4"sv},
|
||||
{u8"latin4"sv, u8"iso8859_4"sv},
|
||||
{u8"l4"sv, u8"iso8859_4"sv},
|
||||
{u8"iso-8859-5"sv, u8"iso8859_5"sv},
|
||||
{u8"cyrillic"sv, u8"iso8859_5"sv},
|
||||
{u8"iso-8859-6"sv, u8"iso8859_6"sv},
|
||||
{u8"arabic"sv, u8"iso8859_6"sv},
|
||||
{u8"iso-8859-7"sv, u8"iso8859_7"sv},
|
||||
{u8"greek"sv, u8"iso8859_7"sv},
|
||||
{u8"greek8"sv, u8"iso8859_7"sv},
|
||||
{u8"iso-8859-8"sv, u8"iso8859_8"sv},
|
||||
{u8"hebrew"sv, u8"iso8859_8"sv},
|
||||
{u8"iso-8859-9"sv, u8"iso8859_9"sv},
|
||||
{u8"latin5"sv, u8"iso8859_9"sv},
|
||||
{u8"l5"sv, u8"iso8859_9"sv},
|
||||
{u8"iso-8859-10"sv, u8"iso8859_10"sv},
|
||||
{u8"latin6"sv, u8"iso8859_10"sv},
|
||||
{u8"l6"sv, u8"iso8859_10"sv},
|
||||
{u8"iso-8859-11"sv, u8"iso8859_11"sv},
|
||||
{u8"thai"sv, u8"iso8859_11"sv},
|
||||
{u8"iso-8859-13"sv, u8"iso8859_13"sv},
|
||||
{u8"latin7"sv, u8"iso8859_13"sv},
|
||||
{u8"l7"sv, u8"iso8859_13"sv},
|
||||
{u8"iso-8859-14"sv, u8"iso8859_14"sv},
|
||||
{u8"latin8"sv, u8"iso8859_14"sv},
|
||||
{u8"l8"sv, u8"iso8859_14"sv},
|
||||
{u8"iso-8859-15"sv, u8"iso8859_15"sv},
|
||||
{u8"latin9"sv, u8"iso8859_15"sv},
|
||||
{u8"l9"sv, u8"iso8859_15"sv},
|
||||
{u8"iso-8859-16"sv, u8"iso8859_16"sv},
|
||||
{u8"latin10"sv, u8"iso8859_16"sv},
|
||||
{u8"l10"sv, u8"iso8859_16"sv},
|
||||
{u8"cp1361"sv, u8"johab"sv},
|
||||
{u8"ms1361"sv, u8"johab"sv},
|
||||
{u8"kz_1048"sv, u8"kz1048"sv},
|
||||
{u8"strk1048_2002"sv, u8"kz1048"sv},
|
||||
{u8"rk1048"sv, u8"kz1048"sv},
|
||||
{u8"maccyrillic"sv, u8"mac_cyrillic"sv},
|
||||
{u8"macgreek"sv, u8"mac_greek"sv},
|
||||
{u8"maciceland"sv, u8"mac_iceland"sv},
|
||||
{u8"maclatin2"sv, u8"mac_latin2"sv},
|
||||
{u8"maccentraleurope"sv, u8"mac_latin2"sv},
|
||||
{u8"mac_centeuro"sv, u8"mac_latin2"sv},
|
||||
{u8"macroman"sv, u8"mac_roman"sv},
|
||||
{u8"macintosh"sv, u8"mac_roman"sv},
|
||||
{u8"macturkish"sv, u8"mac_turkish"sv},
|
||||
{u8"csptcp154"sv, u8"ptcp154"sv},
|
||||
{u8"pt154"sv, u8"ptcp154"sv},
|
||||
{u8"cp154"sv, u8"ptcp154"sv},
|
||||
{u8"cyrillic-asian"sv, u8"ptcp154"sv},
|
||||
{u8"csshiftjis"sv, u8"shift_jis"sv},
|
||||
{u8"shiftjis"sv, u8"shift_jis"sv},
|
||||
{u8"sjis"sv, u8"shift_jis"sv},
|
||||
{u8"s_jis"sv, u8"shift_jis"sv},
|
||||
{u8"shiftjis2004"sv, u8"shift_jis_2004"sv},
|
||||
{u8"sjis_2004"sv, u8"shift_jis_2004"sv},
|
||||
{u8"sjis2004"sv, u8"shift_jis_2004"sv},
|
||||
{u8"shiftjisx0213"sv, u8"shift_jisx0213"sv},
|
||||
{u8"sjisx0213"sv, u8"shift_jisx0213"sv},
|
||||
{u8"s_jisx0213"sv, u8"shift_jisx0213"sv},
|
||||
{u8"u32"sv, u8"utf_32"sv},
|
||||
{u8"utf32"sv, u8"utf_32"sv},
|
||||
{u8"utf-32be"sv, u8"utf_32_be"sv},
|
||||
{u8"utf-32le"sv, u8"utf_32_le"sv},
|
||||
{u8"u16"sv, u8"utf_16"sv},
|
||||
{u8"utf16"sv, u8"utf_16"sv},
|
||||
{u8"utf-16be"sv, u8"utf_16_be"sv},
|
||||
{u8"utf-16le"sv, u8"utf_16_le"sv},
|
||||
{u8"u7"sv, u8"utf_7"sv},
|
||||
{u8"unicode-1-1-utf-7"sv, u8"utf_7"sv},
|
||||
{u8"u8"sv, u8"utf_8"sv},
|
||||
{u8"utf"sv, u8"utf_8"sv},
|
||||
{u8"utf8"sv, u8"utf_8"sv},
|
||||
{u8"utf-8"sv, u8"utf_8"sv},
|
||||
{u8"cp65001"sv, u8"utf_8"sv},
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Resolve encoding name alias and fetch real encoding name.
|
||||
* @param[in] lang The encoding name for finding.
|
||||
* @return
|
||||
* The given encoding name if given name not present in alias map.
|
||||
* Otherwise the found encoding name by given name.
|
||||
*/
|
||||
static std::u8string resolve_encoding_alias(const std::u8string_view& enc_name) {
|
||||
auto name = op::to_lower(enc_name);
|
||||
auto finder = ALIAS_MAP.find(name);
|
||||
if (finder == ALIAS_MAP.end()) {
|
||||
// not found, use original encoding name.
|
||||
return std::u8string(enc_name);
|
||||
} else {
|
||||
// found, use found encoding name.
|
||||
return std::u8string(finder->second);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
|
||||
|
||||
using CodePage = NS_YYCC_ENCODING_BACKEND::CodePage;
|
||||
using CodePage = YYCC_PYCODEC_BACKEND_NS::CodePage;
|
||||
|
||||
static const std::map<NS_YYCC_STRING::u8string, CodePage> WINCP_MAP {
|
||||
{ YYCC_U8("ascii"), static_cast<CodePage>(437u) },
|
||||
{ YYCC_U8("big5"), static_cast<CodePage>(950u) },
|
||||
{ YYCC_U8("cp037"), static_cast<CodePage>(037u) },
|
||||
{ YYCC_U8("cp437"), static_cast<CodePage>(437u) },
|
||||
{ YYCC_U8("cp500"), static_cast<CodePage>(500u) },
|
||||
{ YYCC_U8("cp720"), static_cast<CodePage>(720u) },
|
||||
{ YYCC_U8("cp737"), static_cast<CodePage>(737u) },
|
||||
{ YYCC_U8("cp775"), static_cast<CodePage>(775u) },
|
||||
{ YYCC_U8("cp850"), static_cast<CodePage>(850u) },
|
||||
{ YYCC_U8("cp852"), static_cast<CodePage>(852u) },
|
||||
{ YYCC_U8("cp855"), static_cast<CodePage>(855u) },
|
||||
{ YYCC_U8("cp857"), static_cast<CodePage>(857u) },
|
||||
{ YYCC_U8("cp858"), static_cast<CodePage>(858u) },
|
||||
{ YYCC_U8("cp860"), static_cast<CodePage>(860u) },
|
||||
{ YYCC_U8("cp861"), static_cast<CodePage>(861u) },
|
||||
{ YYCC_U8("cp862"), static_cast<CodePage>(862u) },
|
||||
{ YYCC_U8("cp863"), static_cast<CodePage>(863u) },
|
||||
{ YYCC_U8("cp864"), static_cast<CodePage>(864u) },
|
||||
{ YYCC_U8("cp865"), static_cast<CodePage>(865u) },
|
||||
{ YYCC_U8("cp866"), static_cast<CodePage>(866u) },
|
||||
{ YYCC_U8("cp869"), static_cast<CodePage>(869u) },
|
||||
{ YYCC_U8("cp874"), static_cast<CodePage>(874u) },
|
||||
{ YYCC_U8("cp875"), static_cast<CodePage>(875u) },
|
||||
{ YYCC_U8("cp932"), static_cast<CodePage>(932u) },
|
||||
{ YYCC_U8("cp949"), static_cast<CodePage>(949u) },
|
||||
{ YYCC_U8("cp950"), static_cast<CodePage>(950u) },
|
||||
{ YYCC_U8("cp1026"), static_cast<CodePage>(1026u) },
|
||||
{ YYCC_U8("cp1140"), static_cast<CodePage>(1140u) },
|
||||
{ YYCC_U8("cp1250"), static_cast<CodePage>(1250u) },
|
||||
{ YYCC_U8("cp1251"), static_cast<CodePage>(1251u) },
|
||||
{ YYCC_U8("cp1252"), static_cast<CodePage>(1252u) },
|
||||
{ YYCC_U8("cp1253"), static_cast<CodePage>(1253u) },
|
||||
{ YYCC_U8("cp1254"), static_cast<CodePage>(1254u) },
|
||||
{ YYCC_U8("cp1255"), static_cast<CodePage>(1255u) },
|
||||
{ YYCC_U8("cp1256"), static_cast<CodePage>(1256u) },
|
||||
{ YYCC_U8("cp1257"), static_cast<CodePage>(1257u) },
|
||||
{ YYCC_U8("cp1258"), static_cast<CodePage>(1258u) },
|
||||
{ YYCC_U8("euc_jp"), static_cast<CodePage>(20932u) },
|
||||
{ YYCC_U8("euc_kr"), static_cast<CodePage>(51949u) },
|
||||
{ YYCC_U8("gb2312"), static_cast<CodePage>(936u) },
|
||||
{ YYCC_U8("gbk"), static_cast<CodePage>(936u) },
|
||||
{ YYCC_U8("gb18030"), static_cast<CodePage>(54936u) },
|
||||
{ YYCC_U8("hz"), static_cast<CodePage>(52936u) },
|
||||
{ YYCC_U8("iso2022_jp"), static_cast<CodePage>(50220u) },
|
||||
{ YYCC_U8("iso2022_kr"), static_cast<CodePage>(50225u) },
|
||||
{ YYCC_U8("latin_1"), static_cast<CodePage>(28591u) },
|
||||
{ YYCC_U8("iso8859_2"), static_cast<CodePage>(28592u) },
|
||||
{ YYCC_U8("iso8859_3"), static_cast<CodePage>(28593u) },
|
||||
{ YYCC_U8("iso8859_4"), static_cast<CodePage>(28594u) },
|
||||
{ YYCC_U8("iso8859_5"), static_cast<CodePage>(28595u) },
|
||||
{ YYCC_U8("iso8859_6"), static_cast<CodePage>(28596u) },
|
||||
{ YYCC_U8("iso8859_7"), static_cast<CodePage>(28597u) },
|
||||
{ YYCC_U8("iso8859_8"), static_cast<CodePage>(28598u) },
|
||||
{ YYCC_U8("iso8859_9"), static_cast<CodePage>(28599u) },
|
||||
{ YYCC_U8("iso8859_13"), static_cast<CodePage>(28603u) },
|
||||
{ YYCC_U8("iso8859_15"), static_cast<CodePage>(28605u) },
|
||||
{ YYCC_U8("johab"), static_cast<CodePage>(1361u) },
|
||||
{ YYCC_U8("mac_cyrillic"), static_cast<CodePage>(10007u) },
|
||||
{ YYCC_U8("mac_greek"), static_cast<CodePage>(10006u) },
|
||||
{ YYCC_U8("mac_iceland"), static_cast<CodePage>(10079u) },
|
||||
{ YYCC_U8("mac_turkish"), static_cast<CodePage>(10081u) },
|
||||
{ YYCC_U8("shift_jis"), static_cast<CodePage>(932u) },
|
||||
{ YYCC_U8("utf_7"), static_cast<CodePage>(65000u) },
|
||||
{ YYCC_U8("utf_8"), static_cast<CodePage>(65001u) },
|
||||
static const std::map<std::u8string_view, CodePage> WINCP_MAP{
|
||||
{u8"ascii"sv, static_cast<CodePage>(437u)}, {u8"big5"sv, static_cast<CodePage>(950u)},
|
||||
{u8"cp037"sv, static_cast<CodePage>(037u)}, {u8"cp437"sv, static_cast<CodePage>(437u)},
|
||||
{u8"cp500"sv, static_cast<CodePage>(500u)}, {u8"cp720"sv, static_cast<CodePage>(720u)},
|
||||
{u8"cp737"sv, static_cast<CodePage>(737u)}, {u8"cp775"sv, static_cast<CodePage>(775u)},
|
||||
{u8"cp850"sv, static_cast<CodePage>(850u)}, {u8"cp852"sv, static_cast<CodePage>(852u)},
|
||||
{u8"cp855"sv, static_cast<CodePage>(855u)}, {u8"cp857"sv, static_cast<CodePage>(857u)},
|
||||
{u8"cp858"sv, static_cast<CodePage>(858u)}, {u8"cp860"sv, static_cast<CodePage>(860u)},
|
||||
{u8"cp861"sv, static_cast<CodePage>(861u)}, {u8"cp862"sv, static_cast<CodePage>(862u)},
|
||||
{u8"cp863"sv, static_cast<CodePage>(863u)}, {u8"cp864"sv, static_cast<CodePage>(864u)},
|
||||
{u8"cp865"sv, static_cast<CodePage>(865u)}, {u8"cp866"sv, static_cast<CodePage>(866u)},
|
||||
{u8"cp869"sv, static_cast<CodePage>(869u)}, {u8"cp874"sv, static_cast<CodePage>(874u)},
|
||||
{u8"cp875"sv, static_cast<CodePage>(875u)}, {u8"cp932"sv, static_cast<CodePage>(932u)},
|
||||
{u8"cp949"sv, static_cast<CodePage>(949u)}, {u8"cp950"sv, static_cast<CodePage>(950u)},
|
||||
{u8"cp1026"sv, static_cast<CodePage>(1026u)}, {u8"cp1140"sv, static_cast<CodePage>(1140u)},
|
||||
{u8"cp1250"sv, static_cast<CodePage>(1250u)}, {u8"cp1251"sv, static_cast<CodePage>(1251u)},
|
||||
{u8"cp1252"sv, static_cast<CodePage>(1252u)}, {u8"cp1253"sv, static_cast<CodePage>(1253u)},
|
||||
{u8"cp1254"sv, static_cast<CodePage>(1254u)}, {u8"cp1255"sv, static_cast<CodePage>(1255u)},
|
||||
{u8"cp1256"sv, static_cast<CodePage>(1256u)}, {u8"cp1257"sv, static_cast<CodePage>(1257u)},
|
||||
{u8"cp1258"sv, static_cast<CodePage>(1258u)}, {u8"euc_jp"sv, static_cast<CodePage>(20932u)},
|
||||
{u8"euc_kr"sv, static_cast<CodePage>(51949u)}, {u8"gb2312"sv, static_cast<CodePage>(936u)},
|
||||
{u8"gbk"sv, static_cast<CodePage>(936u)}, {u8"gb18030"sv, static_cast<CodePage>(54936u)},
|
||||
{u8"hz"sv, static_cast<CodePage>(52936u)}, {u8"iso2022_jp"sv, static_cast<CodePage>(50220u)},
|
||||
{u8"iso2022_kr"sv, static_cast<CodePage>(50225u)}, {u8"latin_1"sv, static_cast<CodePage>(28591u)},
|
||||
{u8"iso8859_2"sv, static_cast<CodePage>(28592u)}, {u8"iso8859_3"sv, static_cast<CodePage>(28593u)},
|
||||
{u8"iso8859_4"sv, static_cast<CodePage>(28594u)}, {u8"iso8859_5"sv, static_cast<CodePage>(28595u)},
|
||||
{u8"iso8859_6"sv, static_cast<CodePage>(28596u)}, {u8"iso8859_7"sv, static_cast<CodePage>(28597u)},
|
||||
{u8"iso8859_8"sv, static_cast<CodePage>(28598u)}, {u8"iso8859_9"sv, static_cast<CodePage>(28599u)},
|
||||
{u8"iso8859_13"sv, static_cast<CodePage>(28603u)}, {u8"iso8859_15"sv, static_cast<CodePage>(28605u)},
|
||||
{u8"johab"sv, static_cast<CodePage>(1361u)}, {u8"mac_cyrillic"sv, static_cast<CodePage>(10007u)},
|
||||
{u8"mac_greek"sv, static_cast<CodePage>(10006u)}, {u8"mac_iceland"sv, static_cast<CodePage>(10079u)},
|
||||
{u8"mac_turkish"sv, static_cast<CodePage>(10081u)}, {u8"shift_jis"sv, static_cast<CodePage>(932u)},
|
||||
{u8"utf_7"sv, static_cast<CodePage>(65000u)}, {u8"utf_8"sv, static_cast<CodePage>(65001u)},
|
||||
};
|
||||
|
||||
static bool fetch_code_page(const std::u8string_view& enc_name, CodePage& out_cp) {
|
||||
// resolve alias
|
||||
std::u8string resolved_name = resolve_encoding_alias(enc_name);
|
||||
// find code page
|
||||
op::lower(resolved_name);
|
||||
auto finder = WINCP_MAP.find(resolved_name);
|
||||
if (finder == WINCP_MAP.end()) return false;
|
||||
// okey, we found it.
|
||||
out_cp = finder->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static const std::map<NS_YYCC_STRING::u8string, std::string> ICONV_MAP {
|
||||
{ YYCC_U8("ascii"), "ASCII" },
|
||||
{ YYCC_U8("big5"), "BIG5" },
|
||||
{ YYCC_U8("big5hkscs"), "BIG5-HKSCS" },
|
||||
{ YYCC_U8("cp850"), "CP850" },
|
||||
{ YYCC_U8("cp862"), "CP862" },
|
||||
{ YYCC_U8("cp866"), "CP866" },
|
||||
{ YYCC_U8("cp874"), "CP874" },
|
||||
{ YYCC_U8("cp932"), "CP932" },
|
||||
{ YYCC_U8("cp949"), "CP949" },
|
||||
{ YYCC_U8("cp950"), "CP950" },
|
||||
{ YYCC_U8("cp1250"), "CP1250" },
|
||||
{ YYCC_U8("cp1251"), "CP1251" },
|
||||
{ YYCC_U8("cp1252"), "CP1252" },
|
||||
{ YYCC_U8("cp1253"), "CP1253" },
|
||||
{ YYCC_U8("cp1254"), "CP1254" },
|
||||
{ YYCC_U8("cp1255"), "CP1255" },
|
||||
{ YYCC_U8("cp1256"), "CP1256" },
|
||||
{ YYCC_U8("cp1257"), "CP1257" },
|
||||
{ YYCC_U8("cp1258"), "CP1258" },
|
||||
{ YYCC_U8("euc_jp"), "EUC-JP" },
|
||||
{ YYCC_U8("euc_kr"), "EUC-KR" },
|
||||
{ YYCC_U8("gb2312"), "CP936" },
|
||||
{ YYCC_U8("gbk"), "GBK" },
|
||||
{ YYCC_U8("gb18030"), "GB18030" },
|
||||
{ YYCC_U8("hz"), "HZ" },
|
||||
{ YYCC_U8("iso2022_jp"), "ISO-2022-JP" },
|
||||
{ YYCC_U8("iso2022_jp_1"), "ISO-2022-JP-1" },
|
||||
{ YYCC_U8("iso2022_jp_2"), "ISO-2022-JP-2" },
|
||||
{ YYCC_U8("iso2022_kr"), "ISO-2022-KR" },
|
||||
{ YYCC_U8("latin_1"), "ISO-8859-1" },
|
||||
{ YYCC_U8("iso8859_2"), "ISO-8859-2" },
|
||||
{ YYCC_U8("iso8859_3"), "ISO-8859-3" },
|
||||
{ YYCC_U8("iso8859_4"), "ISO-8859-4" },
|
||||
{ YYCC_U8("iso8859_5"), "ISO-8859-5" },
|
||||
{ YYCC_U8("iso8859_6"), "ISO-8859-6" },
|
||||
{ YYCC_U8("iso8859_7"), "ISO-8859-7" },
|
||||
{ YYCC_U8("iso8859_8"), "ISO-8859-8" },
|
||||
{ YYCC_U8("iso8859_9"), "ISO-8859-9" },
|
||||
{ YYCC_U8("iso8859_10"), "ISO-8859-10" },
|
||||
{ YYCC_U8("iso8859_11"), "ISO-8859-11" },
|
||||
{ YYCC_U8("iso8859_13"), "ISO-8859-13" },
|
||||
{ YYCC_U8("iso8859_14"), "ISO-8859-14" },
|
||||
{ YYCC_U8("iso8859_15"), "ISO-8859-15" },
|
||||
{ YYCC_U8("iso8859_16"), "ISO-8859-16" },
|
||||
{ YYCC_U8("johab"), "JOHAB" },
|
||||
{ YYCC_U8("koi8_t"), "KOI8-T" },
|
||||
{ YYCC_U8("mac_cyrillic"), "MacCyrillic" },
|
||||
{ YYCC_U8("mac_greek"), "MacGreek" },
|
||||
{ YYCC_U8("mac_iceland"), "MacIceland" },
|
||||
{ YYCC_U8("mac_roman"), "MacRoman" },
|
||||
{ YYCC_U8("mac_turkish"), "MacTurkish" },
|
||||
{ YYCC_U8("ptcp154"), "PT154" },
|
||||
{ YYCC_U8("shift_jis"), "SHIFT_JIS" },
|
||||
{ YYCC_U8("utf_32"), "UTF-32" },
|
||||
{ YYCC_U8("utf_32_be"), "UTF-32BE" },
|
||||
{ YYCC_U8("utf_32_le"), "UTF-32LE" },
|
||||
{ YYCC_U8("utf_16"), "UTF16" },
|
||||
{ YYCC_U8("utf_16_be"), "UTF-16BE" },
|
||||
{ YYCC_U8("utf_16_le"), "UTF-16LE" },
|
||||
{ YYCC_U8("utf_7"), "UTF-7" },
|
||||
{ YYCC_U8("utf_8"), "UTF-8" },
|
||||
static const std::map<std::u8string_view, std::string_view> ICONV_MAP{
|
||||
{u8"ascii"sv, "ASCII"sv},
|
||||
{u8"big5"sv, "BIG5"sv},
|
||||
{u8"big5hkscs"sv, "BIG5-HKSCS"sv},
|
||||
{u8"cp850"sv, "CP850"sv},
|
||||
{u8"cp862"sv, "CP862"sv},
|
||||
{u8"cp866"sv, "CP866"sv},
|
||||
{u8"cp874"sv, "CP874"sv},
|
||||
{u8"cp932"sv, "CP932"sv},
|
||||
{u8"cp949"sv, "CP949"sv},
|
||||
{u8"cp950"sv, "CP950"sv},
|
||||
{u8"cp1250"sv, "CP1250"sv},
|
||||
{u8"cp1251"sv, "CP1251"sv},
|
||||
{u8"cp1252"sv, "CP1252"sv},
|
||||
{u8"cp1253"sv, "CP1253"sv},
|
||||
{u8"cp1254"sv, "CP1254"sv},
|
||||
{u8"cp1255"sv, "CP1255"sv},
|
||||
{u8"cp1256"sv, "CP1256"sv},
|
||||
{u8"cp1257"sv, "CP1257"sv},
|
||||
{u8"cp1258"sv, "CP1258"sv},
|
||||
{u8"euc_jp"sv, "EUC-JP"sv},
|
||||
{u8"euc_kr"sv, "EUC-KR"sv},
|
||||
{u8"gb2312"sv, "CP936"sv},
|
||||
{u8"gbk"sv, "GBK"sv},
|
||||
{u8"gb18030"sv, "GB18030"sv},
|
||||
{u8"hz"sv, "HZ"sv},
|
||||
{u8"iso2022_jp"sv, "ISO-2022-JP"sv},
|
||||
{u8"iso2022_jp_1"sv, "ISO-2022-JP-1"sv},
|
||||
{u8"iso2022_jp_2"sv, "ISO-2022-JP-2"sv},
|
||||
{u8"iso2022_kr"sv, "ISO-2022-KR"sv},
|
||||
{u8"latin_1"sv, "ISO-8859-1"sv},
|
||||
{u8"iso8859_2"sv, "ISO-8859-2"sv},
|
||||
{u8"iso8859_3"sv, "ISO-8859-3"sv},
|
||||
{u8"iso8859_4"sv, "ISO-8859-4"sv},
|
||||
{u8"iso8859_5"sv, "ISO-8859-5"sv},
|
||||
{u8"iso8859_6"sv, "ISO-8859-6"sv},
|
||||
{u8"iso8859_7"sv, "ISO-8859-7"sv},
|
||||
{u8"iso8859_8"sv, "ISO-8859-8"sv},
|
||||
{u8"iso8859_9"sv, "ISO-8859-9"sv},
|
||||
{u8"iso8859_10"sv, "ISO-8859-10"sv},
|
||||
{u8"iso8859_11"sv, "ISO-8859-11"sv},
|
||||
{u8"iso8859_13"sv, "ISO-8859-13"sv},
|
||||
{u8"iso8859_14"sv, "ISO-8859-14"sv},
|
||||
{u8"iso8859_15"sv, "ISO-8859-15"sv},
|
||||
{u8"iso8859_16"sv, "ISO-8859-16"sv},
|
||||
{u8"johab"sv, "JOHAB"sv},
|
||||
{u8"koi8_t"sv, "KOI8-T"sv},
|
||||
{u8"mac_cyrillic"sv, "MacCyrillic"sv},
|
||||
{u8"mac_greek"sv, "MacGreek"sv},
|
||||
{u8"mac_iceland"sv, "MacIceland"sv},
|
||||
{u8"mac_roman"sv, "MacRoman"sv},
|
||||
{u8"mac_turkish"sv, "MacTurkish"sv},
|
||||
{u8"ptcp154"sv, "PT154"sv},
|
||||
{u8"shift_jis"sv, "SHIFT_JIS"sv},
|
||||
{u8"utf_32"sv, "UTF-32"sv},
|
||||
{u8"utf_32_be"sv, "UTF-32BE"sv},
|
||||
{u8"utf_32_le"sv, "UTF-32LE"sv},
|
||||
{u8"utf_16"sv, "UTF16"sv},
|
||||
{u8"utf_16_be"sv, "UTF-16BE"sv},
|
||||
{u8"utf_16_le"sv, "UTF-16LE"sv},
|
||||
{u8"utf_7"sv, "UTF-7"sv},
|
||||
{u8"utf_8"sv, "UTF-8"sv},
|
||||
};
|
||||
|
||||
#endif
|
||||
static bool fetch_iconv_name(const std::u8string_view& enc_name, std::string& out_code) {
|
||||
// resolve alias
|
||||
std::u8string resolved_name = resolve_encoding_alias(enc_name);
|
||||
// find code page
|
||||
op::lower(resolved_name);
|
||||
auto finder = ICONV_MAP.find(resolved_name);
|
||||
if (finder == ICONV_MAP.end()) return false;
|
||||
// okey, we found it.
|
||||
out_code = finder->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#pragma endregion
|
||||
|
||||
@ -362,12 +373,27 @@ namespace yycc::encoding::pycodec {
|
||||
ConvError::ConvError(const ConvError::Error& err) : inner(err) {}
|
||||
|
||||
bool is_valid_encoding_name(const EncodingName& name) {
|
||||
|
||||
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
|
||||
CodePage unused;
|
||||
return fetch_code_page(name, unused);
|
||||
#else
|
||||
std::string unused;
|
||||
return fetch_iconv_name(name, unused);
|
||||
#endif
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region
|
||||
#pragma region Char -> UTF8
|
||||
|
||||
CharToUtf8::CharToUtf8(const EncodingName& name) :
|
||||
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
|
||||
code_page(fetch)
|
||||
#else
|
||||
inner(fetch_iconv_name())
|
||||
#endif
|
||||
{}
|
||||
|
||||
|
||||
#pragma endregion
|
||||
|
||||
|
@ -1,37 +1,43 @@
|
||||
#pragma once
|
||||
#include "../macro/os_detector.hpp"
|
||||
#include "../macro/stl_detector.hpp"
|
||||
#include "../macro/class_copy_move.hpp"
|
||||
#include "../patch/expected.hpp"
|
||||
#include "../string.hpp"
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <expected>
|
||||
|
||||
// Choose the backend of PyCodec module
|
||||
#if defined(YYCC_OS_WINDOWS)
|
||||
#if defined(YYCC_OS_WINDOWS) && defined(YYCC_STL_MSSTL)
|
||||
#include "windows.hpp"
|
||||
#define YYCC_PYCODEC_WIN32_BACKEND
|
||||
#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::windows
|
||||
#else
|
||||
#define YYCC_PYCODEC_BACKEND_NS ::yycc::encoding::windows
|
||||
#elif YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS)
|
||||
#include "iconv.hpp"
|
||||
#define YYCC_PYCODEC_ICONV_BACKEND
|
||||
#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::iconv
|
||||
#define YYCC_PYCODEC_BACKEND_NS ::yycc::encoding::iconv
|
||||
#else
|
||||
#error "Can not find viable encoding convertion solution in current environment for PyCodec module."
|
||||
#endif
|
||||
|
||||
#define NS_YYCC_STRING ::yycc::string
|
||||
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
|
||||
|
||||
namespace yycc::encoding::pycodec {
|
||||
|
||||
using EncodingName = NS_YYCC_STRING::u8string_view;
|
||||
/// @brief The universal name of encoding.
|
||||
using EncodingName = std::u8string_view;
|
||||
|
||||
/// @private
|
||||
struct ConvError {
|
||||
using Error = NS_YYCC_ENCODING_BACKEND::ConvError;
|
||||
/// @brief The possible error occurs in this module.
|
||||
class ConvError {
|
||||
public:
|
||||
using Error = YYCC_PYCODEC_BACKEND_NS::ConvError;
|
||||
ConvError(const Error& err);
|
||||
YYCC_DEFAULT_COPY_MOVE(ConvError)
|
||||
|
||||
private:
|
||||
Error inner;
|
||||
};
|
||||
|
||||
/// @private
|
||||
/// @brief The result type of this module.
|
||||
template<typename T>
|
||||
using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected<T, ConvError>;
|
||||
using ConvResult = std::expected<T, ConvError>;
|
||||
|
||||
/**
|
||||
* @brief Check whether given name is a valid encoding name in PyCodec.
|
||||
@ -40,7 +46,7 @@ namespace yycc::encoding::pycodec {
|
||||
*/
|
||||
bool is_valid_encoding_name(const EncodingName& name);
|
||||
|
||||
// Char -> UTF8
|
||||
/// @brief Char -> UTF8
|
||||
class CharToUtf8 {
|
||||
public:
|
||||
CharToUtf8(const EncodingName& name);
|
||||
@ -49,19 +55,17 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(CharToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src);
|
||||
bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::string_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::CodePage code_page;
|
||||
YYCC_PYCODEC_BACKEND_NS::CodePage code_page;
|
||||
#else
|
||||
NS_YYCC_ENCODING_BACKEND::CharToUtf8 inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::CharToUtf8 inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// UTF8 -> Char
|
||||
/// @brief UTF8 -> Char
|
||||
class Utf8ToChar {
|
||||
public:
|
||||
Utf8ToChar(const EncodingName& name);
|
||||
@ -70,19 +74,17 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToChar)
|
||||
|
||||
public:
|
||||
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src);
|
||||
bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst);
|
||||
std::string to_char(const NS_YYCC_STRING::u8string_view& src);
|
||||
ConvResult<std::string> to_char(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::CodePage code_page;
|
||||
YYCC_PYCODEC_BACKEND_NS::CodePage code_page;
|
||||
#else
|
||||
NS_YYCC_ENCODING_BACKEND::Utf8ToChar inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::Utf8ToChar inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// WChar -> UTF8
|
||||
/// @brief WChar -> UTF8
|
||||
class WcharToUtf8 {
|
||||
public:
|
||||
WcharToUtf8();
|
||||
@ -91,17 +93,15 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(WcharToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src);
|
||||
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::wstring_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::WcharToUtf8 inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::WcharToUtf8 inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// UTF8 -> WChar
|
||||
/// @brief UTF8 -> WChar
|
||||
class Utf8ToWchar {
|
||||
public:
|
||||
Utf8ToWchar();
|
||||
@ -110,17 +110,15 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToWchar)
|
||||
|
||||
public:
|
||||
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
|
||||
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||
ConvResult<std::wstring> to_wchar(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::Utf8ToWchar inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::Utf8ToWchar inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// UTF8 -> UTF16
|
||||
/// @brief UTF8 -> UTF16
|
||||
class Utf8ToUtf16 {
|
||||
public:
|
||||
Utf8ToUtf16();
|
||||
@ -129,17 +127,15 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToUtf16)
|
||||
|
||||
public:
|
||||
ConvResult<std::u16string> priv_to_utf16(const NS_YYCC_STRING::u8string_view& src);
|
||||
bool to_utf16(const NS_YYCC_STRING::u8string_view& src, std::u16string& dst);
|
||||
std::u16string to_utf16(const NS_YYCC_STRING::u8string_view& src);
|
||||
ConvResult<std::u16string> to_utf16(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::Utf8ToUtf16 inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::Utf8ToUtf16 inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// UTF16 -> UTF8
|
||||
/// @brief UTF16 -> UTF8
|
||||
class Utf16ToUtf8 {
|
||||
public:
|
||||
Utf16ToUtf8();
|
||||
@ -148,17 +144,15 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(Utf16ToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::u16string_view& src);
|
||||
bool to_utf8(const std::u16string_view& src, NS_YYCC_STRING::u8string& dst);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::u16string_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::u16string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::Utf16ToUtf8 inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::Utf16ToUtf8 inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// UTF8 -> UTF32
|
||||
/// @brief UTF8 -> UTF32
|
||||
class Utf8ToUtf32 {
|
||||
public:
|
||||
Utf8ToUtf32();
|
||||
@ -167,17 +161,15 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(Utf8ToUtf32)
|
||||
|
||||
public:
|
||||
ConvResult<std::u32string> priv_to_utf32(const NS_YYCC_STRING::u8string_view& src);
|
||||
bool to_utf32(const NS_YYCC_STRING::u8string_view& src, std::u32string& dst);
|
||||
std::u32string to_utf32(const NS_YYCC_STRING::u8string_view& src);
|
||||
ConvResult<std::u32string> to_utf32(const std::u8string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::Utf8ToUtf32 inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::Utf8ToUtf32 inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
// UTF32 -> UTF8
|
||||
/// @brief UTF32 -> UTF8
|
||||
class Utf32ToUtf8 {
|
||||
public:
|
||||
Utf32ToUtf8();
|
||||
@ -186,17 +178,12 @@ namespace yycc::encoding::pycodec {
|
||||
YYCC_DEFAULT_MOVE(Utf32ToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::u32string_view& src);
|
||||
bool to_utf8(const std::u32string_view& src, NS_YYCC_STRING::u8string& dst);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::u32string_view& src);
|
||||
ConvResult<std::u8string> to_utf8(const std::u32string_view& src);
|
||||
|
||||
private:
|
||||
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
|
||||
NS_YYCC_ENCODING_BACKEND::Utf32ToUtf8 inner;
|
||||
YYCC_PYCODEC_BACKEND_NS::Utf32ToUtf8 inner;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#undef NS_YYCC_PATCH_EXPECTED
|
||||
#undef NS_YYCC_STRING
|
||||
} // namespace yycc::encoding::pycodec
|
||||
|
@ -120,6 +120,8 @@ namespace yycc::encoding::windows {
|
||||
// * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170
|
||||
// * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170
|
||||
|
||||
#if defined(YYCC_STL_MSSTL)
|
||||
|
||||
// 1 UTF32 unit can produe 4 UTF8 units or 2 UTF16 units in theory.
|
||||
// So we pre-allocate memory for the result to prevent allocating memory multiple times.
|
||||
constexpr size_t MULTIPLE_UTF8_TO_UTF16 = 1u;
|
||||
@ -206,6 +208,8 @@ namespace yycc::encoding::windows {
|
||||
return dst;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#pragma endregion
|
||||
|
||||
} // namespace yycc::encoding::windows
|
||||
|
@ -1,8 +1,6 @@
|
||||
#pragma once
|
||||
#include "../macro/os_detector.hpp"
|
||||
|
||||
#if defined(YYCC_OS_WINDOWS)
|
||||
|
||||
#include "../macro/stl_detector.hpp"
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <expected>
|
||||
@ -28,6 +26,8 @@ namespace yycc::encoding::windows {
|
||||
template<typename T>
|
||||
using ConvResult = std::expected<T, ConvError>;
|
||||
|
||||
#if defined(YYCC_OS_WINDOWS)
|
||||
|
||||
/**
|
||||
* @brief WChar -> Char
|
||||
* @param src
|
||||
@ -88,6 +88,12 @@ namespace yycc::encoding::windows {
|
||||
*/
|
||||
ConvResult<std::string> to_char(const std::u8string_view& src, CodePage code_page);
|
||||
|
||||
// YYC MARK:
|
||||
// UTF convertion only works on Microsoft STL.
|
||||
// See implementation for more details
|
||||
|
||||
#if defined(YYCC_STL_MSSTL)
|
||||
|
||||
/**
|
||||
* @brief UTF8 -> UTF16
|
||||
* @param src
|
||||
@ -116,6 +122,8 @@ namespace yycc::encoding::windows {
|
||||
*/
|
||||
ConvResult<std::u8string> to_utf8(const std::u32string_view& src);
|
||||
|
||||
} // namespace yycc::encoding::windows
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace yycc::encoding::windows
|
||||
|
14
src/yycc/macro/stl_detector.hpp
Normal file
14
src/yycc/macro/stl_detector.hpp
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
// Include a common used STL header for convenient test.
|
||||
#include <cinttypes>
|
||||
|
||||
#if defined(_MSVC_STL_VERSION)
|
||||
#define YYCC_STL_MSSTL
|
||||
#elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
|
||||
#define YYCC_STL_GNUSTL
|
||||
#elif defined(_LIBCPP_VERSION)
|
||||
#define YYCC_STL_CLANGSTL
|
||||
#else
|
||||
#error "Current STL is not supported!"
|
||||
#endif
|
@ -40,7 +40,7 @@ namespace yycc::num::parse {
|
||||
*/
|
||||
template<typename T>
|
||||
requires(std::is_floating_point_v<T>)
|
||||
ParseResult<T> parse(const std::u8string_view& strl, std::chars_format fmt) {
|
||||
ParseResult<T> parse(const std::u8string_view& strl, std::chars_format fmt = std::chars_format::general) {
|
||||
namespace reinterpret = NS_YYCC_STRING_REINTERPRET;
|
||||
|
||||
T rv;
|
||||
@ -75,7 +75,7 @@ namespace yycc::num::parse {
|
||||
*/
|
||||
template<typename T>
|
||||
requires(std::is_integral_v<T> && !std::is_same_v<T, bool>)
|
||||
ParseResult<T> parse(const std::u8string_view& strl, int base) {
|
||||
ParseResult<T> parse(const std::u8string_view& strl, int base = 10) {
|
||||
namespace reinterpret = NS_YYCC_STRING_REINTERPRET;
|
||||
|
||||
T rv;
|
||||
@ -115,7 +115,7 @@ namespace yycc::num::parse {
|
||||
// Compare result
|
||||
if (lower_case == u8"true") return true;
|
||||
else if (lower_case == u8"false") return false;
|
||||
else return ParseError::InvalidString;
|
||||
else return std::unexpected(ParseError::InvalidString);
|
||||
}
|
||||
|
||||
} // namespace yycc::num::parse
|
||||
|
Reference in New Issue
Block a user