From ce2b411b0b83c1a80f6808136173125615eb2701 Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Thu, 31 Jul 2025 22:25:14 +0800 Subject: [PATCH] refactor: continue refactor to make the project can be built --- .gitignore | 13 +- README.md | 5 + script/gen_build_script.py | 2 +- script/pycodec/README.md | 7 + script/pycodec/conv_encoding_table.py | 87 +-- script/pycodec/encoding_table.cpp.jinja | 23 + script/pycodec/encoding_table.csv | 195 +++-- src/CMakeLists.txt | 15 +- src/yycc/encoding/iconv.cpp | 16 +- src/yycc/encoding/iconv.hpp | 25 +- src/yycc/encoding/pycodec.cpp | 700 +++++++++--------- src/yycc/encoding/pycodec.hpp | 109 ++- src/yycc/encoding/windows.cpp | 4 + src/yycc/encoding/windows.hpp | 16 +- src/yycc/macro/stl_detector.hpp | 14 + src/yycc/num/parse.hpp | 6 +- testbench/CMakeLists.txt | 8 +- testbench/shared/parse_template.hpp | 74 -- testbench/shared/stringify_template.hpp | 39 - testbench/yycc/constraint.cpp | 3 +- testbench/yycc/constraint/builder.cpp | 19 +- .../{shared => yycc/encoding}/utf_literal.hpp | 0 testbench/yycc/num/parse.cpp | 74 +- testbench/yycc/num/stringify.cpp | 33 +- testbench/yycc/patch/contains.cpp | 58 -- testbench/yycc/patch/starts_ends_with.cpp | 59 -- testbench/yycc/rust/num/parse.cpp | 35 - testbench/yycc/rust/num/stringify.cpp | 22 - testbench/yycc/string/op.cpp | 65 +- testbench/yycc/string/reinterpret.cpp | 8 +- 30 files changed, 791 insertions(+), 943 deletions(-) create mode 100644 script/pycodec/README.md create mode 100644 script/pycodec/encoding_table.cpp.jinja create mode 100644 src/yycc/macro/stl_detector.hpp delete mode 100644 testbench/shared/parse_template.hpp delete mode 100644 testbench/shared/stringify_template.hpp rename testbench/{shared => yycc/encoding}/utf_literal.hpp (100%) delete mode 100644 testbench/yycc/patch/contains.cpp delete mode 100644 testbench/yycc/patch/starts_ends_with.cpp delete mode 100644 testbench/yycc/rust/num/parse.cpp delete mode 100644 testbench/yycc/rust/num/stringify.cpp diff --git a/.gitignore b/.gitignore index c8df3b4..721225e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,17 @@ -# -------------------- Output -------------------- +## ===== Personal ===== +# Ignore build resources out/ +build/ +install/ + +# Ignore CMake generated stuff src/yycc/version.hpp CMakeSettings.json -# -------------------- VSCode -------------------- +## ===== VSCode ===== .vscode/ -# -------------------- CMake -------------------- +## ===== CMake ===== CMakeLists.txt.user CMakeCache.txt CMakeFiles @@ -19,7 +24,7 @@ compile_commands.json CTestTestfile.cmake _deps -# -------------------- Visual Studio -------------------- +## ===== Visual Studio ===== ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. ## diff --git a/README.md b/README.md index f9323c2..e2ed719 100644 --- a/README.md +++ b/README.md @@ -15,3 +15,8 @@ However, the documentation need CMake to build and you may don't know how to use This project require at least CMake 3.23 to build. We suggest that you only use stable version (tagged commit). The latest commit may still work in progress and not stable. See documentation for how to build this project. + +> [!INFO] +> When building with testbench, you may face link error with GoogleTest. This issue is caused by that the binary provided by your package manager is built in C++ 17 and its ABI is incompatible with C++ 23. The solution is that download GoogleTest source code and build it in C++ 23 on your own. See this [GitHub Issue](https://github.com/google/googletest/issues/4591) for more infomation. +> Oppositely, you don't need care about this issue if you just want to build YYCC self. + diff --git a/script/gen_build_script.py b/script/gen_build_script.py index af73feb..368f079 100644 --- a/script/gen_build_script.py +++ b/script/gen_build_script.py @@ -25,7 +25,7 @@ def escape_cmd_argument(arg): def escape_sh_argument(arg): return shlex.quote(arg) -@dataclass +@dataclass(frozen=True) class ScriptSettings: cpp_version: str build_doc: bool diff --git a/script/pycodec/README.md b/script/pycodec/README.md new file mode 100644 index 0000000..9871166 --- /dev/null +++ b/script/pycodec/README.md @@ -0,0 +1,7 @@ +# PyCodec + +This directory contain all stuff related to PyCodec. + +PyCodec use different encoding system on different OS. In Windows it use Win32 functions, and it will use Iconv in other OS. So we need a table converting PyCodec universal encoding name to Windows Code Page or Iconv Code Name. These relation was stored in CSV file and Python script will render it into C++ source code. + +For the format of CSV file, each line is a record. The first item in record is the standard PyCodec name. The second item is corresponding Windows Code Page. If there is no corresponding Code Page, it can be empty. The third item is corresponding Iconv Code Name. It also can be empty with same case. Then, the count of remain columns is variables after forth item (inclusive). All of them is the alias of this standard PyCodec name. diff --git a/script/pycodec/conv_encoding_table.py b/script/pycodec/conv_encoding_table.py index d7b6fc1..dd5dde0 100644 --- a/script/pycodec/conv_encoding_table.py +++ b/script/pycodec/conv_encoding_table.py @@ -1,6 +1,7 @@ -import typing +import csv from pathlib import Path -import os +import jinja2 + class LanguageToken: name: str @@ -8,56 +9,46 @@ class LanguageToken: code_page: str | None iconv_code: str | None - def __init__(self, name: str, alias: typing.Iterator[str], code_page: str, iconv_code: str): - self.name = name.lower() - self.alias = tuple(map(lambda x: x.lower(), alias)) - self.code_page = None if code_page == '' else code_page - self.iconv_code = None if iconv_code == '' else iconv_code + def __init__(self, row: list[str]): + """Init language token from CSV row.""" + self.name = row[0].lower() + code_page = row[1] + self.code_page = None if len(code_page) == 0 else code_page + iconv_code = row[2] + self.iconv_code = None if len(iconv_code) == 0 else iconv_code + # For alias, we strip and to lower them first, and remove all empty entries + alias = row[3:] + self.alias = tuple( + filter(lambda x: len(x) != 0, + map(lambda x: x.strip().lower(), alias))) -def extract_data(fs: typing.TextIO) -> list[str]: - # remove first line to remove table header - return fs.readlines()[1:] -def extract_token(csv_data: list[str]) -> tuple[LanguageToken, ...]: - ret: list[LanguageToken] = [] - for line in csv_data: - line = line.strip('\n') - line_sp = line.split('\t') - alias_sp = filter(lambda x: len(x) != 0, map(lambda x: x.strip(), line_sp[1].split(','))) - ret.append(LanguageToken(line_sp[0], alias_sp, line_sp[2], line_sp[3])) - return tuple(ret) +def _get_self_dir() -> Path: + return Path(__file__).resolve().parent -def write_alias_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: - fs.write('static const std::map ALISA_MAP {\n') - for i in data: - for j in i.alias: - fs.write(f'\t{{ YYCC_U8("{j}"), YYCC_U8("{i.name}") }},\n') - fs.write('};\n') -def write_win_cp_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: - fs.write('static const std::map WINCP_MAP {\n') - for i in data: - if i.code_page is not None: - fs.write(f'\t{{ YYCC_U8("{i.name}"), static_cast({i.code_page}u) }},\n') - fs.write('};\n') +def _extract_tokens() -> list[LanguageToken]: + rv: list[LanguageToken] = [] + csv_file = _get_self_dir() / 'encoding_table.csv' + + with open(csv_file, 'r', encoding='utf-8', newline='') as f: + reader = csv.reader(f, delimiter='\t') + for row in reader: + rv.append(LanguageToken(row)) + + return rv + + +def _render_cpp(tokens: list[LanguageToken]) -> None: + loader = jinja2.FileSystemLoader(_get_self_dir()) + environment = jinja2.Environment(loader=loader) + template = environment.get_template('encoding_table.cpp.jinja') + + cpp_file = _get_self_dir() / 'encoding_table.cpp' + with open(cpp_file, 'w', encoding='utf-8') as f: + f.write(template.render(tokens=tokens)) -def write_iconv_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: - fs.write('static const std::map ICONV_MAP {\n') - for i in data: - if i.iconv_code is not None: - fs.write(f'\t{{ YYCC_U8("{i.name}"), "{i.iconv_code}" }},\n') - fs.write('};\n') if __name__ == '__main__': - # get file path - self_path = Path(__file__).resolve().parent - csv_file = self_path / 'encoding_table.csv' - cpp_file = self_path / 'encoding_table.cpp' - # process files - with open(csv_file, 'r', encoding='utf-8') as fr: - with open(cpp_file, 'w', encoding='utf-8') as fw: - data = extract_data(fr) - token = extract_token(data) - write_alias_map(fw, token) - write_win_cp_map(fw, token) - write_iconv_map(fw, token) + tokens = _extract_tokens() + _render_cpp(tokens) diff --git a/script/pycodec/encoding_table.cpp.jinja b/script/pycodec/encoding_table.cpp.jinja new file mode 100644 index 0000000..361b7e4 --- /dev/null +++ b/script/pycodec/encoding_table.cpp.jinja @@ -0,0 +1,23 @@ +static const std::map ALIAS_MAP { +{% for token in tokens -%} +{% for alias in token.alias -%} + { u8"{{ alias }}"sv, u8"{{ token.name }}"sv }, +{% endfor -%} +{% endfor -%} +}; + +static const std::map WINCP_MAP { +{% for token in tokens -%} +{% if token.code_page is not none -%} + { u8"{{ token.name }}"sv, static_cast({{ token.code_page }}u) }, +{% endif -%} +{% endfor -%} +}; + +static const std::map ICONV_MAP { +{% for token in tokens -%} +{% if token.iconv_code is not none -%} + { u8"{{ token.name }}"sv, "{{ token.iconv_code }}"sv }, +{% endif -%} +{% endfor -%} +}; diff --git a/script/pycodec/encoding_table.csv b/script/pycodec/encoding_table.csv index ffe38df..adeda8b 100644 --- a/script/pycodec/encoding_table.csv +++ b/script/pycodec/encoding_table.csv @@ -1,98 +1,97 @@ -Encoding Alias Code Page Iconv Identifier -ascii 646, us-ascii 437 ASCII -big5 big5-tw, csbig5 950 BIG5 -big5hkscs big5-hkscs, hkscs BIG5-HKSCS -cp037 IBM037, IBM039 037 -cp273 273, IBM273, csIBM273 -cp424 EBCDIC-CP-HE, IBM424 -cp437 437, IBM437 437 -cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500 -cp720 720 -cp737 737 -cp775 IBM775 775 -cp850 850, IBM850 850 CP850 -cp852 852, IBM852 852 -cp855 855, IBM855 855 -cp856 -cp857 857, IBM857 857 -cp858 858, IBM858 858 -cp860 860, IBM860 860 -cp861 861, CP-IS, IBM861 861 -cp862 862, IBM862 862 CP862 -cp863 863, IBM863 863 -cp864 IBM864 864 -cp865 865, IBM865 865 -cp866 866, IBM866 866 CP866 -cp869 869, CP-GR, IBM869 869 -cp874 874 CP874 -cp875 875 -cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 CP932 -cp949 949, ms949, uhc 949 CP949 -cp950 950, ms950 950 CP950 -cp1006 -cp1026 ibm1026 1026 -cp1125 1125, ibm1125, cp866u, ruscii -cp1140 ibm1140 1140 -cp1250 windows-1250 1250 CP1250 -cp1251 windows-1251 1251 CP1251 -cp1252 windows-1252 1252 CP1252 -cp1253 windows-1253 1253 CP1253 -cp1254 windows-1254 1254 CP1254 -cp1255 windows-1255 1255 CP1255 -cp1256 windows-1256 1256 CP1256 -cp1257 windows-1257 1257 CP1257 -cp1258 windows-1258 1258 CP1258 -euc_jp eucjp, ujis, u-jis 20932 EUC-JP -euc_jis_2004 jisx0213, eucjis2004 -euc_jisx0213 eucjisx0213 -euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 EUC-KR -gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 CP936 -gbk 936, cp936, ms936 936 GBK -gb18030 gb18030-2000 54936 GB18030 -hz hzgb, hz-gb, hz-gb-2312 52936 HZ -iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP -iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1 -iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2 -iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004 -iso2022_jp_3 iso2022jp-3, iso-2022-jp-3 -iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext -iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 ISO-2022-KR -latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 ISO-8859-1 -iso8859_2 iso-8859-2, latin2, L2 28592 ISO-8859-2 -iso8859_3 iso-8859-3, latin3, L3 28593 ISO-8859-3 -iso8859_4 iso-8859-4, latin4, L4 28594 ISO-8859-4 -iso8859_5 iso-8859-5, cyrillic 28595 ISO-8859-5 -iso8859_6 iso-8859-6, arabic 28596 ISO-8859-6 -iso8859_7 iso-8859-7, greek, greek8 28597 ISO-8859-7 -iso8859_8 iso-8859-8, hebrew 28598 ISO-8859-8 -iso8859_9 iso-8859-9, latin5, L5 28599 ISO-8859-9 -iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10 -iso8859_11 iso-8859-11, thai ISO-8859-11 -iso8859_13 iso-8859-13, latin7, L7 28603 ISO-8859-13 -iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14 -iso8859_15 iso-8859-15, latin9, L9 28605 ISO-8859-15 -iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16 -johab cp1361, ms1361 1361 JOHAB -koi8_r -koi8_t KOI8-T -koi8_u -kz1048 kz_1048, strk1048_2002, rk1048 -mac_cyrillic maccyrillic 10007 MacCyrillic -mac_greek macgreek 10006 MacGreek -mac_iceland maciceland 10079 MacIceland -mac_latin2 maclatin2, maccentraleurope, mac_centeuro -mac_roman macroman, macintosh MacRoman -mac_turkish macturkish 10081 MacTurkish -ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154 -shift_jis csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS -shift_jis_2004 shiftjis2004, sjis_2004, sjis2004 -shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213 -utf_32 U32, utf32 UTF-32 -utf_32_be UTF-32BE UTF-32BE -utf_32_le UTF-32LE UTF-32LE -utf_16 U16, utf16 UTF16 -utf_16_be UTF-16BE UTF-16BE -utf_16_le UTF-16LE UTF-16LE -utf_7 U7, unicode-1-1-utf-7 65000 UTF-7 -utf_8 U8, UTF, utf8, utf-8, cp65001 65001 UTF-8 -utf_8_sig +ascii 437 ASCII 646 us-ascii +big5 950 BIG5 big5-tw csbig5 +big5hkscs BIG5-HKSCS big5-hkscs hkscs +cp037 037 IBM037 IBM039 +cp273 273 IBM273 csIBM273 +cp424 EBCDIC-CP-HE IBM424 +cp437 437 437 IBM437 +cp500 500 EBCDIC-CP-BE EBCDIC-CP-CH IBM500 +cp720 720 +cp737 737 +cp775 775 IBM775 +cp850 850 CP850 850 IBM850 +cp852 852 852 IBM852 +cp855 855 855 IBM855 +cp856 +cp857 857 857 IBM857 +cp858 858 858 IBM858 +cp860 860 860 IBM860 +cp861 861 861 CP-IS IBM861 +cp862 862 CP862 862 IBM862 +cp863 863 863 IBM863 +cp864 864 IBM864 +cp865 865 865 IBM865 +cp866 866 CP866 866 IBM866 +cp869 869 869 CP-GR IBM869 +cp874 874 CP874 +cp875 875 +cp932 932 CP932 932 ms932 mskanji ms-kanji windows-31j +cp949 949 CP949 949 ms949 uhc +cp950 950 CP950 950 ms950 +cp1006 +cp1026 1026 ibm1026 +cp1125 1125 ibm1125 cp866u ruscii +cp1140 1140 ibm1140 +cp1250 1250 CP1250 windows-1250 +cp1251 1251 CP1251 windows-1251 +cp1252 1252 CP1252 windows-1252 +cp1253 1253 CP1253 windows-1253 +cp1254 1254 CP1254 windows-1254 +cp1255 1255 CP1255 windows-1255 +cp1256 1256 CP1256 windows-1256 +cp1257 1257 CP1257 windows-1257 +cp1258 1258 CP1258 windows-1258 +euc_jp 20932 EUC-JP eucjp ujis u-jis +euc_jis_2004 jisx0213 eucjis2004 +euc_jisx0213 eucjisx0213 +euc_kr 51949 EUC-KR euckr korean ksc5601 ks_c-5601 ks_c-5601-1987 ksx1001 ks_x-1001 +gb2312 936 CP936 chinese csiso58gb231280 euc-cn euccn eucgb2312-cn gb2312-1980 gb2312-80 iso-ir-58 +gbk 936 GBK 936 cp936 ms936 +gb18030 54936 GB18030 gb18030-2000 +hz 52936 HZ hzgb hz-gb hz-gb-2312 +iso2022_jp 50220 ISO-2022-JP csiso2022jp iso2022jp iso-2022-jp +iso2022_jp_1 ISO-2022-JP-1 iso2022jp-1 iso-2022-jp-1 +iso2022_jp_2 ISO-2022-JP-2 iso2022jp-2 iso-2022-jp-2 +iso2022_jp_2004 iso2022jp-2004 iso-2022-jp-2004 +iso2022_jp_3 iso2022jp-3 iso-2022-jp-3 +iso2022_jp_ext iso2022jp-ext iso-2022-jp-ext +iso2022_kr 50225 ISO-2022-KR csiso2022kr iso2022kr iso-2022-kr +latin_1 28591 ISO-8859-1 iso-8859-1 iso8859-1 8859 cp819 latin latin1 L1 +iso8859_2 28592 ISO-8859-2 iso-8859-2 latin2 L2 +iso8859_3 28593 ISO-8859-3 iso-8859-3 latin3 L3 +iso8859_4 28594 ISO-8859-4 iso-8859-4 latin4 L4 +iso8859_5 28595 ISO-8859-5 iso-8859-5 cyrillic +iso8859_6 28596 ISO-8859-6 iso-8859-6 arabic +iso8859_7 28597 ISO-8859-7 iso-8859-7 greek greek8 +iso8859_8 28598 ISO-8859-8 iso-8859-8 hebrew +iso8859_9 28599 ISO-8859-9 iso-8859-9 latin5 L5 +iso8859_10 ISO-8859-10 iso-8859-10 latin6 L6 +iso8859_11 ISO-8859-11 iso-8859-11 thai +iso8859_13 28603 ISO-8859-13 iso-8859-13 latin7 L7 +iso8859_14 ISO-8859-14 iso-8859-14 latin8 L8 +iso8859_15 28605 ISO-8859-15 iso-8859-15 latin9 L9 +iso8859_16 ISO-8859-16 iso-8859-16 latin10 L10 +johab 1361 JOHAB cp1361 ms1361 +koi8_r +koi8_t KOI8-T +koi8_u +kz1048 kz_1048 strk1048_2002 rk1048 +mac_cyrillic 10007 MacCyrillic maccyrillic +mac_greek 10006 MacGreek macgreek +mac_iceland 10079 MacIceland maciceland +mac_latin2 maclatin2 maccentraleurope mac_centeuro +mac_roman MacRoman macroman macintosh +mac_turkish 10081 MacTurkish macturkish +ptcp154 PT154 csptcp154 pt154 cp154 cyrillic-asian +shift_jis 932 SHIFT_JIS csshiftjis shiftjis sjis s_jis +shift_jis_2004 shiftjis2004 sjis_2004 sjis2004 +shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 +utf_32 UTF-32 U32 utf32 +utf_32_be UTF-32BE UTF-32BE +utf_32_le UTF-32LE UTF-32LE +utf_16 UTF16 U16 utf16 +utf_16_be UTF-16BE UTF-16BE +utf_16_le UTF-16LE UTF-16LE +utf_7 65000 UTF-7 U7 unicode-1-1-utf-7 +utf_8 65001 UTF-8 U8 UTF utf8 utf-8 cp65001 +utf_8_sig diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cc064a3..4dd0a8a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,7 +17,7 @@ PRIVATE yycc/encoding/stlcvt.cpp yycc/encoding/windows.cpp yycc/encoding/iconv.cpp - yycc/encoding/pycodec.cpp + #yycc/encoding/pycodec.cpp ) target_sources(YYCCommonplace PUBLIC @@ -28,6 +28,7 @@ FILES yycc/version.hpp yycc/macro/version_cmp.hpp yycc/macro/os_detector.hpp + yycc/macro/stl_detector.hpp yycc/macro/endian_detector.hpp yycc/macro/compiler_detector.hpp yycc/macro/class_copy_move.hpp @@ -73,8 +74,9 @@ PUBLIC # Iconv environment macro $<$:YYCC_FEAT_ICONV> # OS macro - $<$:YYCC_OS_WINDOWS> + $<$:YYCC_OS_WINDOWS> $<$:YYCC_OS_LINUX> + $<$:YYCC_OS_MACOS> # Compiler macro $<$:YYCC_CC_GCC> $<$:YYCC_CC_CLANG> @@ -100,7 +102,14 @@ PUBLIC $<$:/utf-8> ) -# TODO: Fix GCC stacktrace link issue +# Fix GCC std::stacktrace link error +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14) + target_link_libraries(YYCCommonplace PRIVATE stdc++exp) + else () + target_link_libraries(YYCCommonplace PRIVATE stdc++_libbacktrace) + endif () +endif () # Install binary and headers install(TARGETS YYCCommonplace diff --git a/src/yycc/encoding/iconv.cpp b/src/yycc/encoding/iconv.cpp index 24d468d..18becda 100644 --- a/src/yycc/encoding/iconv.cpp +++ b/src/yycc/encoding/iconv.cpp @@ -229,7 +229,7 @@ namespace yycc::encoding::iconv { CharToUtf8::~CharToUtf8() {} - ConvResult CharToUtf8::priv_to_utf8(const std::string_view& src) { + ConvResult CharToUtf8::to_utf8(const std::string_view& src) { USER_CONVFN(char, char8_t); } @@ -241,7 +241,7 @@ namespace yycc::encoding::iconv { Utf8ToChar::~Utf8ToChar() {} - ConvResult Utf8ToChar::priv_to_char(const std::u8string_view& src) { + ConvResult Utf8ToChar::to_char(const std::u8string_view& src) { USER_CONVFN(char8_t, char); } @@ -253,7 +253,7 @@ namespace yycc::encoding::iconv { WcharToUtf8::~WcharToUtf8() {} - ConvResult WcharToUtf8::priv_to_utf8(const std::wstring_view& src) { + ConvResult WcharToUtf8::to_utf8(const std::wstring_view& src) { USER_CONVFN(wchar_t, char8_t); } @@ -265,7 +265,7 @@ namespace yycc::encoding::iconv { Utf8ToWchar::~Utf8ToWchar() {} - ConvResult Utf8ToWchar::priv_to_wchar(const std::u8string_view& src) { + ConvResult Utf8ToWchar::to_wchar(const std::u8string_view& src) { USER_CONVFN(char8_t, wchar_t); } @@ -277,7 +277,7 @@ namespace yycc::encoding::iconv { Utf8ToUtf16::~Utf8ToUtf16() {} - ConvResult Utf8ToUtf16::priv_to_utf16(const std::u8string_view& src) { + ConvResult Utf8ToUtf16::to_utf16(const std::u8string_view& src) { USER_CONVFN(char8_t, char16_t); } @@ -289,7 +289,7 @@ namespace yycc::encoding::iconv { Utf16ToUtf8::~Utf16ToUtf8() {} - ConvResult Utf16ToUtf8::priv_to_utf8(const std::u16string_view& src) { + ConvResult Utf16ToUtf8::to_utf8(const std::u16string_view& src) { USER_CONVFN(char16_t, char8_t); } @@ -301,7 +301,7 @@ namespace yycc::encoding::iconv { Utf8ToUtf32::~Utf8ToUtf32() {} - ConvResult Utf8ToUtf32::priv_to_utf32(const std::u8string_view& src) { + ConvResult Utf8ToUtf32::to_utf32(const std::u8string_view& src) { USER_CONVFN(char8_t, char32_t); } @@ -313,7 +313,7 @@ namespace yycc::encoding::iconv { Utf32ToUtf8::~Utf32ToUtf8() {} - ConvResult Utf32ToUtf8::priv_to_utf8(const std::u32string_view& src) { + ConvResult Utf32ToUtf8::to_utf8(const std::u32string_view& src) { USER_CONVFN(char32_t, char8_t); } diff --git a/src/yycc/encoding/iconv.hpp b/src/yycc/encoding/iconv.hpp index 5171e7c..00bb7c2 100644 --- a/src/yycc/encoding/iconv.hpp +++ b/src/yycc/encoding/iconv.hpp @@ -1,8 +1,5 @@ #pragma once #include "../macro/os_detector.hpp" - -#if YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS) - #include "../macro/class_copy_move.hpp" #include #include @@ -51,6 +48,8 @@ namespace yycc::encoding::iconv { template using ConvResult = std::expected; +#if YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS) + /// @brief Char -> UTF8 class CharToUtf8 { public: @@ -60,7 +59,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(CharToUtf8) public: - ConvResult priv_to_utf8(const std::string_view& src); + ConvResult to_utf8(const std::string_view& src); private: Token token; @@ -75,7 +74,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(Utf8ToChar) public: - ConvResult priv_to_char(const std::u8string_view& src); + ConvResult to_char(const std::u8string_view& src); private: Token token; @@ -90,7 +89,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(WcharToUtf8) public: - ConvResult priv_to_utf8(const std::wstring_view& src); + ConvResult to_utf8(const std::wstring_view& src); private: Token token; @@ -105,7 +104,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(Utf8ToWchar) public: - ConvResult priv_to_wchar(const std::u8string_view& src); + ConvResult to_wchar(const std::u8string_view& src); private: Token token; @@ -120,7 +119,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(Utf8ToUtf16) public: - ConvResult priv_to_utf16(const std::u8string_view& src); + ConvResult to_utf16(const std::u8string_view& src); private: Token token; @@ -135,7 +134,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(Utf16ToUtf8) public: - ConvResult priv_to_utf8(const std::u16string_view& src); + ConvResult to_utf8(const std::u16string_view& src); private: Token token; @@ -150,7 +149,7 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(Utf8ToUtf32) public: - ConvResult priv_to_utf32(const std::u8string_view& src); + ConvResult to_utf32(const std::u8string_view& src); private: Token token; @@ -165,12 +164,12 @@ namespace yycc::encoding::iconv { YYCC_DEFAULT_MOVE(Utf32ToUtf8) public: - ConvResult priv_to_utf8(const std::u32string_view& src); + ConvResult to_utf8(const std::u32string_view& src); private: Token token; }; -} // namespace yycc::encoding::iconv - #endif + +} // namespace yycc::encoding::iconv diff --git a/src/yycc/encoding/pycodec.cpp b/src/yycc/encoding/pycodec.cpp index ad69fca..59d6564 100644 --- a/src/yycc/encoding/pycodec.cpp +++ b/src/yycc/encoding/pycodec.cpp @@ -1,359 +1,370 @@ #include "pycodec.hpp" + +#include "../string/op.hpp" #include -#define NS_YYCC_STRING ::yycc::string -#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected +using namespace std::literals::string_view_literals; +namespace op = ::yycc::string::op; namespace yycc::encoding::pycodec { #pragma region Encoding Name - static const std::map ALISA_MAP { - { YYCC_U8("646"), YYCC_U8("ascii") }, - { YYCC_U8("us-ascii"), YYCC_U8("ascii") }, - { YYCC_U8("big5-tw"), YYCC_U8("big5") }, - { YYCC_U8("csbig5"), YYCC_U8("big5") }, - { YYCC_U8("big5-hkscs"), YYCC_U8("big5hkscs") }, - { YYCC_U8("hkscs"), YYCC_U8("big5hkscs") }, - { YYCC_U8("ibm037"), YYCC_U8("cp037") }, - { YYCC_U8("ibm039"), YYCC_U8("cp037") }, - { YYCC_U8("273"), YYCC_U8("cp273") }, - { YYCC_U8("ibm273"), YYCC_U8("cp273") }, - { YYCC_U8("csibm273"), YYCC_U8("cp273") }, - { YYCC_U8("ebcdic-cp-he"), YYCC_U8("cp424") }, - { YYCC_U8("ibm424"), YYCC_U8("cp424") }, - { YYCC_U8("437"), YYCC_U8("cp437") }, - { YYCC_U8("ibm437"), YYCC_U8("cp437") }, - { YYCC_U8("ebcdic-cp-be"), YYCC_U8("cp500") }, - { YYCC_U8("ebcdic-cp-ch"), YYCC_U8("cp500") }, - { YYCC_U8("ibm500"), YYCC_U8("cp500") }, - { YYCC_U8("ibm775"), YYCC_U8("cp775") }, - { YYCC_U8("850"), YYCC_U8("cp850") }, - { YYCC_U8("ibm850"), YYCC_U8("cp850") }, - { YYCC_U8("852"), YYCC_U8("cp852") }, - { YYCC_U8("ibm852"), YYCC_U8("cp852") }, - { YYCC_U8("855"), YYCC_U8("cp855") }, - { YYCC_U8("ibm855"), YYCC_U8("cp855") }, - { YYCC_U8("857"), YYCC_U8("cp857") }, - { YYCC_U8("ibm857"), YYCC_U8("cp857") }, - { YYCC_U8("858"), YYCC_U8("cp858") }, - { YYCC_U8("ibm858"), YYCC_U8("cp858") }, - { YYCC_U8("860"), YYCC_U8("cp860") }, - { YYCC_U8("ibm860"), YYCC_U8("cp860") }, - { YYCC_U8("861"), YYCC_U8("cp861") }, - { YYCC_U8("cp-is"), YYCC_U8("cp861") }, - { YYCC_U8("ibm861"), YYCC_U8("cp861") }, - { YYCC_U8("862"), YYCC_U8("cp862") }, - { YYCC_U8("ibm862"), YYCC_U8("cp862") }, - { YYCC_U8("863"), YYCC_U8("cp863") }, - { YYCC_U8("ibm863"), YYCC_U8("cp863") }, - { YYCC_U8("ibm864"), YYCC_U8("cp864") }, - { YYCC_U8("865"), YYCC_U8("cp865") }, - { YYCC_U8("ibm865"), YYCC_U8("cp865") }, - { YYCC_U8("866"), YYCC_U8("cp866") }, - { YYCC_U8("ibm866"), YYCC_U8("cp866") }, - { YYCC_U8("869"), YYCC_U8("cp869") }, - { YYCC_U8("cp-gr"), YYCC_U8("cp869") }, - { YYCC_U8("ibm869"), YYCC_U8("cp869") }, - { YYCC_U8("932"), YYCC_U8("cp932") }, - { YYCC_U8("ms932"), YYCC_U8("cp932") }, - { YYCC_U8("mskanji"), YYCC_U8("cp932") }, - { YYCC_U8("ms-kanji"), YYCC_U8("cp932") }, - { YYCC_U8("windows-31j"), YYCC_U8("cp932") }, - { YYCC_U8("949"), YYCC_U8("cp949") }, - { YYCC_U8("ms949"), YYCC_U8("cp949") }, - { YYCC_U8("uhc"), YYCC_U8("cp949") }, - { YYCC_U8("950"), YYCC_U8("cp950") }, - { YYCC_U8("ms950"), YYCC_U8("cp950") }, - { YYCC_U8("ibm1026"), YYCC_U8("cp1026") }, - { YYCC_U8("1125"), YYCC_U8("cp1125") }, - { YYCC_U8("ibm1125"), YYCC_U8("cp1125") }, - { YYCC_U8("cp866u"), YYCC_U8("cp1125") }, - { YYCC_U8("ruscii"), YYCC_U8("cp1125") }, - { YYCC_U8("ibm1140"), YYCC_U8("cp1140") }, - { YYCC_U8("windows-1250"), YYCC_U8("cp1250") }, - { YYCC_U8("windows-1251"), YYCC_U8("cp1251") }, - { YYCC_U8("windows-1252"), YYCC_U8("cp1252") }, - { YYCC_U8("windows-1253"), YYCC_U8("cp1253") }, - { YYCC_U8("windows-1254"), YYCC_U8("cp1254") }, - { YYCC_U8("windows-1255"), YYCC_U8("cp1255") }, - { YYCC_U8("windows-1256"), YYCC_U8("cp1256") }, - { YYCC_U8("windows-1257"), YYCC_U8("cp1257") }, - { YYCC_U8("windows-1258"), YYCC_U8("cp1258") }, - { YYCC_U8("eucjp"), YYCC_U8("euc_jp") }, - { YYCC_U8("ujis"), YYCC_U8("euc_jp") }, - { YYCC_U8("u-jis"), YYCC_U8("euc_jp") }, - { YYCC_U8("jisx0213"), YYCC_U8("euc_jis_2004") }, - { YYCC_U8("eucjis2004"), YYCC_U8("euc_jis_2004") }, - { YYCC_U8("eucjisx0213"), YYCC_U8("euc_jisx0213") }, - { YYCC_U8("euckr"), YYCC_U8("euc_kr") }, - { YYCC_U8("korean"), YYCC_U8("euc_kr") }, - { YYCC_U8("ksc5601"), YYCC_U8("euc_kr") }, - { YYCC_U8("ks_c-5601"), YYCC_U8("euc_kr") }, - { YYCC_U8("ks_c-5601-1987"), YYCC_U8("euc_kr") }, - { YYCC_U8("ksx1001"), YYCC_U8("euc_kr") }, - { YYCC_U8("ks_x-1001"), YYCC_U8("euc_kr") }, - { YYCC_U8("chinese"), YYCC_U8("gb2312") }, - { YYCC_U8("csiso58gb231280"), YYCC_U8("gb2312") }, - { YYCC_U8("euc-cn"), YYCC_U8("gb2312") }, - { YYCC_U8("euccn"), YYCC_U8("gb2312") }, - { YYCC_U8("eucgb2312-cn"), YYCC_U8("gb2312") }, - { YYCC_U8("gb2312-1980"), YYCC_U8("gb2312") }, - { YYCC_U8("gb2312-80"), YYCC_U8("gb2312") }, - { YYCC_U8("iso-ir-58"), YYCC_U8("gb2312") }, - { YYCC_U8("936"), YYCC_U8("gbk") }, - { YYCC_U8("cp936"), YYCC_U8("gbk") }, - { YYCC_U8("ms936"), YYCC_U8("gbk") }, - { YYCC_U8("gb18030-2000"), YYCC_U8("gb18030") }, - { YYCC_U8("hzgb"), YYCC_U8("hz") }, - { YYCC_U8("hz-gb"), YYCC_U8("hz") }, - { YYCC_U8("hz-gb-2312"), YYCC_U8("hz") }, - { YYCC_U8("csiso2022jp"), YYCC_U8("iso2022_jp") }, - { YYCC_U8("iso2022jp"), YYCC_U8("iso2022_jp") }, - { YYCC_U8("iso-2022-jp"), YYCC_U8("iso2022_jp") }, - { YYCC_U8("iso2022jp-1"), YYCC_U8("iso2022_jp_1") }, - { YYCC_U8("iso-2022-jp-1"), YYCC_U8("iso2022_jp_1") }, - { YYCC_U8("iso2022jp-2"), YYCC_U8("iso2022_jp_2") }, - { YYCC_U8("iso-2022-jp-2"), YYCC_U8("iso2022_jp_2") }, - { YYCC_U8("iso2022jp-2004"), YYCC_U8("iso2022_jp_2004") }, - { YYCC_U8("iso-2022-jp-2004"), YYCC_U8("iso2022_jp_2004") }, - { YYCC_U8("iso2022jp-3"), YYCC_U8("iso2022_jp_3") }, - { YYCC_U8("iso-2022-jp-3"), YYCC_U8("iso2022_jp_3") }, - { YYCC_U8("iso2022jp-ext"), YYCC_U8("iso2022_jp_ext") }, - { YYCC_U8("iso-2022-jp-ext"), YYCC_U8("iso2022_jp_ext") }, - { YYCC_U8("csiso2022kr"), YYCC_U8("iso2022_kr") }, - { YYCC_U8("iso2022kr"), YYCC_U8("iso2022_kr") }, - { YYCC_U8("iso-2022-kr"), YYCC_U8("iso2022_kr") }, - { YYCC_U8("iso-8859-1"), YYCC_U8("latin_1") }, - { YYCC_U8("iso8859-1"), YYCC_U8("latin_1") }, - { YYCC_U8("8859"), YYCC_U8("latin_1") }, - { YYCC_U8("cp819"), YYCC_U8("latin_1") }, - { YYCC_U8("latin"), YYCC_U8("latin_1") }, - { YYCC_U8("latin1"), YYCC_U8("latin_1") }, - { YYCC_U8("l1"), YYCC_U8("latin_1") }, - { YYCC_U8("iso-8859-2"), YYCC_U8("iso8859_2") }, - { YYCC_U8("latin2"), YYCC_U8("iso8859_2") }, - { YYCC_U8("l2"), YYCC_U8("iso8859_2") }, - { YYCC_U8("iso-8859-3"), YYCC_U8("iso8859_3") }, - { YYCC_U8("latin3"), YYCC_U8("iso8859_3") }, - { YYCC_U8("l3"), YYCC_U8("iso8859_3") }, - { YYCC_U8("iso-8859-4"), YYCC_U8("iso8859_4") }, - { YYCC_U8("latin4"), YYCC_U8("iso8859_4") }, - { YYCC_U8("l4"), YYCC_U8("iso8859_4") }, - { YYCC_U8("iso-8859-5"), YYCC_U8("iso8859_5") }, - { YYCC_U8("cyrillic"), YYCC_U8("iso8859_5") }, - { YYCC_U8("iso-8859-6"), YYCC_U8("iso8859_6") }, - { YYCC_U8("arabic"), YYCC_U8("iso8859_6") }, - { YYCC_U8("iso-8859-7"), YYCC_U8("iso8859_7") }, - { YYCC_U8("greek"), YYCC_U8("iso8859_7") }, - { YYCC_U8("greek8"), YYCC_U8("iso8859_7") }, - { YYCC_U8("iso-8859-8"), YYCC_U8("iso8859_8") }, - { YYCC_U8("hebrew"), YYCC_U8("iso8859_8") }, - { YYCC_U8("iso-8859-9"), YYCC_U8("iso8859_9") }, - { YYCC_U8("latin5"), YYCC_U8("iso8859_9") }, - { YYCC_U8("l5"), YYCC_U8("iso8859_9") }, - { YYCC_U8("iso-8859-10"), YYCC_U8("iso8859_10") }, - { YYCC_U8("latin6"), YYCC_U8("iso8859_10") }, - { YYCC_U8("l6"), YYCC_U8("iso8859_10") }, - { YYCC_U8("iso-8859-11"), YYCC_U8("iso8859_11") }, - { YYCC_U8("thai"), YYCC_U8("iso8859_11") }, - { YYCC_U8("iso-8859-13"), YYCC_U8("iso8859_13") }, - { YYCC_U8("latin7"), YYCC_U8("iso8859_13") }, - { YYCC_U8("l7"), YYCC_U8("iso8859_13") }, - { YYCC_U8("iso-8859-14"), YYCC_U8("iso8859_14") }, - { YYCC_U8("latin8"), YYCC_U8("iso8859_14") }, - { YYCC_U8("l8"), YYCC_U8("iso8859_14") }, - { YYCC_U8("iso-8859-15"), YYCC_U8("iso8859_15") }, - { YYCC_U8("latin9"), YYCC_U8("iso8859_15") }, - { YYCC_U8("l9"), YYCC_U8("iso8859_15") }, - { YYCC_U8("iso-8859-16"), YYCC_U8("iso8859_16") }, - { YYCC_U8("latin10"), YYCC_U8("iso8859_16") }, - { YYCC_U8("l10"), YYCC_U8("iso8859_16") }, - { YYCC_U8("cp1361"), YYCC_U8("johab") }, - { YYCC_U8("ms1361"), YYCC_U8("johab") }, - { YYCC_U8("kz_1048"), YYCC_U8("kz1048") }, - { YYCC_U8("strk1048_2002"), YYCC_U8("kz1048") }, - { YYCC_U8("rk1048"), YYCC_U8("kz1048") }, - { YYCC_U8("maccyrillic"), YYCC_U8("mac_cyrillic") }, - { YYCC_U8("macgreek"), YYCC_U8("mac_greek") }, - { YYCC_U8("maciceland"), YYCC_U8("mac_iceland") }, - { YYCC_U8("maclatin2"), YYCC_U8("mac_latin2") }, - { YYCC_U8("maccentraleurope"), YYCC_U8("mac_latin2") }, - { YYCC_U8("mac_centeuro"), YYCC_U8("mac_latin2") }, - { YYCC_U8("macroman"), YYCC_U8("mac_roman") }, - { YYCC_U8("macintosh"), YYCC_U8("mac_roman") }, - { YYCC_U8("macturkish"), YYCC_U8("mac_turkish") }, - { YYCC_U8("csptcp154"), YYCC_U8("ptcp154") }, - { YYCC_U8("pt154"), YYCC_U8("ptcp154") }, - { YYCC_U8("cp154"), YYCC_U8("ptcp154") }, - { YYCC_U8("cyrillic-asian"), YYCC_U8("ptcp154") }, - { YYCC_U8("csshiftjis"), YYCC_U8("shift_jis") }, - { YYCC_U8("shiftjis"), YYCC_U8("shift_jis") }, - { YYCC_U8("sjis"), YYCC_U8("shift_jis") }, - { YYCC_U8("s_jis"), YYCC_U8("shift_jis") }, - { YYCC_U8("shiftjis2004"), YYCC_U8("shift_jis_2004") }, - { YYCC_U8("sjis_2004"), YYCC_U8("shift_jis_2004") }, - { YYCC_U8("sjis2004"), YYCC_U8("shift_jis_2004") }, - { YYCC_U8("shiftjisx0213"), YYCC_U8("shift_jisx0213") }, - { YYCC_U8("sjisx0213"), YYCC_U8("shift_jisx0213") }, - { YYCC_U8("s_jisx0213"), YYCC_U8("shift_jisx0213") }, - { YYCC_U8("u32"), YYCC_U8("utf_32") }, - { YYCC_U8("utf32"), YYCC_U8("utf_32") }, - { YYCC_U8("utf-32be"), YYCC_U8("utf_32_be") }, - { YYCC_U8("utf-32le"), YYCC_U8("utf_32_le") }, - { YYCC_U8("u16"), YYCC_U8("utf_16") }, - { YYCC_U8("utf16"), YYCC_U8("utf_16") }, - { YYCC_U8("utf-16be"), YYCC_U8("utf_16_be") }, - { YYCC_U8("utf-16le"), YYCC_U8("utf_16_le") }, - { YYCC_U8("u7"), YYCC_U8("utf_7") }, - { YYCC_U8("unicode-1-1-utf-7"), YYCC_U8("utf_7") }, - { YYCC_U8("u8"), YYCC_U8("utf_8") }, - { YYCC_U8("utf"), YYCC_U8("utf_8") }, - { YYCC_U8("utf8"), YYCC_U8("utf_8") }, - { YYCC_U8("utf-8"), YYCC_U8("utf_8") }, - { YYCC_U8("cp65001"), YYCC_U8("utf_8") }, + static const std::map ALIAS_MAP{ + {u8"646"sv, u8"ascii"sv}, + {u8"us-ascii"sv, u8"ascii"sv}, + {u8"big5-tw"sv, u8"big5"sv}, + {u8"csbig5"sv, u8"big5"sv}, + {u8"big5-hkscs"sv, u8"big5hkscs"sv}, + {u8"hkscs"sv, u8"big5hkscs"sv}, + {u8"ibm037"sv, u8"cp037"sv}, + {u8"ibm039"sv, u8"cp037"sv}, + {u8"273"sv, u8"cp273"sv}, + {u8"ibm273"sv, u8"cp273"sv}, + {u8"csibm273"sv, u8"cp273"sv}, + {u8"ebcdic-cp-he"sv, u8"cp424"sv}, + {u8"ibm424"sv, u8"cp424"sv}, + {u8"437"sv, u8"cp437"sv}, + {u8"ibm437"sv, u8"cp437"sv}, + {u8"ebcdic-cp-be"sv, u8"cp500"sv}, + {u8"ebcdic-cp-ch"sv, u8"cp500"sv}, + {u8"ibm500"sv, u8"cp500"sv}, + {u8"ibm775"sv, u8"cp775"sv}, + {u8"850"sv, u8"cp850"sv}, + {u8"ibm850"sv, u8"cp850"sv}, + {u8"852"sv, u8"cp852"sv}, + {u8"ibm852"sv, u8"cp852"sv}, + {u8"855"sv, u8"cp855"sv}, + {u8"ibm855"sv, u8"cp855"sv}, + {u8"857"sv, u8"cp857"sv}, + {u8"ibm857"sv, u8"cp857"sv}, + {u8"858"sv, u8"cp858"sv}, + {u8"ibm858"sv, u8"cp858"sv}, + {u8"860"sv, u8"cp860"sv}, + {u8"ibm860"sv, u8"cp860"sv}, + {u8"861"sv, u8"cp861"sv}, + {u8"cp-is"sv, u8"cp861"sv}, + {u8"ibm861"sv, u8"cp861"sv}, + {u8"862"sv, u8"cp862"sv}, + {u8"ibm862"sv, u8"cp862"sv}, + {u8"863"sv, u8"cp863"sv}, + {u8"ibm863"sv, u8"cp863"sv}, + {u8"ibm864"sv, u8"cp864"sv}, + {u8"865"sv, u8"cp865"sv}, + {u8"ibm865"sv, u8"cp865"sv}, + {u8"866"sv, u8"cp866"sv}, + {u8"ibm866"sv, u8"cp866"sv}, + {u8"869"sv, u8"cp869"sv}, + {u8"cp-gr"sv, u8"cp869"sv}, + {u8"ibm869"sv, u8"cp869"sv}, + {u8"932"sv, u8"cp932"sv}, + {u8"ms932"sv, u8"cp932"sv}, + {u8"mskanji"sv, u8"cp932"sv}, + {u8"ms-kanji"sv, u8"cp932"sv}, + {u8"windows-31j"sv, u8"cp932"sv}, + {u8"949"sv, u8"cp949"sv}, + {u8"ms949"sv, u8"cp949"sv}, + {u8"uhc"sv, u8"cp949"sv}, + {u8"950"sv, u8"cp950"sv}, + {u8"ms950"sv, u8"cp950"sv}, + {u8"ibm1026"sv, u8"cp1026"sv}, + {u8"1125"sv, u8"cp1125"sv}, + {u8"ibm1125"sv, u8"cp1125"sv}, + {u8"cp866u"sv, u8"cp1125"sv}, + {u8"ruscii"sv, u8"cp1125"sv}, + {u8"ibm1140"sv, u8"cp1140"sv}, + {u8"windows-1250"sv, u8"cp1250"sv}, + {u8"windows-1251"sv, u8"cp1251"sv}, + {u8"windows-1252"sv, u8"cp1252"sv}, + {u8"windows-1253"sv, u8"cp1253"sv}, + {u8"windows-1254"sv, u8"cp1254"sv}, + {u8"windows-1255"sv, u8"cp1255"sv}, + {u8"windows-1256"sv, u8"cp1256"sv}, + {u8"windows-1257"sv, u8"cp1257"sv}, + {u8"windows-1258"sv, u8"cp1258"sv}, + {u8"eucjp"sv, u8"euc_jp"sv}, + {u8"ujis"sv, u8"euc_jp"sv}, + {u8"u-jis"sv, u8"euc_jp"sv}, + {u8"jisx0213"sv, u8"euc_jis_2004"sv}, + {u8"eucjis2004"sv, u8"euc_jis_2004"sv}, + {u8"eucjisx0213"sv, u8"euc_jisx0213"sv}, + {u8"euckr"sv, u8"euc_kr"sv}, + {u8"korean"sv, u8"euc_kr"sv}, + {u8"ksc5601"sv, u8"euc_kr"sv}, + {u8"ks_c-5601"sv, u8"euc_kr"sv}, + {u8"ks_c-5601-1987"sv, u8"euc_kr"sv}, + {u8"ksx1001"sv, u8"euc_kr"sv}, + {u8"ks_x-1001"sv, u8"euc_kr"sv}, + {u8"chinese"sv, u8"gb2312"sv}, + {u8"csiso58gb231280"sv, u8"gb2312"sv}, + {u8"euc-cn"sv, u8"gb2312"sv}, + {u8"euccn"sv, u8"gb2312"sv}, + {u8"eucgb2312-cn"sv, u8"gb2312"sv}, + {u8"gb2312-1980"sv, u8"gb2312"sv}, + {u8"gb2312-80"sv, u8"gb2312"sv}, + {u8"iso-ir-58"sv, u8"gb2312"sv}, + {u8"936"sv, u8"gbk"sv}, + {u8"cp936"sv, u8"gbk"sv}, + {u8"ms936"sv, u8"gbk"sv}, + {u8"gb18030-2000"sv, u8"gb18030"sv}, + {u8"hzgb"sv, u8"hz"sv}, + {u8"hz-gb"sv, u8"hz"sv}, + {u8"hz-gb-2312"sv, u8"hz"sv}, + {u8"csiso2022jp"sv, u8"iso2022_jp"sv}, + {u8"iso2022jp"sv, u8"iso2022_jp"sv}, + {u8"iso-2022-jp"sv, u8"iso2022_jp"sv}, + {u8"iso2022jp-1"sv, u8"iso2022_jp_1"sv}, + {u8"iso-2022-jp-1"sv, u8"iso2022_jp_1"sv}, + {u8"iso2022jp-2"sv, u8"iso2022_jp_2"sv}, + {u8"iso-2022-jp-2"sv, u8"iso2022_jp_2"sv}, + {u8"iso2022jp-2004"sv, u8"iso2022_jp_2004"sv}, + {u8"iso-2022-jp-2004"sv, u8"iso2022_jp_2004"sv}, + {u8"iso2022jp-3"sv, u8"iso2022_jp_3"sv}, + {u8"iso-2022-jp-3"sv, u8"iso2022_jp_3"sv}, + {u8"iso2022jp-ext"sv, u8"iso2022_jp_ext"sv}, + {u8"iso-2022-jp-ext"sv, u8"iso2022_jp_ext"sv}, + {u8"csiso2022kr"sv, u8"iso2022_kr"sv}, + {u8"iso2022kr"sv, u8"iso2022_kr"sv}, + {u8"iso-2022-kr"sv, u8"iso2022_kr"sv}, + {u8"iso-8859-1"sv, u8"latin_1"sv}, + {u8"iso8859-1"sv, u8"latin_1"sv}, + {u8"8859"sv, u8"latin_1"sv}, + {u8"cp819"sv, u8"latin_1"sv}, + {u8"latin"sv, u8"latin_1"sv}, + {u8"latin1"sv, u8"latin_1"sv}, + {u8"l1"sv, u8"latin_1"sv}, + {u8"iso-8859-2"sv, u8"iso8859_2"sv}, + {u8"latin2"sv, u8"iso8859_2"sv}, + {u8"l2"sv, u8"iso8859_2"sv}, + {u8"iso-8859-3"sv, u8"iso8859_3"sv}, + {u8"latin3"sv, u8"iso8859_3"sv}, + {u8"l3"sv, u8"iso8859_3"sv}, + {u8"iso-8859-4"sv, u8"iso8859_4"sv}, + {u8"latin4"sv, u8"iso8859_4"sv}, + {u8"l4"sv, u8"iso8859_4"sv}, + {u8"iso-8859-5"sv, u8"iso8859_5"sv}, + {u8"cyrillic"sv, u8"iso8859_5"sv}, + {u8"iso-8859-6"sv, u8"iso8859_6"sv}, + {u8"arabic"sv, u8"iso8859_6"sv}, + {u8"iso-8859-7"sv, u8"iso8859_7"sv}, + {u8"greek"sv, u8"iso8859_7"sv}, + {u8"greek8"sv, u8"iso8859_7"sv}, + {u8"iso-8859-8"sv, u8"iso8859_8"sv}, + {u8"hebrew"sv, u8"iso8859_8"sv}, + {u8"iso-8859-9"sv, u8"iso8859_9"sv}, + {u8"latin5"sv, u8"iso8859_9"sv}, + {u8"l5"sv, u8"iso8859_9"sv}, + {u8"iso-8859-10"sv, u8"iso8859_10"sv}, + {u8"latin6"sv, u8"iso8859_10"sv}, + {u8"l6"sv, u8"iso8859_10"sv}, + {u8"iso-8859-11"sv, u8"iso8859_11"sv}, + {u8"thai"sv, u8"iso8859_11"sv}, + {u8"iso-8859-13"sv, u8"iso8859_13"sv}, + {u8"latin7"sv, u8"iso8859_13"sv}, + {u8"l7"sv, u8"iso8859_13"sv}, + {u8"iso-8859-14"sv, u8"iso8859_14"sv}, + {u8"latin8"sv, u8"iso8859_14"sv}, + {u8"l8"sv, u8"iso8859_14"sv}, + {u8"iso-8859-15"sv, u8"iso8859_15"sv}, + {u8"latin9"sv, u8"iso8859_15"sv}, + {u8"l9"sv, u8"iso8859_15"sv}, + {u8"iso-8859-16"sv, u8"iso8859_16"sv}, + {u8"latin10"sv, u8"iso8859_16"sv}, + {u8"l10"sv, u8"iso8859_16"sv}, + {u8"cp1361"sv, u8"johab"sv}, + {u8"ms1361"sv, u8"johab"sv}, + {u8"kz_1048"sv, u8"kz1048"sv}, + {u8"strk1048_2002"sv, u8"kz1048"sv}, + {u8"rk1048"sv, u8"kz1048"sv}, + {u8"maccyrillic"sv, u8"mac_cyrillic"sv}, + {u8"macgreek"sv, u8"mac_greek"sv}, + {u8"maciceland"sv, u8"mac_iceland"sv}, + {u8"maclatin2"sv, u8"mac_latin2"sv}, + {u8"maccentraleurope"sv, u8"mac_latin2"sv}, + {u8"mac_centeuro"sv, u8"mac_latin2"sv}, + {u8"macroman"sv, u8"mac_roman"sv}, + {u8"macintosh"sv, u8"mac_roman"sv}, + {u8"macturkish"sv, u8"mac_turkish"sv}, + {u8"csptcp154"sv, u8"ptcp154"sv}, + {u8"pt154"sv, u8"ptcp154"sv}, + {u8"cp154"sv, u8"ptcp154"sv}, + {u8"cyrillic-asian"sv, u8"ptcp154"sv}, + {u8"csshiftjis"sv, u8"shift_jis"sv}, + {u8"shiftjis"sv, u8"shift_jis"sv}, + {u8"sjis"sv, u8"shift_jis"sv}, + {u8"s_jis"sv, u8"shift_jis"sv}, + {u8"shiftjis2004"sv, u8"shift_jis_2004"sv}, + {u8"sjis_2004"sv, u8"shift_jis_2004"sv}, + {u8"sjis2004"sv, u8"shift_jis_2004"sv}, + {u8"shiftjisx0213"sv, u8"shift_jisx0213"sv}, + {u8"sjisx0213"sv, u8"shift_jisx0213"sv}, + {u8"s_jisx0213"sv, u8"shift_jisx0213"sv}, + {u8"u32"sv, u8"utf_32"sv}, + {u8"utf32"sv, u8"utf_32"sv}, + {u8"utf-32be"sv, u8"utf_32_be"sv}, + {u8"utf-32le"sv, u8"utf_32_le"sv}, + {u8"u16"sv, u8"utf_16"sv}, + {u8"utf16"sv, u8"utf_16"sv}, + {u8"utf-16be"sv, u8"utf_16_be"sv}, + {u8"utf-16le"sv, u8"utf_16_le"sv}, + {u8"u7"sv, u8"utf_7"sv}, + {u8"unicode-1-1-utf-7"sv, u8"utf_7"sv}, + {u8"u8"sv, u8"utf_8"sv}, + {u8"utf"sv, u8"utf_8"sv}, + {u8"utf8"sv, u8"utf_8"sv}, + {u8"utf-8"sv, u8"utf_8"sv}, + {u8"cp65001"sv, u8"utf_8"sv}, }; + /** + * @brief Resolve encoding name alias and fetch real encoding name. + * @param[in] lang The encoding name for finding. + * @return + * The given encoding name if given name not present in alias map. + * Otherwise the found encoding name by given name. + */ + static std::u8string resolve_encoding_alias(const std::u8string_view& enc_name) { + auto name = op::to_lower(enc_name); + auto finder = ALIAS_MAP.find(name); + if (finder == ALIAS_MAP.end()) { + // not found, use original encoding name. + return std::u8string(enc_name); + } else { + // found, use found encoding name. + return std::u8string(finder->second); + } + } #if defined(YYCC_PYCODEC_WIN32_BACKEND) - using CodePage = NS_YYCC_ENCODING_BACKEND::CodePage; + using CodePage = YYCC_PYCODEC_BACKEND_NS::CodePage; - static const std::map WINCP_MAP { - { YYCC_U8("ascii"), static_cast(437u) }, - { YYCC_U8("big5"), static_cast(950u) }, - { YYCC_U8("cp037"), static_cast(037u) }, - { YYCC_U8("cp437"), static_cast(437u) }, - { YYCC_U8("cp500"), static_cast(500u) }, - { YYCC_U8("cp720"), static_cast(720u) }, - { YYCC_U8("cp737"), static_cast(737u) }, - { YYCC_U8("cp775"), static_cast(775u) }, - { YYCC_U8("cp850"), static_cast(850u) }, - { YYCC_U8("cp852"), static_cast(852u) }, - { YYCC_U8("cp855"), static_cast(855u) }, - { YYCC_U8("cp857"), static_cast(857u) }, - { YYCC_U8("cp858"), static_cast(858u) }, - { YYCC_U8("cp860"), static_cast(860u) }, - { YYCC_U8("cp861"), static_cast(861u) }, - { YYCC_U8("cp862"), static_cast(862u) }, - { YYCC_U8("cp863"), static_cast(863u) }, - { YYCC_U8("cp864"), static_cast(864u) }, - { YYCC_U8("cp865"), static_cast(865u) }, - { YYCC_U8("cp866"), static_cast(866u) }, - { YYCC_U8("cp869"), static_cast(869u) }, - { YYCC_U8("cp874"), static_cast(874u) }, - { YYCC_U8("cp875"), static_cast(875u) }, - { YYCC_U8("cp932"), static_cast(932u) }, - { YYCC_U8("cp949"), static_cast(949u) }, - { YYCC_U8("cp950"), static_cast(950u) }, - { YYCC_U8("cp1026"), static_cast(1026u) }, - { YYCC_U8("cp1140"), static_cast(1140u) }, - { YYCC_U8("cp1250"), static_cast(1250u) }, - { YYCC_U8("cp1251"), static_cast(1251u) }, - { YYCC_U8("cp1252"), static_cast(1252u) }, - { YYCC_U8("cp1253"), static_cast(1253u) }, - { YYCC_U8("cp1254"), static_cast(1254u) }, - { YYCC_U8("cp1255"), static_cast(1255u) }, - { YYCC_U8("cp1256"), static_cast(1256u) }, - { YYCC_U8("cp1257"), static_cast(1257u) }, - { YYCC_U8("cp1258"), static_cast(1258u) }, - { YYCC_U8("euc_jp"), static_cast(20932u) }, - { YYCC_U8("euc_kr"), static_cast(51949u) }, - { YYCC_U8("gb2312"), static_cast(936u) }, - { YYCC_U8("gbk"), static_cast(936u) }, - { YYCC_U8("gb18030"), static_cast(54936u) }, - { YYCC_U8("hz"), static_cast(52936u) }, - { YYCC_U8("iso2022_jp"), static_cast(50220u) }, - { YYCC_U8("iso2022_kr"), static_cast(50225u) }, - { YYCC_U8("latin_1"), static_cast(28591u) }, - { YYCC_U8("iso8859_2"), static_cast(28592u) }, - { YYCC_U8("iso8859_3"), static_cast(28593u) }, - { YYCC_U8("iso8859_4"), static_cast(28594u) }, - { YYCC_U8("iso8859_5"), static_cast(28595u) }, - { YYCC_U8("iso8859_6"), static_cast(28596u) }, - { YYCC_U8("iso8859_7"), static_cast(28597u) }, - { YYCC_U8("iso8859_8"), static_cast(28598u) }, - { YYCC_U8("iso8859_9"), static_cast(28599u) }, - { YYCC_U8("iso8859_13"), static_cast(28603u) }, - { YYCC_U8("iso8859_15"), static_cast(28605u) }, - { YYCC_U8("johab"), static_cast(1361u) }, - { YYCC_U8("mac_cyrillic"), static_cast(10007u) }, - { YYCC_U8("mac_greek"), static_cast(10006u) }, - { YYCC_U8("mac_iceland"), static_cast(10079u) }, - { YYCC_U8("mac_turkish"), static_cast(10081u) }, - { YYCC_U8("shift_jis"), static_cast(932u) }, - { YYCC_U8("utf_7"), static_cast(65000u) }, - { YYCC_U8("utf_8"), static_cast(65001u) }, + static const std::map WINCP_MAP{ + {u8"ascii"sv, static_cast(437u)}, {u8"big5"sv, static_cast(950u)}, + {u8"cp037"sv, static_cast(037u)}, {u8"cp437"sv, static_cast(437u)}, + {u8"cp500"sv, static_cast(500u)}, {u8"cp720"sv, static_cast(720u)}, + {u8"cp737"sv, static_cast(737u)}, {u8"cp775"sv, static_cast(775u)}, + {u8"cp850"sv, static_cast(850u)}, {u8"cp852"sv, static_cast(852u)}, + {u8"cp855"sv, static_cast(855u)}, {u8"cp857"sv, static_cast(857u)}, + {u8"cp858"sv, static_cast(858u)}, {u8"cp860"sv, static_cast(860u)}, + {u8"cp861"sv, static_cast(861u)}, {u8"cp862"sv, static_cast(862u)}, + {u8"cp863"sv, static_cast(863u)}, {u8"cp864"sv, static_cast(864u)}, + {u8"cp865"sv, static_cast(865u)}, {u8"cp866"sv, static_cast(866u)}, + {u8"cp869"sv, static_cast(869u)}, {u8"cp874"sv, static_cast(874u)}, + {u8"cp875"sv, static_cast(875u)}, {u8"cp932"sv, static_cast(932u)}, + {u8"cp949"sv, static_cast(949u)}, {u8"cp950"sv, static_cast(950u)}, + {u8"cp1026"sv, static_cast(1026u)}, {u8"cp1140"sv, static_cast(1140u)}, + {u8"cp1250"sv, static_cast(1250u)}, {u8"cp1251"sv, static_cast(1251u)}, + {u8"cp1252"sv, static_cast(1252u)}, {u8"cp1253"sv, static_cast(1253u)}, + {u8"cp1254"sv, static_cast(1254u)}, {u8"cp1255"sv, static_cast(1255u)}, + {u8"cp1256"sv, static_cast(1256u)}, {u8"cp1257"sv, static_cast(1257u)}, + {u8"cp1258"sv, static_cast(1258u)}, {u8"euc_jp"sv, static_cast(20932u)}, + {u8"euc_kr"sv, static_cast(51949u)}, {u8"gb2312"sv, static_cast(936u)}, + {u8"gbk"sv, static_cast(936u)}, {u8"gb18030"sv, static_cast(54936u)}, + {u8"hz"sv, static_cast(52936u)}, {u8"iso2022_jp"sv, static_cast(50220u)}, + {u8"iso2022_kr"sv, static_cast(50225u)}, {u8"latin_1"sv, static_cast(28591u)}, + {u8"iso8859_2"sv, static_cast(28592u)}, {u8"iso8859_3"sv, static_cast(28593u)}, + {u8"iso8859_4"sv, static_cast(28594u)}, {u8"iso8859_5"sv, static_cast(28595u)}, + {u8"iso8859_6"sv, static_cast(28596u)}, {u8"iso8859_7"sv, static_cast(28597u)}, + {u8"iso8859_8"sv, static_cast(28598u)}, {u8"iso8859_9"sv, static_cast(28599u)}, + {u8"iso8859_13"sv, static_cast(28603u)}, {u8"iso8859_15"sv, static_cast(28605u)}, + {u8"johab"sv, static_cast(1361u)}, {u8"mac_cyrillic"sv, static_cast(10007u)}, + {u8"mac_greek"sv, static_cast(10006u)}, {u8"mac_iceland"sv, static_cast(10079u)}, + {u8"mac_turkish"sv, static_cast(10081u)}, {u8"shift_jis"sv, static_cast(932u)}, + {u8"utf_7"sv, static_cast(65000u)}, {u8"utf_8"sv, static_cast(65001u)}, }; + static bool fetch_code_page(const std::u8string_view& enc_name, CodePage& out_cp) { + // resolve alias + std::u8string resolved_name = resolve_encoding_alias(enc_name); + // find code page + op::lower(resolved_name); + auto finder = WINCP_MAP.find(resolved_name); + if (finder == WINCP_MAP.end()) return false; + // okey, we found it. + out_cp = finder->second; + return true; + } + #else - static const std::map ICONV_MAP { - { YYCC_U8("ascii"), "ASCII" }, - { YYCC_U8("big5"), "BIG5" }, - { YYCC_U8("big5hkscs"), "BIG5-HKSCS" }, - { YYCC_U8("cp850"), "CP850" }, - { YYCC_U8("cp862"), "CP862" }, - { YYCC_U8("cp866"), "CP866" }, - { YYCC_U8("cp874"), "CP874" }, - { YYCC_U8("cp932"), "CP932" }, - { YYCC_U8("cp949"), "CP949" }, - { YYCC_U8("cp950"), "CP950" }, - { YYCC_U8("cp1250"), "CP1250" }, - { YYCC_U8("cp1251"), "CP1251" }, - { YYCC_U8("cp1252"), "CP1252" }, - { YYCC_U8("cp1253"), "CP1253" }, - { YYCC_U8("cp1254"), "CP1254" }, - { YYCC_U8("cp1255"), "CP1255" }, - { YYCC_U8("cp1256"), "CP1256" }, - { YYCC_U8("cp1257"), "CP1257" }, - { YYCC_U8("cp1258"), "CP1258" }, - { YYCC_U8("euc_jp"), "EUC-JP" }, - { YYCC_U8("euc_kr"), "EUC-KR" }, - { YYCC_U8("gb2312"), "CP936" }, - { YYCC_U8("gbk"), "GBK" }, - { YYCC_U8("gb18030"), "GB18030" }, - { YYCC_U8("hz"), "HZ" }, - { YYCC_U8("iso2022_jp"), "ISO-2022-JP" }, - { YYCC_U8("iso2022_jp_1"), "ISO-2022-JP-1" }, - { YYCC_U8("iso2022_jp_2"), "ISO-2022-JP-2" }, - { YYCC_U8("iso2022_kr"), "ISO-2022-KR" }, - { YYCC_U8("latin_1"), "ISO-8859-1" }, - { YYCC_U8("iso8859_2"), "ISO-8859-2" }, - { YYCC_U8("iso8859_3"), "ISO-8859-3" }, - { YYCC_U8("iso8859_4"), "ISO-8859-4" }, - { YYCC_U8("iso8859_5"), "ISO-8859-5" }, - { YYCC_U8("iso8859_6"), "ISO-8859-6" }, - { YYCC_U8("iso8859_7"), "ISO-8859-7" }, - { YYCC_U8("iso8859_8"), "ISO-8859-8" }, - { YYCC_U8("iso8859_9"), "ISO-8859-9" }, - { YYCC_U8("iso8859_10"), "ISO-8859-10" }, - { YYCC_U8("iso8859_11"), "ISO-8859-11" }, - { YYCC_U8("iso8859_13"), "ISO-8859-13" }, - { YYCC_U8("iso8859_14"), "ISO-8859-14" }, - { YYCC_U8("iso8859_15"), "ISO-8859-15" }, - { YYCC_U8("iso8859_16"), "ISO-8859-16" }, - { YYCC_U8("johab"), "JOHAB" }, - { YYCC_U8("koi8_t"), "KOI8-T" }, - { YYCC_U8("mac_cyrillic"), "MacCyrillic" }, - { YYCC_U8("mac_greek"), "MacGreek" }, - { YYCC_U8("mac_iceland"), "MacIceland" }, - { YYCC_U8("mac_roman"), "MacRoman" }, - { YYCC_U8("mac_turkish"), "MacTurkish" }, - { YYCC_U8("ptcp154"), "PT154" }, - { YYCC_U8("shift_jis"), "SHIFT_JIS" }, - { YYCC_U8("utf_32"), "UTF-32" }, - { YYCC_U8("utf_32_be"), "UTF-32BE" }, - { YYCC_U8("utf_32_le"), "UTF-32LE" }, - { YYCC_U8("utf_16"), "UTF16" }, - { YYCC_U8("utf_16_be"), "UTF-16BE" }, - { YYCC_U8("utf_16_le"), "UTF-16LE" }, - { YYCC_U8("utf_7"), "UTF-7" }, - { YYCC_U8("utf_8"), "UTF-8" }, + static const std::map ICONV_MAP{ + {u8"ascii"sv, "ASCII"sv}, + {u8"big5"sv, "BIG5"sv}, + {u8"big5hkscs"sv, "BIG5-HKSCS"sv}, + {u8"cp850"sv, "CP850"sv}, + {u8"cp862"sv, "CP862"sv}, + {u8"cp866"sv, "CP866"sv}, + {u8"cp874"sv, "CP874"sv}, + {u8"cp932"sv, "CP932"sv}, + {u8"cp949"sv, "CP949"sv}, + {u8"cp950"sv, "CP950"sv}, + {u8"cp1250"sv, "CP1250"sv}, + {u8"cp1251"sv, "CP1251"sv}, + {u8"cp1252"sv, "CP1252"sv}, + {u8"cp1253"sv, "CP1253"sv}, + {u8"cp1254"sv, "CP1254"sv}, + {u8"cp1255"sv, "CP1255"sv}, + {u8"cp1256"sv, "CP1256"sv}, + {u8"cp1257"sv, "CP1257"sv}, + {u8"cp1258"sv, "CP1258"sv}, + {u8"euc_jp"sv, "EUC-JP"sv}, + {u8"euc_kr"sv, "EUC-KR"sv}, + {u8"gb2312"sv, "CP936"sv}, + {u8"gbk"sv, "GBK"sv}, + {u8"gb18030"sv, "GB18030"sv}, + {u8"hz"sv, "HZ"sv}, + {u8"iso2022_jp"sv, "ISO-2022-JP"sv}, + {u8"iso2022_jp_1"sv, "ISO-2022-JP-1"sv}, + {u8"iso2022_jp_2"sv, "ISO-2022-JP-2"sv}, + {u8"iso2022_kr"sv, "ISO-2022-KR"sv}, + {u8"latin_1"sv, "ISO-8859-1"sv}, + {u8"iso8859_2"sv, "ISO-8859-2"sv}, + {u8"iso8859_3"sv, "ISO-8859-3"sv}, + {u8"iso8859_4"sv, "ISO-8859-4"sv}, + {u8"iso8859_5"sv, "ISO-8859-5"sv}, + {u8"iso8859_6"sv, "ISO-8859-6"sv}, + {u8"iso8859_7"sv, "ISO-8859-7"sv}, + {u8"iso8859_8"sv, "ISO-8859-8"sv}, + {u8"iso8859_9"sv, "ISO-8859-9"sv}, + {u8"iso8859_10"sv, "ISO-8859-10"sv}, + {u8"iso8859_11"sv, "ISO-8859-11"sv}, + {u8"iso8859_13"sv, "ISO-8859-13"sv}, + {u8"iso8859_14"sv, "ISO-8859-14"sv}, + {u8"iso8859_15"sv, "ISO-8859-15"sv}, + {u8"iso8859_16"sv, "ISO-8859-16"sv}, + {u8"johab"sv, "JOHAB"sv}, + {u8"koi8_t"sv, "KOI8-T"sv}, + {u8"mac_cyrillic"sv, "MacCyrillic"sv}, + {u8"mac_greek"sv, "MacGreek"sv}, + {u8"mac_iceland"sv, "MacIceland"sv}, + {u8"mac_roman"sv, "MacRoman"sv}, + {u8"mac_turkish"sv, "MacTurkish"sv}, + {u8"ptcp154"sv, "PT154"sv}, + {u8"shift_jis"sv, "SHIFT_JIS"sv}, + {u8"utf_32"sv, "UTF-32"sv}, + {u8"utf_32_be"sv, "UTF-32BE"sv}, + {u8"utf_32_le"sv, "UTF-32LE"sv}, + {u8"utf_16"sv, "UTF16"sv}, + {u8"utf_16_be"sv, "UTF-16BE"sv}, + {u8"utf_16_le"sv, "UTF-16LE"sv}, + {u8"utf_7"sv, "UTF-7"sv}, + {u8"utf_8"sv, "UTF-8"sv}, }; -#endif + static bool fetch_iconv_name(const std::u8string_view& enc_name, std::string& out_code) { + // resolve alias + std::u8string resolved_name = resolve_encoding_alias(enc_name); + // find code page + op::lower(resolved_name); + auto finder = ICONV_MAP.find(resolved_name); + if (finder == ICONV_MAP.end()) return false; + // okey, we found it. + out_code = finder->second; + return true; + } +#endif #pragma endregion @@ -362,12 +373,27 @@ namespace yycc::encoding::pycodec { ConvError::ConvError(const ConvError::Error& err) : inner(err) {} bool is_valid_encoding_name(const EncodingName& name) { - +#if defined(YYCC_PYCODEC_WIN32_BACKEND) + CodePage unused; + return fetch_code_page(name, unused); +#else + std::string unused; + return fetch_iconv_name(name, unused); +#endif } #pragma endregion -#pragma region +#pragma region Char -> UTF8 + + CharToUtf8::CharToUtf8(const EncodingName& name) : +#if defined(YYCC_PYCODEC_WIN32_BACKEND) + code_page(fetch) +#else + inner(fetch_iconv_name()) +#endif + {} + #pragma endregion diff --git a/src/yycc/encoding/pycodec.hpp b/src/yycc/encoding/pycodec.hpp index ea15f8a..0c52e11 100644 --- a/src/yycc/encoding/pycodec.hpp +++ b/src/yycc/encoding/pycodec.hpp @@ -1,37 +1,43 @@ #pragma once #include "../macro/os_detector.hpp" +#include "../macro/stl_detector.hpp" #include "../macro/class_copy_move.hpp" -#include "../patch/expected.hpp" -#include "../string.hpp" +#include +#include +#include // Choose the backend of PyCodec module -#if defined(YYCC_OS_WINDOWS) +#if defined(YYCC_OS_WINDOWS) && defined(YYCC_STL_MSSTL) #include "windows.hpp" #define YYCC_PYCODEC_WIN32_BACKEND -#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::windows -#else +#define YYCC_PYCODEC_BACKEND_NS ::yycc::encoding::windows +#elif YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS) #include "iconv.hpp" #define YYCC_PYCODEC_ICONV_BACKEND -#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::iconv +#define YYCC_PYCODEC_BACKEND_NS ::yycc::encoding::iconv +#else +#error "Can not find viable encoding convertion solution in current environment for PyCodec module." #endif -#define NS_YYCC_STRING ::yycc::string -#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected - namespace yycc::encoding::pycodec { - using EncodingName = NS_YYCC_STRING::u8string_view; + /// @brief The universal name of encoding. + using EncodingName = std::u8string_view; - /// @private - struct ConvError { - using Error = NS_YYCC_ENCODING_BACKEND::ConvError; + /// @brief The possible error occurs in this module. + class ConvError { + public: + using Error = YYCC_PYCODEC_BACKEND_NS::ConvError; ConvError(const Error& err); + YYCC_DEFAULT_COPY_MOVE(ConvError) + + private: Error inner; }; - /// @private + /// @brief The result type of this module. template - using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected; + using ConvResult = std::expected; /** * @brief Check whether given name is a valid encoding name in PyCodec. @@ -40,7 +46,7 @@ namespace yycc::encoding::pycodec { */ bool is_valid_encoding_name(const EncodingName& name); - // Char -> UTF8 + /// @brief Char -> UTF8 class CharToUtf8 { public: CharToUtf8(const EncodingName& name); @@ -49,19 +55,17 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(CharToUtf8) public: - ConvResult priv_to_utf8(const std::string_view& src); - bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst); - NS_YYCC_STRING::u8string to_utf8(const std::string_view& src); + ConvResult to_utf8(const std::string_view& src); private: #if defined(YYCC_PYCODEC_WIN32_BACKEND) - NS_YYCC_ENCODING_BACKEND::CodePage code_page; + YYCC_PYCODEC_BACKEND_NS::CodePage code_page; #else - NS_YYCC_ENCODING_BACKEND::CharToUtf8 inner; + YYCC_PYCODEC_BACKEND_NS::CharToUtf8 inner; #endif }; - // UTF8 -> Char + /// @brief UTF8 -> Char class Utf8ToChar { public: Utf8ToChar(const EncodingName& name); @@ -70,19 +74,17 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(Utf8ToChar) public: - ConvResult priv_to_char(const NS_YYCC_STRING::u8string_view& src); - bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst); - std::string to_char(const NS_YYCC_STRING::u8string_view& src); + ConvResult to_char(const std::u8string_view& src); private: #if defined(YYCC_PYCODEC_WIN32_BACKEND) - NS_YYCC_ENCODING_BACKEND::CodePage code_page; + YYCC_PYCODEC_BACKEND_NS::CodePage code_page; #else - NS_YYCC_ENCODING_BACKEND::Utf8ToChar inner; + YYCC_PYCODEC_BACKEND_NS::Utf8ToChar inner; #endif }; - // WChar -> UTF8 + /// @brief WChar -> UTF8 class WcharToUtf8 { public: WcharToUtf8(); @@ -91,17 +93,15 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(WcharToUtf8) public: - ConvResult priv_to_utf8(const std::wstring_view& src); - bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst); - NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src); + ConvResult to_utf8(const std::wstring_view& src); private: #if defined(YYCC_PYCODEC_ICONV_BACKEND) - NS_YYCC_ENCODING_BACKEND::WcharToUtf8 inner; + YYCC_PYCODEC_BACKEND_NS::WcharToUtf8 inner; #endif }; - // UTF8 -> WChar + /// @brief UTF8 -> WChar class Utf8ToWchar { public: Utf8ToWchar(); @@ -110,17 +110,15 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(Utf8ToWchar) public: - ConvResult priv_to_wchar(const NS_YYCC_STRING::u8string_view& src); - bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst); - std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src); + ConvResult to_wchar(const std::u8string_view& src); private: #if defined(YYCC_PYCODEC_ICONV_BACKEND) - NS_YYCC_ENCODING_BACKEND::Utf8ToWchar inner; + YYCC_PYCODEC_BACKEND_NS::Utf8ToWchar inner; #endif }; - // UTF8 -> UTF16 + /// @brief UTF8 -> UTF16 class Utf8ToUtf16 { public: Utf8ToUtf16(); @@ -129,17 +127,15 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(Utf8ToUtf16) public: - ConvResult priv_to_utf16(const NS_YYCC_STRING::u8string_view& src); - bool to_utf16(const NS_YYCC_STRING::u8string_view& src, std::u16string& dst); - std::u16string to_utf16(const NS_YYCC_STRING::u8string_view& src); + ConvResult to_utf16(const std::u8string_view& src); private: #if defined(YYCC_PYCODEC_ICONV_BACKEND) - NS_YYCC_ENCODING_BACKEND::Utf8ToUtf16 inner; + YYCC_PYCODEC_BACKEND_NS::Utf8ToUtf16 inner; #endif }; - // UTF16 -> UTF8 + /// @brief UTF16 -> UTF8 class Utf16ToUtf8 { public: Utf16ToUtf8(); @@ -148,17 +144,15 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(Utf16ToUtf8) public: - ConvResult priv_to_utf8(const std::u16string_view& src); - bool to_utf8(const std::u16string_view& src, NS_YYCC_STRING::u8string& dst); - NS_YYCC_STRING::u8string to_utf8(const std::u16string_view& src); + ConvResult to_utf8(const std::u16string_view& src); private: #if defined(YYCC_PYCODEC_ICONV_BACKEND) - NS_YYCC_ENCODING_BACKEND::Utf16ToUtf8 inner; + YYCC_PYCODEC_BACKEND_NS::Utf16ToUtf8 inner; #endif }; - // UTF8 -> UTF32 + /// @brief UTF8 -> UTF32 class Utf8ToUtf32 { public: Utf8ToUtf32(); @@ -167,17 +161,15 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(Utf8ToUtf32) public: - ConvResult priv_to_utf32(const NS_YYCC_STRING::u8string_view& src); - bool to_utf32(const NS_YYCC_STRING::u8string_view& src, std::u32string& dst); - std::u32string to_utf32(const NS_YYCC_STRING::u8string_view& src); + ConvResult to_utf32(const std::u8string_view& src); private: #if defined(YYCC_PYCODEC_ICONV_BACKEND) - NS_YYCC_ENCODING_BACKEND::Utf8ToUtf32 inner; + YYCC_PYCODEC_BACKEND_NS::Utf8ToUtf32 inner; #endif }; - // UTF32 -> UTF8 + /// @brief UTF32 -> UTF8 class Utf32ToUtf8 { public: Utf32ToUtf8(); @@ -186,17 +178,12 @@ namespace yycc::encoding::pycodec { YYCC_DEFAULT_MOVE(Utf32ToUtf8) public: - ConvResult priv_to_utf8(const std::u32string_view& src); - bool to_utf8(const std::u32string_view& src, NS_YYCC_STRING::u8string& dst); - NS_YYCC_STRING::u8string to_utf8(const std::u32string_view& src); + ConvResult to_utf8(const std::u32string_view& src); private: #if defined(YYCC_PYCODEC_ICONV_BACKEND) - NS_YYCC_ENCODING_BACKEND::Utf32ToUtf8 inner; + YYCC_PYCODEC_BACKEND_NS::Utf32ToUtf8 inner; #endif }; -} - -#undef NS_YYCC_PATCH_EXPECTED -#undef NS_YYCC_STRING +} // namespace yycc::encoding::pycodec diff --git a/src/yycc/encoding/windows.cpp b/src/yycc/encoding/windows.cpp index 474113e..bcda2dd 100644 --- a/src/yycc/encoding/windows.cpp +++ b/src/yycc/encoding/windows.cpp @@ -120,6 +120,8 @@ namespace yycc::encoding::windows { // * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170 // * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170 +#if defined(YYCC_STL_MSSTL) + // 1 UTF32 unit can produe 4 UTF8 units or 2 UTF16 units in theory. // So we pre-allocate memory for the result to prevent allocating memory multiple times. constexpr size_t MULTIPLE_UTF8_TO_UTF16 = 1u; @@ -206,6 +208,8 @@ namespace yycc::encoding::windows { return dst; } +#endif + #pragma endregion } // namespace yycc::encoding::windows diff --git a/src/yycc/encoding/windows.hpp b/src/yycc/encoding/windows.hpp index 762372b..1a11d54 100644 --- a/src/yycc/encoding/windows.hpp +++ b/src/yycc/encoding/windows.hpp @@ -1,8 +1,6 @@ #pragma once #include "../macro/os_detector.hpp" - -#if defined(YYCC_OS_WINDOWS) - +#include "../macro/stl_detector.hpp" #include #include #include @@ -28,6 +26,8 @@ namespace yycc::encoding::windows { template using ConvResult = std::expected; +#if defined(YYCC_OS_WINDOWS) + /** * @brief WChar -> Char * @param src @@ -88,6 +88,12 @@ namespace yycc::encoding::windows { */ ConvResult to_char(const std::u8string_view& src, CodePage code_page); + // YYC MARK: + // UTF convertion only works on Microsoft STL. + // See implementation for more details + +#if defined(YYCC_STL_MSSTL) + /** * @brief UTF8 -> UTF16 * @param src @@ -116,6 +122,8 @@ namespace yycc::encoding::windows { */ ConvResult to_utf8(const std::u32string_view& src); -} // namespace yycc::encoding::windows +#endif #endif + +} // namespace yycc::encoding::windows diff --git a/src/yycc/macro/stl_detector.hpp b/src/yycc/macro/stl_detector.hpp new file mode 100644 index 0000000..100333f --- /dev/null +++ b/src/yycc/macro/stl_detector.hpp @@ -0,0 +1,14 @@ +#pragma once + +// Include a common used STL header for convenient test. +#include + +#if defined(_MSVC_STL_VERSION) +#define YYCC_STL_MSSTL +#elif defined(__GLIBCXX__) || defined(__GLIBCPP__) +#define YYCC_STL_GNUSTL +#elif defined(_LIBCPP_VERSION) +#define YYCC_STL_CLANGSTL +#else +#error "Current STL is not supported!" +#endif diff --git a/src/yycc/num/parse.hpp b/src/yycc/num/parse.hpp index ef2a1dc..795f91d 100644 --- a/src/yycc/num/parse.hpp +++ b/src/yycc/num/parse.hpp @@ -40,7 +40,7 @@ namespace yycc::num::parse { */ template requires(std::is_floating_point_v) - ParseResult parse(const std::u8string_view& strl, std::chars_format fmt) { + ParseResult parse(const std::u8string_view& strl, std::chars_format fmt = std::chars_format::general) { namespace reinterpret = NS_YYCC_STRING_REINTERPRET; T rv; @@ -75,7 +75,7 @@ namespace yycc::num::parse { */ template requires(std::is_integral_v && !std::is_same_v) - ParseResult parse(const std::u8string_view& strl, int base) { + ParseResult parse(const std::u8string_view& strl, int base = 10) { namespace reinterpret = NS_YYCC_STRING_REINTERPRET; T rv; @@ -115,7 +115,7 @@ namespace yycc::num::parse { // Compare result if (lower_case == u8"true") return true; else if (lower_case == u8"false") return false; - else return ParseError::InvalidString; + else return std::unexpected(ParseError::InvalidString); } } // namespace yycc::num::parse diff --git a/testbench/CMakeLists.txt b/testbench/CMakeLists.txt index b004aff..e4e97e4 100644 --- a/testbench/CMakeLists.txt +++ b/testbench/CMakeLists.txt @@ -10,18 +10,12 @@ PRIVATE yycc/string/reinterpret.cpp yycc/num/parse.cpp yycc/num/stringify.cpp - yycc/rust/num/parse.cpp - yycc/rust/num/stringify.cpp - yycc/patch/contains.cpp - yycc/patch/starts_ends_with.cpp ) target_sources(YYCCTestbench PRIVATE FILE_SET HEADERS FILES - shared/parse_template.hpp - shared/stringify_template.hpp - shared/utf_literal.hpp + yycc/encoding/utf_literal.hpp ) # Setup headers target_include_directories(YYCCTestbench diff --git a/testbench/shared/parse_template.hpp b/testbench/shared/parse_template.hpp deleted file mode 100644 index 0bf2b18..0000000 --- a/testbench/shared/parse_template.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/** - * \file - * This file is a template for Parse function testing. - * - * As you seen that there is 2 style Parse function locate in main namespace and Rust namespace respectively. - * Both of them share the exactly same test data sheet. - * So it is good idea to extract these common part and put them into a place, and include it in respectively testbench file. - * That what this file does. - * - * Before including this template file, you must make sure that: - * \li Have include - * \li Have include - * \li Have define a macro named \c TEST_NS which indicate the testbench namespace passed to gtest. - * \li Have define a macro with syntax TEST_SUCCESS(type_t, value, string_value, ...). - * This macro will be called for those success case. \c type_t is the generic type of Parse function. - * \c value is the expected value after parse and \c string_value is the string value to be parsed. - * Other arguments should be redirect to corresponding Parse function. - * \li Have define a macro with syntax TEST_FAIL(type_t, string_value, ...). - * Opposite with \c TEST_SUCCESS, this macro is for those bad case testing. - * All arguments have the same meaning with \c TEST_SUCCESS. - * -*/ - -TEST(TEST_NS, Common) { - TEST_SUCCESS(i8, INT8_C(-61), "-61"); - TEST_SUCCESS(u8, UINT8_C(200), "200"); - TEST_SUCCESS(i16, INT16_C(6161), "6161"); - TEST_SUCCESS(u16, UINT16_C(32800), "32800"); - TEST_SUCCESS(i32, INT32_C(61616161), "61616161"); - TEST_SUCCESS(u32, UINT32_C(4294967293), "4294967293"); - TEST_SUCCESS(i64, INT64_C(616161616161), "616161616161"); - TEST_SUCCESS(u64, UINT64_C(9223372036854775807), "9223372036854775807"); - - TEST_SUCCESS(float, 1.0f, "1.0"); - TEST_SUCCESS(double, 1.0, "1.0"); - - TEST_SUCCESS(bool, true, "true"); - TEST_SUCCESS(bool, false, "false"); -} - -TEST(TEST_NS, Radix) { - TEST_SUCCESS(u32, UINT32_C(0xffff), "ffff", 16); - TEST_SUCCESS(u32, UINT32_C(032), "032", 8); - TEST_SUCCESS(u32, UINT32_C(0B1011), "1011", 2); -} - -TEST(TEST_NS, CaseInsensitive) { - TEST_SUCCESS(bool, true, "tRUE"); -} - -TEST(TEST_NS, Overflow) { - TEST_FAIL(i8, "6161"); - TEST_FAIL(u8, "32800"); - TEST_FAIL(i16, "61616161"); - TEST_FAIL(u16, "4294967293"); - TEST_FAIL(i32, "616161616161"); - TEST_FAIL(u32, "9223372036854775807"); - TEST_FAIL(i64, "616161616161616161616161"); - TEST_FAIL(u64, "92233720368547758079223372036854775807"); - - TEST_FAIL(float, "1e40"); - TEST_FAIL(double, "1e114514"); -} - -TEST(TEST_NS, BadRadix) { - TEST_FAIL(u32, "fghj", 16); - TEST_FAIL(u32, "099", 8); - TEST_FAIL(u32, "12345", 2); -} - -TEST(TEST_NS, InvalidWords) { - TEST_FAIL(u32, "hello, world!"); - TEST_FAIL(bool, "hello, world!"); -} diff --git a/testbench/shared/stringify_template.hpp b/testbench/shared/stringify_template.hpp deleted file mode 100644 index 9cd259c..0000000 --- a/testbench/shared/stringify_template.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/** - * \file - * This file is a template for Stringify function testing. - * - * Same as parse_template.hpp . - * - * Before including this template file, you must make sure that: - * \li Have include - * \li Have include - * \li Have define a macro named \c TEST_NS which indicate the testbench namespace passed to gtest. - * \li Have define a macro with syntax TEST_SUCCESS(type_t, value, string_value, ...). - * This macro will be called for those success case. \c type_t is the generic type of Stringify function. - * \c value is the value will be stringified and \c string_value is the expected string. - * Other arguments should be redirect to corresponding Stringify function. - * -*/ - -TEST(TEST_NS, Common) { - TEST_SUCCESS(i8, INT8_C(-61), "-61"); - TEST_SUCCESS(u8, UINT8_C(200), "200"); - TEST_SUCCESS(i16, INT16_C(6161), "6161"); - TEST_SUCCESS(u16, UINT16_C(32800), "32800"); - TEST_SUCCESS(i32, INT32_C(61616161), "61616161"); - TEST_SUCCESS(u32, UINT32_C(4294967293), "4294967293"); - TEST_SUCCESS(i64, INT64_C(616161616161), "616161616161"); - TEST_SUCCESS(u64, UINT64_C(9223372036854775807), "9223372036854775807"); - - TEST_SUCCESS(float, 1.0f, "1.0", std::chars_format::fixed, 1); - TEST_SUCCESS(double, 1.0, "1.0", std::chars_format::fixed, 1); - - TEST_SUCCESS(bool, true, "true"); - TEST_SUCCESS(bool, false, "false"); -} - -TEST(TEST_NS, Radix) { - TEST_SUCCESS(u32, UINT32_C(0xffff), "ffff", 16); - TEST_SUCCESS(u32, UINT32_C(032), "32", 8); - TEST_SUCCESS(u32, UINT32_C(0B1011), "1011", 2); -} diff --git a/testbench/yycc/constraint.cpp b/testbench/yycc/constraint.cpp index 66509e4..4f3c245 100644 --- a/testbench/yycc/constraint.cpp +++ b/testbench/yycc/constraint.cpp @@ -1,7 +1,8 @@ #include #include #include -#include + +#include #define CONSTRAINT ::yycc::constraint::Constraint diff --git a/testbench/yycc/constraint/builder.cpp b/testbench/yycc/constraint/builder.cpp index f043533..e007267 100644 --- a/testbench/yycc/constraint/builder.cpp +++ b/testbench/yycc/constraint/builder.cpp @@ -2,9 +2,10 @@ #include #include -#include +#include #define BUILDER ::yycc::constraint::builder +using namespace std::literals::string_view_literals; namespace yycctest::constraint::builder { @@ -56,8 +57,7 @@ namespace yycctest::constraint::builder { enum class TestEnum : u8 { Entry1 = 0, Entry2 = 1, Entry3 = 2 }; TEST(ConstraintBuilder, EnumConstraint) { - auto c = BUILDER::enum_constraint({TestEnum::Entry1, TestEnum::Entry2, TestEnum::Entry3}, - 1u); + auto c = BUILDER::enum_constraint({TestEnum::Entry1, TestEnum::Entry2, TestEnum::Entry3}, 1u); ASSERT_TRUE(c.support_check()); ASSERT_TRUE(c.support_clamp()); TEST_SUCCESS(c, TestEnum::Entry1); @@ -67,16 +67,13 @@ namespace yycctest::constraint::builder { } TEST(ConstraintBuilder, StrEnumConstraint) { - auto c = BUILDER::strenum_constraint({YYCC_U8("first-entry"), - YYCC_U8("second-entry"), - YYCC_U8("third-entry")}, - 1u); + auto c = BUILDER::strenum_constraint({u8"first-entry"sv, u8"second-entry"sv, u8"third-entry"sv}, 1u); ASSERT_TRUE(c.support_check()); ASSERT_TRUE(c.support_clamp()); - TEST_SUCCESS(c, YYCC_U8("first-entry")); - TEST_SUCCESS(c, YYCC_U8("second-entry")); - TEST_SUCCESS(c, YYCC_U8("third-entry")); - TEST_FAIL(c, YYCC_U8("wtf?"), YYCC_U8("second-entry")); + TEST_SUCCESS(c, u8"first-entry"); + TEST_SUCCESS(c, u8"second-entry"); + TEST_SUCCESS(c, u8"third-entry"); + TEST_FAIL(c, u8"wtf?", u8"second-entry"); } } // namespace yycctest::constraint::builder diff --git a/testbench/shared/utf_literal.hpp b/testbench/yycc/encoding/utf_literal.hpp similarity index 100% rename from testbench/shared/utf_literal.hpp rename to testbench/yycc/encoding/utf_literal.hpp diff --git a/testbench/yycc/num/parse.cpp b/testbench/yycc/num/parse.cpp index 71982a6..8ce15b3 100644 --- a/testbench/yycc/num/parse.cpp +++ b/testbench/yycc/num/parse.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #define PARSE ::yycc::num::parse @@ -11,25 +11,71 @@ namespace yycctest::num::parse { // These 2 test macros build string container via given string. // Check `try_parse` first, and then check `parse`. -#define TEST_NS NumParse - -#define TEST_SUCCESS(type_t, value, string_value, ...) \ +#define TEST_SUCCESS(type_t, expected_value, string_value, ...) \ { \ - u8string cache_string(YYCC_U8(string_value)); \ - type_t cache; \ - ASSERT_TRUE(PARSE::try_parse(cache_string, cache, ##__VA_ARGS__)); \ - EXPECT_EQ(cache, value); \ - EXPECT_EQ(PARSE::parse(cache_string, ##__VA_ARGS__), value); \ + std::u8string cache_string(string_value); \ + auto rv = PARSE::parse(cache_string __VA_OPT__(, ) __VA_ARGS__); \ + ASSERT_TRUE(rv.has_value()); \ + EXPECT_EQ(rv.value(), expected_value); \ } #define TEST_FAIL(type_t, string_value, ...) \ { \ - u8string cache_string(YYCC_U8(string_value)); \ - type_t cache; \ - EXPECT_FALSE(PARSE::try_parse(cache_string, cache, ##__VA_ARGS__)); \ - EXPECT_ANY_THROW(PARSE::parse(cache_string, ##__VA_ARGS__)); \ + std::u8string cache_string(string_value); \ + auto rv = PARSE::parse(cache_string __VA_OPT__(, ) __VA_ARGS__); \ + EXPECT_FALSE(rv.has_value()); \ } -#include "../../shared/parse_template.hpp" + TEST(NumParse, Common) { + TEST_SUCCESS(i8, INT8_C(-61), u8"-61"); + TEST_SUCCESS(u8, UINT8_C(200), u8"200"); + TEST_SUCCESS(i16, INT16_C(6161), u8"6161"); + TEST_SUCCESS(u16, UINT16_C(32800), u8"32800"); + TEST_SUCCESS(i32, INT32_C(61616161), u8"61616161"); + TEST_SUCCESS(u32, UINT32_C(4294967293), u8"4294967293"); + TEST_SUCCESS(i64, INT64_C(616161616161), u8"616161616161"); + TEST_SUCCESS(u64, UINT64_C(9223372036854775807), u8"9223372036854775807"); + + TEST_SUCCESS(float, 1.0f, u8"1.0"); + TEST_SUCCESS(double, 1.0, u8"1.0"); + + TEST_SUCCESS(bool, true, u8"true"); + TEST_SUCCESS(bool, false, u8"false"); + } + + TEST(NumParse, Radix) { + TEST_SUCCESS(u32, UINT32_C(0xffff), u8"ffff", 16); + TEST_SUCCESS(u32, UINT32_C(032), u8"032", 8); + TEST_SUCCESS(u32, UINT32_C(0B1011), u8"1011", 2); + } + + TEST(NumParse, CaseInsensitive) { + TEST_SUCCESS(bool, true, u8"tRUE"); + } + + TEST(NumParse, Overflow) { + TEST_FAIL(i8, u8"6161"); + TEST_FAIL(u8, u8"32800"); + TEST_FAIL(i16, u8"61616161"); + TEST_FAIL(u16, u8"4294967293"); + TEST_FAIL(i32, u8"616161616161"); + TEST_FAIL(u32, u8"9223372036854775807"); + TEST_FAIL(i64, u8"616161616161616161616161"); + TEST_FAIL(u64, u8"92233720368547758079223372036854775807"); + + TEST_FAIL(float, u8"1e40"); + TEST_FAIL(double, u8"1e114514"); + } + + TEST(NumParse, BadRadix) { + TEST_FAIL(u32, u8"fghj", 16); + TEST_FAIL(u32, u8"099", 8); + TEST_FAIL(u32, u8"12345", 2); + } + + TEST(NumParse, InvalidWords) { + TEST_FAIL(u32, u8"hello, world!"); + TEST_FAIL(bool, u8"hello, world!"); + } } // namespace yycctest::num::parse diff --git a/testbench/yycc/num/stringify.cpp b/testbench/yycc/num/stringify.cpp index 909fe12..896d182 100644 --- a/testbench/yycc/num/stringify.cpp +++ b/testbench/yycc/num/stringify.cpp @@ -2,21 +2,40 @@ #include #include -#include +#include #define STRINGIFY ::yycc::num::stringify namespace yycctest::num::stringify { -#define TEST_NS NumStringify - #define TEST_SUCCESS(type_t, value, string_value, ...) \ { \ type_t cache = value; \ - u8string ret = STRINGIFY::stringify(cache, ##__VA_ARGS__); \ - EXPECT_EQ(ret, YYCC_U8(string_value)); \ + std::u8string ret = STRINGIFY::stringify(cache __VA_OPT__(, ) __VA_ARGS__); \ + EXPECT_EQ(ret, string_value); \ } -#include "../../shared/stringify_template.hpp" + TEST(NumStringify, Common) { + TEST_SUCCESS(i8, INT8_C(-61), u8"-61"); + TEST_SUCCESS(u8, UINT8_C(200), u8"200"); + TEST_SUCCESS(i16, INT16_C(6161), u8"6161"); + TEST_SUCCESS(u16, UINT16_C(32800), u8"32800"); + TEST_SUCCESS(i32, INT32_C(61616161), u8"61616161"); + TEST_SUCCESS(u32, UINT32_C(4294967293), u8"4294967293"); + TEST_SUCCESS(i64, INT64_C(616161616161), u8"616161616161"); + TEST_SUCCESS(u64, UINT64_C(9223372036854775807), u8"9223372036854775807"); -} // namespace yycctest::string::stringify + TEST_SUCCESS(float, 1.0f, u8"1.0", std::chars_format::fixed, 1); + TEST_SUCCESS(double, 1.0, u8"1.0", std::chars_format::fixed, 1); + + TEST_SUCCESS(bool, true, u8"true"); + TEST_SUCCESS(bool, false, u8"false"); + } + + TEST(NumStringify, Radix) { + TEST_SUCCESS(u32, UINT32_C(0xffff), u8"ffff", 16); + TEST_SUCCESS(u32, UINT32_C(032), u8"32", 8); + TEST_SUCCESS(u32, UINT32_C(0B1011), u8"1011", 2); + } + +} // namespace yycctest::num::stringify diff --git a/testbench/yycc/patch/contains.cpp b/testbench/yycc/patch/contains.cpp deleted file mode 100644 index 2bb3fe7..0000000 --- a/testbench/yycc/patch/contains.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include - -#define FN_CONTAINS ::yycc::patch::contains::contains - -namespace yycctest::patch::contains { - - TEST(PatchContains, Contains) { - // Set - { - std::set collection{ - UINT32_C(1), - UINT32_C(3), - UINT32_C(5), - }; - EXPECT_TRUE(FN_CONTAINS(collection, 1)); - EXPECT_FALSE(FN_CONTAINS(collection, 2)); - } - // Unordered set - { - std::unordered_set collection{ - UINT32_C(1), - UINT32_C(3), - UINT32_C(5), - }; - EXPECT_TRUE(FN_CONTAINS(collection, 1)); - EXPECT_FALSE(FN_CONTAINS(collection, 2)); - } - // Map - { - std::map collection{ - {UINT32_C(1), UINT32_C(2)}, - {UINT32_C(3), UINT32_C(4)}, - {UINT32_C(5), UINT32_C(6)}, - }; - EXPECT_TRUE(FN_CONTAINS(collection, 1)); - EXPECT_FALSE(FN_CONTAINS(collection, 2)); - } - // Unordered Map - { - std::unordered_map collection{ - {UINT32_C(1), UINT32_C(2)}, - {UINT32_C(3), UINT32_C(4)}, - {UINT32_C(5), UINT32_C(6)}, - }; - EXPECT_TRUE(FN_CONTAINS(collection, 1)); - EXPECT_FALSE(FN_CONTAINS(collection, 2)); - } - } - -} // namespace yycctest::patch::contains diff --git a/testbench/yycc/patch/starts_ends_with.cpp b/testbench/yycc/patch/starts_ends_with.cpp deleted file mode 100644 index efe34ae..0000000 --- a/testbench/yycc/patch/starts_ends_with.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include - -#define FN_STARTS_WITH ::yycc::patch::starts_ends_with::starts_with -#define FN_ENDS_WITH ::yycc::patch::starts_ends_with::ends_with -using namespace std::literals; - -namespace yycctest::patch::starts_ends_with { - -#define TEST_STARTS_WITH(prefix) \ - { \ - const auto that = prefix##"Hello, C++20!"s; \ - EXPECT_TRUE(FN_STARTS_WITH(that, prefix##"He"sv)); \ - EXPECT_FALSE(FN_STARTS_WITH(that, prefix##"he"sv)); \ - EXPECT_TRUE(FN_STARTS_WITH(that, prefix##'H')); \ - EXPECT_FALSE(FN_STARTS_WITH(that, prefix##'h')); \ - EXPECT_TRUE(FN_STARTS_WITH(that, prefix##"He")); \ - EXPECT_FALSE(FN_STARTS_WITH(that, prefix##"he")); \ - } - - TEST(PatchStartsEndsWith, StartsWith) { - // Ordinary string - TEST_STARTS_WITH(); - // Wide string - TEST_STARTS_WITH(L); - // UTF8 string - TEST_STARTS_WITH(u8); - // UTF-16 string - TEST_STARTS_WITH(u); - // UTF-32 string - TEST_STARTS_WITH(U); - } - -#define TEST_ENDS_WITH(prefix) \ - { \ - const auto that = prefix##"Hello, C++20!"s; \ - EXPECT_TRUE(FN_ENDS_WITH(that, prefix##"C++20!"sv)); \ - EXPECT_FALSE(FN_ENDS_WITH(that, prefix##"c++20!"sv)); \ - EXPECT_TRUE(FN_ENDS_WITH(that, prefix##'!')); \ - EXPECT_FALSE(FN_ENDS_WITH(that, prefix##'?')); \ - EXPECT_TRUE(FN_ENDS_WITH(that, prefix##"C++20!")); \ - EXPECT_FALSE(FN_ENDS_WITH(that, prefix##"c++20!")); \ - } - - TEST(PatchStartsEndsWith, EndsWith) { - // Ordinary string - TEST_ENDS_WITH(); - // Wide string - TEST_ENDS_WITH(L); - // UTF8 string - TEST_ENDS_WITH(u8); - // UTF-16 string - TEST_ENDS_WITH(u); - // UTF-32 string - TEST_ENDS_WITH(U); - } - -} // namespace yycctest::patch::starts_ends_with diff --git a/testbench/yycc/rust/num/parse.cpp b/testbench/yycc/rust/num/parse.cpp deleted file mode 100644 index 96225b4..0000000 --- a/testbench/yycc/rust/num/parse.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include -#include -#include - -#include - -#define PARSE ::yycc::rust::num::parse - -namespace yycctest::rust::num::parse { - - // We only want to test it if C++ support it. -#if defined(YYCC_CPPFEAT_EXPECTED) - -#define TEST_NS RustNumParse - -#define TEST_SUCCESS(type_t, expected_value, string_value, ...) \ - { \ - u8string cache_string(YYCC_U8(string_value)); \ - auto rv = PARSE::parse(cache_string, ##__VA_ARGS__); \ - ASSERT_TRUE(rv.has_value()); \ - EXPECT_EQ(rv.value(), expected_value); \ - } - -#define TEST_FAIL(type_t, string_value, ...) \ - { \ - u8string cache_string(YYCC_U8(string_value)); \ - auto rv = PARSE::parse(cache_string, ##__VA_ARGS__); \ - EXPECT_FALSE(rv.has_value()); \ - } - -#include "../../../shared/parse_template.hpp" - -#endif - -} // namespace yycctest::rust::parse diff --git a/testbench/yycc/rust/num/stringify.cpp b/testbench/yycc/rust/num/stringify.cpp deleted file mode 100644 index 0683023..0000000 --- a/testbench/yycc/rust/num/stringify.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include - -#include - -#define STRINGIFY ::yycc::rust::num::stringify - -namespace yycctest::rust::num::stringify { - -#define TEST_NS RustNumStringify - -#define TEST_SUCCESS(type_t, value, string_value, ...) \ - { \ - type_t cache = value; \ - u8string ret = STRINGIFY::stringify(cache, ##__VA_ARGS__); \ - EXPECT_EQ(ret, YYCC_U8(string_value)); \ - } - -#include "../../../shared/stringify_template.hpp" - -} diff --git a/testbench/yycc/string/op.cpp b/testbench/yycc/string/op.cpp index f74638d..f7ed3ba 100644 --- a/testbench/yycc/string/op.cpp +++ b/testbench/yycc/string/op.cpp @@ -2,91 +2,92 @@ #include #include -#include +#include #define OP ::yycc::string::op +using namespace std::literals::string_view_literals; namespace yycctest::string::op { TEST(StringOp, Printf) { - auto rv = OP::printf(YYCC_U8("%s == %s"), YYCC_U8("Hello World"), YYCC_U8("Hello, world")); - EXPECT_EQ(rv, YYCC_U8("Hello World == Hello, world")); + auto rv = OP::printf(u8"%s == %s", u8"Hello World", u8"Hello, world"); + EXPECT_EQ(rv, u8"Hello World == Hello, world"); } TEST(StringOp, Replace) { // Normal case { - auto rv = OP::replace(YYCC_U8("aabbcc"), YYCC_U8("bb"), YYCC_U8("dd")); - EXPECT_EQ(rv, YYCC_U8("aaddcc")); + auto rv = OP::replace(u8"aabbcc", u8"bb", u8"dd"); + EXPECT_EQ(rv, u8"aaddcc"); } // No matched expected string { - auto rv = OP::replace(YYCC_U8("aabbcc"), YYCC_U8("zz"), YYCC_U8("yy")); - EXPECT_EQ(rv, YYCC_U8("aabbcc")); + auto rv = OP::replace(u8"aabbcc", u8"zz", u8"yy"); + EXPECT_EQ(rv, u8"aabbcc"); } // Empty expected string { - auto rv = OP::replace(YYCC_U8("aabbcc"), u8string_view(), YYCC_U8("zz")); - EXPECT_EQ(rv, YYCC_U8("aabbcc")); + auto rv = OP::replace(u8"aabbcc", std::u8string_view(), u8"zz"); + EXPECT_EQ(rv, u8"aabbcc"); } // Empty replace string { - auto rv = OP::replace(YYCC_U8("aaaabbaa"), YYCC_U8("aa"), YYCC_U8("")); - EXPECT_EQ(rv, YYCC_U8("bb")); + auto rv = OP::replace(u8"aaaabbaa", u8"aa", u8""); + EXPECT_EQ(rv, u8"bb"); } // Nested replacing { - auto rv = OP::replace(YYCC_U8("aaxcc"), YYCC_U8("x"), YYCC_U8("yx")); - EXPECT_EQ(rv, YYCC_U8("aayxcc")); + auto rv = OP::replace(u8"aaxcc", u8"x", u8"yx"); + EXPECT_EQ(rv, u8"aayxcc"); } // Empty source string { - auto rv = OP::replace(u8string_view(), YYCC_U8(""), YYCC_U8("xy")); - EXPECT_EQ(rv, YYCC_U8("")); + auto rv = OP::replace(std::u8string_view(), u8"", u8"xy"); + EXPECT_EQ(rv, u8""); } } TEST(StringOp, Lower) { - auto rv = OP::to_lower(YYCC_U8("LOWER")); - EXPECT_EQ(rv, YYCC_U8("lower")); + auto rv = OP::to_lower(u8"LOWER"); + EXPECT_EQ(rv, u8"lower"); } TEST(StringOp, Upper) { - auto rv = OP::to_upper(YYCC_U8("upper")); - EXPECT_EQ(rv, YYCC_U8("UPPER")); + auto rv = OP::to_upper(u8"upper"); + EXPECT_EQ(rv, u8"UPPER"); } TEST(StringOp, Join) { - std::vector datas{YYCC_U8(""), YYCC_U8("1"), YYCC_U8("2"), YYCC_U8("")}; - auto rv = OP::join(datas.begin(), datas.end(), YYCC_U8(", ")); - EXPECT_EQ(rv, YYCC_U8(", 1, 2, ")); + std::vector datas{u8""sv, u8"1"sv, u8"2"sv, u8""sv}; + auto rv = OP::join(datas.begin(), datas.end(), u8", "); + EXPECT_EQ(rv, u8", 1, 2, "); } TEST(StringOp, Split) { // Normal { - auto rv = OP::split(YYCC_U8(", 1, 2, "), YYCC_U8(", ")); + auto rv = OP::split(u8", 1, 2, ", u8", "); ASSERT_EQ(rv.size(), 4u); - EXPECT_EQ(rv[0], YYCC_U8("")); - EXPECT_EQ(rv[1], YYCC_U8("1")); - EXPECT_EQ(rv[2], YYCC_U8("2")); - EXPECT_EQ(rv[3], YYCC_U8("")); + EXPECT_EQ(rv[0], u8""); + EXPECT_EQ(rv[1], u8"1"); + EXPECT_EQ(rv[2], u8"2"); + EXPECT_EQ(rv[3], u8""); } // No matched delimiter { - auto rv = OP::split(YYCC_U8("test"), YYCC_U8("-")); + auto rv = OP::split(u8"test", u8"-"); ASSERT_EQ(rv.size(), 1u); - EXPECT_EQ(rv[0], YYCC_U8("test")); + EXPECT_EQ(rv[0], u8"test"); } // Empty delimiter { - auto rv = OP::split(YYCC_U8("test"), u8string_view()); + auto rv = OP::split(u8"test", std::u8string_view()); ASSERT_EQ(rv.size(), 1u); - EXPECT_EQ(rv[0], YYCC_U8("test")); + EXPECT_EQ(rv[0], u8"test"); } // Empty source string { - auto rv = OP::split(u8string_view(), YYCC_U8("")); + auto rv = OP::split(std::u8string_view(), u8""); ASSERT_EQ(rv.size(), 1u); EXPECT_TRUE(rv[0].empty()); } diff --git a/testbench/yycc/string/reinterpret.cpp b/testbench/yycc/string/reinterpret.cpp index 946c58b..4297fae 100644 --- a/testbench/yycc/string/reinterpret.cpp +++ b/testbench/yycc/string/reinterpret.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #define REINTERPRET ::yycc::string::reinterpret #define CONST_VOID_PTR(p) reinterpret_cast(p) @@ -11,7 +11,7 @@ namespace yycctest::string::reinterpret { - static u8string PROBE(YYCC_U8("Test")); + static std::u8string PROBE(u8"Test"); TEST(StringReinterpret, ConstPointer) { const auto* src = PROBE.data(); @@ -34,7 +34,7 @@ namespace yycctest::string::reinterpret { } TEST(StringReinterpret, String) { - auto src = u8string(PROBE); + auto src = std::u8string(PROBE); auto dst = REINTERPRET::as_ordinary(src); auto new_src = REINTERPRET::as_utf8(dst); @@ -46,7 +46,7 @@ namespace yycctest::string::reinterpret { } TEST(StringReinterpret, StringView) { - auto src = u8string_view(PROBE); + auto src = std::u8string_view(PROBE); auto dst = REINTERPRET::as_ordinary_view(src); auto new_src = REINTERPRET::as_utf8_view(dst);