refactor: continue refactor to make the project can be built

This commit is contained in:
2025-07-31 22:25:14 +08:00
parent 5372af79f8
commit ce2b411b0b
30 changed files with 791 additions and 943 deletions

13
.gitignore vendored
View File

@ -1,12 +1,17 @@
# -------------------- Output -------------------- ## ===== Personal =====
# Ignore build resources
out/ out/
build/
install/
# Ignore CMake generated stuff
src/yycc/version.hpp src/yycc/version.hpp
CMakeSettings.json CMakeSettings.json
# -------------------- VSCode -------------------- ## ===== VSCode =====
.vscode/ .vscode/
# -------------------- CMake -------------------- ## ===== CMake =====
CMakeLists.txt.user CMakeLists.txt.user
CMakeCache.txt CMakeCache.txt
CMakeFiles CMakeFiles
@ -19,7 +24,7 @@ compile_commands.json
CTestTestfile.cmake CTestTestfile.cmake
_deps _deps
# -------------------- Visual Studio -------------------- ## ===== Visual Studio =====
## Ignore Visual Studio temporary files, build results, and ## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons. ## files generated by popular Visual Studio add-ons.
## ##

View File

@ -15,3 +15,8 @@ However, the documentation need CMake to build and you may don't know how to use
This project require at least CMake 3.23 to build. We suggest that you only use stable version (tagged commit). The latest commit may still work in progress and not stable. This project require at least CMake 3.23 to build. We suggest that you only use stable version (tagged commit). The latest commit may still work in progress and not stable.
See documentation for how to build this project. See documentation for how to build this project.
> [!INFO]
> When building with testbench, you may face link error with GoogleTest. This issue is caused by that the binary provided by your package manager is built in C++ 17 and its ABI is incompatible with C++ 23. The solution is that download GoogleTest source code and build it in C++ 23 on your own. See this [GitHub Issue](https://github.com/google/googletest/issues/4591) for more infomation.
> Oppositely, you don't need care about this issue if you just want to build YYCC self.

View File

@ -25,7 +25,7 @@ def escape_cmd_argument(arg):
def escape_sh_argument(arg): def escape_sh_argument(arg):
return shlex.quote(arg) return shlex.quote(arg)
@dataclass @dataclass(frozen=True)
class ScriptSettings: class ScriptSettings:
cpp_version: str cpp_version: str
build_doc: bool build_doc: bool

7
script/pycodec/README.md Normal file
View File

@ -0,0 +1,7 @@
# PyCodec
This directory contain all stuff related to PyCodec.
PyCodec use different encoding system on different OS. In Windows it use Win32 functions, and it will use Iconv in other OS. So we need a table converting PyCodec universal encoding name to Windows Code Page or Iconv Code Name. These relation was stored in CSV file and Python script will render it into C++ source code.
For the format of CSV file, each line is a record. The first item in record is the standard PyCodec name. The second item is corresponding Windows Code Page. If there is no corresponding Code Page, it can be empty. The third item is corresponding Iconv Code Name. It also can be empty with same case. Then, the count of remain columns is variables after forth item (inclusive). All of them is the alias of this standard PyCodec name.

View File

@ -1,6 +1,7 @@
import typing import csv
from pathlib import Path from pathlib import Path
import os import jinja2
class LanguageToken: class LanguageToken:
name: str name: str
@ -8,56 +9,46 @@ class LanguageToken:
code_page: str | None code_page: str | None
iconv_code: str | None iconv_code: str | None
def __init__(self, name: str, alias: typing.Iterator[str], code_page: str, iconv_code: str): def __init__(self, row: list[str]):
self.name = name.lower() """Init language token from CSV row."""
self.alias = tuple(map(lambda x: x.lower(), alias)) self.name = row[0].lower()
self.code_page = None if code_page == '' else code_page code_page = row[1]
self.iconv_code = None if iconv_code == '' else iconv_code self.code_page = None if len(code_page) == 0 else code_page
iconv_code = row[2]
self.iconv_code = None if len(iconv_code) == 0 else iconv_code
# For alias, we strip and to lower them first, and remove all empty entries
alias = row[3:]
self.alias = tuple(
filter(lambda x: len(x) != 0,
map(lambda x: x.strip().lower(), alias)))
def extract_data(fs: typing.TextIO) -> list[str]:
# remove first line to remove table header
return fs.readlines()[1:]
def extract_token(csv_data: list[str]) -> tuple[LanguageToken, ...]: def _get_self_dir() -> Path:
ret: list[LanguageToken] = [] return Path(__file__).resolve().parent
for line in csv_data:
line = line.strip('\n')
line_sp = line.split('\t')
alias_sp = filter(lambda x: len(x) != 0, map(lambda x: x.strip(), line_sp[1].split(',')))
ret.append(LanguageToken(line_sp[0], alias_sp, line_sp[2], line_sp[3]))
return tuple(ret)
def write_alias_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, NS_YYCC_STRING::u8string> ALISA_MAP {\n')
for i in data:
for j in i.alias:
fs.write(f'\t{{ YYCC_U8("{j}"), YYCC_U8("{i.name}") }},\n')
fs.write('};\n')
def write_win_cp_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: def _extract_tokens() -> list[LanguageToken]:
fs.write('static const std::map<NS_YYCC_STRING::u8string, CodePage> WINCP_MAP {\n') rv: list[LanguageToken] = []
for i in data: csv_file = _get_self_dir() / 'encoding_table.csv'
if i.code_page is not None:
fs.write(f'\t{{ YYCC_U8("{i.name}"), static_cast<CodePage>({i.code_page}u) }},\n') with open(csv_file, 'r', encoding='utf-8', newline='') as f:
fs.write('};\n') reader = csv.reader(f, delimiter='\t')
for row in reader:
rv.append(LanguageToken(row))
return rv
def _render_cpp(tokens: list[LanguageToken]) -> None:
loader = jinja2.FileSystemLoader(_get_self_dir())
environment = jinja2.Environment(loader=loader)
template = environment.get_template('encoding_table.cpp.jinja')
cpp_file = _get_self_dir() / 'encoding_table.cpp'
with open(cpp_file, 'w', encoding='utf-8') as f:
f.write(template.render(tokens=tokens))
def write_iconv_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, std::string> ICONV_MAP {\n')
for i in data:
if i.iconv_code is not None:
fs.write(f'\t{{ YYCC_U8("{i.name}"), "{i.iconv_code}" }},\n')
fs.write('};\n')
if __name__ == '__main__': if __name__ == '__main__':
# get file path tokens = _extract_tokens()
self_path = Path(__file__).resolve().parent _render_cpp(tokens)
csv_file = self_path / 'encoding_table.csv'
cpp_file = self_path / 'encoding_table.cpp'
# process files
with open(csv_file, 'r', encoding='utf-8') as fr:
with open(cpp_file, 'w', encoding='utf-8') as fw:
data = extract_data(fr)
token = extract_token(data)
write_alias_map(fw, token)
write_win_cp_map(fw, token)
write_iconv_map(fw, token)

View File

@ -0,0 +1,23 @@
static const std::map<std::u8string_view, std::u8string_view> ALIAS_MAP {
{% for token in tokens -%}
{% for alias in token.alias -%}
{ u8"{{ alias }}"sv, u8"{{ token.name }}"sv },
{% endfor -%}
{% endfor -%}
};
static const std::map<std::u8string_view, CodePage> WINCP_MAP {
{% for token in tokens -%}
{% if token.code_page is not none -%}
{ u8"{{ token.name }}"sv, static_cast<CodePage>({{ token.code_page }}u) },
{% endif -%}
{% endfor -%}
};
static const std::map<std::u8string_view, std::string_view> ICONV_MAP {
{% for token in tokens -%}
{% if token.iconv_code is not none -%}
{ u8"{{ token.name }}"sv, "{{ token.iconv_code }}"sv },
{% endif -%}
{% endfor -%}
};

View File

@ -1,98 +1,97 @@
Encoding Alias Code Page Iconv Identifier ascii 437 ASCII 646 us-ascii
ascii 646, us-ascii 437 ASCII big5 950 BIG5 big5-tw csbig5
big5 big5-tw, csbig5 950 BIG5 big5hkscs BIG5-HKSCS big5-hkscs hkscs
big5hkscs big5-hkscs, hkscs BIG5-HKSCS cp037 037 IBM037 IBM039
cp037 IBM037, IBM039 037 cp273 273 IBM273 csIBM273
cp273 273, IBM273, csIBM273 cp424 EBCDIC-CP-HE IBM424
cp424 EBCDIC-CP-HE, IBM424 cp437 437 437 IBM437
cp437 437, IBM437 437 cp500 500 EBCDIC-CP-BE EBCDIC-CP-CH IBM500
cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500
cp720 720 cp720 720
cp737 737 cp737 737
cp775 IBM775 775 cp775 775 IBM775
cp850 850, IBM850 850 CP850 cp850 850 CP850 850 IBM850
cp852 852, IBM852 852 cp852 852 852 IBM852
cp855 855, IBM855 855 cp855 855 855 IBM855
cp856 cp856
cp857 857, IBM857 857 cp857 857 857 IBM857
cp858 858, IBM858 858 cp858 858 858 IBM858
cp860 860, IBM860 860 cp860 860 860 IBM860
cp861 861, CP-IS, IBM861 861 cp861 861 861 CP-IS IBM861
cp862 862, IBM862 862 CP862 cp862 862 CP862 862 IBM862
cp863 863, IBM863 863 cp863 863 863 IBM863
cp864 IBM864 864 cp864 864 IBM864
cp865 865, IBM865 865 cp865 865 865 IBM865
cp866 866, IBM866 866 CP866 cp866 866 CP866 866 IBM866
cp869 869, CP-GR, IBM869 869 cp869 869 869 CP-GR IBM869
cp874 874 CP874 cp874 874 CP874
cp875 875 cp875 875
cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 CP932 cp932 932 CP932 932 ms932 mskanji ms-kanji windows-31j
cp949 949, ms949, uhc 949 CP949 cp949 949 CP949 949 ms949 uhc
cp950 950, ms950 950 CP950 cp950 950 CP950 950 ms950
cp1006 cp1006
cp1026 ibm1026 1026 cp1026 1026 ibm1026
cp1125 1125, ibm1125, cp866u, ruscii cp1125 1125 ibm1125 cp866u ruscii
cp1140 ibm1140 1140 cp1140 1140 ibm1140
cp1250 windows-1250 1250 CP1250 cp1250 1250 CP1250 windows-1250
cp1251 windows-1251 1251 CP1251 cp1251 1251 CP1251 windows-1251
cp1252 windows-1252 1252 CP1252 cp1252 1252 CP1252 windows-1252
cp1253 windows-1253 1253 CP1253 cp1253 1253 CP1253 windows-1253
cp1254 windows-1254 1254 CP1254 cp1254 1254 CP1254 windows-1254
cp1255 windows-1255 1255 CP1255 cp1255 1255 CP1255 windows-1255
cp1256 windows-1256 1256 CP1256 cp1256 1256 CP1256 windows-1256
cp1257 windows-1257 1257 CP1257 cp1257 1257 CP1257 windows-1257
cp1258 windows-1258 1258 CP1258 cp1258 1258 CP1258 windows-1258
euc_jp eucjp, ujis, u-jis 20932 EUC-JP euc_jp 20932 EUC-JP eucjp ujis u-jis
euc_jis_2004 jisx0213, eucjis2004 euc_jis_2004 jisx0213 eucjis2004
euc_jisx0213 eucjisx0213 euc_jisx0213 eucjisx0213
euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 EUC-KR euc_kr 51949 EUC-KR euckr korean ksc5601 ks_c-5601 ks_c-5601-1987 ksx1001 ks_x-1001
gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 CP936 gb2312 936 CP936 chinese csiso58gb231280 euc-cn euccn eucgb2312-cn gb2312-1980 gb2312-80 iso-ir-58
gbk 936, cp936, ms936 936 GBK gbk 936 GBK 936 cp936 ms936
gb18030 gb18030-2000 54936 GB18030 gb18030 54936 GB18030 gb18030-2000
hz hzgb, hz-gb, hz-gb-2312 52936 HZ hz 52936 HZ hzgb hz-gb hz-gb-2312
iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP iso2022_jp 50220 ISO-2022-JP csiso2022jp iso2022jp iso-2022-jp
iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1 iso2022_jp_1 ISO-2022-JP-1 iso2022jp-1 iso-2022-jp-1
iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2 iso2022_jp_2 ISO-2022-JP-2 iso2022jp-2 iso-2022-jp-2
iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004 iso2022_jp_2004 iso2022jp-2004 iso-2022-jp-2004
iso2022_jp_3 iso2022jp-3, iso-2022-jp-3 iso2022_jp_3 iso2022jp-3 iso-2022-jp-3
iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext iso2022_jp_ext iso2022jp-ext iso-2022-jp-ext
iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 ISO-2022-KR iso2022_kr 50225 ISO-2022-KR csiso2022kr iso2022kr iso-2022-kr
latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 ISO-8859-1 latin_1 28591 ISO-8859-1 iso-8859-1 iso8859-1 8859 cp819 latin latin1 L1
iso8859_2 iso-8859-2, latin2, L2 28592 ISO-8859-2 iso8859_2 28592 ISO-8859-2 iso-8859-2 latin2 L2
iso8859_3 iso-8859-3, latin3, L3 28593 ISO-8859-3 iso8859_3 28593 ISO-8859-3 iso-8859-3 latin3 L3
iso8859_4 iso-8859-4, latin4, L4 28594 ISO-8859-4 iso8859_4 28594 ISO-8859-4 iso-8859-4 latin4 L4
iso8859_5 iso-8859-5, cyrillic 28595 ISO-8859-5 iso8859_5 28595 ISO-8859-5 iso-8859-5 cyrillic
iso8859_6 iso-8859-6, arabic 28596 ISO-8859-6 iso8859_6 28596 ISO-8859-6 iso-8859-6 arabic
iso8859_7 iso-8859-7, greek, greek8 28597 ISO-8859-7 iso8859_7 28597 ISO-8859-7 iso-8859-7 greek greek8
iso8859_8 iso-8859-8, hebrew 28598 ISO-8859-8 iso8859_8 28598 ISO-8859-8 iso-8859-8 hebrew
iso8859_9 iso-8859-9, latin5, L5 28599 ISO-8859-9 iso8859_9 28599 ISO-8859-9 iso-8859-9 latin5 L5
iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10 iso8859_10 ISO-8859-10 iso-8859-10 latin6 L6
iso8859_11 iso-8859-11, thai ISO-8859-11 iso8859_11 ISO-8859-11 iso-8859-11 thai
iso8859_13 iso-8859-13, latin7, L7 28603 ISO-8859-13 iso8859_13 28603 ISO-8859-13 iso-8859-13 latin7 L7
iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14 iso8859_14 ISO-8859-14 iso-8859-14 latin8 L8
iso8859_15 iso-8859-15, latin9, L9 28605 ISO-8859-15 iso8859_15 28605 ISO-8859-15 iso-8859-15 latin9 L9
iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16 iso8859_16 ISO-8859-16 iso-8859-16 latin10 L10
johab cp1361, ms1361 1361 JOHAB johab 1361 JOHAB cp1361 ms1361
koi8_r koi8_r
koi8_t KOI8-T koi8_t KOI8-T
koi8_u koi8_u
kz1048 kz_1048, strk1048_2002, rk1048 kz1048 kz_1048 strk1048_2002 rk1048
mac_cyrillic maccyrillic 10007 MacCyrillic mac_cyrillic 10007 MacCyrillic maccyrillic
mac_greek macgreek 10006 MacGreek mac_greek 10006 MacGreek macgreek
mac_iceland maciceland 10079 MacIceland mac_iceland 10079 MacIceland maciceland
mac_latin2 maclatin2, maccentraleurope, mac_centeuro mac_latin2 maclatin2 maccentraleurope mac_centeuro
mac_roman macroman, macintosh MacRoman mac_roman MacRoman macroman macintosh
mac_turkish macturkish 10081 MacTurkish mac_turkish 10081 MacTurkish macturkish
ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154 ptcp154 PT154 csptcp154 pt154 cp154 cyrillic-asian
shift_jis csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS shift_jis 932 SHIFT_JIS csshiftjis shiftjis sjis s_jis
shift_jis_2004 shiftjis2004, sjis_2004, sjis2004 shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213
utf_32 U32, utf32 UTF-32 utf_32 UTF-32 U32 utf32
utf_32_be UTF-32BE UTF-32BE utf_32_be UTF-32BE UTF-32BE
utf_32_le UTF-32LE UTF-32LE utf_32_le UTF-32LE UTF-32LE
utf_16 U16, utf16 UTF16 utf_16 UTF16 U16 utf16
utf_16_be UTF-16BE UTF-16BE utf_16_be UTF-16BE UTF-16BE
utf_16_le UTF-16LE UTF-16LE utf_16_le UTF-16LE UTF-16LE
utf_7 U7, unicode-1-1-utf-7 65000 UTF-7 utf_7 65000 UTF-7 U7 unicode-1-1-utf-7
utf_8 U8, UTF, utf8, utf-8, cp65001 65001 UTF-8 utf_8 65001 UTF-8 U8 UTF utf8 utf-8 cp65001
utf_8_sig utf_8_sig

1 Encoding ascii Alias 437 Code Page ASCII Iconv Identifier 646 us-ascii
2 ascii big5 646, us-ascii 950 437 BIG5 ASCII big5-tw csbig5
3 big5 big5hkscs big5-tw, csbig5 950 BIG5-HKSCS BIG5 big5-hkscs hkscs
4 big5hkscs cp037 big5-hkscs, hkscs 037 BIG5-HKSCS IBM037 IBM039
5 cp037 cp273 IBM037, IBM039 037 273 IBM273 csIBM273
6 cp273 cp424 273, IBM273, csIBM273 EBCDIC-CP-HE IBM424
7 cp424 cp437 EBCDIC-CP-HE, IBM424 437 437 IBM437
8 cp437 cp500 437, IBM437 500 437 EBCDIC-CP-BE EBCDIC-CP-CH IBM500
cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500
9 cp720 cp720 720 720
10 cp737 cp737 737 737
11 cp775 cp775 IBM775 775 775 IBM775
12 cp850 cp850 850, IBM850 850 850 CP850 CP850 850 IBM850
13 cp852 cp852 852, IBM852 852 852 852 IBM852
14 cp855 cp855 855, IBM855 855 855 855 IBM855
15 cp856 cp856
16 cp857 cp857 857, IBM857 857 857 857 IBM857
17 cp858 cp858 858, IBM858 858 858 858 IBM858
18 cp860 cp860 860, IBM860 860 860 860 IBM860
19 cp861 cp861 861, CP-IS, IBM861 861 861 861 CP-IS IBM861
20 cp862 cp862 862, IBM862 862 862 CP862 CP862 862 IBM862
21 cp863 cp863 863, IBM863 863 863 863 IBM863
22 cp864 cp864 IBM864 864 864 IBM864
23 cp865 cp865 865, IBM865 865 865 865 IBM865
24 cp866 cp866 866, IBM866 866 866 CP866 CP866 866 IBM866
25 cp869 cp869 869, CP-GR, IBM869 869 869 869 CP-GR IBM869
26 cp874 cp874 874 874 CP874 CP874
27 cp875 cp875 875 875
28 cp932 cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 932 CP932 CP932 932 ms932 mskanji ms-kanji windows-31j
29 cp949 cp949 949, ms949, uhc 949 949 CP949 CP949 949 ms949 uhc
30 cp950 cp950 950, ms950 950 950 CP950 CP950 950 ms950
31 cp1006 cp1006
32 cp1026 cp1026 ibm1026 1026 1026 ibm1026
33 cp1125 cp1125 1125, ibm1125, cp866u, ruscii 1125 ibm1125 cp866u ruscii
34 cp1140 cp1140 ibm1140 1140 1140 ibm1140
35 cp1250 cp1250 windows-1250 1250 1250 CP1250 CP1250 windows-1250
36 cp1251 cp1251 windows-1251 1251 1251 CP1251 CP1251 windows-1251
37 cp1252 cp1252 windows-1252 1252 1252 CP1252 CP1252 windows-1252
38 cp1253 cp1253 windows-1253 1253 1253 CP1253 CP1253 windows-1253
39 cp1254 cp1254 windows-1254 1254 1254 CP1254 CP1254 windows-1254
40 cp1255 cp1255 windows-1255 1255 1255 CP1255 CP1255 windows-1255
41 cp1256 cp1256 windows-1256 1256 1256 CP1256 CP1256 windows-1256
42 cp1257 cp1257 windows-1257 1257 1257 CP1257 CP1257 windows-1257
43 cp1258 cp1258 windows-1258 1258 1258 CP1258 CP1258 windows-1258
44 euc_jp euc_jp eucjp, ujis, u-jis 20932 20932 EUC-JP EUC-JP eucjp ujis u-jis
45 euc_jis_2004 euc_jis_2004 jisx0213, eucjis2004 jisx0213 eucjis2004
46 euc_jisx0213 euc_jisx0213 eucjisx0213 eucjisx0213
47 euc_kr euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 51949 EUC-KR EUC-KR euckr korean ksc5601 ks_c-5601 ks_c-5601-1987 ksx1001 ks_x-1001
48 gb2312 gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 936 CP936 CP936 chinese csiso58gb231280 euc-cn euccn eucgb2312-cn gb2312-1980 gb2312-80 iso-ir-58
49 gbk gbk 936, cp936, ms936 936 936 GBK GBK 936 cp936 ms936
50 gb18030 gb18030 gb18030-2000 54936 54936 GB18030 GB18030 gb18030-2000
51 hz hz hzgb, hz-gb, hz-gb-2312 52936 52936 HZ HZ hzgb hz-gb hz-gb-2312
52 iso2022_jp iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 50220 ISO-2022-JP ISO-2022-JP csiso2022jp iso2022jp iso-2022-jp
53 iso2022_jp_1 iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1 ISO-2022-JP-1 iso2022jp-1 iso-2022-jp-1
54 iso2022_jp_2 iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2 ISO-2022-JP-2 iso2022jp-2 iso-2022-jp-2
55 iso2022_jp_2004 iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004 iso2022jp-2004 iso-2022-jp-2004
56 iso2022_jp_3 iso2022_jp_3 iso2022jp-3, iso-2022-jp-3 iso2022jp-3 iso-2022-jp-3
57 iso2022_jp_ext iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext iso2022jp-ext iso-2022-jp-ext
58 iso2022_kr iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 50225 ISO-2022-KR ISO-2022-KR csiso2022kr iso2022kr iso-2022-kr
59 latin_1 latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 28591 ISO-8859-1 ISO-8859-1 iso-8859-1 iso8859-1 8859 cp819 latin latin1 L1
60 iso8859_2 iso8859_2 iso-8859-2, latin2, L2 28592 28592 ISO-8859-2 ISO-8859-2 iso-8859-2 latin2 L2
61 iso8859_3 iso8859_3 iso-8859-3, latin3, L3 28593 28593 ISO-8859-3 ISO-8859-3 iso-8859-3 latin3 L3
62 iso8859_4 iso8859_4 iso-8859-4, latin4, L4 28594 28594 ISO-8859-4 ISO-8859-4 iso-8859-4 latin4 L4
63 iso8859_5 iso8859_5 iso-8859-5, cyrillic 28595 28595 ISO-8859-5 ISO-8859-5 iso-8859-5 cyrillic
64 iso8859_6 iso8859_6 iso-8859-6, arabic 28596 28596 ISO-8859-6 ISO-8859-6 iso-8859-6 arabic
65 iso8859_7 iso8859_7 iso-8859-7, greek, greek8 28597 28597 ISO-8859-7 ISO-8859-7 iso-8859-7 greek greek8
66 iso8859_8 iso8859_8 iso-8859-8, hebrew 28598 28598 ISO-8859-8 ISO-8859-8 iso-8859-8 hebrew
67 iso8859_9 iso8859_9 iso-8859-9, latin5, L5 28599 28599 ISO-8859-9 ISO-8859-9 iso-8859-9 latin5 L5
68 iso8859_10 iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10 ISO-8859-10 iso-8859-10 latin6 L6
69 iso8859_11 iso8859_11 iso-8859-11, thai ISO-8859-11 ISO-8859-11 iso-8859-11 thai
70 iso8859_13 iso8859_13 iso-8859-13, latin7, L7 28603 28603 ISO-8859-13 ISO-8859-13 iso-8859-13 latin7 L7
71 iso8859_14 iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14 ISO-8859-14 iso-8859-14 latin8 L8
72 iso8859_15 iso8859_15 iso-8859-15, latin9, L9 28605 28605 ISO-8859-15 ISO-8859-15 iso-8859-15 latin9 L9
73 iso8859_16 iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16 ISO-8859-16 iso-8859-16 latin10 L10
74 johab johab cp1361, ms1361 1361 1361 JOHAB JOHAB cp1361 ms1361
75 koi8_r koi8_r
76 koi8_t koi8_t KOI8-T KOI8-T
77 koi8_u koi8_u
78 kz1048 kz1048 kz_1048, strk1048_2002, rk1048 kz_1048 strk1048_2002 rk1048
79 mac_cyrillic mac_cyrillic maccyrillic 10007 10007 MacCyrillic MacCyrillic maccyrillic
80 mac_greek mac_greek macgreek 10006 10006 MacGreek MacGreek macgreek
81 mac_iceland mac_iceland maciceland 10079 10079 MacIceland MacIceland maciceland
82 mac_latin2 mac_latin2 maclatin2, maccentraleurope, mac_centeuro maclatin2 maccentraleurope mac_centeuro
83 mac_roman mac_roman macroman, macintosh MacRoman MacRoman macroman macintosh
84 mac_turkish mac_turkish macturkish 10081 10081 MacTurkish MacTurkish macturkish
85 ptcp154 ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154 PT154 csptcp154 pt154 cp154 cyrillic-asian
86 shift_jis shift_jis csshiftjis, shiftjis, sjis, s_jis 932 932 SHIFT_JIS SHIFT_JIS csshiftjis shiftjis sjis s_jis
87 shift_jis_2004 shift_jis_2004 shiftjis2004, sjis_2004, sjis2004 shiftjis2004 sjis_2004 sjis2004
88 shift_jisx0213 shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213
89 utf_32 utf_32 U32, utf32 UTF-32 UTF-32 U32 utf32
90 utf_32_be utf_32_be UTF-32BE UTF-32BE UTF-32BE UTF-32BE
91 utf_32_le utf_32_le UTF-32LE UTF-32LE UTF-32LE UTF-32LE
92 utf_16 utf_16 U16, utf16 UTF16 UTF16 U16 utf16
93 utf_16_be utf_16_be UTF-16BE UTF-16BE UTF-16BE UTF-16BE
94 utf_16_le utf_16_le UTF-16LE UTF-16LE UTF-16LE UTF-16LE
95 utf_7 utf_7 U7, unicode-1-1-utf-7 65000 65000 UTF-7 UTF-7 U7 unicode-1-1-utf-7
96 utf_8 utf_8 U8, UTF, utf8, utf-8, cp65001 65001 65001 UTF-8 UTF-8 U8 UTF utf8 utf-8 cp65001
97 utf_8_sig utf_8_sig

View File

@ -17,7 +17,7 @@ PRIVATE
yycc/encoding/stlcvt.cpp yycc/encoding/stlcvt.cpp
yycc/encoding/windows.cpp yycc/encoding/windows.cpp
yycc/encoding/iconv.cpp yycc/encoding/iconv.cpp
yycc/encoding/pycodec.cpp #yycc/encoding/pycodec.cpp
) )
target_sources(YYCCommonplace target_sources(YYCCommonplace
PUBLIC PUBLIC
@ -28,6 +28,7 @@ FILES
yycc/version.hpp yycc/version.hpp
yycc/macro/version_cmp.hpp yycc/macro/version_cmp.hpp
yycc/macro/os_detector.hpp yycc/macro/os_detector.hpp
yycc/macro/stl_detector.hpp
yycc/macro/endian_detector.hpp yycc/macro/endian_detector.hpp
yycc/macro/compiler_detector.hpp yycc/macro/compiler_detector.hpp
yycc/macro/class_copy_move.hpp yycc/macro/class_copy_move.hpp
@ -73,8 +74,9 @@ PUBLIC
# Iconv environment macro # Iconv environment macro
$<$<BOOL:${YYCC_ENFORCE_ICONV}>:YYCC_FEAT_ICONV> $<$<BOOL:${YYCC_ENFORCE_ICONV}>:YYCC_FEAT_ICONV>
# OS macro # OS macro
$<$<BOOL:${WIN32}>:YYCC_OS_WINDOWS> $<$<PLATFORM_ID:Windows>:YYCC_OS_WINDOWS>
$<$<PLATFORM_ID:Linux>:YYCC_OS_LINUX> $<$<PLATFORM_ID:Linux>:YYCC_OS_LINUX>
$<$<PLATFORM_ID:Darwin>:YYCC_OS_MACOS>
# Compiler macro # Compiler macro
$<$<CXX_COMPILER_ID:GNU>:YYCC_CC_GCC> $<$<CXX_COMPILER_ID:GNU>:YYCC_CC_GCC>
$<$<CXX_COMPILER_ID:Clang>:YYCC_CC_CLANG> $<$<CXX_COMPILER_ID:Clang>:YYCC_CC_CLANG>
@ -100,7 +102,14 @@ PUBLIC
$<$<CXX_COMPILER_ID:MSVC>:/utf-8> $<$<CXX_COMPILER_ID:MSVC>:/utf-8>
) )
# TODO: Fix GCC stacktrace link issue # Fix GCC std::stacktrace link error
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
target_link_libraries(YYCCommonplace PRIVATE stdc++exp)
else ()
target_link_libraries(YYCCommonplace PRIVATE stdc++_libbacktrace)
endif ()
endif ()
# Install binary and headers # Install binary and headers
install(TARGETS YYCCommonplace install(TARGETS YYCCommonplace

View File

@ -229,7 +229,7 @@ namespace yycc::encoding::iconv {
CharToUtf8::~CharToUtf8() {} CharToUtf8::~CharToUtf8() {}
ConvResult<std::u8string> CharToUtf8::priv_to_utf8(const std::string_view& src) { ConvResult<std::u8string> CharToUtf8::to_utf8(const std::string_view& src) {
USER_CONVFN(char, char8_t); USER_CONVFN(char, char8_t);
} }
@ -241,7 +241,7 @@ namespace yycc::encoding::iconv {
Utf8ToChar::~Utf8ToChar() {} Utf8ToChar::~Utf8ToChar() {}
ConvResult<std::string> Utf8ToChar::priv_to_char(const std::u8string_view& src) { ConvResult<std::string> Utf8ToChar::to_char(const std::u8string_view& src) {
USER_CONVFN(char8_t, char); USER_CONVFN(char8_t, char);
} }
@ -253,7 +253,7 @@ namespace yycc::encoding::iconv {
WcharToUtf8::~WcharToUtf8() {} WcharToUtf8::~WcharToUtf8() {}
ConvResult<std::u8string> WcharToUtf8::priv_to_utf8(const std::wstring_view& src) { ConvResult<std::u8string> WcharToUtf8::to_utf8(const std::wstring_view& src) {
USER_CONVFN(wchar_t, char8_t); USER_CONVFN(wchar_t, char8_t);
} }
@ -265,7 +265,7 @@ namespace yycc::encoding::iconv {
Utf8ToWchar::~Utf8ToWchar() {} Utf8ToWchar::~Utf8ToWchar() {}
ConvResult<std::wstring> Utf8ToWchar::priv_to_wchar(const std::u8string_view& src) { ConvResult<std::wstring> Utf8ToWchar::to_wchar(const std::u8string_view& src) {
USER_CONVFN(char8_t, wchar_t); USER_CONVFN(char8_t, wchar_t);
} }
@ -277,7 +277,7 @@ namespace yycc::encoding::iconv {
Utf8ToUtf16::~Utf8ToUtf16() {} Utf8ToUtf16::~Utf8ToUtf16() {}
ConvResult<std::u16string> Utf8ToUtf16::priv_to_utf16(const std::u8string_view& src) { ConvResult<std::u16string> Utf8ToUtf16::to_utf16(const std::u8string_view& src) {
USER_CONVFN(char8_t, char16_t); USER_CONVFN(char8_t, char16_t);
} }
@ -289,7 +289,7 @@ namespace yycc::encoding::iconv {
Utf16ToUtf8::~Utf16ToUtf8() {} Utf16ToUtf8::~Utf16ToUtf8() {}
ConvResult<std::u8string> Utf16ToUtf8::priv_to_utf8(const std::u16string_view& src) { ConvResult<std::u8string> Utf16ToUtf8::to_utf8(const std::u16string_view& src) {
USER_CONVFN(char16_t, char8_t); USER_CONVFN(char16_t, char8_t);
} }
@ -301,7 +301,7 @@ namespace yycc::encoding::iconv {
Utf8ToUtf32::~Utf8ToUtf32() {} Utf8ToUtf32::~Utf8ToUtf32() {}
ConvResult<std::u32string> Utf8ToUtf32::priv_to_utf32(const std::u8string_view& src) { ConvResult<std::u32string> Utf8ToUtf32::to_utf32(const std::u8string_view& src) {
USER_CONVFN(char8_t, char32_t); USER_CONVFN(char8_t, char32_t);
} }
@ -313,7 +313,7 @@ namespace yycc::encoding::iconv {
Utf32ToUtf8::~Utf32ToUtf8() {} Utf32ToUtf8::~Utf32ToUtf8() {}
ConvResult<std::u8string> Utf32ToUtf8::priv_to_utf8(const std::u32string_view& src) { ConvResult<std::u8string> Utf32ToUtf8::to_utf8(const std::u32string_view& src) {
USER_CONVFN(char32_t, char8_t); USER_CONVFN(char32_t, char8_t);
} }

View File

@ -1,8 +1,5 @@
#pragma once #pragma once
#include "../macro/os_detector.hpp" #include "../macro/os_detector.hpp"
#if YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS)
#include "../macro/class_copy_move.hpp" #include "../macro/class_copy_move.hpp"
#include <string> #include <string>
#include <string_view> #include <string_view>
@ -51,6 +48,8 @@ namespace yycc::encoding::iconv {
template<typename T> template<typename T>
using ConvResult = std::expected<T, ConvError>; using ConvResult = std::expected<T, ConvError>;
#if YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS)
/// @brief Char -> UTF8 /// @brief Char -> UTF8
class CharToUtf8 { class CharToUtf8 {
public: public:
@ -60,7 +59,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(CharToUtf8) YYCC_DEFAULT_MOVE(CharToUtf8)
public: public:
ConvResult<std::u8string> priv_to_utf8(const std::string_view& src); ConvResult<std::u8string> to_utf8(const std::string_view& src);
private: private:
Token token; Token token;
@ -75,7 +74,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(Utf8ToChar) YYCC_DEFAULT_MOVE(Utf8ToChar)
public: public:
ConvResult<std::string> priv_to_char(const std::u8string_view& src); ConvResult<std::string> to_char(const std::u8string_view& src);
private: private:
Token token; Token token;
@ -90,7 +89,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(WcharToUtf8) YYCC_DEFAULT_MOVE(WcharToUtf8)
public: public:
ConvResult<std::u8string> priv_to_utf8(const std::wstring_view& src); ConvResult<std::u8string> to_utf8(const std::wstring_view& src);
private: private:
Token token; Token token;
@ -105,7 +104,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(Utf8ToWchar) YYCC_DEFAULT_MOVE(Utf8ToWchar)
public: public:
ConvResult<std::wstring> priv_to_wchar(const std::u8string_view& src); ConvResult<std::wstring> to_wchar(const std::u8string_view& src);
private: private:
Token token; Token token;
@ -120,7 +119,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(Utf8ToUtf16) YYCC_DEFAULT_MOVE(Utf8ToUtf16)
public: public:
ConvResult<std::u16string> priv_to_utf16(const std::u8string_view& src); ConvResult<std::u16string> to_utf16(const std::u8string_view& src);
private: private:
Token token; Token token;
@ -135,7 +134,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(Utf16ToUtf8) YYCC_DEFAULT_MOVE(Utf16ToUtf8)
public: public:
ConvResult<std::u8string> priv_to_utf8(const std::u16string_view& src); ConvResult<std::u8string> to_utf8(const std::u16string_view& src);
private: private:
Token token; Token token;
@ -150,7 +149,7 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(Utf8ToUtf32) YYCC_DEFAULT_MOVE(Utf8ToUtf32)
public: public:
ConvResult<std::u32string> priv_to_utf32(const std::u8string_view& src); ConvResult<std::u32string> to_utf32(const std::u8string_view& src);
private: private:
Token token; Token token;
@ -165,12 +164,12 @@ namespace yycc::encoding::iconv {
YYCC_DEFAULT_MOVE(Utf32ToUtf8) YYCC_DEFAULT_MOVE(Utf32ToUtf8)
public: public:
ConvResult<std::u8string> priv_to_utf8(const std::u32string_view& src); ConvResult<std::u8string> to_utf8(const std::u32string_view& src);
private: private:
Token token; Token token;
}; };
} // namespace yycc::encoding::iconv
#endif #endif
} // namespace yycc::encoding::iconv

View File

@ -1,359 +1,370 @@
#include "pycodec.hpp" #include "pycodec.hpp"
#include "../string/op.hpp"
#include <map> #include <map>
#define NS_YYCC_STRING ::yycc::string using namespace std::literals::string_view_literals;
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected namespace op = ::yycc::string::op;
namespace yycc::encoding::pycodec { namespace yycc::encoding::pycodec {
#pragma region Encoding Name #pragma region Encoding Name
static const std::map<NS_YYCC_STRING::u8string, NS_YYCC_STRING::u8string> ALISA_MAP { static const std::map<std::u8string_view, std::u8string_view> ALIAS_MAP{
{ YYCC_U8("646"), YYCC_U8("ascii") }, {u8"646"sv, u8"ascii"sv},
{ YYCC_U8("us-ascii"), YYCC_U8("ascii") }, {u8"us-ascii"sv, u8"ascii"sv},
{ YYCC_U8("big5-tw"), YYCC_U8("big5") }, {u8"big5-tw"sv, u8"big5"sv},
{ YYCC_U8("csbig5"), YYCC_U8("big5") }, {u8"csbig5"sv, u8"big5"sv},
{ YYCC_U8("big5-hkscs"), YYCC_U8("big5hkscs") }, {u8"big5-hkscs"sv, u8"big5hkscs"sv},
{ YYCC_U8("hkscs"), YYCC_U8("big5hkscs") }, {u8"hkscs"sv, u8"big5hkscs"sv},
{ YYCC_U8("ibm037"), YYCC_U8("cp037") }, {u8"ibm037"sv, u8"cp037"sv},
{ YYCC_U8("ibm039"), YYCC_U8("cp037") }, {u8"ibm039"sv, u8"cp037"sv},
{ YYCC_U8("273"), YYCC_U8("cp273") }, {u8"273"sv, u8"cp273"sv},
{ YYCC_U8("ibm273"), YYCC_U8("cp273") }, {u8"ibm273"sv, u8"cp273"sv},
{ YYCC_U8("csibm273"), YYCC_U8("cp273") }, {u8"csibm273"sv, u8"cp273"sv},
{ YYCC_U8("ebcdic-cp-he"), YYCC_U8("cp424") }, {u8"ebcdic-cp-he"sv, u8"cp424"sv},
{ YYCC_U8("ibm424"), YYCC_U8("cp424") }, {u8"ibm424"sv, u8"cp424"sv},
{ YYCC_U8("437"), YYCC_U8("cp437") }, {u8"437"sv, u8"cp437"sv},
{ YYCC_U8("ibm437"), YYCC_U8("cp437") }, {u8"ibm437"sv, u8"cp437"sv},
{ YYCC_U8("ebcdic-cp-be"), YYCC_U8("cp500") }, {u8"ebcdic-cp-be"sv, u8"cp500"sv},
{ YYCC_U8("ebcdic-cp-ch"), YYCC_U8("cp500") }, {u8"ebcdic-cp-ch"sv, u8"cp500"sv},
{ YYCC_U8("ibm500"), YYCC_U8("cp500") }, {u8"ibm500"sv, u8"cp500"sv},
{ YYCC_U8("ibm775"), YYCC_U8("cp775") }, {u8"ibm775"sv, u8"cp775"sv},
{ YYCC_U8("850"), YYCC_U8("cp850") }, {u8"850"sv, u8"cp850"sv},
{ YYCC_U8("ibm850"), YYCC_U8("cp850") }, {u8"ibm850"sv, u8"cp850"sv},
{ YYCC_U8("852"), YYCC_U8("cp852") }, {u8"852"sv, u8"cp852"sv},
{ YYCC_U8("ibm852"), YYCC_U8("cp852") }, {u8"ibm852"sv, u8"cp852"sv},
{ YYCC_U8("855"), YYCC_U8("cp855") }, {u8"855"sv, u8"cp855"sv},
{ YYCC_U8("ibm855"), YYCC_U8("cp855") }, {u8"ibm855"sv, u8"cp855"sv},
{ YYCC_U8("857"), YYCC_U8("cp857") }, {u8"857"sv, u8"cp857"sv},
{ YYCC_U8("ibm857"), YYCC_U8("cp857") }, {u8"ibm857"sv, u8"cp857"sv},
{ YYCC_U8("858"), YYCC_U8("cp858") }, {u8"858"sv, u8"cp858"sv},
{ YYCC_U8("ibm858"), YYCC_U8("cp858") }, {u8"ibm858"sv, u8"cp858"sv},
{ YYCC_U8("860"), YYCC_U8("cp860") }, {u8"860"sv, u8"cp860"sv},
{ YYCC_U8("ibm860"), YYCC_U8("cp860") }, {u8"ibm860"sv, u8"cp860"sv},
{ YYCC_U8("861"), YYCC_U8("cp861") }, {u8"861"sv, u8"cp861"sv},
{ YYCC_U8("cp-is"), YYCC_U8("cp861") }, {u8"cp-is"sv, u8"cp861"sv},
{ YYCC_U8("ibm861"), YYCC_U8("cp861") }, {u8"ibm861"sv, u8"cp861"sv},
{ YYCC_U8("862"), YYCC_U8("cp862") }, {u8"862"sv, u8"cp862"sv},
{ YYCC_U8("ibm862"), YYCC_U8("cp862") }, {u8"ibm862"sv, u8"cp862"sv},
{ YYCC_U8("863"), YYCC_U8("cp863") }, {u8"863"sv, u8"cp863"sv},
{ YYCC_U8("ibm863"), YYCC_U8("cp863") }, {u8"ibm863"sv, u8"cp863"sv},
{ YYCC_U8("ibm864"), YYCC_U8("cp864") }, {u8"ibm864"sv, u8"cp864"sv},
{ YYCC_U8("865"), YYCC_U8("cp865") }, {u8"865"sv, u8"cp865"sv},
{ YYCC_U8("ibm865"), YYCC_U8("cp865") }, {u8"ibm865"sv, u8"cp865"sv},
{ YYCC_U8("866"), YYCC_U8("cp866") }, {u8"866"sv, u8"cp866"sv},
{ YYCC_U8("ibm866"), YYCC_U8("cp866") }, {u8"ibm866"sv, u8"cp866"sv},
{ YYCC_U8("869"), YYCC_U8("cp869") }, {u8"869"sv, u8"cp869"sv},
{ YYCC_U8("cp-gr"), YYCC_U8("cp869") }, {u8"cp-gr"sv, u8"cp869"sv},
{ YYCC_U8("ibm869"), YYCC_U8("cp869") }, {u8"ibm869"sv, u8"cp869"sv},
{ YYCC_U8("932"), YYCC_U8("cp932") }, {u8"932"sv, u8"cp932"sv},
{ YYCC_U8("ms932"), YYCC_U8("cp932") }, {u8"ms932"sv, u8"cp932"sv},
{ YYCC_U8("mskanji"), YYCC_U8("cp932") }, {u8"mskanji"sv, u8"cp932"sv},
{ YYCC_U8("ms-kanji"), YYCC_U8("cp932") }, {u8"ms-kanji"sv, u8"cp932"sv},
{ YYCC_U8("windows-31j"), YYCC_U8("cp932") }, {u8"windows-31j"sv, u8"cp932"sv},
{ YYCC_U8("949"), YYCC_U8("cp949") }, {u8"949"sv, u8"cp949"sv},
{ YYCC_U8("ms949"), YYCC_U8("cp949") }, {u8"ms949"sv, u8"cp949"sv},
{ YYCC_U8("uhc"), YYCC_U8("cp949") }, {u8"uhc"sv, u8"cp949"sv},
{ YYCC_U8("950"), YYCC_U8("cp950") }, {u8"950"sv, u8"cp950"sv},
{ YYCC_U8("ms950"), YYCC_U8("cp950") }, {u8"ms950"sv, u8"cp950"sv},
{ YYCC_U8("ibm1026"), YYCC_U8("cp1026") }, {u8"ibm1026"sv, u8"cp1026"sv},
{ YYCC_U8("1125"), YYCC_U8("cp1125") }, {u8"1125"sv, u8"cp1125"sv},
{ YYCC_U8("ibm1125"), YYCC_U8("cp1125") }, {u8"ibm1125"sv, u8"cp1125"sv},
{ YYCC_U8("cp866u"), YYCC_U8("cp1125") }, {u8"cp866u"sv, u8"cp1125"sv},
{ YYCC_U8("ruscii"), YYCC_U8("cp1125") }, {u8"ruscii"sv, u8"cp1125"sv},
{ YYCC_U8("ibm1140"), YYCC_U8("cp1140") }, {u8"ibm1140"sv, u8"cp1140"sv},
{ YYCC_U8("windows-1250"), YYCC_U8("cp1250") }, {u8"windows-1250"sv, u8"cp1250"sv},
{ YYCC_U8("windows-1251"), YYCC_U8("cp1251") }, {u8"windows-1251"sv, u8"cp1251"sv},
{ YYCC_U8("windows-1252"), YYCC_U8("cp1252") }, {u8"windows-1252"sv, u8"cp1252"sv},
{ YYCC_U8("windows-1253"), YYCC_U8("cp1253") }, {u8"windows-1253"sv, u8"cp1253"sv},
{ YYCC_U8("windows-1254"), YYCC_U8("cp1254") }, {u8"windows-1254"sv, u8"cp1254"sv},
{ YYCC_U8("windows-1255"), YYCC_U8("cp1255") }, {u8"windows-1255"sv, u8"cp1255"sv},
{ YYCC_U8("windows-1256"), YYCC_U8("cp1256") }, {u8"windows-1256"sv, u8"cp1256"sv},
{ YYCC_U8("windows-1257"), YYCC_U8("cp1257") }, {u8"windows-1257"sv, u8"cp1257"sv},
{ YYCC_U8("windows-1258"), YYCC_U8("cp1258") }, {u8"windows-1258"sv, u8"cp1258"sv},
{ YYCC_U8("eucjp"), YYCC_U8("euc_jp") }, {u8"eucjp"sv, u8"euc_jp"sv},
{ YYCC_U8("ujis"), YYCC_U8("euc_jp") }, {u8"ujis"sv, u8"euc_jp"sv},
{ YYCC_U8("u-jis"), YYCC_U8("euc_jp") }, {u8"u-jis"sv, u8"euc_jp"sv},
{ YYCC_U8("jisx0213"), YYCC_U8("euc_jis_2004") }, {u8"jisx0213"sv, u8"euc_jis_2004"sv},
{ YYCC_U8("eucjis2004"), YYCC_U8("euc_jis_2004") }, {u8"eucjis2004"sv, u8"euc_jis_2004"sv},
{ YYCC_U8("eucjisx0213"), YYCC_U8("euc_jisx0213") }, {u8"eucjisx0213"sv, u8"euc_jisx0213"sv},
{ YYCC_U8("euckr"), YYCC_U8("euc_kr") }, {u8"euckr"sv, u8"euc_kr"sv},
{ YYCC_U8("korean"), YYCC_U8("euc_kr") }, {u8"korean"sv, u8"euc_kr"sv},
{ YYCC_U8("ksc5601"), YYCC_U8("euc_kr") }, {u8"ksc5601"sv, u8"euc_kr"sv},
{ YYCC_U8("ks_c-5601"), YYCC_U8("euc_kr") }, {u8"ks_c-5601"sv, u8"euc_kr"sv},
{ YYCC_U8("ks_c-5601-1987"), YYCC_U8("euc_kr") }, {u8"ks_c-5601-1987"sv, u8"euc_kr"sv},
{ YYCC_U8("ksx1001"), YYCC_U8("euc_kr") }, {u8"ksx1001"sv, u8"euc_kr"sv},
{ YYCC_U8("ks_x-1001"), YYCC_U8("euc_kr") }, {u8"ks_x-1001"sv, u8"euc_kr"sv},
{ YYCC_U8("chinese"), YYCC_U8("gb2312") }, {u8"chinese"sv, u8"gb2312"sv},
{ YYCC_U8("csiso58gb231280"), YYCC_U8("gb2312") }, {u8"csiso58gb231280"sv, u8"gb2312"sv},
{ YYCC_U8("euc-cn"), YYCC_U8("gb2312") }, {u8"euc-cn"sv, u8"gb2312"sv},
{ YYCC_U8("euccn"), YYCC_U8("gb2312") }, {u8"euccn"sv, u8"gb2312"sv},
{ YYCC_U8("eucgb2312-cn"), YYCC_U8("gb2312") }, {u8"eucgb2312-cn"sv, u8"gb2312"sv},
{ YYCC_U8("gb2312-1980"), YYCC_U8("gb2312") }, {u8"gb2312-1980"sv, u8"gb2312"sv},
{ YYCC_U8("gb2312-80"), YYCC_U8("gb2312") }, {u8"gb2312-80"sv, u8"gb2312"sv},
{ YYCC_U8("iso-ir-58"), YYCC_U8("gb2312") }, {u8"iso-ir-58"sv, u8"gb2312"sv},
{ YYCC_U8("936"), YYCC_U8("gbk") }, {u8"936"sv, u8"gbk"sv},
{ YYCC_U8("cp936"), YYCC_U8("gbk") }, {u8"cp936"sv, u8"gbk"sv},
{ YYCC_U8("ms936"), YYCC_U8("gbk") }, {u8"ms936"sv, u8"gbk"sv},
{ YYCC_U8("gb18030-2000"), YYCC_U8("gb18030") }, {u8"gb18030-2000"sv, u8"gb18030"sv},
{ YYCC_U8("hzgb"), YYCC_U8("hz") }, {u8"hzgb"sv, u8"hz"sv},
{ YYCC_U8("hz-gb"), YYCC_U8("hz") }, {u8"hz-gb"sv, u8"hz"sv},
{ YYCC_U8("hz-gb-2312"), YYCC_U8("hz") }, {u8"hz-gb-2312"sv, u8"hz"sv},
{ YYCC_U8("csiso2022jp"), YYCC_U8("iso2022_jp") }, {u8"csiso2022jp"sv, u8"iso2022_jp"sv},
{ YYCC_U8("iso2022jp"), YYCC_U8("iso2022_jp") }, {u8"iso2022jp"sv, u8"iso2022_jp"sv},
{ YYCC_U8("iso-2022-jp"), YYCC_U8("iso2022_jp") }, {u8"iso-2022-jp"sv, u8"iso2022_jp"sv},
{ YYCC_U8("iso2022jp-1"), YYCC_U8("iso2022_jp_1") }, {u8"iso2022jp-1"sv, u8"iso2022_jp_1"sv},
{ YYCC_U8("iso-2022-jp-1"), YYCC_U8("iso2022_jp_1") }, {u8"iso-2022-jp-1"sv, u8"iso2022_jp_1"sv},
{ YYCC_U8("iso2022jp-2"), YYCC_U8("iso2022_jp_2") }, {u8"iso2022jp-2"sv, u8"iso2022_jp_2"sv},
{ YYCC_U8("iso-2022-jp-2"), YYCC_U8("iso2022_jp_2") }, {u8"iso-2022-jp-2"sv, u8"iso2022_jp_2"sv},
{ YYCC_U8("iso2022jp-2004"), YYCC_U8("iso2022_jp_2004") }, {u8"iso2022jp-2004"sv, u8"iso2022_jp_2004"sv},
{ YYCC_U8("iso-2022-jp-2004"), YYCC_U8("iso2022_jp_2004") }, {u8"iso-2022-jp-2004"sv, u8"iso2022_jp_2004"sv},
{ YYCC_U8("iso2022jp-3"), YYCC_U8("iso2022_jp_3") }, {u8"iso2022jp-3"sv, u8"iso2022_jp_3"sv},
{ YYCC_U8("iso-2022-jp-3"), YYCC_U8("iso2022_jp_3") }, {u8"iso-2022-jp-3"sv, u8"iso2022_jp_3"sv},
{ YYCC_U8("iso2022jp-ext"), YYCC_U8("iso2022_jp_ext") }, {u8"iso2022jp-ext"sv, u8"iso2022_jp_ext"sv},
{ YYCC_U8("iso-2022-jp-ext"), YYCC_U8("iso2022_jp_ext") }, {u8"iso-2022-jp-ext"sv, u8"iso2022_jp_ext"sv},
{ YYCC_U8("csiso2022kr"), YYCC_U8("iso2022_kr") }, {u8"csiso2022kr"sv, u8"iso2022_kr"sv},
{ YYCC_U8("iso2022kr"), YYCC_U8("iso2022_kr") }, {u8"iso2022kr"sv, u8"iso2022_kr"sv},
{ YYCC_U8("iso-2022-kr"), YYCC_U8("iso2022_kr") }, {u8"iso-2022-kr"sv, u8"iso2022_kr"sv},
{ YYCC_U8("iso-8859-1"), YYCC_U8("latin_1") }, {u8"iso-8859-1"sv, u8"latin_1"sv},
{ YYCC_U8("iso8859-1"), YYCC_U8("latin_1") }, {u8"iso8859-1"sv, u8"latin_1"sv},
{ YYCC_U8("8859"), YYCC_U8("latin_1") }, {u8"8859"sv, u8"latin_1"sv},
{ YYCC_U8("cp819"), YYCC_U8("latin_1") }, {u8"cp819"sv, u8"latin_1"sv},
{ YYCC_U8("latin"), YYCC_U8("latin_1") }, {u8"latin"sv, u8"latin_1"sv},
{ YYCC_U8("latin1"), YYCC_U8("latin_1") }, {u8"latin1"sv, u8"latin_1"sv},
{ YYCC_U8("l1"), YYCC_U8("latin_1") }, {u8"l1"sv, u8"latin_1"sv},
{ YYCC_U8("iso-8859-2"), YYCC_U8("iso8859_2") }, {u8"iso-8859-2"sv, u8"iso8859_2"sv},
{ YYCC_U8("latin2"), YYCC_U8("iso8859_2") }, {u8"latin2"sv, u8"iso8859_2"sv},
{ YYCC_U8("l2"), YYCC_U8("iso8859_2") }, {u8"l2"sv, u8"iso8859_2"sv},
{ YYCC_U8("iso-8859-3"), YYCC_U8("iso8859_3") }, {u8"iso-8859-3"sv, u8"iso8859_3"sv},
{ YYCC_U8("latin3"), YYCC_U8("iso8859_3") }, {u8"latin3"sv, u8"iso8859_3"sv},
{ YYCC_U8("l3"), YYCC_U8("iso8859_3") }, {u8"l3"sv, u8"iso8859_3"sv},
{ YYCC_U8("iso-8859-4"), YYCC_U8("iso8859_4") }, {u8"iso-8859-4"sv, u8"iso8859_4"sv},
{ YYCC_U8("latin4"), YYCC_U8("iso8859_4") }, {u8"latin4"sv, u8"iso8859_4"sv},
{ YYCC_U8("l4"), YYCC_U8("iso8859_4") }, {u8"l4"sv, u8"iso8859_4"sv},
{ YYCC_U8("iso-8859-5"), YYCC_U8("iso8859_5") }, {u8"iso-8859-5"sv, u8"iso8859_5"sv},
{ YYCC_U8("cyrillic"), YYCC_U8("iso8859_5") }, {u8"cyrillic"sv, u8"iso8859_5"sv},
{ YYCC_U8("iso-8859-6"), YYCC_U8("iso8859_6") }, {u8"iso-8859-6"sv, u8"iso8859_6"sv},
{ YYCC_U8("arabic"), YYCC_U8("iso8859_6") }, {u8"arabic"sv, u8"iso8859_6"sv},
{ YYCC_U8("iso-8859-7"), YYCC_U8("iso8859_7") }, {u8"iso-8859-7"sv, u8"iso8859_7"sv},
{ YYCC_U8("greek"), YYCC_U8("iso8859_7") }, {u8"greek"sv, u8"iso8859_7"sv},
{ YYCC_U8("greek8"), YYCC_U8("iso8859_7") }, {u8"greek8"sv, u8"iso8859_7"sv},
{ YYCC_U8("iso-8859-8"), YYCC_U8("iso8859_8") }, {u8"iso-8859-8"sv, u8"iso8859_8"sv},
{ YYCC_U8("hebrew"), YYCC_U8("iso8859_8") }, {u8"hebrew"sv, u8"iso8859_8"sv},
{ YYCC_U8("iso-8859-9"), YYCC_U8("iso8859_9") }, {u8"iso-8859-9"sv, u8"iso8859_9"sv},
{ YYCC_U8("latin5"), YYCC_U8("iso8859_9") }, {u8"latin5"sv, u8"iso8859_9"sv},
{ YYCC_U8("l5"), YYCC_U8("iso8859_9") }, {u8"l5"sv, u8"iso8859_9"sv},
{ YYCC_U8("iso-8859-10"), YYCC_U8("iso8859_10") }, {u8"iso-8859-10"sv, u8"iso8859_10"sv},
{ YYCC_U8("latin6"), YYCC_U8("iso8859_10") }, {u8"latin6"sv, u8"iso8859_10"sv},
{ YYCC_U8("l6"), YYCC_U8("iso8859_10") }, {u8"l6"sv, u8"iso8859_10"sv},
{ YYCC_U8("iso-8859-11"), YYCC_U8("iso8859_11") }, {u8"iso-8859-11"sv, u8"iso8859_11"sv},
{ YYCC_U8("thai"), YYCC_U8("iso8859_11") }, {u8"thai"sv, u8"iso8859_11"sv},
{ YYCC_U8("iso-8859-13"), YYCC_U8("iso8859_13") }, {u8"iso-8859-13"sv, u8"iso8859_13"sv},
{ YYCC_U8("latin7"), YYCC_U8("iso8859_13") }, {u8"latin7"sv, u8"iso8859_13"sv},
{ YYCC_U8("l7"), YYCC_U8("iso8859_13") }, {u8"l7"sv, u8"iso8859_13"sv},
{ YYCC_U8("iso-8859-14"), YYCC_U8("iso8859_14") }, {u8"iso-8859-14"sv, u8"iso8859_14"sv},
{ YYCC_U8("latin8"), YYCC_U8("iso8859_14") }, {u8"latin8"sv, u8"iso8859_14"sv},
{ YYCC_U8("l8"), YYCC_U8("iso8859_14") }, {u8"l8"sv, u8"iso8859_14"sv},
{ YYCC_U8("iso-8859-15"), YYCC_U8("iso8859_15") }, {u8"iso-8859-15"sv, u8"iso8859_15"sv},
{ YYCC_U8("latin9"), YYCC_U8("iso8859_15") }, {u8"latin9"sv, u8"iso8859_15"sv},
{ YYCC_U8("l9"), YYCC_U8("iso8859_15") }, {u8"l9"sv, u8"iso8859_15"sv},
{ YYCC_U8("iso-8859-16"), YYCC_U8("iso8859_16") }, {u8"iso-8859-16"sv, u8"iso8859_16"sv},
{ YYCC_U8("latin10"), YYCC_U8("iso8859_16") }, {u8"latin10"sv, u8"iso8859_16"sv},
{ YYCC_U8("l10"), YYCC_U8("iso8859_16") }, {u8"l10"sv, u8"iso8859_16"sv},
{ YYCC_U8("cp1361"), YYCC_U8("johab") }, {u8"cp1361"sv, u8"johab"sv},
{ YYCC_U8("ms1361"), YYCC_U8("johab") }, {u8"ms1361"sv, u8"johab"sv},
{ YYCC_U8("kz_1048"), YYCC_U8("kz1048") }, {u8"kz_1048"sv, u8"kz1048"sv},
{ YYCC_U8("strk1048_2002"), YYCC_U8("kz1048") }, {u8"strk1048_2002"sv, u8"kz1048"sv},
{ YYCC_U8("rk1048"), YYCC_U8("kz1048") }, {u8"rk1048"sv, u8"kz1048"sv},
{ YYCC_U8("maccyrillic"), YYCC_U8("mac_cyrillic") }, {u8"maccyrillic"sv, u8"mac_cyrillic"sv},
{ YYCC_U8("macgreek"), YYCC_U8("mac_greek") }, {u8"macgreek"sv, u8"mac_greek"sv},
{ YYCC_U8("maciceland"), YYCC_U8("mac_iceland") }, {u8"maciceland"sv, u8"mac_iceland"sv},
{ YYCC_U8("maclatin2"), YYCC_U8("mac_latin2") }, {u8"maclatin2"sv, u8"mac_latin2"sv},
{ YYCC_U8("maccentraleurope"), YYCC_U8("mac_latin2") }, {u8"maccentraleurope"sv, u8"mac_latin2"sv},
{ YYCC_U8("mac_centeuro"), YYCC_U8("mac_latin2") }, {u8"mac_centeuro"sv, u8"mac_latin2"sv},
{ YYCC_U8("macroman"), YYCC_U8("mac_roman") }, {u8"macroman"sv, u8"mac_roman"sv},
{ YYCC_U8("macintosh"), YYCC_U8("mac_roman") }, {u8"macintosh"sv, u8"mac_roman"sv},
{ YYCC_U8("macturkish"), YYCC_U8("mac_turkish") }, {u8"macturkish"sv, u8"mac_turkish"sv},
{ YYCC_U8("csptcp154"), YYCC_U8("ptcp154") }, {u8"csptcp154"sv, u8"ptcp154"sv},
{ YYCC_U8("pt154"), YYCC_U8("ptcp154") }, {u8"pt154"sv, u8"ptcp154"sv},
{ YYCC_U8("cp154"), YYCC_U8("ptcp154") }, {u8"cp154"sv, u8"ptcp154"sv},
{ YYCC_U8("cyrillic-asian"), YYCC_U8("ptcp154") }, {u8"cyrillic-asian"sv, u8"ptcp154"sv},
{ YYCC_U8("csshiftjis"), YYCC_U8("shift_jis") }, {u8"csshiftjis"sv, u8"shift_jis"sv},
{ YYCC_U8("shiftjis"), YYCC_U8("shift_jis") }, {u8"shiftjis"sv, u8"shift_jis"sv},
{ YYCC_U8("sjis"), YYCC_U8("shift_jis") }, {u8"sjis"sv, u8"shift_jis"sv},
{ YYCC_U8("s_jis"), YYCC_U8("shift_jis") }, {u8"s_jis"sv, u8"shift_jis"sv},
{ YYCC_U8("shiftjis2004"), YYCC_U8("shift_jis_2004") }, {u8"shiftjis2004"sv, u8"shift_jis_2004"sv},
{ YYCC_U8("sjis_2004"), YYCC_U8("shift_jis_2004") }, {u8"sjis_2004"sv, u8"shift_jis_2004"sv},
{ YYCC_U8("sjis2004"), YYCC_U8("shift_jis_2004") }, {u8"sjis2004"sv, u8"shift_jis_2004"sv},
{ YYCC_U8("shiftjisx0213"), YYCC_U8("shift_jisx0213") }, {u8"shiftjisx0213"sv, u8"shift_jisx0213"sv},
{ YYCC_U8("sjisx0213"), YYCC_U8("shift_jisx0213") }, {u8"sjisx0213"sv, u8"shift_jisx0213"sv},
{ YYCC_U8("s_jisx0213"), YYCC_U8("shift_jisx0213") }, {u8"s_jisx0213"sv, u8"shift_jisx0213"sv},
{ YYCC_U8("u32"), YYCC_U8("utf_32") }, {u8"u32"sv, u8"utf_32"sv},
{ YYCC_U8("utf32"), YYCC_U8("utf_32") }, {u8"utf32"sv, u8"utf_32"sv},
{ YYCC_U8("utf-32be"), YYCC_U8("utf_32_be") }, {u8"utf-32be"sv, u8"utf_32_be"sv},
{ YYCC_U8("utf-32le"), YYCC_U8("utf_32_le") }, {u8"utf-32le"sv, u8"utf_32_le"sv},
{ YYCC_U8("u16"), YYCC_U8("utf_16") }, {u8"u16"sv, u8"utf_16"sv},
{ YYCC_U8("utf16"), YYCC_U8("utf_16") }, {u8"utf16"sv, u8"utf_16"sv},
{ YYCC_U8("utf-16be"), YYCC_U8("utf_16_be") }, {u8"utf-16be"sv, u8"utf_16_be"sv},
{ YYCC_U8("utf-16le"), YYCC_U8("utf_16_le") }, {u8"utf-16le"sv, u8"utf_16_le"sv},
{ YYCC_U8("u7"), YYCC_U8("utf_7") }, {u8"u7"sv, u8"utf_7"sv},
{ YYCC_U8("unicode-1-1-utf-7"), YYCC_U8("utf_7") }, {u8"unicode-1-1-utf-7"sv, u8"utf_7"sv},
{ YYCC_U8("u8"), YYCC_U8("utf_8") }, {u8"u8"sv, u8"utf_8"sv},
{ YYCC_U8("utf"), YYCC_U8("utf_8") }, {u8"utf"sv, u8"utf_8"sv},
{ YYCC_U8("utf8"), YYCC_U8("utf_8") }, {u8"utf8"sv, u8"utf_8"sv},
{ YYCC_U8("utf-8"), YYCC_U8("utf_8") }, {u8"utf-8"sv, u8"utf_8"sv},
{ YYCC_U8("cp65001"), YYCC_U8("utf_8") }, {u8"cp65001"sv, u8"utf_8"sv},
}; };
/**
* @brief Resolve encoding name alias and fetch real encoding name.
* @param[in] lang The encoding name for finding.
* @return
* The given encoding name if given name not present in alias map.
* Otherwise the found encoding name by given name.
*/
static std::u8string resolve_encoding_alias(const std::u8string_view& enc_name) {
auto name = op::to_lower(enc_name);
auto finder = ALIAS_MAP.find(name);
if (finder == ALIAS_MAP.end()) {
// not found, use original encoding name.
return std::u8string(enc_name);
} else {
// found, use found encoding name.
return std::u8string(finder->second);
}
}
#if defined(YYCC_PYCODEC_WIN32_BACKEND) #if defined(YYCC_PYCODEC_WIN32_BACKEND)
using CodePage = NS_YYCC_ENCODING_BACKEND::CodePage; using CodePage = YYCC_PYCODEC_BACKEND_NS::CodePage;
static const std::map<NS_YYCC_STRING::u8string, CodePage> WINCP_MAP { static const std::map<std::u8string_view, CodePage> WINCP_MAP{
{ YYCC_U8("ascii"), static_cast<CodePage>(437u) }, {u8"ascii"sv, static_cast<CodePage>(437u)}, {u8"big5"sv, static_cast<CodePage>(950u)},
{ YYCC_U8("big5"), static_cast<CodePage>(950u) }, {u8"cp037"sv, static_cast<CodePage>(037u)}, {u8"cp437"sv, static_cast<CodePage>(437u)},
{ YYCC_U8("cp037"), static_cast<CodePage>(037u) }, {u8"cp500"sv, static_cast<CodePage>(500u)}, {u8"cp720"sv, static_cast<CodePage>(720u)},
{ YYCC_U8("cp437"), static_cast<CodePage>(437u) }, {u8"cp737"sv, static_cast<CodePage>(737u)}, {u8"cp775"sv, static_cast<CodePage>(775u)},
{ YYCC_U8("cp500"), static_cast<CodePage>(500u) }, {u8"cp850"sv, static_cast<CodePage>(850u)}, {u8"cp852"sv, static_cast<CodePage>(852u)},
{ YYCC_U8("cp720"), static_cast<CodePage>(720u) }, {u8"cp855"sv, static_cast<CodePage>(855u)}, {u8"cp857"sv, static_cast<CodePage>(857u)},
{ YYCC_U8("cp737"), static_cast<CodePage>(737u) }, {u8"cp858"sv, static_cast<CodePage>(858u)}, {u8"cp860"sv, static_cast<CodePage>(860u)},
{ YYCC_U8("cp775"), static_cast<CodePage>(775u) }, {u8"cp861"sv, static_cast<CodePage>(861u)}, {u8"cp862"sv, static_cast<CodePage>(862u)},
{ YYCC_U8("cp850"), static_cast<CodePage>(850u) }, {u8"cp863"sv, static_cast<CodePage>(863u)}, {u8"cp864"sv, static_cast<CodePage>(864u)},
{ YYCC_U8("cp852"), static_cast<CodePage>(852u) }, {u8"cp865"sv, static_cast<CodePage>(865u)}, {u8"cp866"sv, static_cast<CodePage>(866u)},
{ YYCC_U8("cp855"), static_cast<CodePage>(855u) }, {u8"cp869"sv, static_cast<CodePage>(869u)}, {u8"cp874"sv, static_cast<CodePage>(874u)},
{ YYCC_U8("cp857"), static_cast<CodePage>(857u) }, {u8"cp875"sv, static_cast<CodePage>(875u)}, {u8"cp932"sv, static_cast<CodePage>(932u)},
{ YYCC_U8("cp858"), static_cast<CodePage>(858u) }, {u8"cp949"sv, static_cast<CodePage>(949u)}, {u8"cp950"sv, static_cast<CodePage>(950u)},
{ YYCC_U8("cp860"), static_cast<CodePage>(860u) }, {u8"cp1026"sv, static_cast<CodePage>(1026u)}, {u8"cp1140"sv, static_cast<CodePage>(1140u)},
{ YYCC_U8("cp861"), static_cast<CodePage>(861u) }, {u8"cp1250"sv, static_cast<CodePage>(1250u)}, {u8"cp1251"sv, static_cast<CodePage>(1251u)},
{ YYCC_U8("cp862"), static_cast<CodePage>(862u) }, {u8"cp1252"sv, static_cast<CodePage>(1252u)}, {u8"cp1253"sv, static_cast<CodePage>(1253u)},
{ YYCC_U8("cp863"), static_cast<CodePage>(863u) }, {u8"cp1254"sv, static_cast<CodePage>(1254u)}, {u8"cp1255"sv, static_cast<CodePage>(1255u)},
{ YYCC_U8("cp864"), static_cast<CodePage>(864u) }, {u8"cp1256"sv, static_cast<CodePage>(1256u)}, {u8"cp1257"sv, static_cast<CodePage>(1257u)},
{ YYCC_U8("cp865"), static_cast<CodePage>(865u) }, {u8"cp1258"sv, static_cast<CodePage>(1258u)}, {u8"euc_jp"sv, static_cast<CodePage>(20932u)},
{ YYCC_U8("cp866"), static_cast<CodePage>(866u) }, {u8"euc_kr"sv, static_cast<CodePage>(51949u)}, {u8"gb2312"sv, static_cast<CodePage>(936u)},
{ YYCC_U8("cp869"), static_cast<CodePage>(869u) }, {u8"gbk"sv, static_cast<CodePage>(936u)}, {u8"gb18030"sv, static_cast<CodePage>(54936u)},
{ YYCC_U8("cp874"), static_cast<CodePage>(874u) }, {u8"hz"sv, static_cast<CodePage>(52936u)}, {u8"iso2022_jp"sv, static_cast<CodePage>(50220u)},
{ YYCC_U8("cp875"), static_cast<CodePage>(875u) }, {u8"iso2022_kr"sv, static_cast<CodePage>(50225u)}, {u8"latin_1"sv, static_cast<CodePage>(28591u)},
{ YYCC_U8("cp932"), static_cast<CodePage>(932u) }, {u8"iso8859_2"sv, static_cast<CodePage>(28592u)}, {u8"iso8859_3"sv, static_cast<CodePage>(28593u)},
{ YYCC_U8("cp949"), static_cast<CodePage>(949u) }, {u8"iso8859_4"sv, static_cast<CodePage>(28594u)}, {u8"iso8859_5"sv, static_cast<CodePage>(28595u)},
{ YYCC_U8("cp950"), static_cast<CodePage>(950u) }, {u8"iso8859_6"sv, static_cast<CodePage>(28596u)}, {u8"iso8859_7"sv, static_cast<CodePage>(28597u)},
{ YYCC_U8("cp1026"), static_cast<CodePage>(1026u) }, {u8"iso8859_8"sv, static_cast<CodePage>(28598u)}, {u8"iso8859_9"sv, static_cast<CodePage>(28599u)},
{ YYCC_U8("cp1140"), static_cast<CodePage>(1140u) }, {u8"iso8859_13"sv, static_cast<CodePage>(28603u)}, {u8"iso8859_15"sv, static_cast<CodePage>(28605u)},
{ YYCC_U8("cp1250"), static_cast<CodePage>(1250u) }, {u8"johab"sv, static_cast<CodePage>(1361u)}, {u8"mac_cyrillic"sv, static_cast<CodePage>(10007u)},
{ YYCC_U8("cp1251"), static_cast<CodePage>(1251u) }, {u8"mac_greek"sv, static_cast<CodePage>(10006u)}, {u8"mac_iceland"sv, static_cast<CodePage>(10079u)},
{ YYCC_U8("cp1252"), static_cast<CodePage>(1252u) }, {u8"mac_turkish"sv, static_cast<CodePage>(10081u)}, {u8"shift_jis"sv, static_cast<CodePage>(932u)},
{ YYCC_U8("cp1253"), static_cast<CodePage>(1253u) }, {u8"utf_7"sv, static_cast<CodePage>(65000u)}, {u8"utf_8"sv, static_cast<CodePage>(65001u)},
{ YYCC_U8("cp1254"), static_cast<CodePage>(1254u) },
{ YYCC_U8("cp1255"), static_cast<CodePage>(1255u) },
{ YYCC_U8("cp1256"), static_cast<CodePage>(1256u) },
{ YYCC_U8("cp1257"), static_cast<CodePage>(1257u) },
{ YYCC_U8("cp1258"), static_cast<CodePage>(1258u) },
{ YYCC_U8("euc_jp"), static_cast<CodePage>(20932u) },
{ YYCC_U8("euc_kr"), static_cast<CodePage>(51949u) },
{ YYCC_U8("gb2312"), static_cast<CodePage>(936u) },
{ YYCC_U8("gbk"), static_cast<CodePage>(936u) },
{ YYCC_U8("gb18030"), static_cast<CodePage>(54936u) },
{ YYCC_U8("hz"), static_cast<CodePage>(52936u) },
{ YYCC_U8("iso2022_jp"), static_cast<CodePage>(50220u) },
{ YYCC_U8("iso2022_kr"), static_cast<CodePage>(50225u) },
{ YYCC_U8("latin_1"), static_cast<CodePage>(28591u) },
{ YYCC_U8("iso8859_2"), static_cast<CodePage>(28592u) },
{ YYCC_U8("iso8859_3"), static_cast<CodePage>(28593u) },
{ YYCC_U8("iso8859_4"), static_cast<CodePage>(28594u) },
{ YYCC_U8("iso8859_5"), static_cast<CodePage>(28595u) },
{ YYCC_U8("iso8859_6"), static_cast<CodePage>(28596u) },
{ YYCC_U8("iso8859_7"), static_cast<CodePage>(28597u) },
{ YYCC_U8("iso8859_8"), static_cast<CodePage>(28598u) },
{ YYCC_U8("iso8859_9"), static_cast<CodePage>(28599u) },
{ YYCC_U8("iso8859_13"), static_cast<CodePage>(28603u) },
{ YYCC_U8("iso8859_15"), static_cast<CodePage>(28605u) },
{ YYCC_U8("johab"), static_cast<CodePage>(1361u) },
{ YYCC_U8("mac_cyrillic"), static_cast<CodePage>(10007u) },
{ YYCC_U8("mac_greek"), static_cast<CodePage>(10006u) },
{ YYCC_U8("mac_iceland"), static_cast<CodePage>(10079u) },
{ YYCC_U8("mac_turkish"), static_cast<CodePage>(10081u) },
{ YYCC_U8("shift_jis"), static_cast<CodePage>(932u) },
{ YYCC_U8("utf_7"), static_cast<CodePage>(65000u) },
{ YYCC_U8("utf_8"), static_cast<CodePage>(65001u) },
}; };
static bool fetch_code_page(const std::u8string_view& enc_name, CodePage& out_cp) {
// resolve alias
std::u8string resolved_name = resolve_encoding_alias(enc_name);
// find code page
op::lower(resolved_name);
auto finder = WINCP_MAP.find(resolved_name);
if (finder == WINCP_MAP.end()) return false;
// okey, we found it.
out_cp = finder->second;
return true;
}
#else #else
static const std::map<NS_YYCC_STRING::u8string, std::string> ICONV_MAP { static const std::map<std::u8string_view, std::string_view> ICONV_MAP{
{ YYCC_U8("ascii"), "ASCII" }, {u8"ascii"sv, "ASCII"sv},
{ YYCC_U8("big5"), "BIG5" }, {u8"big5"sv, "BIG5"sv},
{ YYCC_U8("big5hkscs"), "BIG5-HKSCS" }, {u8"big5hkscs"sv, "BIG5-HKSCS"sv},
{ YYCC_U8("cp850"), "CP850" }, {u8"cp850"sv, "CP850"sv},
{ YYCC_U8("cp862"), "CP862" }, {u8"cp862"sv, "CP862"sv},
{ YYCC_U8("cp866"), "CP866" }, {u8"cp866"sv, "CP866"sv},
{ YYCC_U8("cp874"), "CP874" }, {u8"cp874"sv, "CP874"sv},
{ YYCC_U8("cp932"), "CP932" }, {u8"cp932"sv, "CP932"sv},
{ YYCC_U8("cp949"), "CP949" }, {u8"cp949"sv, "CP949"sv},
{ YYCC_U8("cp950"), "CP950" }, {u8"cp950"sv, "CP950"sv},
{ YYCC_U8("cp1250"), "CP1250" }, {u8"cp1250"sv, "CP1250"sv},
{ YYCC_U8("cp1251"), "CP1251" }, {u8"cp1251"sv, "CP1251"sv},
{ YYCC_U8("cp1252"), "CP1252" }, {u8"cp1252"sv, "CP1252"sv},
{ YYCC_U8("cp1253"), "CP1253" }, {u8"cp1253"sv, "CP1253"sv},
{ YYCC_U8("cp1254"), "CP1254" }, {u8"cp1254"sv, "CP1254"sv},
{ YYCC_U8("cp1255"), "CP1255" }, {u8"cp1255"sv, "CP1255"sv},
{ YYCC_U8("cp1256"), "CP1256" }, {u8"cp1256"sv, "CP1256"sv},
{ YYCC_U8("cp1257"), "CP1257" }, {u8"cp1257"sv, "CP1257"sv},
{ YYCC_U8("cp1258"), "CP1258" }, {u8"cp1258"sv, "CP1258"sv},
{ YYCC_U8("euc_jp"), "EUC-JP" }, {u8"euc_jp"sv, "EUC-JP"sv},
{ YYCC_U8("euc_kr"), "EUC-KR" }, {u8"euc_kr"sv, "EUC-KR"sv},
{ YYCC_U8("gb2312"), "CP936" }, {u8"gb2312"sv, "CP936"sv},
{ YYCC_U8("gbk"), "GBK" }, {u8"gbk"sv, "GBK"sv},
{ YYCC_U8("gb18030"), "GB18030" }, {u8"gb18030"sv, "GB18030"sv},
{ YYCC_U8("hz"), "HZ" }, {u8"hz"sv, "HZ"sv},
{ YYCC_U8("iso2022_jp"), "ISO-2022-JP" }, {u8"iso2022_jp"sv, "ISO-2022-JP"sv},
{ YYCC_U8("iso2022_jp_1"), "ISO-2022-JP-1" }, {u8"iso2022_jp_1"sv, "ISO-2022-JP-1"sv},
{ YYCC_U8("iso2022_jp_2"), "ISO-2022-JP-2" }, {u8"iso2022_jp_2"sv, "ISO-2022-JP-2"sv},
{ YYCC_U8("iso2022_kr"), "ISO-2022-KR" }, {u8"iso2022_kr"sv, "ISO-2022-KR"sv},
{ YYCC_U8("latin_1"), "ISO-8859-1" }, {u8"latin_1"sv, "ISO-8859-1"sv},
{ YYCC_U8("iso8859_2"), "ISO-8859-2" }, {u8"iso8859_2"sv, "ISO-8859-2"sv},
{ YYCC_U8("iso8859_3"), "ISO-8859-3" }, {u8"iso8859_3"sv, "ISO-8859-3"sv},
{ YYCC_U8("iso8859_4"), "ISO-8859-4" }, {u8"iso8859_4"sv, "ISO-8859-4"sv},
{ YYCC_U8("iso8859_5"), "ISO-8859-5" }, {u8"iso8859_5"sv, "ISO-8859-5"sv},
{ YYCC_U8("iso8859_6"), "ISO-8859-6" }, {u8"iso8859_6"sv, "ISO-8859-6"sv},
{ YYCC_U8("iso8859_7"), "ISO-8859-7" }, {u8"iso8859_7"sv, "ISO-8859-7"sv},
{ YYCC_U8("iso8859_8"), "ISO-8859-8" }, {u8"iso8859_8"sv, "ISO-8859-8"sv},
{ YYCC_U8("iso8859_9"), "ISO-8859-9" }, {u8"iso8859_9"sv, "ISO-8859-9"sv},
{ YYCC_U8("iso8859_10"), "ISO-8859-10" }, {u8"iso8859_10"sv, "ISO-8859-10"sv},
{ YYCC_U8("iso8859_11"), "ISO-8859-11" }, {u8"iso8859_11"sv, "ISO-8859-11"sv},
{ YYCC_U8("iso8859_13"), "ISO-8859-13" }, {u8"iso8859_13"sv, "ISO-8859-13"sv},
{ YYCC_U8("iso8859_14"), "ISO-8859-14" }, {u8"iso8859_14"sv, "ISO-8859-14"sv},
{ YYCC_U8("iso8859_15"), "ISO-8859-15" }, {u8"iso8859_15"sv, "ISO-8859-15"sv},
{ YYCC_U8("iso8859_16"), "ISO-8859-16" }, {u8"iso8859_16"sv, "ISO-8859-16"sv},
{ YYCC_U8("johab"), "JOHAB" }, {u8"johab"sv, "JOHAB"sv},
{ YYCC_U8("koi8_t"), "KOI8-T" }, {u8"koi8_t"sv, "KOI8-T"sv},
{ YYCC_U8("mac_cyrillic"), "MacCyrillic" }, {u8"mac_cyrillic"sv, "MacCyrillic"sv},
{ YYCC_U8("mac_greek"), "MacGreek" }, {u8"mac_greek"sv, "MacGreek"sv},
{ YYCC_U8("mac_iceland"), "MacIceland" }, {u8"mac_iceland"sv, "MacIceland"sv},
{ YYCC_U8("mac_roman"), "MacRoman" }, {u8"mac_roman"sv, "MacRoman"sv},
{ YYCC_U8("mac_turkish"), "MacTurkish" }, {u8"mac_turkish"sv, "MacTurkish"sv},
{ YYCC_U8("ptcp154"), "PT154" }, {u8"ptcp154"sv, "PT154"sv},
{ YYCC_U8("shift_jis"), "SHIFT_JIS" }, {u8"shift_jis"sv, "SHIFT_JIS"sv},
{ YYCC_U8("utf_32"), "UTF-32" }, {u8"utf_32"sv, "UTF-32"sv},
{ YYCC_U8("utf_32_be"), "UTF-32BE" }, {u8"utf_32_be"sv, "UTF-32BE"sv},
{ YYCC_U8("utf_32_le"), "UTF-32LE" }, {u8"utf_32_le"sv, "UTF-32LE"sv},
{ YYCC_U8("utf_16"), "UTF16" }, {u8"utf_16"sv, "UTF16"sv},
{ YYCC_U8("utf_16_be"), "UTF-16BE" }, {u8"utf_16_be"sv, "UTF-16BE"sv},
{ YYCC_U8("utf_16_le"), "UTF-16LE" }, {u8"utf_16_le"sv, "UTF-16LE"sv},
{ YYCC_U8("utf_7"), "UTF-7" }, {u8"utf_7"sv, "UTF-7"sv},
{ YYCC_U8("utf_8"), "UTF-8" }, {u8"utf_8"sv, "UTF-8"sv},
}; };
#endif static bool fetch_iconv_name(const std::u8string_view& enc_name, std::string& out_code) {
// resolve alias
std::u8string resolved_name = resolve_encoding_alias(enc_name);
// find code page
op::lower(resolved_name);
auto finder = ICONV_MAP.find(resolved_name);
if (finder == ICONV_MAP.end()) return false;
// okey, we found it.
out_code = finder->second;
return true;
}
#endif
#pragma endregion #pragma endregion
@ -362,12 +373,27 @@ namespace yycc::encoding::pycodec {
ConvError::ConvError(const ConvError::Error& err) : inner(err) {} ConvError::ConvError(const ConvError::Error& err) : inner(err) {}
bool is_valid_encoding_name(const EncodingName& name) { bool is_valid_encoding_name(const EncodingName& name) {
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
CodePage unused;
return fetch_code_page(name, unused);
#else
std::string unused;
return fetch_iconv_name(name, unused);
#endif
} }
#pragma endregion #pragma endregion
#pragma region #pragma region Char -> UTF8
CharToUtf8::CharToUtf8(const EncodingName& name) :
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
code_page(fetch)
#else
inner(fetch_iconv_name())
#endif
{}
#pragma endregion #pragma endregion

View File

@ -1,37 +1,43 @@
#pragma once #pragma once
#include "../macro/os_detector.hpp" #include "../macro/os_detector.hpp"
#include "../macro/stl_detector.hpp"
#include "../macro/class_copy_move.hpp" #include "../macro/class_copy_move.hpp"
#include "../patch/expected.hpp" #include <string>
#include "../string.hpp" #include <string_view>
#include <expected>
// Choose the backend of PyCodec module // Choose the backend of PyCodec module
#if defined(YYCC_OS_WINDOWS) #if defined(YYCC_OS_WINDOWS) && defined(YYCC_STL_MSSTL)
#include "windows.hpp" #include "windows.hpp"
#define YYCC_PYCODEC_WIN32_BACKEND #define YYCC_PYCODEC_WIN32_BACKEND
#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::windows #define YYCC_PYCODEC_BACKEND_NS ::yycc::encoding::windows
#else #elif YYCC_FEAT_ICONV || !defined(YYCC_OS_WINDOWS)
#include "iconv.hpp" #include "iconv.hpp"
#define YYCC_PYCODEC_ICONV_BACKEND #define YYCC_PYCODEC_ICONV_BACKEND
#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::iconv #define YYCC_PYCODEC_BACKEND_NS ::yycc::encoding::iconv
#else
#error "Can not find viable encoding convertion solution in current environment for PyCodec module."
#endif #endif
#define NS_YYCC_STRING ::yycc::string
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
namespace yycc::encoding::pycodec { namespace yycc::encoding::pycodec {
using EncodingName = NS_YYCC_STRING::u8string_view; /// @brief The universal name of encoding.
using EncodingName = std::u8string_view;
/// @private /// @brief The possible error occurs in this module.
struct ConvError { class ConvError {
using Error = NS_YYCC_ENCODING_BACKEND::ConvError; public:
using Error = YYCC_PYCODEC_BACKEND_NS::ConvError;
ConvError(const Error& err); ConvError(const Error& err);
YYCC_DEFAULT_COPY_MOVE(ConvError)
private:
Error inner; Error inner;
}; };
/// @private /// @brief The result type of this module.
template<typename T> template<typename T>
using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected<T, ConvError>; using ConvResult = std::expected<T, ConvError>;
/** /**
* @brief Check whether given name is a valid encoding name in PyCodec. * @brief Check whether given name is a valid encoding name in PyCodec.
@ -40,7 +46,7 @@ namespace yycc::encoding::pycodec {
*/ */
bool is_valid_encoding_name(const EncodingName& name); bool is_valid_encoding_name(const EncodingName& name);
// Char -> UTF8 /// @brief Char -> UTF8
class CharToUtf8 { class CharToUtf8 {
public: public:
CharToUtf8(const EncodingName& name); CharToUtf8(const EncodingName& name);
@ -49,19 +55,17 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(CharToUtf8) YYCC_DEFAULT_MOVE(CharToUtf8)
public: public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src); ConvResult<std::u8string> to_utf8(const std::string_view& src);
bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::string_view& src);
private: private:
#if defined(YYCC_PYCODEC_WIN32_BACKEND) #if defined(YYCC_PYCODEC_WIN32_BACKEND)
NS_YYCC_ENCODING_BACKEND::CodePage code_page; YYCC_PYCODEC_BACKEND_NS::CodePage code_page;
#else #else
NS_YYCC_ENCODING_BACKEND::CharToUtf8 inner; YYCC_PYCODEC_BACKEND_NS::CharToUtf8 inner;
#endif #endif
}; };
// UTF8 -> Char /// @brief UTF8 -> Char
class Utf8ToChar { class Utf8ToChar {
public: public:
Utf8ToChar(const EncodingName& name); Utf8ToChar(const EncodingName& name);
@ -70,19 +74,17 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(Utf8ToChar) YYCC_DEFAULT_MOVE(Utf8ToChar)
public: public:
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src); ConvResult<std::string> to_char(const std::u8string_view& src);
bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst);
std::string to_char(const NS_YYCC_STRING::u8string_view& src);
private: private:
#if defined(YYCC_PYCODEC_WIN32_BACKEND) #if defined(YYCC_PYCODEC_WIN32_BACKEND)
NS_YYCC_ENCODING_BACKEND::CodePage code_page; YYCC_PYCODEC_BACKEND_NS::CodePage code_page;
#else #else
NS_YYCC_ENCODING_BACKEND::Utf8ToChar inner; YYCC_PYCODEC_BACKEND_NS::Utf8ToChar inner;
#endif #endif
}; };
// WChar -> UTF8 /// @brief WChar -> UTF8
class WcharToUtf8 { class WcharToUtf8 {
public: public:
WcharToUtf8(); WcharToUtf8();
@ -91,17 +93,15 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(WcharToUtf8) YYCC_DEFAULT_MOVE(WcharToUtf8)
public: public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src); ConvResult<std::u8string> to_utf8(const std::wstring_view& src);
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
private: private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND) #if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::WcharToUtf8 inner; YYCC_PYCODEC_BACKEND_NS::WcharToUtf8 inner;
#endif #endif
}; };
// UTF8 -> WChar /// @brief UTF8 -> WChar
class Utf8ToWchar { class Utf8ToWchar {
public: public:
Utf8ToWchar(); Utf8ToWchar();
@ -110,17 +110,15 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(Utf8ToWchar) YYCC_DEFAULT_MOVE(Utf8ToWchar)
public: public:
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src); ConvResult<std::wstring> to_wchar(const std::u8string_view& src);
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
private: private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND) #if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf8ToWchar inner; YYCC_PYCODEC_BACKEND_NS::Utf8ToWchar inner;
#endif #endif
}; };
// UTF8 -> UTF16 /// @brief UTF8 -> UTF16
class Utf8ToUtf16 { class Utf8ToUtf16 {
public: public:
Utf8ToUtf16(); Utf8ToUtf16();
@ -129,17 +127,15 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(Utf8ToUtf16) YYCC_DEFAULT_MOVE(Utf8ToUtf16)
public: public:
ConvResult<std::u16string> priv_to_utf16(const NS_YYCC_STRING::u8string_view& src); ConvResult<std::u16string> to_utf16(const std::u8string_view& src);
bool to_utf16(const NS_YYCC_STRING::u8string_view& src, std::u16string& dst);
std::u16string to_utf16(const NS_YYCC_STRING::u8string_view& src);
private: private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND) #if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf8ToUtf16 inner; YYCC_PYCODEC_BACKEND_NS::Utf8ToUtf16 inner;
#endif #endif
}; };
// UTF16 -> UTF8 /// @brief UTF16 -> UTF8
class Utf16ToUtf8 { class Utf16ToUtf8 {
public: public:
Utf16ToUtf8(); Utf16ToUtf8();
@ -148,17 +144,15 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(Utf16ToUtf8) YYCC_DEFAULT_MOVE(Utf16ToUtf8)
public: public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::u16string_view& src); ConvResult<std::u8string> to_utf8(const std::u16string_view& src);
bool to_utf8(const std::u16string_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::u16string_view& src);
private: private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND) #if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf16ToUtf8 inner; YYCC_PYCODEC_BACKEND_NS::Utf16ToUtf8 inner;
#endif #endif
}; };
// UTF8 -> UTF32 /// @brief UTF8 -> UTF32
class Utf8ToUtf32 { class Utf8ToUtf32 {
public: public:
Utf8ToUtf32(); Utf8ToUtf32();
@ -167,17 +161,15 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(Utf8ToUtf32) YYCC_DEFAULT_MOVE(Utf8ToUtf32)
public: public:
ConvResult<std::u32string> priv_to_utf32(const NS_YYCC_STRING::u8string_view& src); ConvResult<std::u32string> to_utf32(const std::u8string_view& src);
bool to_utf32(const NS_YYCC_STRING::u8string_view& src, std::u32string& dst);
std::u32string to_utf32(const NS_YYCC_STRING::u8string_view& src);
private: private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND) #if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf8ToUtf32 inner; YYCC_PYCODEC_BACKEND_NS::Utf8ToUtf32 inner;
#endif #endif
}; };
// UTF32 -> UTF8 /// @brief UTF32 -> UTF8
class Utf32ToUtf8 { class Utf32ToUtf8 {
public: public:
Utf32ToUtf8(); Utf32ToUtf8();
@ -186,17 +178,12 @@ namespace yycc::encoding::pycodec {
YYCC_DEFAULT_MOVE(Utf32ToUtf8) YYCC_DEFAULT_MOVE(Utf32ToUtf8)
public: public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::u32string_view& src); ConvResult<std::u8string> to_utf8(const std::u32string_view& src);
bool to_utf8(const std::u32string_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::u32string_view& src);
private: private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND) #if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf32ToUtf8 inner; YYCC_PYCODEC_BACKEND_NS::Utf32ToUtf8 inner;
#endif #endif
}; };
} } // namespace yycc::encoding::pycodec
#undef NS_YYCC_PATCH_EXPECTED
#undef NS_YYCC_STRING

View File

@ -120,6 +120,8 @@ namespace yycc::encoding::windows {
// * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170 // * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170
// * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170 // * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170
#if defined(YYCC_STL_MSSTL)
// 1 UTF32 unit can produe 4 UTF8 units or 2 UTF16 units in theory. // 1 UTF32 unit can produe 4 UTF8 units or 2 UTF16 units in theory.
// So we pre-allocate memory for the result to prevent allocating memory multiple times. // So we pre-allocate memory for the result to prevent allocating memory multiple times.
constexpr size_t MULTIPLE_UTF8_TO_UTF16 = 1u; constexpr size_t MULTIPLE_UTF8_TO_UTF16 = 1u;
@ -206,6 +208,8 @@ namespace yycc::encoding::windows {
return dst; return dst;
} }
#endif
#pragma endregion #pragma endregion
} // namespace yycc::encoding::windows } // namespace yycc::encoding::windows

View File

@ -1,8 +1,6 @@
#pragma once #pragma once
#include "../macro/os_detector.hpp" #include "../macro/os_detector.hpp"
#include "../macro/stl_detector.hpp"
#if defined(YYCC_OS_WINDOWS)
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <expected> #include <expected>
@ -28,6 +26,8 @@ namespace yycc::encoding::windows {
template<typename T> template<typename T>
using ConvResult = std::expected<T, ConvError>; using ConvResult = std::expected<T, ConvError>;
#if defined(YYCC_OS_WINDOWS)
/** /**
* @brief WChar -> Char * @brief WChar -> Char
* @param src * @param src
@ -88,6 +88,12 @@ namespace yycc::encoding::windows {
*/ */
ConvResult<std::string> to_char(const std::u8string_view& src, CodePage code_page); ConvResult<std::string> to_char(const std::u8string_view& src, CodePage code_page);
// YYC MARK:
// UTF convertion only works on Microsoft STL.
// See implementation for more details
#if defined(YYCC_STL_MSSTL)
/** /**
* @brief UTF8 -> UTF16 * @brief UTF8 -> UTF16
* @param src * @param src
@ -116,6 +122,8 @@ namespace yycc::encoding::windows {
*/ */
ConvResult<std::u8string> to_utf8(const std::u32string_view& src); ConvResult<std::u8string> to_utf8(const std::u32string_view& src);
} // namespace yycc::encoding::windows #endif
#endif #endif
} // namespace yycc::encoding::windows

View File

@ -0,0 +1,14 @@
#pragma once
// Include a common used STL header for convenient test.
#include <cinttypes>
#if defined(_MSVC_STL_VERSION)
#define YYCC_STL_MSSTL
#elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
#define YYCC_STL_GNUSTL
#elif defined(_LIBCPP_VERSION)
#define YYCC_STL_CLANGSTL
#else
#error "Current STL is not supported!"
#endif

View File

@ -40,7 +40,7 @@ namespace yycc::num::parse {
*/ */
template<typename T> template<typename T>
requires(std::is_floating_point_v<T>) requires(std::is_floating_point_v<T>)
ParseResult<T> parse(const std::u8string_view& strl, std::chars_format fmt) { ParseResult<T> parse(const std::u8string_view& strl, std::chars_format fmt = std::chars_format::general) {
namespace reinterpret = NS_YYCC_STRING_REINTERPRET; namespace reinterpret = NS_YYCC_STRING_REINTERPRET;
T rv; T rv;
@ -75,7 +75,7 @@ namespace yycc::num::parse {
*/ */
template<typename T> template<typename T>
requires(std::is_integral_v<T> && !std::is_same_v<T, bool>) requires(std::is_integral_v<T> && !std::is_same_v<T, bool>)
ParseResult<T> parse(const std::u8string_view& strl, int base) { ParseResult<T> parse(const std::u8string_view& strl, int base = 10) {
namespace reinterpret = NS_YYCC_STRING_REINTERPRET; namespace reinterpret = NS_YYCC_STRING_REINTERPRET;
T rv; T rv;
@ -115,7 +115,7 @@ namespace yycc::num::parse {
// Compare result // Compare result
if (lower_case == u8"true") return true; if (lower_case == u8"true") return true;
else if (lower_case == u8"false") return false; else if (lower_case == u8"false") return false;
else return ParseError::InvalidString; else return std::unexpected(ParseError::InvalidString);
} }
} // namespace yycc::num::parse } // namespace yycc::num::parse

View File

@ -10,18 +10,12 @@ PRIVATE
yycc/string/reinterpret.cpp yycc/string/reinterpret.cpp
yycc/num/parse.cpp yycc/num/parse.cpp
yycc/num/stringify.cpp yycc/num/stringify.cpp
yycc/rust/num/parse.cpp
yycc/rust/num/stringify.cpp
yycc/patch/contains.cpp
yycc/patch/starts_ends_with.cpp
) )
target_sources(YYCCTestbench target_sources(YYCCTestbench
PRIVATE PRIVATE
FILE_SET HEADERS FILE_SET HEADERS
FILES FILES
shared/parse_template.hpp yycc/encoding/utf_literal.hpp
shared/stringify_template.hpp
shared/utf_literal.hpp
) )
# Setup headers # Setup headers
target_include_directories(YYCCTestbench target_include_directories(YYCCTestbench

View File

@ -1,74 +0,0 @@
/**
* \file
* This file is a template for Parse function testing.
*
* As you seen that there is 2 style Parse function locate in main namespace and Rust namespace respectively.
* Both of them share the exactly same test data sheet.
* So it is good idea to extract these common part and put them into a place, and include it in respectively testbench file.
* That what this file does.
*
* Before including this template file, you must make sure that:
* \li Have include <gtest/gtest.h>
* \li Have include <yycc/prelude/rust.hpp>
* \li Have define a macro named \c TEST_NS which indicate the testbench namespace passed to gtest.
* \li Have define a macro with syntax <TT>TEST_SUCCESS(type_t, value, string_value, ...)</TT>.
* This macro will be called for those success case. \c type_t is the generic type of Parse function.
* \c value is the expected value after parse and \c string_value is the string value to be parsed.
* Other arguments should be redirect to corresponding Parse function.
* \li Have define a macro with syntax <TT>TEST_FAIL(type_t, string_value, ...)</TT>.
* Opposite with \c TEST_SUCCESS, this macro is for those bad case testing.
* All arguments have the same meaning with \c TEST_SUCCESS.
*
*/
TEST(TEST_NS, Common) {
TEST_SUCCESS(i8, INT8_C(-61), "-61");
TEST_SUCCESS(u8, UINT8_C(200), "200");
TEST_SUCCESS(i16, INT16_C(6161), "6161");
TEST_SUCCESS(u16, UINT16_C(32800), "32800");
TEST_SUCCESS(i32, INT32_C(61616161), "61616161");
TEST_SUCCESS(u32, UINT32_C(4294967293), "4294967293");
TEST_SUCCESS(i64, INT64_C(616161616161), "616161616161");
TEST_SUCCESS(u64, UINT64_C(9223372036854775807), "9223372036854775807");
TEST_SUCCESS(float, 1.0f, "1.0");
TEST_SUCCESS(double, 1.0, "1.0");
TEST_SUCCESS(bool, true, "true");
TEST_SUCCESS(bool, false, "false");
}
TEST(TEST_NS, Radix) {
TEST_SUCCESS(u32, UINT32_C(0xffff), "ffff", 16);
TEST_SUCCESS(u32, UINT32_C(032), "032", 8);
TEST_SUCCESS(u32, UINT32_C(0B1011), "1011", 2);
}
TEST(TEST_NS, CaseInsensitive) {
TEST_SUCCESS(bool, true, "tRUE");
}
TEST(TEST_NS, Overflow) {
TEST_FAIL(i8, "6161");
TEST_FAIL(u8, "32800");
TEST_FAIL(i16, "61616161");
TEST_FAIL(u16, "4294967293");
TEST_FAIL(i32, "616161616161");
TEST_FAIL(u32, "9223372036854775807");
TEST_FAIL(i64, "616161616161616161616161");
TEST_FAIL(u64, "92233720368547758079223372036854775807");
TEST_FAIL(float, "1e40");
TEST_FAIL(double, "1e114514");
}
TEST(TEST_NS, BadRadix) {
TEST_FAIL(u32, "fghj", 16);
TEST_FAIL(u32, "099", 8);
TEST_FAIL(u32, "12345", 2);
}
TEST(TEST_NS, InvalidWords) {
TEST_FAIL(u32, "hello, world!");
TEST_FAIL(bool, "hello, world!");
}

View File

@ -1,39 +0,0 @@
/**
* \file
* This file is a template for Stringify function testing.
*
* Same as parse_template.hpp .
*
* Before including this template file, you must make sure that:
* \li Have include <gtest/gtest.h>
* \li Have include <yycc/prelude/rust.hpp>
* \li Have define a macro named \c TEST_NS which indicate the testbench namespace passed to gtest.
* \li Have define a macro with syntax <TT>TEST_SUCCESS(type_t, value, string_value, ...)</TT>.
* This macro will be called for those success case. \c type_t is the generic type of Stringify function.
* \c value is the value will be stringified and \c string_value is the expected string.
* Other arguments should be redirect to corresponding Stringify function.
*
*/
TEST(TEST_NS, Common) {
TEST_SUCCESS(i8, INT8_C(-61), "-61");
TEST_SUCCESS(u8, UINT8_C(200), "200");
TEST_SUCCESS(i16, INT16_C(6161), "6161");
TEST_SUCCESS(u16, UINT16_C(32800), "32800");
TEST_SUCCESS(i32, INT32_C(61616161), "61616161");
TEST_SUCCESS(u32, UINT32_C(4294967293), "4294967293");
TEST_SUCCESS(i64, INT64_C(616161616161), "616161616161");
TEST_SUCCESS(u64, UINT64_C(9223372036854775807), "9223372036854775807");
TEST_SUCCESS(float, 1.0f, "1.0", std::chars_format::fixed, 1);
TEST_SUCCESS(double, 1.0, "1.0", std::chars_format::fixed, 1);
TEST_SUCCESS(bool, true, "true");
TEST_SUCCESS(bool, false, "false");
}
TEST(TEST_NS, Radix) {
TEST_SUCCESS(u32, UINT32_C(0xffff), "ffff", 16);
TEST_SUCCESS(u32, UINT32_C(032), "32", 8);
TEST_SUCCESS(u32, UINT32_C(0B1011), "1011", 2);
}

View File

@ -1,7 +1,8 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <yycc.hpp> #include <yycc.hpp>
#include <yycc/constraint.hpp> #include <yycc/constraint.hpp>
#include <yycc/prelude/rust.hpp>
#include <yycc/rust/prelude.hpp>
#define CONSTRAINT ::yycc::constraint::Constraint #define CONSTRAINT ::yycc::constraint::Constraint

View File

@ -2,9 +2,10 @@
#include <yycc.hpp> #include <yycc.hpp>
#include <yycc/constraint/builder.hpp> #include <yycc/constraint/builder.hpp>
#include <yycc/prelude/rust.hpp> #include <yycc/rust/prelude.hpp>
#define BUILDER ::yycc::constraint::builder #define BUILDER ::yycc::constraint::builder
using namespace std::literals::string_view_literals;
namespace yycctest::constraint::builder { namespace yycctest::constraint::builder {
@ -56,8 +57,7 @@ namespace yycctest::constraint::builder {
enum class TestEnum : u8 { Entry1 = 0, Entry2 = 1, Entry3 = 2 }; enum class TestEnum : u8 { Entry1 = 0, Entry2 = 1, Entry3 = 2 };
TEST(ConstraintBuilder, EnumConstraint) { TEST(ConstraintBuilder, EnumConstraint) {
auto c = BUILDER::enum_constraint({TestEnum::Entry1, TestEnum::Entry2, TestEnum::Entry3}, auto c = BUILDER::enum_constraint({TestEnum::Entry1, TestEnum::Entry2, TestEnum::Entry3}, 1u);
1u);
ASSERT_TRUE(c.support_check()); ASSERT_TRUE(c.support_check());
ASSERT_TRUE(c.support_clamp()); ASSERT_TRUE(c.support_clamp());
TEST_SUCCESS(c, TestEnum::Entry1); TEST_SUCCESS(c, TestEnum::Entry1);
@ -67,16 +67,13 @@ namespace yycctest::constraint::builder {
} }
TEST(ConstraintBuilder, StrEnumConstraint) { TEST(ConstraintBuilder, StrEnumConstraint) {
auto c = BUILDER::strenum_constraint({YYCC_U8("first-entry"), auto c = BUILDER::strenum_constraint({u8"first-entry"sv, u8"second-entry"sv, u8"third-entry"sv}, 1u);
YYCC_U8("second-entry"),
YYCC_U8("third-entry")},
1u);
ASSERT_TRUE(c.support_check()); ASSERT_TRUE(c.support_check());
ASSERT_TRUE(c.support_clamp()); ASSERT_TRUE(c.support_clamp());
TEST_SUCCESS(c, YYCC_U8("first-entry")); TEST_SUCCESS(c, u8"first-entry");
TEST_SUCCESS(c, YYCC_U8("second-entry")); TEST_SUCCESS(c, u8"second-entry");
TEST_SUCCESS(c, YYCC_U8("third-entry")); TEST_SUCCESS(c, u8"third-entry");
TEST_FAIL(c, YYCC_U8("wtf?"), YYCC_U8("second-entry")); TEST_FAIL(c, u8"wtf?", u8"second-entry");
} }
} // namespace yycctest::constraint::builder } // namespace yycctest::constraint::builder

View File

@ -2,7 +2,7 @@
#include <yycc.hpp> #include <yycc.hpp>
#include <yycc/num/parse.hpp> #include <yycc/num/parse.hpp>
#include <yycc/prelude/rust.hpp> #include <yycc/rust/prelude.hpp>
#define PARSE ::yycc::num::parse #define PARSE ::yycc::num::parse
@ -11,25 +11,71 @@ namespace yycctest::num::parse {
// These 2 test macros build string container via given string. // These 2 test macros build string container via given string.
// Check `try_parse` first, and then check `parse`. // Check `try_parse` first, and then check `parse`.
#define TEST_NS NumParse #define TEST_SUCCESS(type_t, expected_value, string_value, ...) \
#define TEST_SUCCESS(type_t, value, string_value, ...) \
{ \ { \
u8string cache_string(YYCC_U8(string_value)); \ std::u8string cache_string(string_value); \
type_t cache; \ auto rv = PARSE::parse<type_t>(cache_string __VA_OPT__(, ) __VA_ARGS__); \
ASSERT_TRUE(PARSE::try_parse<type_t>(cache_string, cache, ##__VA_ARGS__)); \ ASSERT_TRUE(rv.has_value()); \
EXPECT_EQ(cache, value); \ EXPECT_EQ(rv.value(), expected_value); \
EXPECT_EQ(PARSE::parse<type_t>(cache_string, ##__VA_ARGS__), value); \
} }
#define TEST_FAIL(type_t, string_value, ...) \ #define TEST_FAIL(type_t, string_value, ...) \
{ \ { \
u8string cache_string(YYCC_U8(string_value)); \ std::u8string cache_string(string_value); \
type_t cache; \ auto rv = PARSE::parse<type_t>(cache_string __VA_OPT__(, ) __VA_ARGS__); \
EXPECT_FALSE(PARSE::try_parse<type_t>(cache_string, cache, ##__VA_ARGS__)); \ EXPECT_FALSE(rv.has_value()); \
EXPECT_ANY_THROW(PARSE::parse<type_t>(cache_string, ##__VA_ARGS__)); \
} }
#include "../../shared/parse_template.hpp" TEST(NumParse, Common) {
TEST_SUCCESS(i8, INT8_C(-61), u8"-61");
TEST_SUCCESS(u8, UINT8_C(200), u8"200");
TEST_SUCCESS(i16, INT16_C(6161), u8"6161");
TEST_SUCCESS(u16, UINT16_C(32800), u8"32800");
TEST_SUCCESS(i32, INT32_C(61616161), u8"61616161");
TEST_SUCCESS(u32, UINT32_C(4294967293), u8"4294967293");
TEST_SUCCESS(i64, INT64_C(616161616161), u8"616161616161");
TEST_SUCCESS(u64, UINT64_C(9223372036854775807), u8"9223372036854775807");
TEST_SUCCESS(float, 1.0f, u8"1.0");
TEST_SUCCESS(double, 1.0, u8"1.0");
TEST_SUCCESS(bool, true, u8"true");
TEST_SUCCESS(bool, false, u8"false");
}
TEST(NumParse, Radix) {
TEST_SUCCESS(u32, UINT32_C(0xffff), u8"ffff", 16);
TEST_SUCCESS(u32, UINT32_C(032), u8"032", 8);
TEST_SUCCESS(u32, UINT32_C(0B1011), u8"1011", 2);
}
TEST(NumParse, CaseInsensitive) {
TEST_SUCCESS(bool, true, u8"tRUE");
}
TEST(NumParse, Overflow) {
TEST_FAIL(i8, u8"6161");
TEST_FAIL(u8, u8"32800");
TEST_FAIL(i16, u8"61616161");
TEST_FAIL(u16, u8"4294967293");
TEST_FAIL(i32, u8"616161616161");
TEST_FAIL(u32, u8"9223372036854775807");
TEST_FAIL(i64, u8"616161616161616161616161");
TEST_FAIL(u64, u8"92233720368547758079223372036854775807");
TEST_FAIL(float, u8"1e40");
TEST_FAIL(double, u8"1e114514");
}
TEST(NumParse, BadRadix) {
TEST_FAIL(u32, u8"fghj", 16);
TEST_FAIL(u32, u8"099", 8);
TEST_FAIL(u32, u8"12345", 2);
}
TEST(NumParse, InvalidWords) {
TEST_FAIL(u32, u8"hello, world!");
TEST_FAIL(bool, u8"hello, world!");
}
} // namespace yycctest::num::parse } // namespace yycctest::num::parse

View File

@ -2,21 +2,40 @@
#include <yycc.hpp> #include <yycc.hpp>
#include <yycc/num/stringify.hpp> #include <yycc/num/stringify.hpp>
#include <yycc/prelude/rust.hpp> #include <yycc/rust/prelude.hpp>
#define STRINGIFY ::yycc::num::stringify #define STRINGIFY ::yycc::num::stringify
namespace yycctest::num::stringify { namespace yycctest::num::stringify {
#define TEST_NS NumStringify
#define TEST_SUCCESS(type_t, value, string_value, ...) \ #define TEST_SUCCESS(type_t, value, string_value, ...) \
{ \ { \
type_t cache = value; \ type_t cache = value; \
u8string ret = STRINGIFY::stringify<type_t>(cache, ##__VA_ARGS__); \ std::u8string ret = STRINGIFY::stringify<type_t>(cache __VA_OPT__(, ) __VA_ARGS__); \
EXPECT_EQ(ret, YYCC_U8(string_value)); \ EXPECT_EQ(ret, string_value); \
} }
#include "../../shared/stringify_template.hpp" TEST(NumStringify, Common) {
TEST_SUCCESS(i8, INT8_C(-61), u8"-61");
TEST_SUCCESS(u8, UINT8_C(200), u8"200");
TEST_SUCCESS(i16, INT16_C(6161), u8"6161");
TEST_SUCCESS(u16, UINT16_C(32800), u8"32800");
TEST_SUCCESS(i32, INT32_C(61616161), u8"61616161");
TEST_SUCCESS(u32, UINT32_C(4294967293), u8"4294967293");
TEST_SUCCESS(i64, INT64_C(616161616161), u8"616161616161");
TEST_SUCCESS(u64, UINT64_C(9223372036854775807), u8"9223372036854775807");
} // namespace yycctest::string::stringify TEST_SUCCESS(float, 1.0f, u8"1.0", std::chars_format::fixed, 1);
TEST_SUCCESS(double, 1.0, u8"1.0", std::chars_format::fixed, 1);
TEST_SUCCESS(bool, true, u8"true");
TEST_SUCCESS(bool, false, u8"false");
}
TEST(NumStringify, Radix) {
TEST_SUCCESS(u32, UINT32_C(0xffff), u8"ffff", 16);
TEST_SUCCESS(u32, UINT32_C(032), u8"32", 8);
TEST_SUCCESS(u32, UINT32_C(0B1011), u8"1011", 2);
}
} // namespace yycctest::num::stringify

View File

@ -1,58 +0,0 @@
#include <gtest/gtest.h>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <yycc.hpp>
#include <yycc/patch/contains.hpp>
#include <yycc/prelude/rust.hpp>
#define FN_CONTAINS ::yycc::patch::contains::contains
namespace yycctest::patch::contains {
TEST(PatchContains, Contains) {
// Set
{
std::set<u32> collection{
UINT32_C(1),
UINT32_C(3),
UINT32_C(5),
};
EXPECT_TRUE(FN_CONTAINS(collection, 1));
EXPECT_FALSE(FN_CONTAINS(collection, 2));
}
// Unordered set
{
std::unordered_set<u32> collection{
UINT32_C(1),
UINT32_C(3),
UINT32_C(5),
};
EXPECT_TRUE(FN_CONTAINS(collection, 1));
EXPECT_FALSE(FN_CONTAINS(collection, 2));
}
// Map
{
std::map<u32, u32> collection{
{UINT32_C(1), UINT32_C(2)},
{UINT32_C(3), UINT32_C(4)},
{UINT32_C(5), UINT32_C(6)},
};
EXPECT_TRUE(FN_CONTAINS(collection, 1));
EXPECT_FALSE(FN_CONTAINS(collection, 2));
}
// Unordered Map
{
std::unordered_map<u32, u32> collection{
{UINT32_C(1), UINT32_C(2)},
{UINT32_C(3), UINT32_C(4)},
{UINT32_C(5), UINT32_C(6)},
};
EXPECT_TRUE(FN_CONTAINS(collection, 1));
EXPECT_FALSE(FN_CONTAINS(collection, 2));
}
}
} // namespace yycctest::patch::contains

View File

@ -1,59 +0,0 @@
#include <gtest/gtest.h>
#include <yycc.hpp>
#include <yycc/patch/starts_ends_with.hpp>
#define FN_STARTS_WITH ::yycc::patch::starts_ends_with::starts_with
#define FN_ENDS_WITH ::yycc::patch::starts_ends_with::ends_with
using namespace std::literals;
namespace yycctest::patch::starts_ends_with {
#define TEST_STARTS_WITH(prefix) \
{ \
const auto that = prefix##"Hello, C++20!"s; \
EXPECT_TRUE(FN_STARTS_WITH(that, prefix##"He"sv)); \
EXPECT_FALSE(FN_STARTS_WITH(that, prefix##"he"sv)); \
EXPECT_TRUE(FN_STARTS_WITH(that, prefix##'H')); \
EXPECT_FALSE(FN_STARTS_WITH(that, prefix##'h')); \
EXPECT_TRUE(FN_STARTS_WITH(that, prefix##"He")); \
EXPECT_FALSE(FN_STARTS_WITH(that, prefix##"he")); \
}
TEST(PatchStartsEndsWith, StartsWith) {
// Ordinary string
TEST_STARTS_WITH();
// Wide string
TEST_STARTS_WITH(L);
// UTF8 string
TEST_STARTS_WITH(u8);
// UTF-16 string
TEST_STARTS_WITH(u);
// UTF-32 string
TEST_STARTS_WITH(U);
}
#define TEST_ENDS_WITH(prefix) \
{ \
const auto that = prefix##"Hello, C++20!"s; \
EXPECT_TRUE(FN_ENDS_WITH(that, prefix##"C++20!"sv)); \
EXPECT_FALSE(FN_ENDS_WITH(that, prefix##"c++20!"sv)); \
EXPECT_TRUE(FN_ENDS_WITH(that, prefix##'!')); \
EXPECT_FALSE(FN_ENDS_WITH(that, prefix##'?')); \
EXPECT_TRUE(FN_ENDS_WITH(that, prefix##"C++20!")); \
EXPECT_FALSE(FN_ENDS_WITH(that, prefix##"c++20!")); \
}
TEST(PatchStartsEndsWith, EndsWith) {
// Ordinary string
TEST_ENDS_WITH();
// Wide string
TEST_ENDS_WITH(L);
// UTF8 string
TEST_ENDS_WITH(u8);
// UTF-16 string
TEST_ENDS_WITH(u);
// UTF-32 string
TEST_ENDS_WITH(U);
}
} // namespace yycctest::patch::starts_ends_with

View File

@ -1,35 +0,0 @@
#include <gtest/gtest.h>
#include <yycc.hpp>
#include <yycc/rust/num/parse.hpp>
#include <yycc/prelude/rust.hpp>
#define PARSE ::yycc::rust::num::parse
namespace yycctest::rust::num::parse {
// We only want to test it if C++ support it.
#if defined(YYCC_CPPFEAT_EXPECTED)
#define TEST_NS RustNumParse
#define TEST_SUCCESS(type_t, expected_value, string_value, ...) \
{ \
u8string cache_string(YYCC_U8(string_value)); \
auto rv = PARSE::parse<type_t>(cache_string, ##__VA_ARGS__); \
ASSERT_TRUE(rv.has_value()); \
EXPECT_EQ(rv.value(), expected_value); \
}
#define TEST_FAIL(type_t, string_value, ...) \
{ \
u8string cache_string(YYCC_U8(string_value)); \
auto rv = PARSE::parse<type_t>(cache_string, ##__VA_ARGS__); \
EXPECT_FALSE(rv.has_value()); \
}
#include "../../../shared/parse_template.hpp"
#endif
} // namespace yycctest::rust::parse

View File

@ -1,22 +0,0 @@
#include <gtest/gtest.h>
#include <yycc.hpp>
#include <yycc/rust/num/stringify.hpp>
#include <yycc/prelude/rust.hpp>
#define STRINGIFY ::yycc::rust::num::stringify
namespace yycctest::rust::num::stringify {
#define TEST_NS RustNumStringify
#define TEST_SUCCESS(type_t, value, string_value, ...) \
{ \
type_t cache = value; \
u8string ret = STRINGIFY::stringify<type_t>(cache, ##__VA_ARGS__); \
EXPECT_EQ(ret, YYCC_U8(string_value)); \
}
#include "../../../shared/stringify_template.hpp"
}

View File

@ -2,91 +2,92 @@
#include <yycc.hpp> #include <yycc.hpp>
#include <yycc/string/op.hpp> #include <yycc/string/op.hpp>
#include <yycc/prelude/core.hpp> #include <yycc/rust/prelude.hpp>
#define OP ::yycc::string::op #define OP ::yycc::string::op
using namespace std::literals::string_view_literals;
namespace yycctest::string::op { namespace yycctest::string::op {
TEST(StringOp, Printf) { TEST(StringOp, Printf) {
auto rv = OP::printf(YYCC_U8("%s == %s"), YYCC_U8("Hello World"), YYCC_U8("Hello, world")); auto rv = OP::printf(u8"%s == %s", u8"Hello World", u8"Hello, world");
EXPECT_EQ(rv, YYCC_U8("Hello World == Hello, world")); EXPECT_EQ(rv, u8"Hello World == Hello, world");
} }
TEST(StringOp, Replace) { TEST(StringOp, Replace) {
// Normal case // Normal case
{ {
auto rv = OP::replace(YYCC_U8("aabbcc"), YYCC_U8("bb"), YYCC_U8("dd")); auto rv = OP::replace(u8"aabbcc", u8"bb", u8"dd");
EXPECT_EQ(rv, YYCC_U8("aaddcc")); EXPECT_EQ(rv, u8"aaddcc");
} }
// No matched expected string // No matched expected string
{ {
auto rv = OP::replace(YYCC_U8("aabbcc"), YYCC_U8("zz"), YYCC_U8("yy")); auto rv = OP::replace(u8"aabbcc", u8"zz", u8"yy");
EXPECT_EQ(rv, YYCC_U8("aabbcc")); EXPECT_EQ(rv, u8"aabbcc");
} }
// Empty expected string // Empty expected string
{ {
auto rv = OP::replace(YYCC_U8("aabbcc"), u8string_view(), YYCC_U8("zz")); auto rv = OP::replace(u8"aabbcc", std::u8string_view(), u8"zz");
EXPECT_EQ(rv, YYCC_U8("aabbcc")); EXPECT_EQ(rv, u8"aabbcc");
} }
// Empty replace string // Empty replace string
{ {
auto rv = OP::replace(YYCC_U8("aaaabbaa"), YYCC_U8("aa"), YYCC_U8("")); auto rv = OP::replace(u8"aaaabbaa", u8"aa", u8"");
EXPECT_EQ(rv, YYCC_U8("bb")); EXPECT_EQ(rv, u8"bb");
} }
// Nested replacing // Nested replacing
{ {
auto rv = OP::replace(YYCC_U8("aaxcc"), YYCC_U8("x"), YYCC_U8("yx")); auto rv = OP::replace(u8"aaxcc", u8"x", u8"yx");
EXPECT_EQ(rv, YYCC_U8("aayxcc")); EXPECT_EQ(rv, u8"aayxcc");
} }
// Empty source string // Empty source string
{ {
auto rv = OP::replace(u8string_view(), YYCC_U8(""), YYCC_U8("xy")); auto rv = OP::replace(std::u8string_view(), u8"", u8"xy");
EXPECT_EQ(rv, YYCC_U8("")); EXPECT_EQ(rv, u8"");
} }
} }
TEST(StringOp, Lower) { TEST(StringOp, Lower) {
auto rv = OP::to_lower(YYCC_U8("LOWER")); auto rv = OP::to_lower(u8"LOWER");
EXPECT_EQ(rv, YYCC_U8("lower")); EXPECT_EQ(rv, u8"lower");
} }
TEST(StringOp, Upper) { TEST(StringOp, Upper) {
auto rv = OP::to_upper(YYCC_U8("upper")); auto rv = OP::to_upper(u8"upper");
EXPECT_EQ(rv, YYCC_U8("UPPER")); EXPECT_EQ(rv, u8"UPPER");
} }
TEST(StringOp, Join) { TEST(StringOp, Join) {
std::vector<u8string> datas{YYCC_U8(""), YYCC_U8("1"), YYCC_U8("2"), YYCC_U8("")}; std::vector<std::u8string_view> datas{u8""sv, u8"1"sv, u8"2"sv, u8""sv};
auto rv = OP::join(datas.begin(), datas.end(), YYCC_U8(", ")); auto rv = OP::join(datas.begin(), datas.end(), u8", ");
EXPECT_EQ(rv, YYCC_U8(", 1, 2, ")); EXPECT_EQ(rv, u8", 1, 2, ");
} }
TEST(StringOp, Split) { TEST(StringOp, Split) {
// Normal // Normal
{ {
auto rv = OP::split(YYCC_U8(", 1, 2, "), YYCC_U8(", ")); auto rv = OP::split(u8", 1, 2, ", u8", ");
ASSERT_EQ(rv.size(), 4u); ASSERT_EQ(rv.size(), 4u);
EXPECT_EQ(rv[0], YYCC_U8("")); EXPECT_EQ(rv[0], u8"");
EXPECT_EQ(rv[1], YYCC_U8("1")); EXPECT_EQ(rv[1], u8"1");
EXPECT_EQ(rv[2], YYCC_U8("2")); EXPECT_EQ(rv[2], u8"2");
EXPECT_EQ(rv[3], YYCC_U8("")); EXPECT_EQ(rv[3], u8"");
} }
// No matched delimiter // No matched delimiter
{ {
auto rv = OP::split(YYCC_U8("test"), YYCC_U8("-")); auto rv = OP::split(u8"test", u8"-");
ASSERT_EQ(rv.size(), 1u); ASSERT_EQ(rv.size(), 1u);
EXPECT_EQ(rv[0], YYCC_U8("test")); EXPECT_EQ(rv[0], u8"test");
} }
// Empty delimiter // Empty delimiter
{ {
auto rv = OP::split(YYCC_U8("test"), u8string_view()); auto rv = OP::split(u8"test", std::u8string_view());
ASSERT_EQ(rv.size(), 1u); ASSERT_EQ(rv.size(), 1u);
EXPECT_EQ(rv[0], YYCC_U8("test")); EXPECT_EQ(rv[0], u8"test");
} }
// Empty source string // Empty source string
{ {
auto rv = OP::split(u8string_view(), YYCC_U8("")); auto rv = OP::split(std::u8string_view(), u8"");
ASSERT_EQ(rv.size(), 1u); ASSERT_EQ(rv.size(), 1u);
EXPECT_TRUE(rv[0].empty()); EXPECT_TRUE(rv[0].empty());
} }

View File

@ -3,7 +3,7 @@
#include <yycc.hpp> #include <yycc.hpp>
#include <yycc/string/reinterpret.hpp> #include <yycc/string/reinterpret.hpp>
#include <yycc/prelude/core.hpp> #include <yycc/rust/prelude.hpp>
#define REINTERPRET ::yycc::string::reinterpret #define REINTERPRET ::yycc::string::reinterpret
#define CONST_VOID_PTR(p) reinterpret_cast<const void*>(p) #define CONST_VOID_PTR(p) reinterpret_cast<const void*>(p)
@ -11,7 +11,7 @@
namespace yycctest::string::reinterpret { namespace yycctest::string::reinterpret {
static u8string PROBE(YYCC_U8("Test")); static std::u8string PROBE(u8"Test");
TEST(StringReinterpret, ConstPointer) { TEST(StringReinterpret, ConstPointer) {
const auto* src = PROBE.data(); const auto* src = PROBE.data();
@ -34,7 +34,7 @@ namespace yycctest::string::reinterpret {
} }
TEST(StringReinterpret, String) { TEST(StringReinterpret, String) {
auto src = u8string(PROBE); auto src = std::u8string(PROBE);
auto dst = REINTERPRET::as_ordinary(src); auto dst = REINTERPRET::as_ordinary(src);
auto new_src = REINTERPRET::as_utf8(dst); auto new_src = REINTERPRET::as_utf8(dst);
@ -46,7 +46,7 @@ namespace yycctest::string::reinterpret {
} }
TEST(StringReinterpret, StringView) { TEST(StringReinterpret, StringView) {
auto src = u8string_view(PROBE); auto src = std::u8string_view(PROBE);
auto dst = REINTERPRET::as_ordinary_view(src); auto dst = REINTERPRET::as_ordinary_view(src);
auto new_src = REINTERPRET::as_utf8_view(dst); auto new_src = REINTERPRET::as_utf8_view(dst);