feat: update pycodec.

- rename encoding::utf to encoding::stlcvt.
- use uv to manage script and add pycodec generator script.
- update script in modern python.
- fix added pycodec generator.
This commit is contained in:
2025-07-23 16:05:53 +08:00
parent 821a592f02
commit f014e54604
15 changed files with 1148 additions and 41 deletions

215
script/.gitignore vendored
View File

@ -1,3 +1,216 @@
# -------------------- Output --------------------
## ===== Myself =====
# Exclude VSCode
.vscode/
# Exclude generated files
win_build.bat
linux_build.sh
## ===== Python =====
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/

View File

@ -1,15 +1,16 @@
import jinja2
import argparse
import os
import io
import typing
import re
import shlex
from pathlib import Path
from dataclasses import dataclass
import jinja2
def validate_cpp_ver(ver: str) -> str:
if re.match(r'^[0-9]+$', ver) is not None: return ver
else: raise argparse.ArgumentTypeError('invalid version of C++ standard.')
def write_line(f: io.TextIOWrapper, val: str) -> None:
def write_line(f: typing.TextIO, val: str) -> None:
f.write(val)
f.write('\n')
@ -24,55 +25,51 @@ def escape_cmd_argument(arg):
def escape_sh_argument(arg):
return shlex.quote(arg)
@dataclass
class ScriptSettings:
m_CppVersion: str
m_BuildDoc: bool
m_PIC: bool
def __init__(self, cpp_ver: str, build_doc: bool, pic: bool):
self.m_CppVersion = cpp_ver
self.m_BuildDoc = build_doc
self.m_PIC = pic
cpp_version: str
build_doc: bool
pic: bool
class TemplateRender:
m_Loader: jinja2.BaseLoader
m_Environment: jinja2.Environment
loader: jinja2.BaseLoader
environment: jinja2.Environment
m_WinTemplate: jinja2.Template
m_LinuxTemplate: jinja2.Template
win_template: jinja2.Template
linux_template: jinja2.Template
m_Settings: ScriptSettings
settings: ScriptSettings
def __init__(self, settings: ScriptSettings) -> None:
self.m_Loader = jinja2.FileSystemLoader(self.__get_dir())
self.m_Environment = jinja2.Environment(loader=self.m_Loader)
self.loader = jinja2.FileSystemLoader(self.__get_dir())
self.environment = jinja2.Environment(loader=self.loader)
self.m_WinTemplate = self.m_Environment.get_template('win_build.template.bat')
self.m_LinuxTemplate = self.m_Environment.get_template('linux_build.template.sh')
self.win_template = self.environment.get_template('win_build.bat.jinja')
self.linux_template = self.environment.get_template('linux_build.sh.jinja')
self.m_Settings = settings
self.settings = settings
def __get_dir(self) -> str:
return os.path.dirname(__file__)
def __get_dir(self) -> Path:
return Path(__file__).resolve().parent
def __escape_path(self, val: str, is_win: bool) -> str:
if is_win: return escape_cmd_argument(val)
else: return escape_sh_argument(val)
def __render(self, template: jinja2.Template, dest_file: str, is_win: bool) -> None:
with open(os.path.join(self.__get_dir(), dest_file), 'w', encoding='utf-8') as f:
with open(self.__get_dir() / dest_file, 'w', encoding='utf-8') as f:
f.write(template.render(
repo_root_dir = self.__escape_path(os.path.dirname(self.__get_dir()), is_win),
cpp_version = self.m_Settings.m_CppVersion,
build_doc = self.m_Settings.m_BuildDoc,
pic = settings.m_PIC
repo_root_dir = self.__escape_path(str(self.__get_dir().parent), is_win),
cpp_version = self.settings.cpp_version,
build_doc = self.settings.build_doc,
pic = settings.pic
))
def render_win_script(self) -> None:
self.__render(self.m_WinTemplate, 'win_build.bat', True)
self.__render(self.win_template, 'win_build.bat', True)
def render_linux_script(self) -> None:
self.__render(self.m_LinuxTemplate, 'linux_build.sh', False)
self.__render(self.linux_template, 'linux_build.sh', False)
if __name__ == '__main__':

2
script/pycodec/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# Exclude result
*.cpp

View File

@ -0,0 +1,63 @@
import typing
from pathlib import Path
import os
class LanguageToken:
name: str
alias: tuple[str, ...]
code_page: str | None
iconv_code: str | None
def __init__(self, name: str, alias: typing.Iterator[str], code_page: str, iconv_code: str):
self.name = name.lower()
self.alias = tuple(map(lambda x: x.lower(), alias))
self.code_page = None if code_page == '' else code_page
self.iconv_code = None if iconv_code == '' else iconv_code
def extract_data(fs: typing.TextIO) -> list[str]:
# remove first line to remove table header
return fs.readlines()[1:]
def extract_token(csv_data: list[str]) -> tuple[LanguageToken, ...]:
ret: list[LanguageToken] = []
for line in csv_data:
line = line.strip('\n')
line_sp = line.split('\t')
alias_sp = filter(lambda x: len(x) != 0, map(lambda x: x.strip(), line_sp[1].split(',')))
ret.append(LanguageToken(line_sp[0], alias_sp, line_sp[2], line_sp[3]))
return tuple(ret)
def write_alias_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, NS_YYCC_STRING::u8string> ALISA_MAP {\n')
for i in data:
for j in i.alias:
fs.write(f'\t{{ YYCC_U8("{j}"), YYCC_U8("{i.name}") }},\n')
fs.write('};\n')
def write_win_cp_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, CodePage> WINCP_MAP {\n')
for i in data:
if i.code_page is not None:
fs.write(f'\t{{ YYCC_U8("{i.name}"), static_cast<CodePage>({i.code_page}u) }},\n')
fs.write('};\n')
def write_iconv_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, std::string> ICONV_MAP {\n')
for i in data:
if i.iconv_code is not None:
fs.write(f'\t{{ YYCC_U8("{i.name}"), "{i.iconv_code}" }},\n')
fs.write('};\n')
if __name__ == '__main__':
# get file path
self_path = Path(__file__).resolve().parent
csv_file = self_path / 'encoding_table.csv'
cpp_file = self_path / 'encoding_table.cpp'
# process files
with open(csv_file, 'r', encoding='utf-8') as fr:
with open(cpp_file, 'w', encoding='utf-8') as fw:
data = extract_data(fr)
token = extract_token(data)
write_alias_map(fw, token)
write_win_cp_map(fw, token)
write_iconv_map(fw, token)

View File

@ -0,0 +1,98 @@
Encoding Alias Code Page Iconv Identifier
ascii 646, us-ascii 437 ASCII
big5 big5-tw, csbig5 950 BIG5
big5hkscs big5-hkscs, hkscs BIG5-HKSCS
cp037 IBM037, IBM039 037
cp273 273, IBM273, csIBM273
cp424 EBCDIC-CP-HE, IBM424
cp437 437, IBM437 437
cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500
cp720 720
cp737 737
cp775 IBM775 775
cp850 850, IBM850 850 CP850
cp852 852, IBM852 852
cp855 855, IBM855 855
cp856
cp857 857, IBM857 857
cp858 858, IBM858 858
cp860 860, IBM860 860
cp861 861, CP-IS, IBM861 861
cp862 862, IBM862 862 CP862
cp863 863, IBM863 863
cp864 IBM864 864
cp865 865, IBM865 865
cp866 866, IBM866 866 CP866
cp869 869, CP-GR, IBM869 869
cp874 874 CP874
cp875 875
cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 CP932
cp949 949, ms949, uhc 949 CP949
cp950 950, ms950 950 CP950
cp1006
cp1026 ibm1026 1026
cp1125 1125, ibm1125, cp866u, ruscii
cp1140 ibm1140 1140
cp1250 windows-1250 1250 CP1250
cp1251 windows-1251 1251 CP1251
cp1252 windows-1252 1252 CP1252
cp1253 windows-1253 1253 CP1253
cp1254 windows-1254 1254 CP1254
cp1255 windows-1255 1255 CP1255
cp1256 windows-1256 1256 CP1256
cp1257 windows-1257 1257 CP1257
cp1258 windows-1258 1258 CP1258
euc_jp eucjp, ujis, u-jis 20932 EUC-JP
euc_jis_2004 jisx0213, eucjis2004
euc_jisx0213 eucjisx0213
euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 EUC-KR
gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 CP936
gbk 936, cp936, ms936 936 GBK
gb18030 gb18030-2000 54936 GB18030
hz hzgb, hz-gb, hz-gb-2312 52936 HZ
iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP
iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1
iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2
iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004
iso2022_jp_3 iso2022jp-3, iso-2022-jp-3
iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext
iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 ISO-2022-KR
latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 ISO-8859-1
iso8859_2 iso-8859-2, latin2, L2 28592 ISO-8859-2
iso8859_3 iso-8859-3, latin3, L3 28593 ISO-8859-3
iso8859_4 iso-8859-4, latin4, L4 28594 ISO-8859-4
iso8859_5 iso-8859-5, cyrillic 28595 ISO-8859-5
iso8859_6 iso-8859-6, arabic 28596 ISO-8859-6
iso8859_7 iso-8859-7, greek, greek8 28597 ISO-8859-7
iso8859_8 iso-8859-8, hebrew 28598 ISO-8859-8
iso8859_9 iso-8859-9, latin5, L5 28599 ISO-8859-9
iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10
iso8859_11 iso-8859-11, thai ISO-8859-11
iso8859_13 iso-8859-13, latin7, L7 28603 ISO-8859-13
iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14
iso8859_15 iso-8859-15, latin9, L9 28605 ISO-8859-15
iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16
johab cp1361, ms1361 1361 JOHAB
koi8_r
koi8_t KOI8-T
koi8_u
kz1048 kz_1048, strk1048_2002, rk1048
mac_cyrillic maccyrillic 10007 MacCyrillic
mac_greek macgreek 10006 MacGreek
mac_iceland maciceland 10079 MacIceland
mac_latin2 maclatin2, maccentraleurope, mac_centeuro
mac_roman macroman, macintosh MacRoman
mac_turkish macturkish 10081 MacTurkish
ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154
shift_jis csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS
shift_jis_2004 shiftjis2004, sjis_2004, sjis2004
shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213
utf_32 U32, utf32 UTF-32
utf_32_be UTF-32BE UTF-32BE
utf_32_le UTF-32LE UTF-32LE
utf_16 U16, utf16 UTF16
utf_16_be UTF-16BE UTF-16BE
utf_16_le UTF-16LE UTF-16LE
utf_7 U7, unicode-1-1-utf-7 65000 UTF-7
utf_8 U8, UTF, utf8, utf-8, cp65001 65001 UTF-8
utf_8_sig
1 Encoding Alias Code Page Iconv Identifier
2 ascii 646, us-ascii 437 ASCII
3 big5 big5-tw, csbig5 950 BIG5
4 big5hkscs big5-hkscs, hkscs BIG5-HKSCS
5 cp037 IBM037, IBM039 037
6 cp273 273, IBM273, csIBM273
7 cp424 EBCDIC-CP-HE, IBM424
8 cp437 437, IBM437 437
9 cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500
10 cp720 720
11 cp737 737
12 cp775 IBM775 775
13 cp850 850, IBM850 850 CP850
14 cp852 852, IBM852 852
15 cp855 855, IBM855 855
16 cp856
17 cp857 857, IBM857 857
18 cp858 858, IBM858 858
19 cp860 860, IBM860 860
20 cp861 861, CP-IS, IBM861 861
21 cp862 862, IBM862 862 CP862
22 cp863 863, IBM863 863
23 cp864 IBM864 864
24 cp865 865, IBM865 865
25 cp866 866, IBM866 866 CP866
26 cp869 869, CP-GR, IBM869 869
27 cp874 874 CP874
28 cp875 875
29 cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 CP932
30 cp949 949, ms949, uhc 949 CP949
31 cp950 950, ms950 950 CP950
32 cp1006
33 cp1026 ibm1026 1026
34 cp1125 1125, ibm1125, cp866u, ruscii
35 cp1140 ibm1140 1140
36 cp1250 windows-1250 1250 CP1250
37 cp1251 windows-1251 1251 CP1251
38 cp1252 windows-1252 1252 CP1252
39 cp1253 windows-1253 1253 CP1253
40 cp1254 windows-1254 1254 CP1254
41 cp1255 windows-1255 1255 CP1255
42 cp1256 windows-1256 1256 CP1256
43 cp1257 windows-1257 1257 CP1257
44 cp1258 windows-1258 1258 CP1258
45 euc_jp eucjp, ujis, u-jis 20932 EUC-JP
46 euc_jis_2004 jisx0213, eucjis2004
47 euc_jisx0213 eucjisx0213
48 euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 EUC-KR
49 gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 CP936
50 gbk 936, cp936, ms936 936 GBK
51 gb18030 gb18030-2000 54936 GB18030
52 hz hzgb, hz-gb, hz-gb-2312 52936 HZ
53 iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP
54 iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1
55 iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2
56 iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004
57 iso2022_jp_3 iso2022jp-3, iso-2022-jp-3
58 iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext
59 iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 ISO-2022-KR
60 latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 ISO-8859-1
61 iso8859_2 iso-8859-2, latin2, L2 28592 ISO-8859-2
62 iso8859_3 iso-8859-3, latin3, L3 28593 ISO-8859-3
63 iso8859_4 iso-8859-4, latin4, L4 28594 ISO-8859-4
64 iso8859_5 iso-8859-5, cyrillic 28595 ISO-8859-5
65 iso8859_6 iso-8859-6, arabic 28596 ISO-8859-6
66 iso8859_7 iso-8859-7, greek, greek8 28597 ISO-8859-7
67 iso8859_8 iso-8859-8, hebrew 28598 ISO-8859-8
68 iso8859_9 iso-8859-9, latin5, L5 28599 ISO-8859-9
69 iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10
70 iso8859_11 iso-8859-11, thai ISO-8859-11
71 iso8859_13 iso-8859-13, latin7, L7 28603 ISO-8859-13
72 iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14
73 iso8859_15 iso-8859-15, latin9, L9 28605 ISO-8859-15
74 iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16
75 johab cp1361, ms1361 1361 JOHAB
76 koi8_r
77 koi8_t KOI8-T
78 koi8_u
79 kz1048 kz_1048, strk1048_2002, rk1048
80 mac_cyrillic maccyrillic 10007 MacCyrillic
81 mac_greek macgreek 10006 MacGreek
82 mac_iceland maciceland 10079 MacIceland
83 mac_latin2 maclatin2, maccentraleurope, mac_centeuro
84 mac_roman macroman, macintosh MacRoman
85 mac_turkish macturkish 10081 MacTurkish
86 ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154
87 shift_jis csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS
88 shift_jis_2004 shiftjis2004, sjis_2004, sjis2004
89 shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213
90 utf_32 U32, utf32 UTF-32
91 utf_32_be UTF-32BE UTF-32BE
92 utf_32_le UTF-32LE UTF-32LE
93 utf_16 U16, utf16 UTF16
94 utf_16_be UTF-16BE UTF-16BE
95 utf_16_le UTF-16LE UTF-16LE
96 utf_7 U7, unicode-1-1-utf-7 65000 UTF-7
97 utf_8 U8, UTF, utf8, utf-8, cp65001 65001 UTF-8
98 utf_8_sig

7
script/pyproject.toml Normal file
View File

@ -0,0 +1,7 @@
[project]
name = "script"
version = "0.1.0"
requires-python = ">=3.11"
dependencies = [
"jinja2==3.1.2",
]

74
script/uv.lock generated Normal file
View File

@ -0,0 +1,74 @@
version = 1
revision = 2
requires-python = ">=3.11"
[[package]]
name = "jinja2"
version = "3.1.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markupsafe" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7a/ff/75c28576a1d900e87eb6335b063fab47a8ef3c8b4d88524c4bf78f670cce/Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", size = 268239, upload-time = "2022-04-28T17:21:27.579Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", size = 133101, upload-time = "2022-04-28T17:21:25.336Z" },
]
[[package]]
name = "markupsafe"
version = "3.0.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353, upload-time = "2024-10-18T15:21:02.187Z" },
{ url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392, upload-time = "2024-10-18T15:21:02.941Z" },
{ url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984, upload-time = "2024-10-18T15:21:03.953Z" },
{ url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120, upload-time = "2024-10-18T15:21:06.495Z" },
{ url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032, upload-time = "2024-10-18T15:21:07.295Z" },
{ url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057, upload-time = "2024-10-18T15:21:08.073Z" },
{ url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359, upload-time = "2024-10-18T15:21:09.318Z" },
{ url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload-time = "2024-10-18T15:21:10.185Z" },
{ url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload-time = "2024-10-18T15:21:11.005Z" },
{ url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload-time = "2024-10-18T15:21:12.911Z" },
{ url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" },
{ url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" },
{ url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" },
{ url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" },
{ url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" },
{ url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" },
{ url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" },
{ url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" },
{ url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" },
{ url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" },
{ url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" },
{ url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload-time = "2024-10-18T15:21:25.382Z" },
{ url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload-time = "2024-10-18T15:21:26.199Z" },
{ url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload-time = "2024-10-18T15:21:27.029Z" },
{ url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload-time = "2024-10-18T15:21:27.846Z" },
{ url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload-time = "2024-10-18T15:21:28.744Z" },
{ url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload-time = "2024-10-18T15:21:29.545Z" },
{ url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload-time = "2024-10-18T15:21:30.366Z" },
{ url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload-time = "2024-10-18T15:21:31.207Z" },
{ url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload-time = "2024-10-18T15:21:32.032Z" },
{ url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload-time = "2024-10-18T15:21:33.625Z" },
{ url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload-time = "2024-10-18T15:21:34.611Z" },
{ url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload-time = "2024-10-18T15:21:35.398Z" },
{ url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload-time = "2024-10-18T15:21:36.231Z" },
{ url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload-time = "2024-10-18T15:21:37.073Z" },
{ url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload-time = "2024-10-18T15:21:37.932Z" },
{ url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload-time = "2024-10-18T15:21:39.799Z" },
{ url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload-time = "2024-10-18T15:21:40.813Z" },
{ url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload-time = "2024-10-18T15:21:41.814Z" },
{ url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" },
]
[[package]]
name = "script"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "jinja2" },
]
[package.metadata]
requires-dist = [{ name = "jinja2", specifier = "==3.1.2" }]

View File

@ -15,7 +15,7 @@ PRIVATE
yycc/string/op.cpp
yycc/rust/panic.cpp
yycc/patch/path.cpp
yycc/encoding/utf.cpp
yycc/encoding/stlcvt.cpp
yycc/encoding/windows.cpp
yycc/encoding/iconv.cpp
yycc/encoding/pycodec.cpp
@ -68,7 +68,7 @@ FILES
yycc/patch/contains.hpp
yycc/patch/starts_ends_with.hpp
yycc/patch/expected.hpp
yycc/encoding/utf.hpp
yycc/encoding/stlcvt.hpp
yycc/encoding/windows.hpp
yycc/encoding/iconv.hpp
yycc/encoding/pycodec.hpp

View File

@ -0,0 +1,446 @@
#include "pycodec.hpp"
#include <map>
#define NS_YYCC_STRING ::yycc::string
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
namespace yycc::encoding::pycodec {
#pragma region Encoding Name
static const std::map<NS_YYCC_STRING::u8string, NS_YYCC_STRING::u8string> ALISA_MAP {
{ YYCC_U8("646"), YYCC_U8("ascii") },
{ YYCC_U8("us-ascii"), YYCC_U8("ascii") },
{ YYCC_U8("big5-tw"), YYCC_U8("big5") },
{ YYCC_U8("csbig5"), YYCC_U8("big5") },
{ YYCC_U8("big5-hkscs"), YYCC_U8("big5hkscs") },
{ YYCC_U8("hkscs"), YYCC_U8("big5hkscs") },
{ YYCC_U8("ibm037"), YYCC_U8("cp037") },
{ YYCC_U8("ibm039"), YYCC_U8("cp037") },
{ YYCC_U8("273"), YYCC_U8("cp273") },
{ YYCC_U8("ibm273"), YYCC_U8("cp273") },
{ YYCC_U8("csibm273"), YYCC_U8("cp273") },
{ YYCC_U8("ebcdic-cp-he"), YYCC_U8("cp424") },
{ YYCC_U8("ibm424"), YYCC_U8("cp424") },
{ YYCC_U8("437"), YYCC_U8("cp437") },
{ YYCC_U8("ibm437"), YYCC_U8("cp437") },
{ YYCC_U8("ebcdic-cp-be"), YYCC_U8("cp500") },
{ YYCC_U8("ebcdic-cp-ch"), YYCC_U8("cp500") },
{ YYCC_U8("ibm500"), YYCC_U8("cp500") },
{ YYCC_U8("ibm775"), YYCC_U8("cp775") },
{ YYCC_U8("850"), YYCC_U8("cp850") },
{ YYCC_U8("ibm850"), YYCC_U8("cp850") },
{ YYCC_U8("852"), YYCC_U8("cp852") },
{ YYCC_U8("ibm852"), YYCC_U8("cp852") },
{ YYCC_U8("855"), YYCC_U8("cp855") },
{ YYCC_U8("ibm855"), YYCC_U8("cp855") },
{ YYCC_U8("857"), YYCC_U8("cp857") },
{ YYCC_U8("ibm857"), YYCC_U8("cp857") },
{ YYCC_U8("858"), YYCC_U8("cp858") },
{ YYCC_U8("ibm858"), YYCC_U8("cp858") },
{ YYCC_U8("860"), YYCC_U8("cp860") },
{ YYCC_U8("ibm860"), YYCC_U8("cp860") },
{ YYCC_U8("861"), YYCC_U8("cp861") },
{ YYCC_U8("cp-is"), YYCC_U8("cp861") },
{ YYCC_U8("ibm861"), YYCC_U8("cp861") },
{ YYCC_U8("862"), YYCC_U8("cp862") },
{ YYCC_U8("ibm862"), YYCC_U8("cp862") },
{ YYCC_U8("863"), YYCC_U8("cp863") },
{ YYCC_U8("ibm863"), YYCC_U8("cp863") },
{ YYCC_U8("ibm864"), YYCC_U8("cp864") },
{ YYCC_U8("865"), YYCC_U8("cp865") },
{ YYCC_U8("ibm865"), YYCC_U8("cp865") },
{ YYCC_U8("866"), YYCC_U8("cp866") },
{ YYCC_U8("ibm866"), YYCC_U8("cp866") },
{ YYCC_U8("869"), YYCC_U8("cp869") },
{ YYCC_U8("cp-gr"), YYCC_U8("cp869") },
{ YYCC_U8("ibm869"), YYCC_U8("cp869") },
{ YYCC_U8("932"), YYCC_U8("cp932") },
{ YYCC_U8("ms932"), YYCC_U8("cp932") },
{ YYCC_U8("mskanji"), YYCC_U8("cp932") },
{ YYCC_U8("ms-kanji"), YYCC_U8("cp932") },
{ YYCC_U8("windows-31j"), YYCC_U8("cp932") },
{ YYCC_U8("949"), YYCC_U8("cp949") },
{ YYCC_U8("ms949"), YYCC_U8("cp949") },
{ YYCC_U8("uhc"), YYCC_U8("cp949") },
{ YYCC_U8("950"), YYCC_U8("cp950") },
{ YYCC_U8("ms950"), YYCC_U8("cp950") },
{ YYCC_U8("ibm1026"), YYCC_U8("cp1026") },
{ YYCC_U8("1125"), YYCC_U8("cp1125") },
{ YYCC_U8("ibm1125"), YYCC_U8("cp1125") },
{ YYCC_U8("cp866u"), YYCC_U8("cp1125") },
{ YYCC_U8("ruscii"), YYCC_U8("cp1125") },
{ YYCC_U8("ibm1140"), YYCC_U8("cp1140") },
{ YYCC_U8("windows-1250"), YYCC_U8("cp1250") },
{ YYCC_U8("windows-1251"), YYCC_U8("cp1251") },
{ YYCC_U8("windows-1252"), YYCC_U8("cp1252") },
{ YYCC_U8("windows-1253"), YYCC_U8("cp1253") },
{ YYCC_U8("windows-1254"), YYCC_U8("cp1254") },
{ YYCC_U8("windows-1255"), YYCC_U8("cp1255") },
{ YYCC_U8("windows-1256"), YYCC_U8("cp1256") },
{ YYCC_U8("windows-1257"), YYCC_U8("cp1257") },
{ YYCC_U8("windows-1258"), YYCC_U8("cp1258") },
{ YYCC_U8("eucjp"), YYCC_U8("euc_jp") },
{ YYCC_U8("ujis"), YYCC_U8("euc_jp") },
{ YYCC_U8("u-jis"), YYCC_U8("euc_jp") },
{ YYCC_U8("jisx0213"), YYCC_U8("euc_jis_2004") },
{ YYCC_U8("eucjis2004"), YYCC_U8("euc_jis_2004") },
{ YYCC_U8("eucjisx0213"), YYCC_U8("euc_jisx0213") },
{ YYCC_U8("euckr"), YYCC_U8("euc_kr") },
{ YYCC_U8("korean"), YYCC_U8("euc_kr") },
{ YYCC_U8("ksc5601"), YYCC_U8("euc_kr") },
{ YYCC_U8("ks_c-5601"), YYCC_U8("euc_kr") },
{ YYCC_U8("ks_c-5601-1987"), YYCC_U8("euc_kr") },
{ YYCC_U8("ksx1001"), YYCC_U8("euc_kr") },
{ YYCC_U8("ks_x-1001"), YYCC_U8("euc_kr") },
{ YYCC_U8("chinese"), YYCC_U8("gb2312") },
{ YYCC_U8("csiso58gb231280"), YYCC_U8("gb2312") },
{ YYCC_U8("euc-cn"), YYCC_U8("gb2312") },
{ YYCC_U8("euccn"), YYCC_U8("gb2312") },
{ YYCC_U8("eucgb2312-cn"), YYCC_U8("gb2312") },
{ YYCC_U8("gb2312-1980"), YYCC_U8("gb2312") },
{ YYCC_U8("gb2312-80"), YYCC_U8("gb2312") },
{ YYCC_U8("iso-ir-58"), YYCC_U8("gb2312") },
{ YYCC_U8("936"), YYCC_U8("gbk") },
{ YYCC_U8("cp936"), YYCC_U8("gbk") },
{ YYCC_U8("ms936"), YYCC_U8("gbk") },
{ YYCC_U8("gb18030-2000"), YYCC_U8("gb18030") },
{ YYCC_U8("hzgb"), YYCC_U8("hz") },
{ YYCC_U8("hz-gb"), YYCC_U8("hz") },
{ YYCC_U8("hz-gb-2312"), YYCC_U8("hz") },
{ YYCC_U8("csiso2022jp"), YYCC_U8("iso2022_jp") },
{ YYCC_U8("iso2022jp"), YYCC_U8("iso2022_jp") },
{ YYCC_U8("iso-2022-jp"), YYCC_U8("iso2022_jp") },
{ YYCC_U8("iso2022jp-1"), YYCC_U8("iso2022_jp_1") },
{ YYCC_U8("iso-2022-jp-1"), YYCC_U8("iso2022_jp_1") },
{ YYCC_U8("iso2022jp-2"), YYCC_U8("iso2022_jp_2") },
{ YYCC_U8("iso-2022-jp-2"), YYCC_U8("iso2022_jp_2") },
{ YYCC_U8("iso2022jp-2004"), YYCC_U8("iso2022_jp_2004") },
{ YYCC_U8("iso-2022-jp-2004"), YYCC_U8("iso2022_jp_2004") },
{ YYCC_U8("iso2022jp-3"), YYCC_U8("iso2022_jp_3") },
{ YYCC_U8("iso-2022-jp-3"), YYCC_U8("iso2022_jp_3") },
{ YYCC_U8("iso2022jp-ext"), YYCC_U8("iso2022_jp_ext") },
{ YYCC_U8("iso-2022-jp-ext"), YYCC_U8("iso2022_jp_ext") },
{ YYCC_U8("csiso2022kr"), YYCC_U8("iso2022_kr") },
{ YYCC_U8("iso2022kr"), YYCC_U8("iso2022_kr") },
{ YYCC_U8("iso-2022-kr"), YYCC_U8("iso2022_kr") },
{ YYCC_U8("iso-8859-1"), YYCC_U8("latin_1") },
{ YYCC_U8("iso8859-1"), YYCC_U8("latin_1") },
{ YYCC_U8("8859"), YYCC_U8("latin_1") },
{ YYCC_U8("cp819"), YYCC_U8("latin_1") },
{ YYCC_U8("latin"), YYCC_U8("latin_1") },
{ YYCC_U8("latin1"), YYCC_U8("latin_1") },
{ YYCC_U8("l1"), YYCC_U8("latin_1") },
{ YYCC_U8("iso-8859-2"), YYCC_U8("iso8859_2") },
{ YYCC_U8("latin2"), YYCC_U8("iso8859_2") },
{ YYCC_U8("l2"), YYCC_U8("iso8859_2") },
{ YYCC_U8("iso-8859-3"), YYCC_U8("iso8859_3") },
{ YYCC_U8("latin3"), YYCC_U8("iso8859_3") },
{ YYCC_U8("l3"), YYCC_U8("iso8859_3") },
{ YYCC_U8("iso-8859-4"), YYCC_U8("iso8859_4") },
{ YYCC_U8("latin4"), YYCC_U8("iso8859_4") },
{ YYCC_U8("l4"), YYCC_U8("iso8859_4") },
{ YYCC_U8("iso-8859-5"), YYCC_U8("iso8859_5") },
{ YYCC_U8("cyrillic"), YYCC_U8("iso8859_5") },
{ YYCC_U8("iso-8859-6"), YYCC_U8("iso8859_6") },
{ YYCC_U8("arabic"), YYCC_U8("iso8859_6") },
{ YYCC_U8("iso-8859-7"), YYCC_U8("iso8859_7") },
{ YYCC_U8("greek"), YYCC_U8("iso8859_7") },
{ YYCC_U8("greek8"), YYCC_U8("iso8859_7") },
{ YYCC_U8("iso-8859-8"), YYCC_U8("iso8859_8") },
{ YYCC_U8("hebrew"), YYCC_U8("iso8859_8") },
{ YYCC_U8("iso-8859-9"), YYCC_U8("iso8859_9") },
{ YYCC_U8("latin5"), YYCC_U8("iso8859_9") },
{ YYCC_U8("l5"), YYCC_U8("iso8859_9") },
{ YYCC_U8("iso-8859-10"), YYCC_U8("iso8859_10") },
{ YYCC_U8("latin6"), YYCC_U8("iso8859_10") },
{ YYCC_U8("l6"), YYCC_U8("iso8859_10") },
{ YYCC_U8("iso-8859-11"), YYCC_U8("iso8859_11") },
{ YYCC_U8("thai"), YYCC_U8("iso8859_11") },
{ YYCC_U8("iso-8859-13"), YYCC_U8("iso8859_13") },
{ YYCC_U8("latin7"), YYCC_U8("iso8859_13") },
{ YYCC_U8("l7"), YYCC_U8("iso8859_13") },
{ YYCC_U8("iso-8859-14"), YYCC_U8("iso8859_14") },
{ YYCC_U8("latin8"), YYCC_U8("iso8859_14") },
{ YYCC_U8("l8"), YYCC_U8("iso8859_14") },
{ YYCC_U8("iso-8859-15"), YYCC_U8("iso8859_15") },
{ YYCC_U8("latin9"), YYCC_U8("iso8859_15") },
{ YYCC_U8("l9"), YYCC_U8("iso8859_15") },
{ YYCC_U8("iso-8859-16"), YYCC_U8("iso8859_16") },
{ YYCC_U8("latin10"), YYCC_U8("iso8859_16") },
{ YYCC_U8("l10"), YYCC_U8("iso8859_16") },
{ YYCC_U8("cp1361"), YYCC_U8("johab") },
{ YYCC_U8("ms1361"), YYCC_U8("johab") },
{ YYCC_U8("kz_1048"), YYCC_U8("kz1048") },
{ YYCC_U8("strk1048_2002"), YYCC_U8("kz1048") },
{ YYCC_U8("rk1048"), YYCC_U8("kz1048") },
{ YYCC_U8("maccyrillic"), YYCC_U8("mac_cyrillic") },
{ YYCC_U8("macgreek"), YYCC_U8("mac_greek") },
{ YYCC_U8("maciceland"), YYCC_U8("mac_iceland") },
{ YYCC_U8("maclatin2"), YYCC_U8("mac_latin2") },
{ YYCC_U8("maccentraleurope"), YYCC_U8("mac_latin2") },
{ YYCC_U8("mac_centeuro"), YYCC_U8("mac_latin2") },
{ YYCC_U8("macroman"), YYCC_U8("mac_roman") },
{ YYCC_U8("macintosh"), YYCC_U8("mac_roman") },
{ YYCC_U8("macturkish"), YYCC_U8("mac_turkish") },
{ YYCC_U8("csptcp154"), YYCC_U8("ptcp154") },
{ YYCC_U8("pt154"), YYCC_U8("ptcp154") },
{ YYCC_U8("cp154"), YYCC_U8("ptcp154") },
{ YYCC_U8("cyrillic-asian"), YYCC_U8("ptcp154") },
{ YYCC_U8("csshiftjis"), YYCC_U8("shift_jis") },
{ YYCC_U8("shiftjis"), YYCC_U8("shift_jis") },
{ YYCC_U8("sjis"), YYCC_U8("shift_jis") },
{ YYCC_U8("s_jis"), YYCC_U8("shift_jis") },
{ YYCC_U8("shiftjis2004"), YYCC_U8("shift_jis_2004") },
{ YYCC_U8("sjis_2004"), YYCC_U8("shift_jis_2004") },
{ YYCC_U8("sjis2004"), YYCC_U8("shift_jis_2004") },
{ YYCC_U8("shiftjisx0213"), YYCC_U8("shift_jisx0213") },
{ YYCC_U8("sjisx0213"), YYCC_U8("shift_jisx0213") },
{ YYCC_U8("s_jisx0213"), YYCC_U8("shift_jisx0213") },
{ YYCC_U8("u32"), YYCC_U8("utf_32") },
{ YYCC_U8("utf32"), YYCC_U8("utf_32") },
{ YYCC_U8("utf-32be"), YYCC_U8("utf_32_be") },
{ YYCC_U8("utf-32le"), YYCC_U8("utf_32_le") },
{ YYCC_U8("u16"), YYCC_U8("utf_16") },
{ YYCC_U8("utf16"), YYCC_U8("utf_16") },
{ YYCC_U8("utf-16be"), YYCC_U8("utf_16_be") },
{ YYCC_U8("utf-16le"), YYCC_U8("utf_16_le") },
{ YYCC_U8("u7"), YYCC_U8("utf_7") },
{ YYCC_U8("unicode-1-1-utf-7"), YYCC_U8("utf_7") },
{ YYCC_U8("u8"), YYCC_U8("utf_8") },
{ YYCC_U8("utf"), YYCC_U8("utf_8") },
{ YYCC_U8("utf8"), YYCC_U8("utf_8") },
{ YYCC_U8("utf-8"), YYCC_U8("utf_8") },
{ YYCC_U8("cp65001"), YYCC_U8("utf_8") },
};
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
using CodePage = NS_YYCC_ENCODING_BACKEND::CodePage;
static const std::map<NS_YYCC_STRING::u8string, CodePage> WINCP_MAP {
{ YYCC_U8("ascii"), static_cast<CodePage>(437u) },
{ YYCC_U8("big5"), static_cast<CodePage>(950u) },
{ YYCC_U8("cp037"), static_cast<CodePage>(037u) },
{ YYCC_U8("cp437"), static_cast<CodePage>(437u) },
{ YYCC_U8("cp500"), static_cast<CodePage>(500u) },
{ YYCC_U8("cp720"), static_cast<CodePage>(720u) },
{ YYCC_U8("cp737"), static_cast<CodePage>(737u) },
{ YYCC_U8("cp775"), static_cast<CodePage>(775u) },
{ YYCC_U8("cp850"), static_cast<CodePage>(850u) },
{ YYCC_U8("cp852"), static_cast<CodePage>(852u) },
{ YYCC_U8("cp855"), static_cast<CodePage>(855u) },
{ YYCC_U8("cp857"), static_cast<CodePage>(857u) },
{ YYCC_U8("cp858"), static_cast<CodePage>(858u) },
{ YYCC_U8("cp860"), static_cast<CodePage>(860u) },
{ YYCC_U8("cp861"), static_cast<CodePage>(861u) },
{ YYCC_U8("cp862"), static_cast<CodePage>(862u) },
{ YYCC_U8("cp863"), static_cast<CodePage>(863u) },
{ YYCC_U8("cp864"), static_cast<CodePage>(864u) },
{ YYCC_U8("cp865"), static_cast<CodePage>(865u) },
{ YYCC_U8("cp866"), static_cast<CodePage>(866u) },
{ YYCC_U8("cp869"), static_cast<CodePage>(869u) },
{ YYCC_U8("cp874"), static_cast<CodePage>(874u) },
{ YYCC_U8("cp875"), static_cast<CodePage>(875u) },
{ YYCC_U8("cp932"), static_cast<CodePage>(932u) },
{ YYCC_U8("cp949"), static_cast<CodePage>(949u) },
{ YYCC_U8("cp950"), static_cast<CodePage>(950u) },
{ YYCC_U8("cp1026"), static_cast<CodePage>(1026u) },
{ YYCC_U8("cp1140"), static_cast<CodePage>(1140u) },
{ YYCC_U8("cp1250"), static_cast<CodePage>(1250u) },
{ YYCC_U8("cp1251"), static_cast<CodePage>(1251u) },
{ YYCC_U8("cp1252"), static_cast<CodePage>(1252u) },
{ YYCC_U8("cp1253"), static_cast<CodePage>(1253u) },
{ YYCC_U8("cp1254"), static_cast<CodePage>(1254u) },
{ YYCC_U8("cp1255"), static_cast<CodePage>(1255u) },
{ YYCC_U8("cp1256"), static_cast<CodePage>(1256u) },
{ YYCC_U8("cp1257"), static_cast<CodePage>(1257u) },
{ YYCC_U8("cp1258"), static_cast<CodePage>(1258u) },
{ YYCC_U8("euc_jp"), static_cast<CodePage>(20932u) },
{ YYCC_U8("euc_kr"), static_cast<CodePage>(51949u) },
{ YYCC_U8("gb2312"), static_cast<CodePage>(936u) },
{ YYCC_U8("gbk"), static_cast<CodePage>(936u) },
{ YYCC_U8("gb18030"), static_cast<CodePage>(54936u) },
{ YYCC_U8("hz"), static_cast<CodePage>(52936u) },
{ YYCC_U8("iso2022_jp"), static_cast<CodePage>(50220u) },
{ YYCC_U8("iso2022_kr"), static_cast<CodePage>(50225u) },
{ YYCC_U8("latin_1"), static_cast<CodePage>(28591u) },
{ YYCC_U8("iso8859_2"), static_cast<CodePage>(28592u) },
{ YYCC_U8("iso8859_3"), static_cast<CodePage>(28593u) },
{ YYCC_U8("iso8859_4"), static_cast<CodePage>(28594u) },
{ YYCC_U8("iso8859_5"), static_cast<CodePage>(28595u) },
{ YYCC_U8("iso8859_6"), static_cast<CodePage>(28596u) },
{ YYCC_U8("iso8859_7"), static_cast<CodePage>(28597u) },
{ YYCC_U8("iso8859_8"), static_cast<CodePage>(28598u) },
{ YYCC_U8("iso8859_9"), static_cast<CodePage>(28599u) },
{ YYCC_U8("iso8859_13"), static_cast<CodePage>(28603u) },
{ YYCC_U8("iso8859_15"), static_cast<CodePage>(28605u) },
{ YYCC_U8("johab"), static_cast<CodePage>(1361u) },
{ YYCC_U8("mac_cyrillic"), static_cast<CodePage>(10007u) },
{ YYCC_U8("mac_greek"), static_cast<CodePage>(10006u) },
{ YYCC_U8("mac_iceland"), static_cast<CodePage>(10079u) },
{ YYCC_U8("mac_turkish"), static_cast<CodePage>(10081u) },
{ YYCC_U8("shift_jis"), static_cast<CodePage>(932u) },
{ YYCC_U8("utf_7"), static_cast<CodePage>(65000u) },
{ YYCC_U8("utf_8"), static_cast<CodePage>(65001u) },
};
#else
static const std::map<NS_YYCC_STRING::u8string, std::string> ICONV_MAP {
{ YYCC_U8("ascii"), "ASCII" },
{ YYCC_U8("big5"), "BIG5" },
{ YYCC_U8("big5hkscs"), "BIG5-HKSCS" },
{ YYCC_U8("cp850"), "CP850" },
{ YYCC_U8("cp862"), "CP862" },
{ YYCC_U8("cp866"), "CP866" },
{ YYCC_U8("cp874"), "CP874" },
{ YYCC_U8("cp932"), "CP932" },
{ YYCC_U8("cp949"), "CP949" },
{ YYCC_U8("cp950"), "CP950" },
{ YYCC_U8("cp1250"), "CP1250" },
{ YYCC_U8("cp1251"), "CP1251" },
{ YYCC_U8("cp1252"), "CP1252" },
{ YYCC_U8("cp1253"), "CP1253" },
{ YYCC_U8("cp1254"), "CP1254" },
{ YYCC_U8("cp1255"), "CP1255" },
{ YYCC_U8("cp1256"), "CP1256" },
{ YYCC_U8("cp1257"), "CP1257" },
{ YYCC_U8("cp1258"), "CP1258" },
{ YYCC_U8("euc_jp"), "EUC-JP" },
{ YYCC_U8("euc_kr"), "EUC-KR" },
{ YYCC_U8("gb2312"), "CP936" },
{ YYCC_U8("gbk"), "GBK" },
{ YYCC_U8("gb18030"), "GB18030" },
{ YYCC_U8("hz"), "HZ" },
{ YYCC_U8("iso2022_jp"), "ISO-2022-JP" },
{ YYCC_U8("iso2022_jp_1"), "ISO-2022-JP-1" },
{ YYCC_U8("iso2022_jp_2"), "ISO-2022-JP-2" },
{ YYCC_U8("iso2022_kr"), "ISO-2022-KR" },
{ YYCC_U8("latin_1"), "ISO-8859-1" },
{ YYCC_U8("iso8859_2"), "ISO-8859-2" },
{ YYCC_U8("iso8859_3"), "ISO-8859-3" },
{ YYCC_U8("iso8859_4"), "ISO-8859-4" },
{ YYCC_U8("iso8859_5"), "ISO-8859-5" },
{ YYCC_U8("iso8859_6"), "ISO-8859-6" },
{ YYCC_U8("iso8859_7"), "ISO-8859-7" },
{ YYCC_U8("iso8859_8"), "ISO-8859-8" },
{ YYCC_U8("iso8859_9"), "ISO-8859-9" },
{ YYCC_U8("iso8859_10"), "ISO-8859-10" },
{ YYCC_U8("iso8859_11"), "ISO-8859-11" },
{ YYCC_U8("iso8859_13"), "ISO-8859-13" },
{ YYCC_U8("iso8859_14"), "ISO-8859-14" },
{ YYCC_U8("iso8859_15"), "ISO-8859-15" },
{ YYCC_U8("iso8859_16"), "ISO-8859-16" },
{ YYCC_U8("johab"), "JOHAB" },
{ YYCC_U8("koi8_t"), "KOI8-T" },
{ YYCC_U8("mac_cyrillic"), "MacCyrillic" },
{ YYCC_U8("mac_greek"), "MacGreek" },
{ YYCC_U8("mac_iceland"), "MacIceland" },
{ YYCC_U8("mac_roman"), "MacRoman" },
{ YYCC_U8("mac_turkish"), "MacTurkish" },
{ YYCC_U8("ptcp154"), "PT154" },
{ YYCC_U8("shift_jis"), "SHIFT_JIS" },
{ YYCC_U8("utf_32"), "UTF-32" },
{ YYCC_U8("utf_32_be"), "UTF-32BE" },
{ YYCC_U8("utf_32_le"), "UTF-32LE" },
{ YYCC_U8("utf_16"), "UTF16" },
{ YYCC_U8("utf_16_be"), "UTF-16BE" },
{ YYCC_U8("utf_16_le"), "UTF-16LE" },
{ YYCC_U8("utf_7"), "UTF-7" },
{ YYCC_U8("utf_8"), "UTF-8" },
};
#endif
#pragma endregion
#pragma region Misc
ConvError::ConvError(const ConvError::Error& err) : inner(err) {}
bool is_valid_encoding_name(const EncodingName& name) {
}
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
#pragma region
#pragma endregion
} // namespace yycc::encoding::pycodec

View File

@ -0,0 +1,202 @@
#pragma once
#include "../macro/os_detector.hpp"
#include "../macro/class_copy_move.hpp"
#include "../patch/expected.hpp"
#include "../string.hpp"
// Choose the backend of PyCodec module
#if defined(YYCC_OS_WINDOWS)
#include "windows.hpp"
#define YYCC_PYCODEC_WIN32_BACKEND
#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::windows
#else
#include "iconv.hpp"
#define YYCC_PYCODEC_ICONV_BACKEND
#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::iconv
#endif
#define NS_YYCC_STRING ::yycc::string
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
namespace yycc::encoding::pycodec {
using EncodingName = NS_YYCC_STRING::u8string_view;
/// @private
struct ConvError {
using Error = NS_YYCC_ENCODING_BACKEND::ConvError;
ConvError(const Error& err);
Error inner;
};
/// @private
template<typename T>
using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected<T, ConvError>;
/**
* @brief Check whether given name is a valid encoding name in PyCodec.
* @param[in] name The name to be checked.
* @return True if it is valid, otherwise false.
*/
bool is_valid_encoding_name(const EncodingName& name);
// Char -> UTF8
class CharToUtf8 {
public:
CharToUtf8(const EncodingName& name);
~CharToUtf8();
YYCC_DELETE_COPY(CharToUtf8)
YYCC_DEFAULT_MOVE(CharToUtf8)
public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src);
bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::string_view& src);
private:
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
NS_YYCC_ENCODING_BACKEND::CodePage code_page;
#else
NS_YYCC_ENCODING_BACKEND::CharToUtf8 inner;
#endif
};
// UTF8 -> Char
class Utf8ToChar {
public:
Utf8ToChar(const EncodingName& name);
~Utf8ToChar();
YYCC_DELETE_COPY(Utf8ToChar)
YYCC_DEFAULT_MOVE(Utf8ToChar)
public:
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src);
bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst);
std::string to_char(const NS_YYCC_STRING::u8string_view& src);
private:
#if defined(YYCC_PYCODEC_WIN32_BACKEND)
NS_YYCC_ENCODING_BACKEND::CodePage code_page;
#else
NS_YYCC_ENCODING_BACKEND::Utf8ToChar inner;
#endif
};
// WChar -> UTF8
class WcharToUtf8 {
public:
WcharToUtf8();
~WcharToUtf8();
YYCC_DELETE_COPY(WcharToUtf8)
YYCC_DEFAULT_MOVE(WcharToUtf8)
public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src);
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::WcharToUtf8 inner;
#endif
};
// UTF8 -> WChar
class Utf8ToWchar {
public:
Utf8ToWchar();
~Utf8ToWchar();
YYCC_DELETE_COPY(Utf8ToWchar)
YYCC_DEFAULT_MOVE(Utf8ToWchar)
public:
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src);
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf8ToWchar inner;
#endif
};
// UTF8 -> UTF16
class Utf8ToUtf16 {
public:
Utf8ToUtf16();
~Utf8ToUtf16();
YYCC_DELETE_COPY(Utf8ToUtf16)
YYCC_DEFAULT_MOVE(Utf8ToUtf16)
public:
ConvResult<std::u16string> priv_to_utf16(const NS_YYCC_STRING::u8string_view& src);
bool to_utf16(const NS_YYCC_STRING::u8string_view& src, std::u16string& dst);
std::u16string to_utf16(const NS_YYCC_STRING::u8string_view& src);
private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf8ToUtf16 inner;
#endif
};
// UTF16 -> UTF8
class Utf16ToUtf8 {
public:
Utf16ToUtf8();
~Utf16ToUtf8();
YYCC_DELETE_COPY(Utf16ToUtf8)
YYCC_DEFAULT_MOVE(Utf16ToUtf8)
public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::u16string_view& src);
bool to_utf8(const std::u16string_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::u16string_view& src);
private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf16ToUtf8 inner;
#endif
};
// UTF8 -> UTF32
class Utf8ToUtf32 {
public:
Utf8ToUtf32();
~Utf8ToUtf32();
YYCC_DELETE_COPY(Utf8ToUtf32)
YYCC_DEFAULT_MOVE(Utf8ToUtf32)
public:
ConvResult<std::u32string> priv_to_utf32(const NS_YYCC_STRING::u8string_view& src);
bool to_utf32(const NS_YYCC_STRING::u8string_view& src, std::u32string& dst);
std::u32string to_utf32(const NS_YYCC_STRING::u8string_view& src);
private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf8ToUtf32 inner;
#endif
};
// UTF32 -> UTF8
class Utf32ToUtf8 {
public:
Utf32ToUtf8();
~Utf32ToUtf8();
YYCC_DELETE_COPY(Utf32ToUtf8)
YYCC_DEFAULT_MOVE(Utf32ToUtf8)
public:
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::u32string_view& src);
bool to_utf8(const std::u32string_view& src, NS_YYCC_STRING::u8string& dst);
NS_YYCC_STRING::u8string to_utf8(const std::u32string_view& src);
private:
#if defined(YYCC_PYCODEC_ICONV_BACKEND)
NS_YYCC_ENCODING_BACKEND::Utf32ToUtf8 inner;
#endif
};
}
#undef NS_YYCC_PATCH_EXPECTED
#undef NS_YYCC_STRING

View File

@ -1,10 +1,10 @@
#include "utf.hpp"
#include "stlcvt.hpp"
#include <locale>
#define NS_YYCC_STRING ::yycc::string
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
namespace yycc::encoding::utf {
namespace yycc::encoding::stlcvt {
#pragma region Generic Converter

View File

@ -5,7 +5,7 @@
#define NS_YYCC_STRING ::yycc::string
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
namespace yycc::encoding::utf {
namespace yycc::encoding::stlcvt {
/// @private
struct ConvError {};

View File

@ -228,6 +228,9 @@ namespace yycc::encoding::windows {
// The convertion between UTF is implemented by c16rtomb, c32rtomb, mbrtoc16 and mbrtoc32.
// These function is locale related in C++ standard, but in Microsoft STL, it's only for UTF8.
// So we can use them safely in Win32 environment.
// Reference:
// * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170
// * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170
// 1 UTF32 unit can produe 4 UTF8 units or 2 UTF16 units in theory.
// So we pre-allocate memory for the result to prevent allocating memory multiple times.
@ -280,8 +283,9 @@ namespace yycc::encoding::windows {
std::mbstate_t state{};
char mbout[MB_LEN_MAX]{};
for (char16_t c : src) {
std::size_t rc = std::c16rtomb(mbout, c, &state);
if (rc != (std::size_t) -1) dst.append(reinterpret_cast<NS_YYCC_STRING::u8char*>(mbout), rc);
size_t rc = std::c16rtomb(mbout, c, &state);
if (rc != (size_t) -1) dst.append(reinterpret_cast<NS_YYCC_STRING::u8char*>(mbout), rc);
else return ConvError::InvalidUtf16;
}
return dst;
@ -340,8 +344,9 @@ namespace yycc::encoding::windows {
std::mbstate_t state{};
char mbout[MB_LEN_MAX]{};
for (char32_t c : src) {
std::size_t rc = std::c32rtomb(mbout, c, &state);
if (rc != (std::size_t) -1) dst.append(reinterpret_cast<NS_YYCC_STRING::u8char*>(mbout), rc);
size_t rc = std::c32rtomb(mbout, c, &state);
if (rc != (size_t) -1) dst.append(reinterpret_cast<NS_YYCC_STRING::u8char*>(mbout), rc);
else return ConvError::InvalidUtf32;
}
return dst;