diff --git a/script/.gitignore b/script/.gitignore index dabf011..8877f27 100644 --- a/script/.gitignore +++ b/script/.gitignore @@ -1,3 +1,216 @@ -# -------------------- Output -------------------- +## ===== Myself ===== +# Exclude VSCode +.vscode/ + +# Exclude generated files win_build.bat linux_build.sh + +## ===== Python ===== +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/script/gen_build_script.py b/script/gen_build_script.py index a61a620..af73feb 100644 --- a/script/gen_build_script.py +++ b/script/gen_build_script.py @@ -1,15 +1,16 @@ -import jinja2 import argparse -import os -import io +import typing import re import shlex +from pathlib import Path +from dataclasses import dataclass +import jinja2 def validate_cpp_ver(ver: str) -> str: if re.match(r'^[0-9]+$', ver) is not None: return ver else: raise argparse.ArgumentTypeError('invalid version of C++ standard.') -def write_line(f: io.TextIOWrapper, val: str) -> None: +def write_line(f: typing.TextIO, val: str) -> None: f.write(val) f.write('\n') @@ -24,55 +25,51 @@ def escape_cmd_argument(arg): def escape_sh_argument(arg): return shlex.quote(arg) +@dataclass class ScriptSettings: - m_CppVersion: str - m_BuildDoc: bool - m_PIC: bool - - def __init__(self, cpp_ver: str, build_doc: bool, pic: bool): - self.m_CppVersion = cpp_ver - self.m_BuildDoc = build_doc - self.m_PIC = pic + cpp_version: str + build_doc: bool + pic: bool class TemplateRender: - m_Loader: jinja2.BaseLoader - m_Environment: jinja2.Environment + loader: jinja2.BaseLoader + environment: jinja2.Environment - m_WinTemplate: jinja2.Template - m_LinuxTemplate: jinja2.Template + win_template: jinja2.Template + linux_template: jinja2.Template - m_Settings: ScriptSettings + settings: ScriptSettings def __init__(self, settings: ScriptSettings) -> None: - self.m_Loader = jinja2.FileSystemLoader(self.__get_dir()) - self.m_Environment = jinja2.Environment(loader=self.m_Loader) + self.loader = jinja2.FileSystemLoader(self.__get_dir()) + self.environment = jinja2.Environment(loader=self.loader) - self.m_WinTemplate = self.m_Environment.get_template('win_build.template.bat') - self.m_LinuxTemplate = self.m_Environment.get_template('linux_build.template.sh') + self.win_template = self.environment.get_template('win_build.bat.jinja') + self.linux_template = self.environment.get_template('linux_build.sh.jinja') - self.m_Settings = settings + self.settings = settings - def __get_dir(self) -> str: - return os.path.dirname(__file__) + def __get_dir(self) -> Path: + return Path(__file__).resolve().parent def __escape_path(self, val: str, is_win: bool) -> str: if is_win: return escape_cmd_argument(val) else: return escape_sh_argument(val) def __render(self, template: jinja2.Template, dest_file: str, is_win: bool) -> None: - with open(os.path.join(self.__get_dir(), dest_file), 'w', encoding='utf-8') as f: + with open(self.__get_dir() / dest_file, 'w', encoding='utf-8') as f: f.write(template.render( - repo_root_dir = self.__escape_path(os.path.dirname(self.__get_dir()), is_win), - cpp_version = self.m_Settings.m_CppVersion, - build_doc = self.m_Settings.m_BuildDoc, - pic = settings.m_PIC + repo_root_dir = self.__escape_path(str(self.__get_dir().parent), is_win), + cpp_version = self.settings.cpp_version, + build_doc = self.settings.build_doc, + pic = settings.pic )) def render_win_script(self) -> None: - self.__render(self.m_WinTemplate, 'win_build.bat', True) + self.__render(self.win_template, 'win_build.bat', True) def render_linux_script(self) -> None: - self.__render(self.m_LinuxTemplate, 'linux_build.sh', False) + self.__render(self.linux_template, 'linux_build.sh', False) if __name__ == '__main__': diff --git a/script/linux_build.template.sh b/script/linux_build.sh.jinja similarity index 100% rename from script/linux_build.template.sh rename to script/linux_build.sh.jinja diff --git a/script/pycodec/.gitignore b/script/pycodec/.gitignore new file mode 100644 index 0000000..59501ae --- /dev/null +++ b/script/pycodec/.gitignore @@ -0,0 +1,2 @@ +# Exclude result +*.cpp diff --git a/script/pycodec/conv_encoding_table.py b/script/pycodec/conv_encoding_table.py new file mode 100644 index 0000000..d7b6fc1 --- /dev/null +++ b/script/pycodec/conv_encoding_table.py @@ -0,0 +1,63 @@ +import typing +from pathlib import Path +import os + +class LanguageToken: + name: str + alias: tuple[str, ...] + code_page: str | None + iconv_code: str | None + + def __init__(self, name: str, alias: typing.Iterator[str], code_page: str, iconv_code: str): + self.name = name.lower() + self.alias = tuple(map(lambda x: x.lower(), alias)) + self.code_page = None if code_page == '' else code_page + self.iconv_code = None if iconv_code == '' else iconv_code + +def extract_data(fs: typing.TextIO) -> list[str]: + # remove first line to remove table header + return fs.readlines()[1:] + +def extract_token(csv_data: list[str]) -> tuple[LanguageToken, ...]: + ret: list[LanguageToken] = [] + for line in csv_data: + line = line.strip('\n') + line_sp = line.split('\t') + alias_sp = filter(lambda x: len(x) != 0, map(lambda x: x.strip(), line_sp[1].split(','))) + ret.append(LanguageToken(line_sp[0], alias_sp, line_sp[2], line_sp[3])) + return tuple(ret) + +def write_alias_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: + fs.write('static const std::map ALISA_MAP {\n') + for i in data: + for j in i.alias: + fs.write(f'\t{{ YYCC_U8("{j}"), YYCC_U8("{i.name}") }},\n') + fs.write('};\n') + +def write_win_cp_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: + fs.write('static const std::map WINCP_MAP {\n') + for i in data: + if i.code_page is not None: + fs.write(f'\t{{ YYCC_U8("{i.name}"), static_cast({i.code_page}u) }},\n') + fs.write('};\n') + +def write_iconv_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None: + fs.write('static const std::map ICONV_MAP {\n') + for i in data: + if i.iconv_code is not None: + fs.write(f'\t{{ YYCC_U8("{i.name}"), "{i.iconv_code}" }},\n') + fs.write('};\n') + +if __name__ == '__main__': + # get file path + self_path = Path(__file__).resolve().parent + csv_file = self_path / 'encoding_table.csv' + cpp_file = self_path / 'encoding_table.cpp' + # process files + with open(csv_file, 'r', encoding='utf-8') as fr: + with open(cpp_file, 'w', encoding='utf-8') as fw: + data = extract_data(fr) + token = extract_token(data) + write_alias_map(fw, token) + write_win_cp_map(fw, token) + write_iconv_map(fw, token) diff --git a/script/pycodec/encoding_table.csv b/script/pycodec/encoding_table.csv new file mode 100644 index 0000000..ffe38df --- /dev/null +++ b/script/pycodec/encoding_table.csv @@ -0,0 +1,98 @@ +Encoding Alias Code Page Iconv Identifier +ascii 646, us-ascii 437 ASCII +big5 big5-tw, csbig5 950 BIG5 +big5hkscs big5-hkscs, hkscs BIG5-HKSCS +cp037 IBM037, IBM039 037 +cp273 273, IBM273, csIBM273 +cp424 EBCDIC-CP-HE, IBM424 +cp437 437, IBM437 437 +cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500 +cp720 720 +cp737 737 +cp775 IBM775 775 +cp850 850, IBM850 850 CP850 +cp852 852, IBM852 852 +cp855 855, IBM855 855 +cp856 +cp857 857, IBM857 857 +cp858 858, IBM858 858 +cp860 860, IBM860 860 +cp861 861, CP-IS, IBM861 861 +cp862 862, IBM862 862 CP862 +cp863 863, IBM863 863 +cp864 IBM864 864 +cp865 865, IBM865 865 +cp866 866, IBM866 866 CP866 +cp869 869, CP-GR, IBM869 869 +cp874 874 CP874 +cp875 875 +cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 CP932 +cp949 949, ms949, uhc 949 CP949 +cp950 950, ms950 950 CP950 +cp1006 +cp1026 ibm1026 1026 +cp1125 1125, ibm1125, cp866u, ruscii +cp1140 ibm1140 1140 +cp1250 windows-1250 1250 CP1250 +cp1251 windows-1251 1251 CP1251 +cp1252 windows-1252 1252 CP1252 +cp1253 windows-1253 1253 CP1253 +cp1254 windows-1254 1254 CP1254 +cp1255 windows-1255 1255 CP1255 +cp1256 windows-1256 1256 CP1256 +cp1257 windows-1257 1257 CP1257 +cp1258 windows-1258 1258 CP1258 +euc_jp eucjp, ujis, u-jis 20932 EUC-JP +euc_jis_2004 jisx0213, eucjis2004 +euc_jisx0213 eucjisx0213 +euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 EUC-KR +gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 CP936 +gbk 936, cp936, ms936 936 GBK +gb18030 gb18030-2000 54936 GB18030 +hz hzgb, hz-gb, hz-gb-2312 52936 HZ +iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP +iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1 +iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2 +iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004 +iso2022_jp_3 iso2022jp-3, iso-2022-jp-3 +iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext +iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 ISO-2022-KR +latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 ISO-8859-1 +iso8859_2 iso-8859-2, latin2, L2 28592 ISO-8859-2 +iso8859_3 iso-8859-3, latin3, L3 28593 ISO-8859-3 +iso8859_4 iso-8859-4, latin4, L4 28594 ISO-8859-4 +iso8859_5 iso-8859-5, cyrillic 28595 ISO-8859-5 +iso8859_6 iso-8859-6, arabic 28596 ISO-8859-6 +iso8859_7 iso-8859-7, greek, greek8 28597 ISO-8859-7 +iso8859_8 iso-8859-8, hebrew 28598 ISO-8859-8 +iso8859_9 iso-8859-9, latin5, L5 28599 ISO-8859-9 +iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10 +iso8859_11 iso-8859-11, thai ISO-8859-11 +iso8859_13 iso-8859-13, latin7, L7 28603 ISO-8859-13 +iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14 +iso8859_15 iso-8859-15, latin9, L9 28605 ISO-8859-15 +iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16 +johab cp1361, ms1361 1361 JOHAB +koi8_r +koi8_t KOI8-T +koi8_u +kz1048 kz_1048, strk1048_2002, rk1048 +mac_cyrillic maccyrillic 10007 MacCyrillic +mac_greek macgreek 10006 MacGreek +mac_iceland maciceland 10079 MacIceland +mac_latin2 maclatin2, maccentraleurope, mac_centeuro +mac_roman macroman, macintosh MacRoman +mac_turkish macturkish 10081 MacTurkish +ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154 +shift_jis csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS +shift_jis_2004 shiftjis2004, sjis_2004, sjis2004 +shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213 +utf_32 U32, utf32 UTF-32 +utf_32_be UTF-32BE UTF-32BE +utf_32_le UTF-32LE UTF-32LE +utf_16 U16, utf16 UTF16 +utf_16_be UTF-16BE UTF-16BE +utf_16_le UTF-16LE UTF-16LE +utf_7 U7, unicode-1-1-utf-7 65000 UTF-7 +utf_8 U8, UTF, utf8, utf-8, cp65001 65001 UTF-8 +utf_8_sig diff --git a/script/pyproject.toml b/script/pyproject.toml new file mode 100644 index 0000000..8141c06 --- /dev/null +++ b/script/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "script" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "jinja2==3.1.2", +] diff --git a/script/uv.lock b/script/uv.lock new file mode 100644 index 0000000..3f0527a --- /dev/null +++ b/script/uv.lock @@ -0,0 +1,74 @@ +version = 1 +revision = 2 +requires-python = ">=3.11" + +[[package]] +name = "jinja2" +version = "3.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/ff/75c28576a1d900e87eb6335b063fab47a8ef3c8b4d88524c4bf78f670cce/Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", size = 268239, upload-time = "2022-04-28T17:21:27.579Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", size = 133101, upload-time = "2022-04-28T17:21:25.336Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353, upload-time = "2024-10-18T15:21:02.187Z" }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392, upload-time = "2024-10-18T15:21:02.941Z" }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984, upload-time = "2024-10-18T15:21:03.953Z" }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120, upload-time = "2024-10-18T15:21:06.495Z" }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032, upload-time = "2024-10-18T15:21:07.295Z" }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057, upload-time = "2024-10-18T15:21:08.073Z" }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359, upload-time = "2024-10-18T15:21:09.318Z" }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload-time = "2024-10-18T15:21:10.185Z" }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload-time = "2024-10-18T15:21:11.005Z" }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload-time = "2024-10-18T15:21:12.911Z" }, + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" }, + { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" }, + { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload-time = "2024-10-18T15:21:25.382Z" }, + { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload-time = "2024-10-18T15:21:26.199Z" }, + { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload-time = "2024-10-18T15:21:27.029Z" }, + { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload-time = "2024-10-18T15:21:27.846Z" }, + { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload-time = "2024-10-18T15:21:28.744Z" }, + { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload-time = "2024-10-18T15:21:29.545Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload-time = "2024-10-18T15:21:30.366Z" }, + { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload-time = "2024-10-18T15:21:31.207Z" }, + { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload-time = "2024-10-18T15:21:32.032Z" }, + { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload-time = "2024-10-18T15:21:33.625Z" }, + { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload-time = "2024-10-18T15:21:34.611Z" }, + { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload-time = "2024-10-18T15:21:35.398Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload-time = "2024-10-18T15:21:36.231Z" }, + { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload-time = "2024-10-18T15:21:37.073Z" }, + { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload-time = "2024-10-18T15:21:37.932Z" }, + { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload-time = "2024-10-18T15:21:39.799Z" }, + { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload-time = "2024-10-18T15:21:40.813Z" }, + { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload-time = "2024-10-18T15:21:41.814Z" }, + { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" }, +] + +[[package]] +name = "script" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "jinja2" }, +] + +[package.metadata] +requires-dist = [{ name = "jinja2", specifier = "==3.1.2" }] diff --git a/script/win_build.template.bat b/script/win_build.bat.jinja similarity index 100% rename from script/win_build.template.bat rename to script/win_build.bat.jinja diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b168862..cd84c60 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,7 +15,7 @@ PRIVATE yycc/string/op.cpp yycc/rust/panic.cpp yycc/patch/path.cpp - yycc/encoding/utf.cpp + yycc/encoding/stlcvt.cpp yycc/encoding/windows.cpp yycc/encoding/iconv.cpp yycc/encoding/pycodec.cpp @@ -68,7 +68,7 @@ FILES yycc/patch/contains.hpp yycc/patch/starts_ends_with.hpp yycc/patch/expected.hpp - yycc/encoding/utf.hpp + yycc/encoding/stlcvt.hpp yycc/encoding/windows.hpp yycc/encoding/iconv.hpp yycc/encoding/pycodec.hpp diff --git a/src/yycc/encoding/pycodec.cpp b/src/yycc/encoding/pycodec.cpp index e69de29..ad69fca 100644 --- a/src/yycc/encoding/pycodec.cpp +++ b/src/yycc/encoding/pycodec.cpp @@ -0,0 +1,446 @@ +#include "pycodec.hpp" +#include + +#define NS_YYCC_STRING ::yycc::string +#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected + +namespace yycc::encoding::pycodec { + +#pragma region Encoding Name + + static const std::map ALISA_MAP { + { YYCC_U8("646"), YYCC_U8("ascii") }, + { YYCC_U8("us-ascii"), YYCC_U8("ascii") }, + { YYCC_U8("big5-tw"), YYCC_U8("big5") }, + { YYCC_U8("csbig5"), YYCC_U8("big5") }, + { YYCC_U8("big5-hkscs"), YYCC_U8("big5hkscs") }, + { YYCC_U8("hkscs"), YYCC_U8("big5hkscs") }, + { YYCC_U8("ibm037"), YYCC_U8("cp037") }, + { YYCC_U8("ibm039"), YYCC_U8("cp037") }, + { YYCC_U8("273"), YYCC_U8("cp273") }, + { YYCC_U8("ibm273"), YYCC_U8("cp273") }, + { YYCC_U8("csibm273"), YYCC_U8("cp273") }, + { YYCC_U8("ebcdic-cp-he"), YYCC_U8("cp424") }, + { YYCC_U8("ibm424"), YYCC_U8("cp424") }, + { YYCC_U8("437"), YYCC_U8("cp437") }, + { YYCC_U8("ibm437"), YYCC_U8("cp437") }, + { YYCC_U8("ebcdic-cp-be"), YYCC_U8("cp500") }, + { YYCC_U8("ebcdic-cp-ch"), YYCC_U8("cp500") }, + { YYCC_U8("ibm500"), YYCC_U8("cp500") }, + { YYCC_U8("ibm775"), YYCC_U8("cp775") }, + { YYCC_U8("850"), YYCC_U8("cp850") }, + { YYCC_U8("ibm850"), YYCC_U8("cp850") }, + { YYCC_U8("852"), YYCC_U8("cp852") }, + { YYCC_U8("ibm852"), YYCC_U8("cp852") }, + { YYCC_U8("855"), YYCC_U8("cp855") }, + { YYCC_U8("ibm855"), YYCC_U8("cp855") }, + { YYCC_U8("857"), YYCC_U8("cp857") }, + { YYCC_U8("ibm857"), YYCC_U8("cp857") }, + { YYCC_U8("858"), YYCC_U8("cp858") }, + { YYCC_U8("ibm858"), YYCC_U8("cp858") }, + { YYCC_U8("860"), YYCC_U8("cp860") }, + { YYCC_U8("ibm860"), YYCC_U8("cp860") }, + { YYCC_U8("861"), YYCC_U8("cp861") }, + { YYCC_U8("cp-is"), YYCC_U8("cp861") }, + { YYCC_U8("ibm861"), YYCC_U8("cp861") }, + { YYCC_U8("862"), YYCC_U8("cp862") }, + { YYCC_U8("ibm862"), YYCC_U8("cp862") }, + { YYCC_U8("863"), YYCC_U8("cp863") }, + { YYCC_U8("ibm863"), YYCC_U8("cp863") }, + { YYCC_U8("ibm864"), YYCC_U8("cp864") }, + { YYCC_U8("865"), YYCC_U8("cp865") }, + { YYCC_U8("ibm865"), YYCC_U8("cp865") }, + { YYCC_U8("866"), YYCC_U8("cp866") }, + { YYCC_U8("ibm866"), YYCC_U8("cp866") }, + { YYCC_U8("869"), YYCC_U8("cp869") }, + { YYCC_U8("cp-gr"), YYCC_U8("cp869") }, + { YYCC_U8("ibm869"), YYCC_U8("cp869") }, + { YYCC_U8("932"), YYCC_U8("cp932") }, + { YYCC_U8("ms932"), YYCC_U8("cp932") }, + { YYCC_U8("mskanji"), YYCC_U8("cp932") }, + { YYCC_U8("ms-kanji"), YYCC_U8("cp932") }, + { YYCC_U8("windows-31j"), YYCC_U8("cp932") }, + { YYCC_U8("949"), YYCC_U8("cp949") }, + { YYCC_U8("ms949"), YYCC_U8("cp949") }, + { YYCC_U8("uhc"), YYCC_U8("cp949") }, + { YYCC_U8("950"), YYCC_U8("cp950") }, + { YYCC_U8("ms950"), YYCC_U8("cp950") }, + { YYCC_U8("ibm1026"), YYCC_U8("cp1026") }, + { YYCC_U8("1125"), YYCC_U8("cp1125") }, + { YYCC_U8("ibm1125"), YYCC_U8("cp1125") }, + { YYCC_U8("cp866u"), YYCC_U8("cp1125") }, + { YYCC_U8("ruscii"), YYCC_U8("cp1125") }, + { YYCC_U8("ibm1140"), YYCC_U8("cp1140") }, + { YYCC_U8("windows-1250"), YYCC_U8("cp1250") }, + { YYCC_U8("windows-1251"), YYCC_U8("cp1251") }, + { YYCC_U8("windows-1252"), YYCC_U8("cp1252") }, + { YYCC_U8("windows-1253"), YYCC_U8("cp1253") }, + { YYCC_U8("windows-1254"), YYCC_U8("cp1254") }, + { YYCC_U8("windows-1255"), YYCC_U8("cp1255") }, + { YYCC_U8("windows-1256"), YYCC_U8("cp1256") }, + { YYCC_U8("windows-1257"), YYCC_U8("cp1257") }, + { YYCC_U8("windows-1258"), YYCC_U8("cp1258") }, + { YYCC_U8("eucjp"), YYCC_U8("euc_jp") }, + { YYCC_U8("ujis"), YYCC_U8("euc_jp") }, + { YYCC_U8("u-jis"), YYCC_U8("euc_jp") }, + { YYCC_U8("jisx0213"), YYCC_U8("euc_jis_2004") }, + { YYCC_U8("eucjis2004"), YYCC_U8("euc_jis_2004") }, + { YYCC_U8("eucjisx0213"), YYCC_U8("euc_jisx0213") }, + { YYCC_U8("euckr"), YYCC_U8("euc_kr") }, + { YYCC_U8("korean"), YYCC_U8("euc_kr") }, + { YYCC_U8("ksc5601"), YYCC_U8("euc_kr") }, + { YYCC_U8("ks_c-5601"), YYCC_U8("euc_kr") }, + { YYCC_U8("ks_c-5601-1987"), YYCC_U8("euc_kr") }, + { YYCC_U8("ksx1001"), YYCC_U8("euc_kr") }, + { YYCC_U8("ks_x-1001"), YYCC_U8("euc_kr") }, + { YYCC_U8("chinese"), YYCC_U8("gb2312") }, + { YYCC_U8("csiso58gb231280"), YYCC_U8("gb2312") }, + { YYCC_U8("euc-cn"), YYCC_U8("gb2312") }, + { YYCC_U8("euccn"), YYCC_U8("gb2312") }, + { YYCC_U8("eucgb2312-cn"), YYCC_U8("gb2312") }, + { YYCC_U8("gb2312-1980"), YYCC_U8("gb2312") }, + { YYCC_U8("gb2312-80"), YYCC_U8("gb2312") }, + { YYCC_U8("iso-ir-58"), YYCC_U8("gb2312") }, + { YYCC_U8("936"), YYCC_U8("gbk") }, + { YYCC_U8("cp936"), YYCC_U8("gbk") }, + { YYCC_U8("ms936"), YYCC_U8("gbk") }, + { YYCC_U8("gb18030-2000"), YYCC_U8("gb18030") }, + { YYCC_U8("hzgb"), YYCC_U8("hz") }, + { YYCC_U8("hz-gb"), YYCC_U8("hz") }, + { YYCC_U8("hz-gb-2312"), YYCC_U8("hz") }, + { YYCC_U8("csiso2022jp"), YYCC_U8("iso2022_jp") }, + { YYCC_U8("iso2022jp"), YYCC_U8("iso2022_jp") }, + { YYCC_U8("iso-2022-jp"), YYCC_U8("iso2022_jp") }, + { YYCC_U8("iso2022jp-1"), YYCC_U8("iso2022_jp_1") }, + { YYCC_U8("iso-2022-jp-1"), YYCC_U8("iso2022_jp_1") }, + { YYCC_U8("iso2022jp-2"), YYCC_U8("iso2022_jp_2") }, + { YYCC_U8("iso-2022-jp-2"), YYCC_U8("iso2022_jp_2") }, + { YYCC_U8("iso2022jp-2004"), YYCC_U8("iso2022_jp_2004") }, + { YYCC_U8("iso-2022-jp-2004"), YYCC_U8("iso2022_jp_2004") }, + { YYCC_U8("iso2022jp-3"), YYCC_U8("iso2022_jp_3") }, + { YYCC_U8("iso-2022-jp-3"), YYCC_U8("iso2022_jp_3") }, + { YYCC_U8("iso2022jp-ext"), YYCC_U8("iso2022_jp_ext") }, + { YYCC_U8("iso-2022-jp-ext"), YYCC_U8("iso2022_jp_ext") }, + { YYCC_U8("csiso2022kr"), YYCC_U8("iso2022_kr") }, + { YYCC_U8("iso2022kr"), YYCC_U8("iso2022_kr") }, + { YYCC_U8("iso-2022-kr"), YYCC_U8("iso2022_kr") }, + { YYCC_U8("iso-8859-1"), YYCC_U8("latin_1") }, + { YYCC_U8("iso8859-1"), YYCC_U8("latin_1") }, + { YYCC_U8("8859"), YYCC_U8("latin_1") }, + { YYCC_U8("cp819"), YYCC_U8("latin_1") }, + { YYCC_U8("latin"), YYCC_U8("latin_1") }, + { YYCC_U8("latin1"), YYCC_U8("latin_1") }, + { YYCC_U8("l1"), YYCC_U8("latin_1") }, + { YYCC_U8("iso-8859-2"), YYCC_U8("iso8859_2") }, + { YYCC_U8("latin2"), YYCC_U8("iso8859_2") }, + { YYCC_U8("l2"), YYCC_U8("iso8859_2") }, + { YYCC_U8("iso-8859-3"), YYCC_U8("iso8859_3") }, + { YYCC_U8("latin3"), YYCC_U8("iso8859_3") }, + { YYCC_U8("l3"), YYCC_U8("iso8859_3") }, + { YYCC_U8("iso-8859-4"), YYCC_U8("iso8859_4") }, + { YYCC_U8("latin4"), YYCC_U8("iso8859_4") }, + { YYCC_U8("l4"), YYCC_U8("iso8859_4") }, + { YYCC_U8("iso-8859-5"), YYCC_U8("iso8859_5") }, + { YYCC_U8("cyrillic"), YYCC_U8("iso8859_5") }, + { YYCC_U8("iso-8859-6"), YYCC_U8("iso8859_6") }, + { YYCC_U8("arabic"), YYCC_U8("iso8859_6") }, + { YYCC_U8("iso-8859-7"), YYCC_U8("iso8859_7") }, + { YYCC_U8("greek"), YYCC_U8("iso8859_7") }, + { YYCC_U8("greek8"), YYCC_U8("iso8859_7") }, + { YYCC_U8("iso-8859-8"), YYCC_U8("iso8859_8") }, + { YYCC_U8("hebrew"), YYCC_U8("iso8859_8") }, + { YYCC_U8("iso-8859-9"), YYCC_U8("iso8859_9") }, + { YYCC_U8("latin5"), YYCC_U8("iso8859_9") }, + { YYCC_U8("l5"), YYCC_U8("iso8859_9") }, + { YYCC_U8("iso-8859-10"), YYCC_U8("iso8859_10") }, + { YYCC_U8("latin6"), YYCC_U8("iso8859_10") }, + { YYCC_U8("l6"), YYCC_U8("iso8859_10") }, + { YYCC_U8("iso-8859-11"), YYCC_U8("iso8859_11") }, + { YYCC_U8("thai"), YYCC_U8("iso8859_11") }, + { YYCC_U8("iso-8859-13"), YYCC_U8("iso8859_13") }, + { YYCC_U8("latin7"), YYCC_U8("iso8859_13") }, + { YYCC_U8("l7"), YYCC_U8("iso8859_13") }, + { YYCC_U8("iso-8859-14"), YYCC_U8("iso8859_14") }, + { YYCC_U8("latin8"), YYCC_U8("iso8859_14") }, + { YYCC_U8("l8"), YYCC_U8("iso8859_14") }, + { YYCC_U8("iso-8859-15"), YYCC_U8("iso8859_15") }, + { YYCC_U8("latin9"), YYCC_U8("iso8859_15") }, + { YYCC_U8("l9"), YYCC_U8("iso8859_15") }, + { YYCC_U8("iso-8859-16"), YYCC_U8("iso8859_16") }, + { YYCC_U8("latin10"), YYCC_U8("iso8859_16") }, + { YYCC_U8("l10"), YYCC_U8("iso8859_16") }, + { YYCC_U8("cp1361"), YYCC_U8("johab") }, + { YYCC_U8("ms1361"), YYCC_U8("johab") }, + { YYCC_U8("kz_1048"), YYCC_U8("kz1048") }, + { YYCC_U8("strk1048_2002"), YYCC_U8("kz1048") }, + { YYCC_U8("rk1048"), YYCC_U8("kz1048") }, + { YYCC_U8("maccyrillic"), YYCC_U8("mac_cyrillic") }, + { YYCC_U8("macgreek"), YYCC_U8("mac_greek") }, + { YYCC_U8("maciceland"), YYCC_U8("mac_iceland") }, + { YYCC_U8("maclatin2"), YYCC_U8("mac_latin2") }, + { YYCC_U8("maccentraleurope"), YYCC_U8("mac_latin2") }, + { YYCC_U8("mac_centeuro"), YYCC_U8("mac_latin2") }, + { YYCC_U8("macroman"), YYCC_U8("mac_roman") }, + { YYCC_U8("macintosh"), YYCC_U8("mac_roman") }, + { YYCC_U8("macturkish"), YYCC_U8("mac_turkish") }, + { YYCC_U8("csptcp154"), YYCC_U8("ptcp154") }, + { YYCC_U8("pt154"), YYCC_U8("ptcp154") }, + { YYCC_U8("cp154"), YYCC_U8("ptcp154") }, + { YYCC_U8("cyrillic-asian"), YYCC_U8("ptcp154") }, + { YYCC_U8("csshiftjis"), YYCC_U8("shift_jis") }, + { YYCC_U8("shiftjis"), YYCC_U8("shift_jis") }, + { YYCC_U8("sjis"), YYCC_U8("shift_jis") }, + { YYCC_U8("s_jis"), YYCC_U8("shift_jis") }, + { YYCC_U8("shiftjis2004"), YYCC_U8("shift_jis_2004") }, + { YYCC_U8("sjis_2004"), YYCC_U8("shift_jis_2004") }, + { YYCC_U8("sjis2004"), YYCC_U8("shift_jis_2004") }, + { YYCC_U8("shiftjisx0213"), YYCC_U8("shift_jisx0213") }, + { YYCC_U8("sjisx0213"), YYCC_U8("shift_jisx0213") }, + { YYCC_U8("s_jisx0213"), YYCC_U8("shift_jisx0213") }, + { YYCC_U8("u32"), YYCC_U8("utf_32") }, + { YYCC_U8("utf32"), YYCC_U8("utf_32") }, + { YYCC_U8("utf-32be"), YYCC_U8("utf_32_be") }, + { YYCC_U8("utf-32le"), YYCC_U8("utf_32_le") }, + { YYCC_U8("u16"), YYCC_U8("utf_16") }, + { YYCC_U8("utf16"), YYCC_U8("utf_16") }, + { YYCC_U8("utf-16be"), YYCC_U8("utf_16_be") }, + { YYCC_U8("utf-16le"), YYCC_U8("utf_16_le") }, + { YYCC_U8("u7"), YYCC_U8("utf_7") }, + { YYCC_U8("unicode-1-1-utf-7"), YYCC_U8("utf_7") }, + { YYCC_U8("u8"), YYCC_U8("utf_8") }, + { YYCC_U8("utf"), YYCC_U8("utf_8") }, + { YYCC_U8("utf8"), YYCC_U8("utf_8") }, + { YYCC_U8("utf-8"), YYCC_U8("utf_8") }, + { YYCC_U8("cp65001"), YYCC_U8("utf_8") }, + }; + + +#if defined(YYCC_PYCODEC_WIN32_BACKEND) + + using CodePage = NS_YYCC_ENCODING_BACKEND::CodePage; + + static const std::map WINCP_MAP { + { YYCC_U8("ascii"), static_cast(437u) }, + { YYCC_U8("big5"), static_cast(950u) }, + { YYCC_U8("cp037"), static_cast(037u) }, + { YYCC_U8("cp437"), static_cast(437u) }, + { YYCC_U8("cp500"), static_cast(500u) }, + { YYCC_U8("cp720"), static_cast(720u) }, + { YYCC_U8("cp737"), static_cast(737u) }, + { YYCC_U8("cp775"), static_cast(775u) }, + { YYCC_U8("cp850"), static_cast(850u) }, + { YYCC_U8("cp852"), static_cast(852u) }, + { YYCC_U8("cp855"), static_cast(855u) }, + { YYCC_U8("cp857"), static_cast(857u) }, + { YYCC_U8("cp858"), static_cast(858u) }, + { YYCC_U8("cp860"), static_cast(860u) }, + { YYCC_U8("cp861"), static_cast(861u) }, + { YYCC_U8("cp862"), static_cast(862u) }, + { YYCC_U8("cp863"), static_cast(863u) }, + { YYCC_U8("cp864"), static_cast(864u) }, + { YYCC_U8("cp865"), static_cast(865u) }, + { YYCC_U8("cp866"), static_cast(866u) }, + { YYCC_U8("cp869"), static_cast(869u) }, + { YYCC_U8("cp874"), static_cast(874u) }, + { YYCC_U8("cp875"), static_cast(875u) }, + { YYCC_U8("cp932"), static_cast(932u) }, + { YYCC_U8("cp949"), static_cast(949u) }, + { YYCC_U8("cp950"), static_cast(950u) }, + { YYCC_U8("cp1026"), static_cast(1026u) }, + { YYCC_U8("cp1140"), static_cast(1140u) }, + { YYCC_U8("cp1250"), static_cast(1250u) }, + { YYCC_U8("cp1251"), static_cast(1251u) }, + { YYCC_U8("cp1252"), static_cast(1252u) }, + { YYCC_U8("cp1253"), static_cast(1253u) }, + { YYCC_U8("cp1254"), static_cast(1254u) }, + { YYCC_U8("cp1255"), static_cast(1255u) }, + { YYCC_U8("cp1256"), static_cast(1256u) }, + { YYCC_U8("cp1257"), static_cast(1257u) }, + { YYCC_U8("cp1258"), static_cast(1258u) }, + { YYCC_U8("euc_jp"), static_cast(20932u) }, + { YYCC_U8("euc_kr"), static_cast(51949u) }, + { YYCC_U8("gb2312"), static_cast(936u) }, + { YYCC_U8("gbk"), static_cast(936u) }, + { YYCC_U8("gb18030"), static_cast(54936u) }, + { YYCC_U8("hz"), static_cast(52936u) }, + { YYCC_U8("iso2022_jp"), static_cast(50220u) }, + { YYCC_U8("iso2022_kr"), static_cast(50225u) }, + { YYCC_U8("latin_1"), static_cast(28591u) }, + { YYCC_U8("iso8859_2"), static_cast(28592u) }, + { YYCC_U8("iso8859_3"), static_cast(28593u) }, + { YYCC_U8("iso8859_4"), static_cast(28594u) }, + { YYCC_U8("iso8859_5"), static_cast(28595u) }, + { YYCC_U8("iso8859_6"), static_cast(28596u) }, + { YYCC_U8("iso8859_7"), static_cast(28597u) }, + { YYCC_U8("iso8859_8"), static_cast(28598u) }, + { YYCC_U8("iso8859_9"), static_cast(28599u) }, + { YYCC_U8("iso8859_13"), static_cast(28603u) }, + { YYCC_U8("iso8859_15"), static_cast(28605u) }, + { YYCC_U8("johab"), static_cast(1361u) }, + { YYCC_U8("mac_cyrillic"), static_cast(10007u) }, + { YYCC_U8("mac_greek"), static_cast(10006u) }, + { YYCC_U8("mac_iceland"), static_cast(10079u) }, + { YYCC_U8("mac_turkish"), static_cast(10081u) }, + { YYCC_U8("shift_jis"), static_cast(932u) }, + { YYCC_U8("utf_7"), static_cast(65000u) }, + { YYCC_U8("utf_8"), static_cast(65001u) }, + }; + +#else + + static const std::map ICONV_MAP { + { YYCC_U8("ascii"), "ASCII" }, + { YYCC_U8("big5"), "BIG5" }, + { YYCC_U8("big5hkscs"), "BIG5-HKSCS" }, + { YYCC_U8("cp850"), "CP850" }, + { YYCC_U8("cp862"), "CP862" }, + { YYCC_U8("cp866"), "CP866" }, + { YYCC_U8("cp874"), "CP874" }, + { YYCC_U8("cp932"), "CP932" }, + { YYCC_U8("cp949"), "CP949" }, + { YYCC_U8("cp950"), "CP950" }, + { YYCC_U8("cp1250"), "CP1250" }, + { YYCC_U8("cp1251"), "CP1251" }, + { YYCC_U8("cp1252"), "CP1252" }, + { YYCC_U8("cp1253"), "CP1253" }, + { YYCC_U8("cp1254"), "CP1254" }, + { YYCC_U8("cp1255"), "CP1255" }, + { YYCC_U8("cp1256"), "CP1256" }, + { YYCC_U8("cp1257"), "CP1257" }, + { YYCC_U8("cp1258"), "CP1258" }, + { YYCC_U8("euc_jp"), "EUC-JP" }, + { YYCC_U8("euc_kr"), "EUC-KR" }, + { YYCC_U8("gb2312"), "CP936" }, + { YYCC_U8("gbk"), "GBK" }, + { YYCC_U8("gb18030"), "GB18030" }, + { YYCC_U8("hz"), "HZ" }, + { YYCC_U8("iso2022_jp"), "ISO-2022-JP" }, + { YYCC_U8("iso2022_jp_1"), "ISO-2022-JP-1" }, + { YYCC_U8("iso2022_jp_2"), "ISO-2022-JP-2" }, + { YYCC_U8("iso2022_kr"), "ISO-2022-KR" }, + { YYCC_U8("latin_1"), "ISO-8859-1" }, + { YYCC_U8("iso8859_2"), "ISO-8859-2" }, + { YYCC_U8("iso8859_3"), "ISO-8859-3" }, + { YYCC_U8("iso8859_4"), "ISO-8859-4" }, + { YYCC_U8("iso8859_5"), "ISO-8859-5" }, + { YYCC_U8("iso8859_6"), "ISO-8859-6" }, + { YYCC_U8("iso8859_7"), "ISO-8859-7" }, + { YYCC_U8("iso8859_8"), "ISO-8859-8" }, + { YYCC_U8("iso8859_9"), "ISO-8859-9" }, + { YYCC_U8("iso8859_10"), "ISO-8859-10" }, + { YYCC_U8("iso8859_11"), "ISO-8859-11" }, + { YYCC_U8("iso8859_13"), "ISO-8859-13" }, + { YYCC_U8("iso8859_14"), "ISO-8859-14" }, + { YYCC_U8("iso8859_15"), "ISO-8859-15" }, + { YYCC_U8("iso8859_16"), "ISO-8859-16" }, + { YYCC_U8("johab"), "JOHAB" }, + { YYCC_U8("koi8_t"), "KOI8-T" }, + { YYCC_U8("mac_cyrillic"), "MacCyrillic" }, + { YYCC_U8("mac_greek"), "MacGreek" }, + { YYCC_U8("mac_iceland"), "MacIceland" }, + { YYCC_U8("mac_roman"), "MacRoman" }, + { YYCC_U8("mac_turkish"), "MacTurkish" }, + { YYCC_U8("ptcp154"), "PT154" }, + { YYCC_U8("shift_jis"), "SHIFT_JIS" }, + { YYCC_U8("utf_32"), "UTF-32" }, + { YYCC_U8("utf_32_be"), "UTF-32BE" }, + { YYCC_U8("utf_32_le"), "UTF-32LE" }, + { YYCC_U8("utf_16"), "UTF16" }, + { YYCC_U8("utf_16_be"), "UTF-16BE" }, + { YYCC_U8("utf_16_le"), "UTF-16LE" }, + { YYCC_U8("utf_7"), "UTF-7" }, + { YYCC_U8("utf_8"), "UTF-8" }, + }; + +#endif + + +#pragma endregion + +#pragma region Misc + + ConvError::ConvError(const ConvError::Error& err) : inner(err) {} + + bool is_valid_encoding_name(const EncodingName& name) { + + } + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +#pragma region + +#pragma endregion + +} // namespace yycc::encoding::pycodec diff --git a/src/yycc/encoding/pycodec.hpp b/src/yycc/encoding/pycodec.hpp index e69de29..ea15f8a 100644 --- a/src/yycc/encoding/pycodec.hpp +++ b/src/yycc/encoding/pycodec.hpp @@ -0,0 +1,202 @@ +#pragma once +#include "../macro/os_detector.hpp" +#include "../macro/class_copy_move.hpp" +#include "../patch/expected.hpp" +#include "../string.hpp" + +// Choose the backend of PyCodec module +#if defined(YYCC_OS_WINDOWS) +#include "windows.hpp" +#define YYCC_PYCODEC_WIN32_BACKEND +#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::windows +#else +#include "iconv.hpp" +#define YYCC_PYCODEC_ICONV_BACKEND +#define NS_YYCC_ENCODING_BACKEND ::yycc::encoding::iconv +#endif + +#define NS_YYCC_STRING ::yycc::string +#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected + +namespace yycc::encoding::pycodec { + + using EncodingName = NS_YYCC_STRING::u8string_view; + + /// @private + struct ConvError { + using Error = NS_YYCC_ENCODING_BACKEND::ConvError; + ConvError(const Error& err); + Error inner; + }; + + /// @private + template + using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected; + + /** + * @brief Check whether given name is a valid encoding name in PyCodec. + * @param[in] name The name to be checked. + * @return True if it is valid, otherwise false. + */ + bool is_valid_encoding_name(const EncodingName& name); + + // Char -> UTF8 + class CharToUtf8 { + public: + CharToUtf8(const EncodingName& name); + ~CharToUtf8(); + YYCC_DELETE_COPY(CharToUtf8) + YYCC_DEFAULT_MOVE(CharToUtf8) + + public: + ConvResult priv_to_utf8(const std::string_view& src); + bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst); + NS_YYCC_STRING::u8string to_utf8(const std::string_view& src); + + private: +#if defined(YYCC_PYCODEC_WIN32_BACKEND) + NS_YYCC_ENCODING_BACKEND::CodePage code_page; +#else + NS_YYCC_ENCODING_BACKEND::CharToUtf8 inner; +#endif + }; + + // UTF8 -> Char + class Utf8ToChar { + public: + Utf8ToChar(const EncodingName& name); + ~Utf8ToChar(); + YYCC_DELETE_COPY(Utf8ToChar) + YYCC_DEFAULT_MOVE(Utf8ToChar) + + public: + ConvResult priv_to_char(const NS_YYCC_STRING::u8string_view& src); + bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst); + std::string to_char(const NS_YYCC_STRING::u8string_view& src); + + private: +#if defined(YYCC_PYCODEC_WIN32_BACKEND) + NS_YYCC_ENCODING_BACKEND::CodePage code_page; +#else + NS_YYCC_ENCODING_BACKEND::Utf8ToChar inner; +#endif + }; + + // WChar -> UTF8 + class WcharToUtf8 { + public: + WcharToUtf8(); + ~WcharToUtf8(); + YYCC_DELETE_COPY(WcharToUtf8) + YYCC_DEFAULT_MOVE(WcharToUtf8) + + public: + ConvResult priv_to_utf8(const std::wstring_view& src); + bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst); + NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src); + + private: +#if defined(YYCC_PYCODEC_ICONV_BACKEND) + NS_YYCC_ENCODING_BACKEND::WcharToUtf8 inner; +#endif + }; + + // UTF8 -> WChar + class Utf8ToWchar { + public: + Utf8ToWchar(); + ~Utf8ToWchar(); + YYCC_DELETE_COPY(Utf8ToWchar) + YYCC_DEFAULT_MOVE(Utf8ToWchar) + + public: + ConvResult priv_to_wchar(const NS_YYCC_STRING::u8string_view& src); + bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst); + std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src); + + private: +#if defined(YYCC_PYCODEC_ICONV_BACKEND) + NS_YYCC_ENCODING_BACKEND::Utf8ToWchar inner; +#endif + }; + + // UTF8 -> UTF16 + class Utf8ToUtf16 { + public: + Utf8ToUtf16(); + ~Utf8ToUtf16(); + YYCC_DELETE_COPY(Utf8ToUtf16) + YYCC_DEFAULT_MOVE(Utf8ToUtf16) + + public: + ConvResult priv_to_utf16(const NS_YYCC_STRING::u8string_view& src); + bool to_utf16(const NS_YYCC_STRING::u8string_view& src, std::u16string& dst); + std::u16string to_utf16(const NS_YYCC_STRING::u8string_view& src); + + private: +#if defined(YYCC_PYCODEC_ICONV_BACKEND) + NS_YYCC_ENCODING_BACKEND::Utf8ToUtf16 inner; +#endif + }; + + // UTF16 -> UTF8 + class Utf16ToUtf8 { + public: + Utf16ToUtf8(); + ~Utf16ToUtf8(); + YYCC_DELETE_COPY(Utf16ToUtf8) + YYCC_DEFAULT_MOVE(Utf16ToUtf8) + + public: + ConvResult priv_to_utf8(const std::u16string_view& src); + bool to_utf8(const std::u16string_view& src, NS_YYCC_STRING::u8string& dst); + NS_YYCC_STRING::u8string to_utf8(const std::u16string_view& src); + + private: +#if defined(YYCC_PYCODEC_ICONV_BACKEND) + NS_YYCC_ENCODING_BACKEND::Utf16ToUtf8 inner; +#endif + }; + + // UTF8 -> UTF32 + class Utf8ToUtf32 { + public: + Utf8ToUtf32(); + ~Utf8ToUtf32(); + YYCC_DELETE_COPY(Utf8ToUtf32) + YYCC_DEFAULT_MOVE(Utf8ToUtf32) + + public: + ConvResult priv_to_utf32(const NS_YYCC_STRING::u8string_view& src); + bool to_utf32(const NS_YYCC_STRING::u8string_view& src, std::u32string& dst); + std::u32string to_utf32(const NS_YYCC_STRING::u8string_view& src); + + private: +#if defined(YYCC_PYCODEC_ICONV_BACKEND) + NS_YYCC_ENCODING_BACKEND::Utf8ToUtf32 inner; +#endif + }; + + // UTF32 -> UTF8 + class Utf32ToUtf8 { + public: + Utf32ToUtf8(); + ~Utf32ToUtf8(); + YYCC_DELETE_COPY(Utf32ToUtf8) + YYCC_DEFAULT_MOVE(Utf32ToUtf8) + + public: + ConvResult priv_to_utf8(const std::u32string_view& src); + bool to_utf8(const std::u32string_view& src, NS_YYCC_STRING::u8string& dst); + NS_YYCC_STRING::u8string to_utf8(const std::u32string_view& src); + + private: +#if defined(YYCC_PYCODEC_ICONV_BACKEND) + NS_YYCC_ENCODING_BACKEND::Utf32ToUtf8 inner; +#endif + }; + +} + +#undef NS_YYCC_PATCH_EXPECTED +#undef NS_YYCC_STRING diff --git a/src/yycc/encoding/utf.cpp b/src/yycc/encoding/stlcvt.cpp similarity index 99% rename from src/yycc/encoding/utf.cpp rename to src/yycc/encoding/stlcvt.cpp index fc8d80b..7f438b1 100644 --- a/src/yycc/encoding/utf.cpp +++ b/src/yycc/encoding/stlcvt.cpp @@ -1,10 +1,10 @@ -#include "utf.hpp" +#include "stlcvt.hpp" #include #define NS_YYCC_STRING ::yycc::string #define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected -namespace yycc::encoding::utf { +namespace yycc::encoding::stlcvt { #pragma region Generic Converter diff --git a/src/yycc/encoding/utf.hpp b/src/yycc/encoding/stlcvt.hpp similarity index 97% rename from src/yycc/encoding/utf.hpp rename to src/yycc/encoding/stlcvt.hpp index 8cac4cc..4f16a1b 100644 --- a/src/yycc/encoding/utf.hpp +++ b/src/yycc/encoding/stlcvt.hpp @@ -5,7 +5,7 @@ #define NS_YYCC_STRING ::yycc::string #define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected -namespace yycc::encoding::utf { +namespace yycc::encoding::stlcvt { /// @private struct ConvError {}; diff --git a/src/yycc/encoding/windows.cpp b/src/yycc/encoding/windows.cpp index 346b2ea..43ef690 100644 --- a/src/yycc/encoding/windows.cpp +++ b/src/yycc/encoding/windows.cpp @@ -228,6 +228,9 @@ namespace yycc::encoding::windows { // The convertion between UTF is implemented by c16rtomb, c32rtomb, mbrtoc16 and mbrtoc32. // These function is locale related in C++ standard, but in Microsoft STL, it's only for UTF8. // So we can use them safely in Win32 environment. + // Reference: + // * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/c16rtomb-c32rtomb1?view=msvc-170 + // * https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/mbrtoc16-mbrtoc323?view=msvc-170 // 1 UTF32 unit can produe 4 UTF8 units or 2 UTF16 units in theory. // So we pre-allocate memory for the result to prevent allocating memory multiple times. @@ -280,8 +283,9 @@ namespace yycc::encoding::windows { std::mbstate_t state{}; char mbout[MB_LEN_MAX]{}; for (char16_t c : src) { - std::size_t rc = std::c16rtomb(mbout, c, &state); - if (rc != (std::size_t) -1) dst.append(reinterpret_cast(mbout), rc); + size_t rc = std::c16rtomb(mbout, c, &state); + + if (rc != (size_t) -1) dst.append(reinterpret_cast(mbout), rc); else return ConvError::InvalidUtf16; } return dst; @@ -340,8 +344,9 @@ namespace yycc::encoding::windows { std::mbstate_t state{}; char mbout[MB_LEN_MAX]{}; for (char32_t c : src) { - std::size_t rc = std::c32rtomb(mbout, c, &state); - if (rc != (std::size_t) -1) dst.append(reinterpret_cast(mbout), rc); + size_t rc = std::c32rtomb(mbout, c, &state); + + if (rc != (size_t) -1) dst.append(reinterpret_cast(mbout), rc); else return ConvError::InvalidUtf32; } return dst;