refactor: continue refactor to make the project can be built

This commit is contained in:
2025-07-31 22:25:14 +08:00
parent 5372af79f8
commit ce2b411b0b
30 changed files with 791 additions and 943 deletions

7
script/pycodec/README.md Normal file
View File

@ -0,0 +1,7 @@
# PyCodec
This directory contain all stuff related to PyCodec.
PyCodec use different encoding system on different OS. In Windows it use Win32 functions, and it will use Iconv in other OS. So we need a table converting PyCodec universal encoding name to Windows Code Page or Iconv Code Name. These relation was stored in CSV file and Python script will render it into C++ source code.
For the format of CSV file, each line is a record. The first item in record is the standard PyCodec name. The second item is corresponding Windows Code Page. If there is no corresponding Code Page, it can be empty. The third item is corresponding Iconv Code Name. It also can be empty with same case. Then, the count of remain columns is variables after forth item (inclusive). All of them is the alias of this standard PyCodec name.

View File

@ -1,6 +1,7 @@
import typing
import csv
from pathlib import Path
import os
import jinja2
class LanguageToken:
name: str
@ -8,56 +9,46 @@ class LanguageToken:
code_page: str | None
iconv_code: str | None
def __init__(self, name: str, alias: typing.Iterator[str], code_page: str, iconv_code: str):
self.name = name.lower()
self.alias = tuple(map(lambda x: x.lower(), alias))
self.code_page = None if code_page == '' else code_page
self.iconv_code = None if iconv_code == '' else iconv_code
def __init__(self, row: list[str]):
"""Init language token from CSV row."""
self.name = row[0].lower()
code_page = row[1]
self.code_page = None if len(code_page) == 0 else code_page
iconv_code = row[2]
self.iconv_code = None if len(iconv_code) == 0 else iconv_code
# For alias, we strip and to lower them first, and remove all empty entries
alias = row[3:]
self.alias = tuple(
filter(lambda x: len(x) != 0,
map(lambda x: x.strip().lower(), alias)))
def extract_data(fs: typing.TextIO) -> list[str]:
# remove first line to remove table header
return fs.readlines()[1:]
def extract_token(csv_data: list[str]) -> tuple[LanguageToken, ...]:
ret: list[LanguageToken] = []
for line in csv_data:
line = line.strip('\n')
line_sp = line.split('\t')
alias_sp = filter(lambda x: len(x) != 0, map(lambda x: x.strip(), line_sp[1].split(',')))
ret.append(LanguageToken(line_sp[0], alias_sp, line_sp[2], line_sp[3]))
return tuple(ret)
def _get_self_dir() -> Path:
return Path(__file__).resolve().parent
def write_alias_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, NS_YYCC_STRING::u8string> ALISA_MAP {\n')
for i in data:
for j in i.alias:
fs.write(f'\t{{ YYCC_U8("{j}"), YYCC_U8("{i.name}") }},\n')
fs.write('};\n')
def write_win_cp_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, CodePage> WINCP_MAP {\n')
for i in data:
if i.code_page is not None:
fs.write(f'\t{{ YYCC_U8("{i.name}"), static_cast<CodePage>({i.code_page}u) }},\n')
fs.write('};\n')
def _extract_tokens() -> list[LanguageToken]:
rv: list[LanguageToken] = []
csv_file = _get_self_dir() / 'encoding_table.csv'
with open(csv_file, 'r', encoding='utf-8', newline='') as f:
reader = csv.reader(f, delimiter='\t')
for row in reader:
rv.append(LanguageToken(row))
return rv
def _render_cpp(tokens: list[LanguageToken]) -> None:
loader = jinja2.FileSystemLoader(_get_self_dir())
environment = jinja2.Environment(loader=loader)
template = environment.get_template('encoding_table.cpp.jinja')
cpp_file = _get_self_dir() / 'encoding_table.cpp'
with open(cpp_file, 'w', encoding='utf-8') as f:
f.write(template.render(tokens=tokens))
def write_iconv_map(fs: typing.TextIO, data: tuple[LanguageToken, ...]) -> None:
fs.write('static const std::map<NS_YYCC_STRING::u8string, std::string> ICONV_MAP {\n')
for i in data:
if i.iconv_code is not None:
fs.write(f'\t{{ YYCC_U8("{i.name}"), "{i.iconv_code}" }},\n')
fs.write('};\n')
if __name__ == '__main__':
# get file path
self_path = Path(__file__).resolve().parent
csv_file = self_path / 'encoding_table.csv'
cpp_file = self_path / 'encoding_table.cpp'
# process files
with open(csv_file, 'r', encoding='utf-8') as fr:
with open(cpp_file, 'w', encoding='utf-8') as fw:
data = extract_data(fr)
token = extract_token(data)
write_alias_map(fw, token)
write_win_cp_map(fw, token)
write_iconv_map(fw, token)
tokens = _extract_tokens()
_render_cpp(tokens)

View File

@ -0,0 +1,23 @@
static const std::map<std::u8string_view, std::u8string_view> ALIAS_MAP {
{% for token in tokens -%}
{% for alias in token.alias -%}
{ u8"{{ alias }}"sv, u8"{{ token.name }}"sv },
{% endfor -%}
{% endfor -%}
};
static const std::map<std::u8string_view, CodePage> WINCP_MAP {
{% for token in tokens -%}
{% if token.code_page is not none -%}
{ u8"{{ token.name }}"sv, static_cast<CodePage>({{ token.code_page }}u) },
{% endif -%}
{% endfor -%}
};
static const std::map<std::u8string_view, std::string_view> ICONV_MAP {
{% for token in tokens -%}
{% if token.iconv_code is not none -%}
{ u8"{{ token.name }}"sv, "{{ token.iconv_code }}"sv },
{% endif -%}
{% endfor -%}
};

View File

@ -1,98 +1,97 @@
Encoding Alias Code Page Iconv Identifier
ascii 646, us-ascii 437 ASCII
big5 big5-tw, csbig5 950 BIG5
big5hkscs big5-hkscs, hkscs BIG5-HKSCS
cp037 IBM037, IBM039 037
cp273 273, IBM273, csIBM273
cp424 EBCDIC-CP-HE, IBM424
cp437 437, IBM437 437
cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 500
cp720 720
cp737 737
cp775 IBM775 775
cp850 850, IBM850 850 CP850
cp852 852, IBM852 852
cp855 855, IBM855 855
cp856
cp857 857, IBM857 857
cp858 858, IBM858 858
cp860 860, IBM860 860
cp861 861, CP-IS, IBM861 861
cp862 862, IBM862 862 CP862
cp863 863, IBM863 863
cp864 IBM864 864
cp865 865, IBM865 865
cp866 866, IBM866 866 CP866
cp869 869, CP-GR, IBM869 869
cp874 874 CP874
cp875 875
cp932 932, ms932, mskanji, ms-kanji, windows-31j 932 CP932
cp949 949, ms949, uhc 949 CP949
cp950 950, ms950 950 CP950
cp1006
cp1026 ibm1026 1026
cp1125 1125, ibm1125, cp866u, ruscii
cp1140 ibm1140 1140
cp1250 windows-1250 1250 CP1250
cp1251 windows-1251 1251 CP1251
cp1252 windows-1252 1252 CP1252
cp1253 windows-1253 1253 CP1253
cp1254 windows-1254 1254 CP1254
cp1255 windows-1255 1255 CP1255
cp1256 windows-1256 1256 CP1256
cp1257 windows-1257 1257 CP1257
cp1258 windows-1258 1258 CP1258
euc_jp eucjp, ujis, u-jis 20932 EUC-JP
euc_jis_2004 jisx0213, eucjis2004
euc_jisx0213 eucjisx0213
euc_kr euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 51949 EUC-KR
gb2312 chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 CP936
gbk 936, cp936, ms936 936 GBK
gb18030 gb18030-2000 54936 GB18030
hz hzgb, hz-gb, hz-gb-2312 52936 HZ
iso2022_jp csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP
iso2022_jp_1 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-1
iso2022_jp_2 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2
iso2022_jp_2004 iso2022jp-2004, iso-2022-jp-2004
iso2022_jp_3 iso2022jp-3, iso-2022-jp-3
iso2022_jp_ext iso2022jp-ext, iso-2022-jp-ext
iso2022_kr csiso2022kr, iso2022kr, iso-2022-kr 50225 ISO-2022-KR
latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28591 ISO-8859-1
iso8859_2 iso-8859-2, latin2, L2 28592 ISO-8859-2
iso8859_3 iso-8859-3, latin3, L3 28593 ISO-8859-3
iso8859_4 iso-8859-4, latin4, L4 28594 ISO-8859-4
iso8859_5 iso-8859-5, cyrillic 28595 ISO-8859-5
iso8859_6 iso-8859-6, arabic 28596 ISO-8859-6
iso8859_7 iso-8859-7, greek, greek8 28597 ISO-8859-7
iso8859_8 iso-8859-8, hebrew 28598 ISO-8859-8
iso8859_9 iso-8859-9, latin5, L5 28599 ISO-8859-9
iso8859_10 iso-8859-10, latin6, L6 ISO-8859-10
iso8859_11 iso-8859-11, thai ISO-8859-11
iso8859_13 iso-8859-13, latin7, L7 28603 ISO-8859-13
iso8859_14 iso-8859-14, latin8, L8 ISO-8859-14
iso8859_15 iso-8859-15, latin9, L9 28605 ISO-8859-15
iso8859_16 iso-8859-16, latin10, L10 ISO-8859-16
johab cp1361, ms1361 1361 JOHAB
koi8_r
koi8_t KOI8-T
koi8_u
kz1048 kz_1048, strk1048_2002, rk1048
mac_cyrillic maccyrillic 10007 MacCyrillic
mac_greek macgreek 10006 MacGreek
mac_iceland maciceland 10079 MacIceland
mac_latin2 maclatin2, maccentraleurope, mac_centeuro
mac_roman macroman, macintosh MacRoman
mac_turkish macturkish 10081 MacTurkish
ptcp154 csptcp154, pt154, cp154, cyrillic-asian PT154
shift_jis csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS
shift_jis_2004 shiftjis2004, sjis_2004, sjis2004
shift_jisx0213 shiftjisx0213, sjisx0213, s_jisx0213
utf_32 U32, utf32 UTF-32
utf_32_be UTF-32BE UTF-32BE
utf_32_le UTF-32LE UTF-32LE
utf_16 U16, utf16 UTF16
utf_16_be UTF-16BE UTF-16BE
utf_16_le UTF-16LE UTF-16LE
utf_7 U7, unicode-1-1-utf-7 65000 UTF-7
utf_8 U8, UTF, utf8, utf-8, cp65001 65001 UTF-8
utf_8_sig
ascii 437 ASCII 646 us-ascii
big5 950 BIG5 big5-tw csbig5
big5hkscs BIG5-HKSCS big5-hkscs hkscs
cp037 037 IBM037 IBM039
cp273 273 IBM273 csIBM273
cp424 EBCDIC-CP-HE IBM424
cp437 437 437 IBM437
cp500 500 EBCDIC-CP-BE EBCDIC-CP-CH IBM500
cp720 720
cp737 737
cp775 775 IBM775
cp850 850 CP850 850 IBM850
cp852 852 852 IBM852
cp855 855 855 IBM855
cp856
cp857 857 857 IBM857
cp858 858 858 IBM858
cp860 860 860 IBM860
cp861 861 861 CP-IS IBM861
cp862 862 CP862 862 IBM862
cp863 863 863 IBM863
cp864 864 IBM864
cp865 865 865 IBM865
cp866 866 CP866 866 IBM866
cp869 869 869 CP-GR IBM869
cp874 874 CP874
cp875 875
cp932 932 CP932 932 ms932 mskanji ms-kanji windows-31j
cp949 949 CP949 949 ms949 uhc
cp950 950 CP950 950 ms950
cp1006
cp1026 1026 ibm1026
cp1125 1125 ibm1125 cp866u ruscii
cp1140 1140 ibm1140
cp1250 1250 CP1250 windows-1250
cp1251 1251 CP1251 windows-1251
cp1252 1252 CP1252 windows-1252
cp1253 1253 CP1253 windows-1253
cp1254 1254 CP1254 windows-1254
cp1255 1255 CP1255 windows-1255
cp1256 1256 CP1256 windows-1256
cp1257 1257 CP1257 windows-1257
cp1258 1258 CP1258 windows-1258
euc_jp 20932 EUC-JP eucjp ujis u-jis
euc_jis_2004 jisx0213 eucjis2004
euc_jisx0213 eucjisx0213
euc_kr 51949 EUC-KR euckr korean ksc5601 ks_c-5601 ks_c-5601-1987 ksx1001 ks_x-1001
gb2312 936 CP936 chinese csiso58gb231280 euc-cn euccn eucgb2312-cn gb2312-1980 gb2312-80 iso-ir-58
gbk 936 GBK 936 cp936 ms936
gb18030 54936 GB18030 gb18030-2000
hz 52936 HZ hzgb hz-gb hz-gb-2312
iso2022_jp 50220 ISO-2022-JP csiso2022jp iso2022jp iso-2022-jp
iso2022_jp_1 ISO-2022-JP-1 iso2022jp-1 iso-2022-jp-1
iso2022_jp_2 ISO-2022-JP-2 iso2022jp-2 iso-2022-jp-2
iso2022_jp_2004 iso2022jp-2004 iso-2022-jp-2004
iso2022_jp_3 iso2022jp-3 iso-2022-jp-3
iso2022_jp_ext iso2022jp-ext iso-2022-jp-ext
iso2022_kr 50225 ISO-2022-KR csiso2022kr iso2022kr iso-2022-kr
latin_1 28591 ISO-8859-1 iso-8859-1 iso8859-1 8859 cp819 latin latin1 L1
iso8859_2 28592 ISO-8859-2 iso-8859-2 latin2 L2
iso8859_3 28593 ISO-8859-3 iso-8859-3 latin3 L3
iso8859_4 28594 ISO-8859-4 iso-8859-4 latin4 L4
iso8859_5 28595 ISO-8859-5 iso-8859-5 cyrillic
iso8859_6 28596 ISO-8859-6 iso-8859-6 arabic
iso8859_7 28597 ISO-8859-7 iso-8859-7 greek greek8
iso8859_8 28598 ISO-8859-8 iso-8859-8 hebrew
iso8859_9 28599 ISO-8859-9 iso-8859-9 latin5 L5
iso8859_10 ISO-8859-10 iso-8859-10 latin6 L6
iso8859_11 ISO-8859-11 iso-8859-11 thai
iso8859_13 28603 ISO-8859-13 iso-8859-13 latin7 L7
iso8859_14 ISO-8859-14 iso-8859-14 latin8 L8
iso8859_15 28605 ISO-8859-15 iso-8859-15 latin9 L9
iso8859_16 ISO-8859-16 iso-8859-16 latin10 L10
johab 1361 JOHAB cp1361 ms1361
koi8_r
koi8_t KOI8-T
koi8_u
kz1048 kz_1048 strk1048_2002 rk1048
mac_cyrillic 10007 MacCyrillic maccyrillic
mac_greek 10006 MacGreek macgreek
mac_iceland 10079 MacIceland maciceland
mac_latin2 maclatin2 maccentraleurope mac_centeuro
mac_roman MacRoman macroman macintosh
mac_turkish 10081 MacTurkish macturkish
ptcp154 PT154 csptcp154 pt154 cp154 cyrillic-asian
shift_jis 932 SHIFT_JIS csshiftjis shiftjis sjis s_jis
shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213
utf_32 UTF-32 U32 utf32
utf_32_be UTF-32BE UTF-32BE
utf_32_le UTF-32LE UTF-32LE
utf_16 UTF16 U16 utf16
utf_16_be UTF-16BE UTF-16BE
utf_16_le UTF-16LE UTF-16LE
utf_7 65000 UTF-7 U7 unicode-1-1-utf-7
utf_8 65001 UTF-8 U8 UTF utf8 utf-8 cp65001
utf_8_sig

1 Encoding ascii Alias 437 Code Page ASCII Iconv Identifier 646 us-ascii
2 ascii big5 646, us-ascii 950 437 BIG5 ASCII big5-tw csbig5
3 big5 big5hkscs big5-tw, csbig5 950 BIG5-HKSCS BIG5 big5-hkscs hkscs
4 big5hkscs cp037 big5-hkscs, hkscs 037 BIG5-HKSCS IBM037 IBM039
5 cp037 cp273 IBM037, IBM039 037 273 IBM273 csIBM273
6 cp273 cp424 273, IBM273, csIBM273 EBCDIC-CP-HE IBM424
7 cp424 cp437 EBCDIC-CP-HE, IBM424 437 437 IBM437
8 cp437 cp500 437, IBM437 500 437 EBCDIC-CP-BE EBCDIC-CP-CH IBM500
9 cp500 cp720 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 720 500
10 cp720 cp737 737 720
11 cp737 cp775 775 737 IBM775
12 cp775 cp850 IBM775 850 775 CP850 850 IBM850
13 cp850 cp852 850, IBM850 852 850 CP850 852 IBM852
14 cp852 cp855 852, IBM852 855 852 855 IBM855
15 cp855 cp856 855, IBM855 855
16 cp856 cp857 857 857 IBM857
17 cp857 cp858 857, IBM857 858 857 858 IBM858
18 cp858 cp860 858, IBM858 860 858 860 IBM860
19 cp860 cp861 860, IBM860 861 860 861 CP-IS IBM861
20 cp861 cp862 861, CP-IS, IBM861 862 861 CP862 862 IBM862
21 cp862 cp863 862, IBM862 863 862 CP862 863 IBM863
22 cp863 cp864 863, IBM863 864 863 IBM864
23 cp864 cp865 IBM864 865 864 865 IBM865
24 cp865 cp866 865, IBM865 866 865 CP866 866 IBM866
25 cp866 cp869 866, IBM866 869 866 CP866 869 CP-GR IBM869
26 cp869 cp874 869, CP-GR, IBM869 874 869 CP874
27 cp874 cp875 875 874 CP874
28 cp875 cp932 932 875 CP932 932 ms932 mskanji ms-kanji windows-31j
29 cp932 cp949 932, ms932, mskanji, ms-kanji, windows-31j 949 932 CP949 CP932 949 ms949 uhc
30 cp949 cp950 949, ms949, uhc 950 949 CP950 CP949 950 ms950
31 cp950 cp1006 950, ms950 950 CP950
32 cp1006 cp1026 1026 ibm1026
33 cp1026 cp1125 ibm1026 1026 1125 ibm1125 cp866u ruscii
34 cp1125 cp1140 1125, ibm1125, cp866u, ruscii 1140 ibm1140
35 cp1140 cp1250 ibm1140 1250 1140 CP1250 windows-1250
36 cp1250 cp1251 windows-1250 1251 1250 CP1251 CP1250 windows-1251
37 cp1251 cp1252 windows-1251 1252 1251 CP1252 CP1251 windows-1252
38 cp1252 cp1253 windows-1252 1253 1252 CP1253 CP1252 windows-1253
39 cp1253 cp1254 windows-1253 1254 1253 CP1254 CP1253 windows-1254
40 cp1254 cp1255 windows-1254 1255 1254 CP1255 CP1254 windows-1255
41 cp1255 cp1256 windows-1255 1256 1255 CP1256 CP1255 windows-1256
42 cp1256 cp1257 windows-1256 1257 1256 CP1257 CP1256 windows-1257
43 cp1257 cp1258 windows-1257 1258 1257 CP1258 CP1257 windows-1258
44 cp1258 euc_jp windows-1258 20932 1258 EUC-JP CP1258 eucjp ujis u-jis
45 euc_jp euc_jis_2004 eucjp, ujis, u-jis 20932 EUC-JP jisx0213 eucjis2004
46 euc_jis_2004 euc_jisx0213 jisx0213, eucjis2004 eucjisx0213
47 euc_jisx0213 euc_kr eucjisx0213 51949 EUC-KR euckr korean ksc5601 ks_c-5601 ks_c-5601-1987 ksx1001 ks_x-1001
48 euc_kr gb2312 euckr, korean, ksc5601, ks_c-5601, ks_c-5601-1987, ksx1001, ks_x-1001 936 51949 CP936 EUC-KR chinese csiso58gb231280 euc-cn euccn eucgb2312-cn gb2312-1980 gb2312-80 iso-ir-58
49 gb2312 gbk chinese, csiso58gb231280, euc-cn, euccn, eucgb2312-cn, gb2312-1980, gb2312-80, iso-ir-58 936 936 GBK CP936 936 cp936 ms936
50 gbk gb18030 936, cp936, ms936 54936 936 GB18030 GBK gb18030-2000
51 gb18030 hz gb18030-2000 52936 54936 HZ GB18030 hzgb hz-gb hz-gb-2312
52 hz iso2022_jp hzgb, hz-gb, hz-gb-2312 50220 52936 ISO-2022-JP HZ csiso2022jp iso2022jp iso-2022-jp
53 iso2022_jp iso2022_jp_1 csiso2022jp, iso2022jp, iso-2022-jp 50220 ISO-2022-JP-1 ISO-2022-JP iso2022jp-1 iso-2022-jp-1
54 iso2022_jp_1 iso2022_jp_2 iso2022jp-1, iso-2022-jp-1 ISO-2022-JP-2 ISO-2022-JP-1 iso2022jp-2 iso-2022-jp-2
55 iso2022_jp_2 iso2022_jp_2004 iso2022jp-2, iso-2022-jp-2 ISO-2022-JP-2 iso2022jp-2004 iso-2022-jp-2004
56 iso2022_jp_2004 iso2022_jp_3 iso2022jp-2004, iso-2022-jp-2004 iso2022jp-3 iso-2022-jp-3
57 iso2022_jp_3 iso2022_jp_ext iso2022jp-3, iso-2022-jp-3 iso2022jp-ext iso-2022-jp-ext
58 iso2022_jp_ext iso2022_kr iso2022jp-ext, iso-2022-jp-ext 50225 ISO-2022-KR csiso2022kr iso2022kr iso-2022-kr
59 iso2022_kr latin_1 csiso2022kr, iso2022kr, iso-2022-kr 28591 50225 ISO-8859-1 ISO-2022-KR iso-8859-1 iso8859-1 8859 cp819 latin latin1 L1
60 latin_1 iso8859_2 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 28592 28591 ISO-8859-2 ISO-8859-1 iso-8859-2 latin2 L2
61 iso8859_2 iso8859_3 iso-8859-2, latin2, L2 28593 28592 ISO-8859-3 ISO-8859-2 iso-8859-3 latin3 L3
62 iso8859_3 iso8859_4 iso-8859-3, latin3, L3 28594 28593 ISO-8859-4 ISO-8859-3 iso-8859-4 latin4 L4
63 iso8859_4 iso8859_5 iso-8859-4, latin4, L4 28595 28594 ISO-8859-5 ISO-8859-4 iso-8859-5 cyrillic
64 iso8859_5 iso8859_6 iso-8859-5, cyrillic 28596 28595 ISO-8859-6 ISO-8859-5 iso-8859-6 arabic
65 iso8859_6 iso8859_7 iso-8859-6, arabic 28597 28596 ISO-8859-7 ISO-8859-6 iso-8859-7 greek greek8
66 iso8859_7 iso8859_8 iso-8859-7, greek, greek8 28598 28597 ISO-8859-8 ISO-8859-7 iso-8859-8 hebrew
67 iso8859_8 iso8859_9 iso-8859-8, hebrew 28599 28598 ISO-8859-9 ISO-8859-8 iso-8859-9 latin5 L5
68 iso8859_9 iso8859_10 iso-8859-9, latin5, L5 28599 ISO-8859-10 ISO-8859-9 iso-8859-10 latin6 L6
69 iso8859_10 iso8859_11 iso-8859-10, latin6, L6 ISO-8859-11 ISO-8859-10 iso-8859-11 thai
70 iso8859_11 iso8859_13 iso-8859-11, thai 28603 ISO-8859-13 ISO-8859-11 iso-8859-13 latin7 L7
71 iso8859_13 iso8859_14 iso-8859-13, latin7, L7 28603 ISO-8859-14 ISO-8859-13 iso-8859-14 latin8 L8
72 iso8859_14 iso8859_15 iso-8859-14, latin8, L8 28605 ISO-8859-15 ISO-8859-14 iso-8859-15 latin9 L9
73 iso8859_15 iso8859_16 iso-8859-15, latin9, L9 28605 ISO-8859-16 ISO-8859-15 iso-8859-16 latin10 L10
74 iso8859_16 johab iso-8859-16, latin10, L10 1361 JOHAB ISO-8859-16 cp1361 ms1361
75 johab koi8_r cp1361, ms1361 1361 JOHAB
76 koi8_r koi8_t KOI8-T
77 koi8_t koi8_u KOI8-T
78 koi8_u kz1048 kz_1048 strk1048_2002 rk1048
79 kz1048 mac_cyrillic kz_1048, strk1048_2002, rk1048 10007 MacCyrillic maccyrillic
80 mac_cyrillic mac_greek maccyrillic 10006 10007 MacGreek MacCyrillic macgreek
81 mac_greek mac_iceland macgreek 10079 10006 MacIceland MacGreek maciceland
82 mac_iceland mac_latin2 maciceland 10079 MacIceland maclatin2 maccentraleurope mac_centeuro
83 mac_latin2 mac_roman maclatin2, maccentraleurope, mac_centeuro MacRoman macroman macintosh
84 mac_roman mac_turkish macroman, macintosh 10081 MacTurkish MacRoman macturkish
85 mac_turkish ptcp154 macturkish 10081 PT154 MacTurkish csptcp154 pt154 cp154 cyrillic-asian
86 ptcp154 shift_jis csptcp154, pt154, cp154, cyrillic-asian 932 SHIFT_JIS PT154 csshiftjis shiftjis sjis s_jis
87 shift_jis shift_jis_2004 csshiftjis, shiftjis, sjis, s_jis 932 SHIFT_JIS shiftjis2004 sjis_2004 sjis2004
88 shift_jis_2004 shift_jisx0213 shiftjis2004, sjis_2004, sjis2004 shiftjisx0213 sjisx0213 s_jisx0213
89 shift_jisx0213 utf_32 shiftjisx0213, sjisx0213, s_jisx0213 UTF-32 U32 utf32
90 utf_32 utf_32_be U32, utf32 UTF-32BE UTF-32 UTF-32BE
91 utf_32_be utf_32_le UTF-32BE UTF-32LE UTF-32BE UTF-32LE
92 utf_32_le utf_16 UTF-32LE UTF16 UTF-32LE U16 utf16
93 utf_16 utf_16_be U16, utf16 UTF-16BE UTF16 UTF-16BE
94 utf_16_be utf_16_le UTF-16BE UTF-16LE UTF-16BE UTF-16LE
95 utf_16_le utf_7 UTF-16LE 65000 UTF-7 UTF-16LE U7 unicode-1-1-utf-7
96 utf_7 utf_8 U7, unicode-1-1-utf-7 65001 65000 UTF-8 UTF-7 U8 UTF utf8 utf-8 cp65001
97 utf_8 utf_8_sig U8, UTF, utf8, utf-8, cp65001 65001 UTF-8
utf_8_sig