fix issue that vt do recognize output nlp file
This commit is contained in:
parent
b740e95a62
commit
799ec37b65
@ -38,6 +38,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# pick data from pre-loaded dict
|
# pick data from pre-loaded dict
|
||||||
diffIdxData = preLoadedData[ver]
|
diffIdxData = preLoadedData[ver]
|
||||||
|
plainKeys = diffIdxData.plainKeys
|
||||||
|
|
||||||
# load lang file
|
# load lang file
|
||||||
# and only keeps its value.
|
# and only keeps its value.
|
||||||
@ -59,6 +60,8 @@ if __name__ == "__main__":
|
|||||||
# convert plain json to nlp json
|
# convert plain json to nlp json
|
||||||
nlpJson = NlpUtils.PlainJson2NlpJson(plainKeys, plainValues)
|
nlpJson = NlpUtils.PlainJson2NlpJson(plainKeys, plainValues)
|
||||||
|
|
||||||
|
NlpUtils.DumpJson(GetRawNlpPath(ver, lang, '')[:-5] + '.json', nlpJson)
|
||||||
|
|
||||||
# write into file with different encoding
|
# write into file with different encoding
|
||||||
lang_macro, encs = g_SupportedEncoding[lang]
|
lang_macro, encs = g_SupportedEncoding[lang]
|
||||||
for enc in encs:
|
for enc in encs:
|
||||||
|
@ -4,6 +4,8 @@ import io
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
g_EnableDebugging = False
|
||||||
|
|
||||||
g_VirtoolsVersion: tuple[str] = (
|
g_VirtoolsVersion: tuple[str] = (
|
||||||
'25', '35', '40', '50',
|
'25', '35', '40', '50',
|
||||||
)
|
)
|
||||||
@ -11,9 +13,11 @@ g_SupportedLangs: tuple[str] = (
|
|||||||
'zh-cn',
|
'zh-cn',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ========== Basic File RW Functions ==========
|
||||||
|
|
||||||
def DumpJson(filepath: str, jsonData: dict):
|
def DumpJson(filepath: str, jsonData: dict):
|
||||||
with open(filepath, 'w', encoding='utf-8') as f:
|
with open(filepath, 'w', encoding='utf-8') as f:
|
||||||
json.dump(jsonData, f, indent=4, sort_keys=False)
|
json.dump(jsonData, f, indent=(2 if g_EnableDebugging else None), sort_keys=False)
|
||||||
|
|
||||||
def LoadJson(filepath: str) -> dict:
|
def LoadJson(filepath: str) -> dict:
|
||||||
with open(filepath, 'r', encoding='utf-8') as f:
|
with open(filepath, 'r', encoding='utf-8') as f:
|
||||||
@ -122,24 +126,34 @@ def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList:
|
|||||||
InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
|
InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
|
||||||
stack.pop()
|
stack.pop()
|
||||||
|
|
||||||
|
# ========== Json Converter ==========
|
||||||
|
|
||||||
def PlainJson2NlpJson(keyList: list[str], valueList: list[str]) -> dict:
|
def PlainJson2NlpJson(keyList: list[str], valueList: list[str]) -> dict:
|
||||||
# create the base section
|
# create the base section
|
||||||
# each section will have 3 k-v pair. language/section and entities are existed in original nlp json
|
# each section will have 3 k-v pair. language/section and entries are existed in original nlp json
|
||||||
# and key_map is served for path finding and convenient for looking for sub section.
|
# and key_map is served for path finding and convenient for looking for sub section.
|
||||||
result: dict = {
|
result: dict = {
|
||||||
"language": "English",
|
"language": "English",
|
||||||
"entities": [],
|
"entries": [],
|
||||||
"key_map": {}
|
"key_map": {}
|
||||||
}
|
}
|
||||||
# inerate list and construct dict
|
# inerate list and construct dict
|
||||||
for k, v in zip(keyList, valueList):
|
for k, v in zip(keyList, valueList):
|
||||||
InternalPlainJson2NlpJson(result, k, v)
|
InternalPlainJson2NlpJson(result, k, v)
|
||||||
|
# remove useless key map
|
||||||
|
InternalDelNlpJsonKeyMap(result)
|
||||||
return result
|
return result
|
||||||
|
def InternalDelNlpJsonKeyMap(nlpJson: dict):
|
||||||
|
# recursively calling self
|
||||||
|
for v in nlpJson['key_map'].values():
|
||||||
|
InternalDelNlpJsonKeyMap(v)
|
||||||
|
# then delete self
|
||||||
|
del nlpJson['key_map']
|
||||||
def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
|
def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
|
||||||
keypath = pairKey.split('/')
|
keypath = pairKey.split('/')
|
||||||
# confirm last node is number and remove it
|
# confirm last node is number and remove it
|
||||||
assert keypath[-1].isdecimal()
|
assert keypath[-1].isdecimal()
|
||||||
keypath = keypath[0:-1]
|
keypath = keypath[:-1]
|
||||||
|
|
||||||
# move to correct sub section
|
# move to correct sub section
|
||||||
for pathpart in keypath:
|
for pathpart in keypath:
|
||||||
@ -150,21 +164,21 @@ def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
|
|||||||
# create a new one
|
# create a new one
|
||||||
sub_section = {
|
sub_section = {
|
||||||
'section': pathpart,
|
'section': pathpart,
|
||||||
'entities': [],
|
'entries': [],
|
||||||
'key_map': {}
|
'key_map': {}
|
||||||
}
|
}
|
||||||
|
|
||||||
# add into current section
|
# add into current section
|
||||||
nlpJson['entities'].append(sub_section)
|
nlpJson['entries'].append(sub_section)
|
||||||
nlpJson['key_map'][pathpart] = sub_section
|
nlpJson['key_map'][pathpart] = sub_section
|
||||||
|
|
||||||
# move to the new created sub section
|
# move to the new created sub section
|
||||||
nlpJson = sub_section
|
nlpJson = sub_section
|
||||||
|
|
||||||
# insert data
|
# insert data
|
||||||
nlpJson['entities'].append(pairVal)
|
nlpJson['entries'].append(pairVal)
|
||||||
|
|
||||||
|
|
||||||
|
# ========== Raw Nlp Text Writer ==========
|
||||||
|
|
||||||
def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
|
def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
|
||||||
# write in wb mode because we need explicitly write \r\n, not \n
|
# write in wb mode because we need explicitly write \r\n, not \n
|
||||||
@ -172,16 +186,16 @@ def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
|
|||||||
f.write(f'Language:{lang_macro}\r\n'.encode(encoding, errors='ignore'))
|
f.write(f'Language:{lang_macro}\r\n'.encode(encoding, errors='ignore'))
|
||||||
InternalDumpNlpJson(f, encoding, 0, nlpJson)
|
InternalDumpNlpJson(f, encoding, 0, nlpJson)
|
||||||
|
|
||||||
g_NlpJsonStrRepl1 = re.compile('\\\\')
|
# g_NlpJsonStrRepl1 = re.compile('\\\\')
|
||||||
g_NlpJsonStrRepl2 = re.compile('\"')
|
g_NlpJsonStrRepl2 = re.compile('\"')
|
||||||
def NlpJsonStringProcessor(strl: str) -> str:
|
def NlpJsonStringProcessor(strl: str) -> str:
|
||||||
return g_NlpJsonStrRepl2.sub('\"\"', strl)
|
return g_NlpJsonStrRepl2.sub('\"\"', strl)
|
||||||
|
|
||||||
def InternalDumpNlpJson(f: io.BufferedWriter, encoding: str, depth: int, nlpJson: dict):
|
def InternalDumpNlpJson(f: io.BufferedWriter, encoding: str, depth: int, nlpJson: dict):
|
||||||
assert 'entities' in nlpJson
|
assert 'entries' in nlpJson
|
||||||
|
|
||||||
is_first: bool = True
|
is_first: bool = True
|
||||||
for entity in nlpJson['entities']:
|
for entity in nlpJson['entries']:
|
||||||
if isinstance(entity, str):
|
if isinstance(entity, str):
|
||||||
# write comma if not the first element
|
# write comma if not the first element
|
||||||
if not is_first: f.write(','.encode(encoding))
|
if not is_first: f.write(','.encode(encoding))
|
||||||
|
@ -3,3 +3,4 @@
|
|||||||
Example:
|
Example:
|
||||||
|
|
||||||
Create templates: `py NlpJsonDecoder.py`
|
Create templates: `py NlpJsonDecoder.py`
|
||||||
|
Compile translations: `py NlpJsonEncoder.py`
|
||||||
|
Loading…
Reference in New Issue
Block a user