update python script

This commit is contained in:
yyc12345 2023-07-10 21:42:52 +08:00
parent 6eaa91fc9a
commit 4651110885
8 changed files with 78 additions and 47 deletions

View File

@ -2,11 +2,11 @@ import NlpUtils
import jsondiff
import sys
json1 = NlpUtils.LoadJsonFromFile(sys.argv[1])
json2 = NlpUtils.LoadJsonFromFile(sys.argv[2])
json1 = NlpUtils.LoadJson(sys.argv[1])
json2 = NlpUtils.LoadJson(sys.argv[2])
(_, value1, ) = NlpUtils.NlpJson2PlainJsonWrapper(json1)
(_, value2, ) = NlpUtils.NlpJson2PlainJsonWrapper(json2)
(_, value1, ) = NlpUtils.NlpJson2PlainJson(json1)
(_, value2, ) = NlpUtils.NlpJson2PlainJson(json2)
diff = jsondiff.diff(value1, value2)
print(diff)

View File

@ -1,34 +1,25 @@
import NlpUtils
import jsondiff
import sys, collections
import collections
CmdArgvPair = collections.namedtuple('CmdArgvPair', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex'))
def CmdArgvAnalyzer() -> tuple[CmdArgvPair]:
ls: list[CmdArgvPair] = []
VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex'))
def ConstructVtTrDataTuple() -> tuple[VtTrDataTuple]:
return tuple(VtTrDataTuple._make((
f'../NlpSrc/VT{i}.json',
f'../NlpTr/VT{i}.template.json',
f'../NlpTr/VT{i}.diff',
f'../NlpTr/VT{i}.index',
))for i in NlpUtils.g_VirtoolsVersion)
argc = len(sys.argv) - 1
if argc % 4 != 0:
print("invalid parameter.")
sys.exit(1)
count = argc // 4
return tuple(CmdArgvPair._make(sys.argv[1 + i * 4:5 + i * 4]) for i in range(count))
# script will order 1 file as reference
# 0. the nlp json file
# script will output 3 files for each version translation.
# 0. translation template json
# 0. the diff result comparing with the previous version
# 0. a list including the key of each value in template json
# so for a single version virtools, we need input 4 arguments
if __name__ == "__main__":
resolvedArgv = CmdArgvAnalyzer()
prevJson = None
for vtVer in resolvedArgv:
for vtVer in ConstructVtTrDataTuple():
print(f'Processing {vtVer.nlpJson}...')
# read nlp json and convert it into plain json
nlpJson = NlpUtils.LoadJson(vtVer.nlpJson)
(plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJsonWrapper(nlpJson)
(plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJson(nlpJson)
# write index file
NlpUtils.DumpTrIndex(vtVer.trIndex, plainKeys)
@ -48,7 +39,7 @@ if __name__ == "__main__":
# write diff
NlpUtils.DumpTrDiff(vtVer.trDiff, insertedKey, deletedKey)
# write template with special treat
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((i, j) for i, j in enumerate(insertedVal)))
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((plainKeys[insertedKey[i]], insertedVal[i]) for i in range(len(insertedKey))))
# assign prev json
prevJson = plainValues

33
NlpProc/NlpJsonEncoder.py Normal file
View File

@ -0,0 +1,33 @@
import NlpUtils
import jsondiff
import collections
g_SupportedEncoding = {
'zh-cn': ('utf-8', 'gb2312', )
}
VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('rawNlp', 'trTemplate', 'trDiff', 'trIndex'))
def GetRawNlpPath(ver: str, lang: str) -> str:
return f'../NlpTr/out/VT{ver}.{lang}.txt'
def GetTrPath(ver: str, lang: str) -> str:
return f'../NlpTr/VT{ver}.{lang}.json'
def GetTrDiffPath(ver: str) -> str:
return f'../NlpTr/VT{ver}.diff'
def GetTrIndexPath(ver: str) -> str:
return f'../NlpTr/VT{ver}.index'
if __name__ == "__main__":
for ver in NlpUtils.g_VirtoolsVersion:
# load diff and index data
for lang in NlpUtils.g_SupportedLangs:
# load lang file
# patch it
# convert plain json to nested json
# write into file with different encoding
for enc in g_SupportedEncoding[lang]:
print(f'Process {ver}.{lang}.{enc}...')

View File

@ -1,2 +0,0 @@
import NlpUtils
import sys, collections

View File

@ -1,4 +0,0 @@
import NlpUtils
import sys, collections

View File

@ -3,6 +3,13 @@ import collections
import io
import json
g_VirtoolsVersion: tuple[str] = (
'25', '35', '40', '50',
)
g_SupportedLangs: tuple[str] = (
'zh-cn',
)
def DumpJson(filepath: str, jsonData: dict):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(jsonData, f, indent=4, sort_keys=False)
@ -60,7 +67,7 @@ def LoadTrDiff(filepath: str) -> dict:
# return a tuple. (insertedKey, deletedKey, insertedVal)
def SeperatePlainJsonDiff(diffData: dict) -> tuple:
insertedKey: list[str] = []
insertedKey: list[int] = []
insertedVal: list[str] = []
if jsondiff.insert in diffData:
@ -75,14 +82,28 @@ def SeperatePlainJsonDiff(diffData: dict) -> tuple:
return (insertedKey, deletedKey, insertedVal)
def CombinePlainJsonDiff(insertedKey: list[int], deletedKey: list[int], insertedVal: list[str]) -> dict:
assert len(insertedKey) == len(insertedVal)
result: dict = {}
if len(insertedKey) != 0:
result[jsondiff.insert] = []
for k, v in zip(insertedKey, insertedVal):
result[jsondiff.insert].append((k, v))
if len(deletedKey) != 0:
result[jsondiff.delete] = deletedKey[:]
return result
# return a tuple. (keyList, valueList)
def NlpJson2PlainJsonWrapper(nlpJson: dict) -> tuple:
def NlpJson2PlainJson(nlpJson: dict) -> tuple:
keyList: list[str] = []
valueList: list[str] = []
stack: collections.deque = collections.deque()
NlpJson2PlainJson(nlpJson, stack, keyList, valueList)
InternalNlpJson2PlainJson(nlpJson, stack, keyList, valueList)
return (keyList, valueList, )
def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]):
def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]):
assert isinstance(nlpJson, dict)
assert 'entries' in nlpJson
@ -97,6 +118,6 @@ def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str
# is a sub section
# push section name and recursive calling this function
stack.append(entry['section'])
NlpJson2PlainJson(entry, stack, keyList, valueList)
InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
stack.pop()

View File

@ -2,12 +2,4 @@
Example:
`py NlpJsonDecoder.py ../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index ../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index ../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index ../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index`
```
py NlpJsonDecoder.py
../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index
../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index
../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index
../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index
```
Create templates: `py NlpJsonDecoder.py`