diff --git a/NlpProc/NlpDirectCmp.py b/NlpProc/NlpDirectCmp.py index 4441894..6fc0ac7 100644 --- a/NlpProc/NlpDirectCmp.py +++ b/NlpProc/NlpDirectCmp.py @@ -2,11 +2,11 @@ import NlpUtils import jsondiff import sys -json1 = NlpUtils.LoadJsonFromFile(sys.argv[1]) -json2 = NlpUtils.LoadJsonFromFile(sys.argv[2]) +json1 = NlpUtils.LoadJson(sys.argv[1]) +json2 = NlpUtils.LoadJson(sys.argv[2]) -(_, value1, ) = NlpUtils.NlpJson2PlainJsonWrapper(json1) -(_, value2, ) = NlpUtils.NlpJson2PlainJsonWrapper(json2) +(_, value1, ) = NlpUtils.NlpJson2PlainJson(json1) +(_, value2, ) = NlpUtils.NlpJson2PlainJson(json2) diff = jsondiff.diff(value1, value2) print(diff) diff --git a/NlpProc/NlpJsonDecoder.py b/NlpProc/NlpJsonDecoder.py index e1b4d6b..6d0de6b 100644 --- a/NlpProc/NlpJsonDecoder.py +++ b/NlpProc/NlpJsonDecoder.py @@ -1,34 +1,25 @@ import NlpUtils import jsondiff -import sys, collections +import collections -CmdArgvPair = collections.namedtuple('CmdArgvPair', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex')) -def CmdArgvAnalyzer() -> tuple[CmdArgvPair]: - ls: list[CmdArgvPair] = [] +VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex')) +def ConstructVtTrDataTuple() -> tuple[VtTrDataTuple]: + return tuple(VtTrDataTuple._make(( + f'../NlpSrc/VT{i}.json', + f'../NlpTr/VT{i}.template.json', + f'../NlpTr/VT{i}.diff', + f'../NlpTr/VT{i}.index', + ))for i in NlpUtils.g_VirtoolsVersion) - argc = len(sys.argv) - 1 - if argc % 4 != 0: - print("invalid parameter.") - sys.exit(1) - - count = argc // 4 - return tuple(CmdArgvPair._make(sys.argv[1 + i * 4:5 + i * 4]) for i in range(count)) - -# script will order 1 file as reference -# 0. the nlp json file -# script will output 3 files for each version translation. -# 0. translation template json -# 0. the diff result comparing with the previous version -# 0. a list including the key of each value in template json -# so for a single version virtools, we need input 4 arguments if __name__ == "__main__": - resolvedArgv = CmdArgvAnalyzer() prevJson = None - for vtVer in resolvedArgv: + for vtVer in ConstructVtTrDataTuple(): + print(f'Processing {vtVer.nlpJson}...') + # read nlp json and convert it into plain json nlpJson = NlpUtils.LoadJson(vtVer.nlpJson) - (plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJsonWrapper(nlpJson) + (plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJson(nlpJson) # write index file NlpUtils.DumpTrIndex(vtVer.trIndex, plainKeys) @@ -48,7 +39,7 @@ if __name__ == "__main__": # write diff NlpUtils.DumpTrDiff(vtVer.trDiff, insertedKey, deletedKey) # write template with special treat - NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((i, j) for i, j in enumerate(insertedVal))) + NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((plainKeys[insertedKey[i]], insertedVal[i]) for i in range(len(insertedKey)))) # assign prev json prevJson = plainValues diff --git a/NlpProc/NlpJsonEncoder.py b/NlpProc/NlpJsonEncoder.py new file mode 100644 index 0000000..4901183 --- /dev/null +++ b/NlpProc/NlpJsonEncoder.py @@ -0,0 +1,33 @@ +import NlpUtils +import jsondiff +import collections + +g_SupportedEncoding = { + 'zh-cn': ('utf-8', 'gb2312', ) +} + +VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('rawNlp', 'trTemplate', 'trDiff', 'trIndex')) +def GetRawNlpPath(ver: str, lang: str) -> str: + return f'../NlpTr/out/VT{ver}.{lang}.txt' +def GetTrPath(ver: str, lang: str) -> str: + return f'../NlpTr/VT{ver}.{lang}.json' +def GetTrDiffPath(ver: str) -> str: + return f'../NlpTr/VT{ver}.diff' +def GetTrIndexPath(ver: str) -> str: + return f'../NlpTr/VT{ver}.index' + +if __name__ == "__main__": + + for ver in NlpUtils.g_VirtoolsVersion: + # load diff and index data + + for lang in NlpUtils.g_SupportedLangs: + # load lang file + + # patch it + + # convert plain json to nested json + + # write into file with different encoding + for enc in g_SupportedEncoding[lang]: + print(f'Process {ver}.{lang}.{enc}...') diff --git a/NlpProc/NlpTrBaseCompiler.py b/NlpProc/NlpTrBaseCompiler.py deleted file mode 100644 index 86c0f4a..0000000 --- a/NlpProc/NlpTrBaseCompiler.py +++ /dev/null @@ -1,2 +0,0 @@ -import NlpUtils -import sys, collections diff --git a/NlpProc/NlpTrPatchCompiler.py b/NlpProc/NlpTrPatchCompiler.py deleted file mode 100644 index e69de29..0000000 diff --git a/NlpProc/NlpTrPatchCreator.py b/NlpProc/NlpTrPatchCreator.py deleted file mode 100644 index 70f3dc2..0000000 --- a/NlpProc/NlpTrPatchCreator.py +++ /dev/null @@ -1,4 +0,0 @@ -import NlpUtils -import sys, collections - - diff --git a/NlpProc/NlpUtils.py b/NlpProc/NlpUtils.py index 8cd45bf..6adb18d 100644 --- a/NlpProc/NlpUtils.py +++ b/NlpProc/NlpUtils.py @@ -3,6 +3,13 @@ import collections import io import json +g_VirtoolsVersion: tuple[str] = ( + '25', '35', '40', '50', +) +g_SupportedLangs: tuple[str] = ( + 'zh-cn', +) + def DumpJson(filepath: str, jsonData: dict): with open(filepath, 'w', encoding='utf-8') as f: json.dump(jsonData, f, indent=4, sort_keys=False) @@ -60,7 +67,7 @@ def LoadTrDiff(filepath: str) -> dict: # return a tuple. (insertedKey, deletedKey, insertedVal) def SeperatePlainJsonDiff(diffData: dict) -> tuple: - insertedKey: list[str] = [] + insertedKey: list[int] = [] insertedVal: list[str] = [] if jsondiff.insert in diffData: @@ -75,14 +82,28 @@ def SeperatePlainJsonDiff(diffData: dict) -> tuple: return (insertedKey, deletedKey, insertedVal) +def CombinePlainJsonDiff(insertedKey: list[int], deletedKey: list[int], insertedVal: list[str]) -> dict: + assert len(insertedKey) == len(insertedVal) + + result: dict = {} + if len(insertedKey) != 0: + result[jsondiff.insert] = [] + for k, v in zip(insertedKey, insertedVal): + result[jsondiff.insert].append((k, v)) + + if len(deletedKey) != 0: + result[jsondiff.delete] = deletedKey[:] + + return result + # return a tuple. (keyList, valueList) -def NlpJson2PlainJsonWrapper(nlpJson: dict) -> tuple: +def NlpJson2PlainJson(nlpJson: dict) -> tuple: keyList: list[str] = [] valueList: list[str] = [] stack: collections.deque = collections.deque() - NlpJson2PlainJson(nlpJson, stack, keyList, valueList) + InternalNlpJson2PlainJson(nlpJson, stack, keyList, valueList) return (keyList, valueList, ) -def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]): +def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]): assert isinstance(nlpJson, dict) assert 'entries' in nlpJson @@ -97,6 +118,6 @@ def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str # is a sub section # push section name and recursive calling this function stack.append(entry['section']) - NlpJson2PlainJson(entry, stack, keyList, valueList) + InternalNlpJson2PlainJson(entry, stack, keyList, valueList) stack.pop() diff --git a/NlpProc/README.md b/NlpProc/README.md index c71b6b0..f72bc21 100644 --- a/NlpProc/README.md +++ b/NlpProc/README.md @@ -2,12 +2,4 @@ Example: -`py NlpJsonDecoder.py ../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index ../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index ../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index ../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index` - -``` -py NlpJsonDecoder.py -../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index -../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index -../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index -../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index -``` +Create templates: `py NlpJsonDecoder.py`