From 6eaa91fc9ad89b75b922342a31c1ce30cd5e422e Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Sun, 9 Jul 2023 22:32:23 +0800 Subject: [PATCH] update python script --- .gitignore | 1 + NlpProc/NlpDirectCmp.py | 12 ++++ NlpProc/NlpJsonDecoder.py | 55 +++++++++++++++++ NlpProc/NlpTrBaseCreator.py | 26 -------- NlpProc/NlpUtils.py | 118 ++++++++++++++++++++++++++---------- NlpProc/README.md | 13 ++++ 6 files changed, 168 insertions(+), 57 deletions(-) create mode 100644 NlpProc/NlpDirectCmp.py create mode 100644 NlpProc/NlpJsonDecoder.py delete mode 100644 NlpProc/NlpTrBaseCreator.py create mode 100644 NlpProc/README.md diff --git a/.gitignore b/.gitignore index 14bc2dd..8249019 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ NlpParser/* !NlpParser/NlpRunner.java !NlpParser/testbench.txt !NlpParser/README.md +NlpTr/* .vscode/ diff --git a/NlpProc/NlpDirectCmp.py b/NlpProc/NlpDirectCmp.py new file mode 100644 index 0000000..4441894 --- /dev/null +++ b/NlpProc/NlpDirectCmp.py @@ -0,0 +1,12 @@ +import NlpUtils +import jsondiff +import sys + +json1 = NlpUtils.LoadJsonFromFile(sys.argv[1]) +json2 = NlpUtils.LoadJsonFromFile(sys.argv[2]) + +(_, value1, ) = NlpUtils.NlpJson2PlainJsonWrapper(json1) +(_, value2, ) = NlpUtils.NlpJson2PlainJsonWrapper(json2) + +diff = jsondiff.diff(value1, value2) +print(diff) diff --git a/NlpProc/NlpJsonDecoder.py b/NlpProc/NlpJsonDecoder.py new file mode 100644 index 0000000..e1b4d6b --- /dev/null +++ b/NlpProc/NlpJsonDecoder.py @@ -0,0 +1,55 @@ +import NlpUtils +import jsondiff +import sys, collections + +CmdArgvPair = collections.namedtuple('CmdArgvPair', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex')) +def CmdArgvAnalyzer() -> tuple[CmdArgvPair]: + ls: list[CmdArgvPair] = [] + + argc = len(sys.argv) - 1 + if argc % 4 != 0: + print("invalid parameter.") + sys.exit(1) + + count = argc // 4 + return tuple(CmdArgvPair._make(sys.argv[1 + i * 4:5 + i * 4]) for i in range(count)) + +# script will order 1 file as reference +# 0. the nlp json file +# script will output 3 files for each version translation. +# 0. translation template json +# 0. the diff result comparing with the previous version +# 0. a list including the key of each value in template json +# so for a single version virtools, we need input 4 arguments +if __name__ == "__main__": + resolvedArgv = CmdArgvAnalyzer() + + prevJson = None + for vtVer in resolvedArgv: + # read nlp json and convert it into plain json + nlpJson = NlpUtils.LoadJson(vtVer.nlpJson) + (plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJsonWrapper(nlpJson) + + # write index file + NlpUtils.DumpTrIndex(vtVer.trIndex, plainKeys) + + # compare with previous one + if prevJson is None: + # this is first json. omit diff + # write blank diff and write whole translation values + NlpUtils.DumpTrDiff(vtVer.trDiff, [], []) + NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict(zip(plainKeys, plainValues))) + else: + # compare with prev json + cmpResult = jsondiff.diff(prevJson, plainValues) + # seperate diff result + (insertedKey, deletedKey, insertedVal) = NlpUtils.SeperatePlainJsonDiff(cmpResult) + + # write diff + NlpUtils.DumpTrDiff(vtVer.trDiff, insertedKey, deletedKey) + # write template with special treat + NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((i, j) for i, j in enumerate(insertedVal))) + + # assign prev json + prevJson = plainValues + diff --git a/NlpProc/NlpTrBaseCreator.py b/NlpProc/NlpTrBaseCreator.py deleted file mode 100644 index 44c0cc6..0000000 --- a/NlpProc/NlpTrBaseCreator.py +++ /dev/null @@ -1,26 +0,0 @@ -import NlpUtils -import sys, collections - -def CreateBaseJsonWrapper(baseJson: dict) -> dict[str, str]: - result: dict[str, str] = {} - stack: collections.deque = collections.deque() - CreateBaseJson(baseJson, stack, result) - return result -def CreateBaseJson(baseJson: dict, stack: collections.deque, result: dict[str, str]): - assert isinstance(baseJson, dict) - assert 'entries' in baseJson - - counter = 0 - for entry in baseJson['entries']: - if isinstance(entry, str): - result['.'.join(tuple(stack) + (str(counter), ))] = entry - counter += 1 - else: - stack.append(entry['section']) - CreateBaseJson(entry, stack, result) - stack.pop() - -if __name__ == "__main__": - baseJson = NlpUtils.LoadJsonFromFile(sys.argv[1]) - trJson = CreateBaseJsonWrapper(baseJson) - NlpUtils.WriteJsonToFile(sys.argv[2], trJson) diff --git a/NlpProc/NlpUtils.py b/NlpProc/NlpUtils.py index 5849316..8cd45bf 100644 --- a/NlpProc/NlpUtils.py +++ b/NlpProc/NlpUtils.py @@ -3,44 +3,100 @@ import collections import io import json -def WriteJsonToFile(filepath: str, jsonData: dict): +def DumpJson(filepath: str, jsonData: dict): with open(filepath, 'w', encoding='utf-8') as f: json.dump(jsonData, f, indent=4, sort_keys=False) -def LoadJsonFromFile(filepath: str) -> dict: +def LoadJson(filepath: str) -> dict: with open(filepath, 'r', encoding='utf-8') as f: return json.load(f) -def SaveDiffToFileWrapper(jsonDiffData: dict, filepath: str) -> dict[str, str]: - result: dict[tuple[int], str] = {} +def DumpTrIndex(filepath: str, indexData: list[str]): + with open(filepath, 'w', encoding='utf-8') as f: + for item in indexData: + f.write(item) + f.write('\n') + +def LoadTrIndex(filepath: str) -> list[str]: + data: list[str] = [] + with open(filepath, 'r', encoding='utf-8') as f: + while True: + ln = f.readline() + if ln == '': break + data.append(ln.strip('\n')) + + return data + +def DumpTrTemplate(filepath: str, templateData: dict[str, str]): + DumpJson(filepath, templateData) + +def LoadTrTemplate(filepath: str) -> dict[str, str]: + return LoadJson(filepath) + +def DumpTrDiff(filepath: str, insertedKey: list[str], deletedKey: list[str]): + with open(filepath, 'w', encoding='utf-8') as f: + for entryIdx in insertedKey: + f.write(f'i/{entryIdx}\n') + + for entryIdx in deletedKey: + f.write(f'd/{entryIdx}\n') + +# return a tuple. (insertedKey, deletedKey) +def LoadTrDiff(filepath: str) -> dict: + insertedKey: list[str] = [] + deletedKey: list[str] = [] + with open(filepath, 'r', encoding='utf-8') as f: + while True: + ln = f.readline() + if ln == '': break + + sp = ln.strip('\n').split('/') + if sp[0] == 'i': + insertedKey.append(sp[1]) + else: + deletedKey.append(sp[1]) + + return (insertedKey, deletedKey) + +# return a tuple. (insertedKey, deletedKey, insertedVal) +def SeperatePlainJsonDiff(diffData: dict) -> tuple: + insertedKey: list[str] = [] + insertedVal: list[str] = [] + + if jsondiff.insert in diffData: + for (entryIdx, entryVal, ) in diffData[jsondiff.insert]: + insertedKey.append(entryIdx) + insertedVal.append(entryVal) + + if jsondiff.delete in diffData: + deletedKey = diffData[jsondiff.delete][:] + else: + deletedKey = [] + + return (insertedKey, deletedKey, insertedVal) + +# return a tuple. (keyList, valueList) +def NlpJson2PlainJsonWrapper(nlpJson: dict) -> tuple: + keyList: list[str] = [] + valueList: list[str] = [] stack: collections.deque = collections.deque() - with open(filepath, 'w', encoding='utf-8') as fdiff: - SaveDiffToFile(jsonDiffData, fdiff, stack, result) - return result + NlpJson2PlainJson(nlpJson, stack, keyList, valueList) + return (keyList, valueList, ) +def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]): + assert isinstance(nlpJson, dict) + assert 'entries' in nlpJson -def SaveDiffToFile(jsonDiffData: dict, fs: io.TextIOWrapper, stack: collections.deque, result: dict[tuple[int], str]): - assert isinstance(jsonDiffData, dict) - assert len(jsonDiffData) == 1 - assert "entries" in jsonDiffData - assert isinstance(jsonDiffData["entries"], dict) - - for key, item in jsonDiffData["entries"].items(): - if isinstance(key, int): - stack.append(key) - SaveDiffToFile(item, fs, stack, result) - stack.pop() - elif key == jsondiff.symbols.insert: - for (modIdx, modEntry) in item: - stridx = ".".join(tuple(stack) + (modIdx, )) - result[stridx] = modEntry - fs.write(f'i {stridx}\n') - elif key == jsondiff.symbols.delete: - for delIdx in item: - stridx = ".".join(tuple(stack) + (delIdx, )) - fs.write(f'd {stridx}\n') + counter = 0 + for entry in nlpJson['entries']: + if isinstance(entry, str): + # is data node. add into result + keyList.append('/'.join(tuple(stack) + (str(counter), ))) + valueList.append(entry) + counter += 1 else: - raise Exception("invalid key type") - -def ReadDiffFromFile(translations: tuple[str], filepath: str, result: dict): - pass + # is a sub section + # push section name and recursive calling this function + stack.append(entry['section']) + NlpJson2PlainJson(entry, stack, keyList, valueList) + stack.pop() diff --git a/NlpProc/README.md b/NlpProc/README.md new file mode 100644 index 0000000..c71b6b0 --- /dev/null +++ b/NlpProc/README.md @@ -0,0 +1,13 @@ +# Nlp Proc + +Example: + +`py NlpJsonDecoder.py ../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index ../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index ../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index ../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index` + +``` +py NlpJsonDecoder.py +../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index +../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index +../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index +../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index +```