update python script

This commit is contained in:
yyc12345 2023-07-09 22:32:23 +08:00
parent b62f6c2fa4
commit 6eaa91fc9a
6 changed files with 168 additions and 57 deletions

1
.gitignore vendored
View File

@ -9,6 +9,7 @@ NlpParser/*
!NlpParser/NlpRunner.java
!NlpParser/testbench.txt
!NlpParser/README.md
NlpTr/*
.vscode/

12
NlpProc/NlpDirectCmp.py Normal file
View File

@ -0,0 +1,12 @@
import NlpUtils
import jsondiff
import sys
json1 = NlpUtils.LoadJsonFromFile(sys.argv[1])
json2 = NlpUtils.LoadJsonFromFile(sys.argv[2])
(_, value1, ) = NlpUtils.NlpJson2PlainJsonWrapper(json1)
(_, value2, ) = NlpUtils.NlpJson2PlainJsonWrapper(json2)
diff = jsondiff.diff(value1, value2)
print(diff)

55
NlpProc/NlpJsonDecoder.py Normal file
View File

@ -0,0 +1,55 @@
import NlpUtils
import jsondiff
import sys, collections
CmdArgvPair = collections.namedtuple('CmdArgvPair', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex'))
def CmdArgvAnalyzer() -> tuple[CmdArgvPair]:
ls: list[CmdArgvPair] = []
argc = len(sys.argv) - 1
if argc % 4 != 0:
print("invalid parameter.")
sys.exit(1)
count = argc // 4
return tuple(CmdArgvPair._make(sys.argv[1 + i * 4:5 + i * 4]) for i in range(count))
# script will order 1 file as reference
# 0. the nlp json file
# script will output 3 files for each version translation.
# 0. translation template json
# 0. the diff result comparing with the previous version
# 0. a list including the key of each value in template json
# so for a single version virtools, we need input 4 arguments
if __name__ == "__main__":
resolvedArgv = CmdArgvAnalyzer()
prevJson = None
for vtVer in resolvedArgv:
# read nlp json and convert it into plain json
nlpJson = NlpUtils.LoadJson(vtVer.nlpJson)
(plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJsonWrapper(nlpJson)
# write index file
NlpUtils.DumpTrIndex(vtVer.trIndex, plainKeys)
# compare with previous one
if prevJson is None:
# this is first json. omit diff
# write blank diff and write whole translation values
NlpUtils.DumpTrDiff(vtVer.trDiff, [], [])
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict(zip(plainKeys, plainValues)))
else:
# compare with prev json
cmpResult = jsondiff.diff(prevJson, plainValues)
# seperate diff result
(insertedKey, deletedKey, insertedVal) = NlpUtils.SeperatePlainJsonDiff(cmpResult)
# write diff
NlpUtils.DumpTrDiff(vtVer.trDiff, insertedKey, deletedKey)
# write template with special treat
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((i, j) for i, j in enumerate(insertedVal)))
# assign prev json
prevJson = plainValues

View File

@ -1,26 +0,0 @@
import NlpUtils
import sys, collections
def CreateBaseJsonWrapper(baseJson: dict) -> dict[str, str]:
result: dict[str, str] = {}
stack: collections.deque = collections.deque()
CreateBaseJson(baseJson, stack, result)
return result
def CreateBaseJson(baseJson: dict, stack: collections.deque, result: dict[str, str]):
assert isinstance(baseJson, dict)
assert 'entries' in baseJson
counter = 0
for entry in baseJson['entries']:
if isinstance(entry, str):
result['.'.join(tuple(stack) + (str(counter), ))] = entry
counter += 1
else:
stack.append(entry['section'])
CreateBaseJson(entry, stack, result)
stack.pop()
if __name__ == "__main__":
baseJson = NlpUtils.LoadJsonFromFile(sys.argv[1])
trJson = CreateBaseJsonWrapper(baseJson)
NlpUtils.WriteJsonToFile(sys.argv[2], trJson)

View File

@ -3,44 +3,100 @@ import collections
import io
import json
def WriteJsonToFile(filepath: str, jsonData: dict):
def DumpJson(filepath: str, jsonData: dict):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(jsonData, f, indent=4, sort_keys=False)
def LoadJsonFromFile(filepath: str) -> dict:
def LoadJson(filepath: str) -> dict:
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
def SaveDiffToFileWrapper(jsonDiffData: dict, filepath: str) -> dict[str, str]:
result: dict[tuple[int], str] = {}
stack: collections.deque = collections.deque()
with open(filepath, 'w', encoding='utf-8') as fdiff:
SaveDiffToFile(jsonDiffData, fdiff, stack, result)
return result
def DumpTrIndex(filepath: str, indexData: list[str]):
with open(filepath, 'w', encoding='utf-8') as f:
for item in indexData:
f.write(item)
f.write('\n')
def SaveDiffToFile(jsonDiffData: dict, fs: io.TextIOWrapper, stack: collections.deque, result: dict[tuple[int], str]):
assert isinstance(jsonDiffData, dict)
assert len(jsonDiffData) == 1
assert "entries" in jsonDiffData
assert isinstance(jsonDiffData["entries"], dict)
def LoadTrIndex(filepath: str) -> list[str]:
data: list[str] = []
with open(filepath, 'r', encoding='utf-8') as f:
while True:
ln = f.readline()
if ln == '': break
data.append(ln.strip('\n'))
for key, item in jsonDiffData["entries"].items():
if isinstance(key, int):
stack.append(key)
SaveDiffToFile(item, fs, stack, result)
stack.pop()
elif key == jsondiff.symbols.insert:
for (modIdx, modEntry) in item:
stridx = ".".join(tuple(stack) + (modIdx, ))
result[stridx] = modEntry
fs.write(f'i {stridx}\n')
elif key == jsondiff.symbols.delete:
for delIdx in item:
stridx = ".".join(tuple(stack) + (delIdx, ))
fs.write(f'd {stridx}\n')
return data
def DumpTrTemplate(filepath: str, templateData: dict[str, str]):
DumpJson(filepath, templateData)
def LoadTrTemplate(filepath: str) -> dict[str, str]:
return LoadJson(filepath)
def DumpTrDiff(filepath: str, insertedKey: list[str], deletedKey: list[str]):
with open(filepath, 'w', encoding='utf-8') as f:
for entryIdx in insertedKey:
f.write(f'i/{entryIdx}\n')
for entryIdx in deletedKey:
f.write(f'd/{entryIdx}\n')
# return a tuple. (insertedKey, deletedKey)
def LoadTrDiff(filepath: str) -> dict:
insertedKey: list[str] = []
deletedKey: list[str] = []
with open(filepath, 'r', encoding='utf-8') as f:
while True:
ln = f.readline()
if ln == '': break
sp = ln.strip('\n').split('/')
if sp[0] == 'i':
insertedKey.append(sp[1])
else:
raise Exception("invalid key type")
deletedKey.append(sp[1])
def ReadDiffFromFile(translations: tuple[str], filepath: str, result: dict):
pass
return (insertedKey, deletedKey)
# return a tuple. (insertedKey, deletedKey, insertedVal)
def SeperatePlainJsonDiff(diffData: dict) -> tuple:
insertedKey: list[str] = []
insertedVal: list[str] = []
if jsondiff.insert in diffData:
for (entryIdx, entryVal, ) in diffData[jsondiff.insert]:
insertedKey.append(entryIdx)
insertedVal.append(entryVal)
if jsondiff.delete in diffData:
deletedKey = diffData[jsondiff.delete][:]
else:
deletedKey = []
return (insertedKey, deletedKey, insertedVal)
# return a tuple. (keyList, valueList)
def NlpJson2PlainJsonWrapper(nlpJson: dict) -> tuple:
keyList: list[str] = []
valueList: list[str] = []
stack: collections.deque = collections.deque()
NlpJson2PlainJson(nlpJson, stack, keyList, valueList)
return (keyList, valueList, )
def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]):
assert isinstance(nlpJson, dict)
assert 'entries' in nlpJson
counter = 0
for entry in nlpJson['entries']:
if isinstance(entry, str):
# is data node. add into result
keyList.append('/'.join(tuple(stack) + (str(counter), )))
valueList.append(entry)
counter += 1
else:
# is a sub section
# push section name and recursive calling this function
stack.append(entry['section'])
NlpJson2PlainJson(entry, stack, keyList, valueList)
stack.pop()

13
NlpProc/README.md Normal file
View File

@ -0,0 +1,13 @@
# Nlp Proc
Example:
`py NlpJsonDecoder.py ../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index ../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index ../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index ../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index`
```
py NlpJsonDecoder.py
../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index
../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index
../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index
../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index
```