update python script
This commit is contained in:
parent
6eaa91fc9a
commit
4651110885
@ -2,11 +2,11 @@ import NlpUtils
|
||||
import jsondiff
|
||||
import sys
|
||||
|
||||
json1 = NlpUtils.LoadJsonFromFile(sys.argv[1])
|
||||
json2 = NlpUtils.LoadJsonFromFile(sys.argv[2])
|
||||
json1 = NlpUtils.LoadJson(sys.argv[1])
|
||||
json2 = NlpUtils.LoadJson(sys.argv[2])
|
||||
|
||||
(_, value1, ) = NlpUtils.NlpJson2PlainJsonWrapper(json1)
|
||||
(_, value2, ) = NlpUtils.NlpJson2PlainJsonWrapper(json2)
|
||||
(_, value1, ) = NlpUtils.NlpJson2PlainJson(json1)
|
||||
(_, value2, ) = NlpUtils.NlpJson2PlainJson(json2)
|
||||
|
||||
diff = jsondiff.diff(value1, value2)
|
||||
print(diff)
|
||||
|
@ -1,34 +1,25 @@
|
||||
import NlpUtils
|
||||
import jsondiff
|
||||
import sys, collections
|
||||
import collections
|
||||
|
||||
CmdArgvPair = collections.namedtuple('CmdArgvPair', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex'))
|
||||
def CmdArgvAnalyzer() -> tuple[CmdArgvPair]:
|
||||
ls: list[CmdArgvPair] = []
|
||||
VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex'))
|
||||
def ConstructVtTrDataTuple() -> tuple[VtTrDataTuple]:
|
||||
return tuple(VtTrDataTuple._make((
|
||||
f'../NlpSrc/VT{i}.json',
|
||||
f'../NlpTr/VT{i}.template.json',
|
||||
f'../NlpTr/VT{i}.diff',
|
||||
f'../NlpTr/VT{i}.index',
|
||||
))for i in NlpUtils.g_VirtoolsVersion)
|
||||
|
||||
argc = len(sys.argv) - 1
|
||||
if argc % 4 != 0:
|
||||
print("invalid parameter.")
|
||||
sys.exit(1)
|
||||
|
||||
count = argc // 4
|
||||
return tuple(CmdArgvPair._make(sys.argv[1 + i * 4:5 + i * 4]) for i in range(count))
|
||||
|
||||
# script will order 1 file as reference
|
||||
# 0. the nlp json file
|
||||
# script will output 3 files for each version translation.
|
||||
# 0. translation template json
|
||||
# 0. the diff result comparing with the previous version
|
||||
# 0. a list including the key of each value in template json
|
||||
# so for a single version virtools, we need input 4 arguments
|
||||
if __name__ == "__main__":
|
||||
resolvedArgv = CmdArgvAnalyzer()
|
||||
|
||||
prevJson = None
|
||||
for vtVer in resolvedArgv:
|
||||
for vtVer in ConstructVtTrDataTuple():
|
||||
print(f'Processing {vtVer.nlpJson}...')
|
||||
|
||||
# read nlp json and convert it into plain json
|
||||
nlpJson = NlpUtils.LoadJson(vtVer.nlpJson)
|
||||
(plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJsonWrapper(nlpJson)
|
||||
(plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJson(nlpJson)
|
||||
|
||||
# write index file
|
||||
NlpUtils.DumpTrIndex(vtVer.trIndex, plainKeys)
|
||||
@ -48,7 +39,7 @@ if __name__ == "__main__":
|
||||
# write diff
|
||||
NlpUtils.DumpTrDiff(vtVer.trDiff, insertedKey, deletedKey)
|
||||
# write template with special treat
|
||||
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((i, j) for i, j in enumerate(insertedVal)))
|
||||
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((plainKeys[insertedKey[i]], insertedVal[i]) for i in range(len(insertedKey))))
|
||||
|
||||
# assign prev json
|
||||
prevJson = plainValues
|
||||
|
33
NlpProc/NlpJsonEncoder.py
Normal file
33
NlpProc/NlpJsonEncoder.py
Normal file
@ -0,0 +1,33 @@
|
||||
import NlpUtils
|
||||
import jsondiff
|
||||
import collections
|
||||
|
||||
g_SupportedEncoding = {
|
||||
'zh-cn': ('utf-8', 'gb2312', )
|
||||
}
|
||||
|
||||
VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('rawNlp', 'trTemplate', 'trDiff', 'trIndex'))
|
||||
def GetRawNlpPath(ver: str, lang: str) -> str:
|
||||
return f'../NlpTr/out/VT{ver}.{lang}.txt'
|
||||
def GetTrPath(ver: str, lang: str) -> str:
|
||||
return f'../NlpTr/VT{ver}.{lang}.json'
|
||||
def GetTrDiffPath(ver: str) -> str:
|
||||
return f'../NlpTr/VT{ver}.diff'
|
||||
def GetTrIndexPath(ver: str) -> str:
|
||||
return f'../NlpTr/VT{ver}.index'
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
for ver in NlpUtils.g_VirtoolsVersion:
|
||||
# load diff and index data
|
||||
|
||||
for lang in NlpUtils.g_SupportedLangs:
|
||||
# load lang file
|
||||
|
||||
# patch it
|
||||
|
||||
# convert plain json to nested json
|
||||
|
||||
# write into file with different encoding
|
||||
for enc in g_SupportedEncoding[lang]:
|
||||
print(f'Process {ver}.{lang}.{enc}...')
|
@ -1,2 +0,0 @@
|
||||
import NlpUtils
|
||||
import sys, collections
|
@ -1,4 +0,0 @@
|
||||
import NlpUtils
|
||||
import sys, collections
|
||||
|
||||
|
@ -3,6 +3,13 @@ import collections
|
||||
import io
|
||||
import json
|
||||
|
||||
g_VirtoolsVersion: tuple[str] = (
|
||||
'25', '35', '40', '50',
|
||||
)
|
||||
g_SupportedLangs: tuple[str] = (
|
||||
'zh-cn',
|
||||
)
|
||||
|
||||
def DumpJson(filepath: str, jsonData: dict):
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(jsonData, f, indent=4, sort_keys=False)
|
||||
@ -60,7 +67,7 @@ def LoadTrDiff(filepath: str) -> dict:
|
||||
|
||||
# return a tuple. (insertedKey, deletedKey, insertedVal)
|
||||
def SeperatePlainJsonDiff(diffData: dict) -> tuple:
|
||||
insertedKey: list[str] = []
|
||||
insertedKey: list[int] = []
|
||||
insertedVal: list[str] = []
|
||||
|
||||
if jsondiff.insert in diffData:
|
||||
@ -75,14 +82,28 @@ def SeperatePlainJsonDiff(diffData: dict) -> tuple:
|
||||
|
||||
return (insertedKey, deletedKey, insertedVal)
|
||||
|
||||
def CombinePlainJsonDiff(insertedKey: list[int], deletedKey: list[int], insertedVal: list[str]) -> dict:
|
||||
assert len(insertedKey) == len(insertedVal)
|
||||
|
||||
result: dict = {}
|
||||
if len(insertedKey) != 0:
|
||||
result[jsondiff.insert] = []
|
||||
for k, v in zip(insertedKey, insertedVal):
|
||||
result[jsondiff.insert].append((k, v))
|
||||
|
||||
if len(deletedKey) != 0:
|
||||
result[jsondiff.delete] = deletedKey[:]
|
||||
|
||||
return result
|
||||
|
||||
# return a tuple. (keyList, valueList)
|
||||
def NlpJson2PlainJsonWrapper(nlpJson: dict) -> tuple:
|
||||
def NlpJson2PlainJson(nlpJson: dict) -> tuple:
|
||||
keyList: list[str] = []
|
||||
valueList: list[str] = []
|
||||
stack: collections.deque = collections.deque()
|
||||
NlpJson2PlainJson(nlpJson, stack, keyList, valueList)
|
||||
InternalNlpJson2PlainJson(nlpJson, stack, keyList, valueList)
|
||||
return (keyList, valueList, )
|
||||
def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]):
|
||||
def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]):
|
||||
assert isinstance(nlpJson, dict)
|
||||
assert 'entries' in nlpJson
|
||||
|
||||
@ -97,6 +118,6 @@ def NlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str
|
||||
# is a sub section
|
||||
# push section name and recursive calling this function
|
||||
stack.append(entry['section'])
|
||||
NlpJson2PlainJson(entry, stack, keyList, valueList)
|
||||
InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
|
||||
stack.pop()
|
||||
|
||||
|
@ -2,12 +2,4 @@
|
||||
|
||||
Example:
|
||||
|
||||
`py NlpJsonDecoder.py ../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index ../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index ../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index ../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index`
|
||||
|
||||
```
|
||||
py NlpJsonDecoder.py
|
||||
../NlpSrc/VT25.json ../NlpTr/VT25.template.json ../NlpTr/VT25.diff ../NlpTr/VT25.index
|
||||
../NlpSrc/VT35.json ../NlpTr/VT35.template.json ../NlpTr/VT35.diff ../NlpTr/VT35.index
|
||||
../NlpSrc/VT40.json ../NlpTr/VT40.template.json ../NlpTr/VT40.diff ../NlpTr/VT40.index
|
||||
../NlpSrc/VT50.json ../NlpTr/VT50.template.json ../NlpTr/VT50.diff ../NlpTr/VT50.index
|
||||
```
|
||||
Create templates: `py NlpJsonDecoder.py`
|
||||
|
Loading…
Reference in New Issue
Block a user