VirtoolsTranslation/NlpProc/NlpJsonDecoder.py

47 lines
1.7 KiB
Python
Raw Permalink Normal View History

2023-07-09 22:32:23 +08:00
import NlpUtils
import jsondiff
2023-07-10 21:42:52 +08:00
import collections
VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('nlpJson', 'trTemplate', 'trDiff', 'trIndex'))
def ConstructVtTrDataTuple() -> tuple[VtTrDataTuple]:
return tuple(VtTrDataTuple._make((
f'../NlpSrc/VT{i}.json',
f'../NlpTr/VT{i}.template.json',
f'../NlpTr/VT{i}.diff',
f'../NlpTr/VT{i}.index',
))for i in NlpUtils.g_VirtoolsVersion)
2023-07-09 22:32:23 +08:00
if __name__ == "__main__":
prevPlainValues = None
2023-07-10 21:42:52 +08:00
for vtVer in ConstructVtTrDataTuple():
print(f'Processing {vtVer.nlpJson}...')
2023-07-09 22:32:23 +08:00
# read nlp json and convert it into plain json
nlpJson = NlpUtils.LoadJson(vtVer.nlpJson)
2023-07-10 21:42:52 +08:00
(plainKeys, plainValues, ) = NlpUtils.NlpJson2PlainJson(nlpJson)
2023-07-09 22:32:23 +08:00
# write index file
NlpUtils.DumpTrIndex(vtVer.trIndex, plainKeys)
# compare with previous one
if prevPlainValues is None:
2023-07-09 22:32:23 +08:00
# this is first json. omit diff
# write blank diff and write whole translation values
NlpUtils.DumpTrDiff(vtVer.trDiff, [], [])
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict(zip(plainKeys, plainValues)))
else:
# compare with prev json
cmpResult = jsondiff.diff(prevPlainValues, plainValues)
2023-07-09 22:32:23 +08:00
# seperate diff result
(insertedKey, deletedKey, insertedVal) = NlpUtils.SeperatePlainJsonDiff(cmpResult)
# write diff
NlpUtils.DumpTrDiff(vtVer.trDiff, insertedKey, deletedKey)
# write template with special treat
2023-07-10 21:42:52 +08:00
NlpUtils.DumpTrTemplate(vtVer.trTemplate, dict((plainKeys[insertedKey[i]], insertedVal[i]) for i in range(len(insertedKey))))
2023-07-09 22:32:23 +08:00
# assign prev json
prevPlainValues = plainValues
2023-07-09 22:32:23 +08:00