diff --git a/NlpParser/NlpRunner.java b/NlpParser/NlpRunner.java index 1e20bc0..af1467f 100644 --- a/NlpParser/NlpRunner.java +++ b/NlpParser/NlpRunner.java @@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets; public class NlpRunner { public static class NlpJsonConverter extends NlpBaseListener { public NlpJsonConverter() { - mGsonInstance = new GsonBuilder().setPrettyPrinting().create(); + mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create(); mRoot = new JsonObject(); mSection = new JsonArray(); mSectionStack = new Stack(); diff --git a/NlpProc/NlpJsonEncoder.py b/NlpProc/NlpJsonEncoder.py index 05d8f8f..2391d4a 100644 --- a/NlpProc/NlpJsonEncoder.py +++ b/NlpProc/NlpJsonEncoder.py @@ -2,11 +2,18 @@ import NlpUtils import jsondiff import collections -g_SupportedEncoding = { - 'zh-cn': ('Chinese', ('utf-8', 'gb2312', ), ) -} +if NlpUtils.g_EnableDebugging: + g_SupportedEncoding = { + 'template': ('English', ('ascii', ), ) + } +else: + g_SupportedEncoding = { + 'zh-cn': ('Chinese', ('utf-8', 'gb2312', ), ) + } VtTrDataTuple = collections.namedtuple('VtTrDataTuple', ('rawNlp', 'trTemplate', 'trDiff', 'trIndex')) +def GetNlpJsonPath(ver: str, lang: str) -> str: + return f'../NlpTr/out/VT{ver}.{lang}.json' def GetRawNlpPath(ver: str, lang: str, enc: str) -> str: return f'../NlpTr/out/VT{ver}.{lang}.{enc}.txt' def GetTrPath(ver: str, lang: str) -> str: @@ -60,7 +67,8 @@ if __name__ == "__main__": # convert plain json to nlp json nlpJson = NlpUtils.PlainJson2NlpJson(plainKeys, plainValues) - NlpUtils.DumpJson(GetRawNlpPath(ver, lang, '')[:-5] + '.json', nlpJson) + if NlpUtils.g_EnableDebugging: + NlpUtils.DumpJson(GetNlpJsonPath(ver, lang), nlpJson) # write into file with different encoding lang_macro, encs = g_SupportedEncoding[lang] diff --git a/NlpProc/NlpUtils.py b/NlpProc/NlpUtils.py index 38f92fc..62437d0 100644 --- a/NlpProc/NlpUtils.py +++ b/NlpProc/NlpUtils.py @@ -4,20 +4,30 @@ import io import json import re -g_EnableDebugging = False +g_EnableDebugging = True g_VirtoolsVersion: tuple[str] = ( '25', '35', '40', '50', ) -g_SupportedLangs: tuple[str] = ( - 'zh-cn', -) + +if g_EnableDebugging: + g_SupportedLangs: tuple[str] = ( + 'template', + ) +else: + g_SupportedLangs: tuple[str] = ( + 'zh-cn', + ) # ========== Basic File RW Functions ========== def DumpJson(filepath: str, jsonData: dict): with open(filepath, 'w', encoding='utf-8') as f: - json.dump(jsonData, f, indent=(2 if g_EnableDebugging else None), sort_keys=False) + json.dump(jsonData, f, + indent=(2 if g_EnableDebugging else None), + sort_keys=False, + ensure_ascii=False + ) def LoadJson(filepath: str) -> dict: with open(filepath, 'r', encoding='utf-8') as f: diff --git a/NlpSrc/README.md b/NlpSrc/README.md index 0887f92..9e7d353 100644 --- a/NlpSrc/README.md +++ b/NlpSrc/README.md @@ -2,5 +2,5 @@ Useful comparing differences command: -`diff -u VT25.json VT50.json` -`diff -u VT25.txt VT50.txt` +`diff -u --strip-trailing-cr VT25.json VT50.json` +`diff -u --strip-trailing-cr VT25.txt VT50.txt`