From 799ec37b65c25e3637508b7c6f5aac03e471b993 Mon Sep 17 00:00:00 2001
From: yyc12345 <yyc12321@outlook.com>
Date: Wed, 12 Jul 2023 22:42:58 +0800
Subject: [PATCH] fix issue that vt do recognize output nlp file

---
 NlpProc/NlpJsonEncoder.py |  3 +++
 NlpProc/NlpUtils.py       | 36 +++++++++++++++++++++++++-----------
 NlpProc/README.md         |  1 +
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/NlpProc/NlpJsonEncoder.py b/NlpProc/NlpJsonEncoder.py
index fa6d6a1..05d8f8f 100644
--- a/NlpProc/NlpJsonEncoder.py
+++ b/NlpProc/NlpJsonEncoder.py
@@ -38,6 +38,7 @@ if __name__ == "__main__":
 
             # pick data from pre-loaded dict
             diffIdxData = preLoadedData[ver]
+            plainKeys = diffIdxData.plainKeys
 
             # load lang file
             # and only keeps its value.
@@ -59,6 +60,8 @@ if __name__ == "__main__":
             # convert plain json to nlp json
             nlpJson = NlpUtils.PlainJson2NlpJson(plainKeys, plainValues)
 
+            NlpUtils.DumpJson(GetRawNlpPath(ver, lang, '')[:-5] + '.json', nlpJson)
+
             # write into file with different encoding
             lang_macro, encs = g_SupportedEncoding[lang]
             for enc in encs:
diff --git a/NlpProc/NlpUtils.py b/NlpProc/NlpUtils.py
index 4abc24d..38f92fc 100644
--- a/NlpProc/NlpUtils.py
+++ b/NlpProc/NlpUtils.py
@@ -4,6 +4,8 @@ import io
 import json
 import re
 
+g_EnableDebugging = False
+
 g_VirtoolsVersion: tuple[str] = (
     '25', '35', '40', '50',
 )
@@ -11,9 +13,11 @@ g_SupportedLangs: tuple[str] = (
     'zh-cn', 
 )
 
+# ========== Basic File RW Functions ==========
+
 def DumpJson(filepath: str, jsonData: dict):
     with open(filepath, 'w', encoding='utf-8') as f:
-        json.dump(jsonData, f, indent=4, sort_keys=False)
+        json.dump(jsonData, f, indent=(2 if g_EnableDebugging else None), sort_keys=False)
 
 def LoadJson(filepath: str) -> dict:
     with open(filepath, 'r', encoding='utf-8') as f:
@@ -122,24 +126,34 @@ def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList:
             InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
             stack.pop()
 
+# ========== Json Converter ==========
+
 def PlainJson2NlpJson(keyList: list[str], valueList: list[str]) -> dict:
     # create the base section
-    # each section will have 3 k-v pair. language/section and entities are existed in original nlp json
+    # each section will have 3 k-v pair. language/section and entries are existed in original nlp json
     # and key_map is served for path finding and convenient for looking for sub section.
     result: dict = {
         "language": "English",
-        "entities": [],
+        "entries": [],
         "key_map": {}
     }
     # inerate list and construct dict
     for k, v in zip(keyList, valueList):
         InternalPlainJson2NlpJson(result, k, v)
+    # remove useless key map
+    InternalDelNlpJsonKeyMap(result)
     return result
+def InternalDelNlpJsonKeyMap(nlpJson: dict):
+    # recursively calling self
+    for v in nlpJson['key_map'].values():
+        InternalDelNlpJsonKeyMap(v)
+    # then delete self
+    del nlpJson['key_map']
 def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
     keypath = pairKey.split('/')
     # confirm last node is number and remove it
     assert keypath[-1].isdecimal()
-    keypath = keypath[0:-1]
+    keypath = keypath[:-1]
 
     # move to correct sub section
     for pathpart in keypath:
@@ -150,21 +164,21 @@ def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
             # create a new one
             sub_section = {
                 'section': pathpart,
-                'entities': [],
+                'entries': [],
                 'key_map': {}
             }
 
             # add into current section
-            nlpJson['entities'].append(sub_section)
+            nlpJson['entries'].append(sub_section)
             nlpJson['key_map'][pathpart] = sub_section
 
             # move to the new created sub section
             nlpJson = sub_section
 
     # insert data
-    nlpJson['entities'].append(pairVal)
-
+    nlpJson['entries'].append(pairVal)
 
+# ========== Raw Nlp Text Writer ==========
 
 def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
     # write in wb mode because we need explicitly write \r\n, not \n
@@ -172,16 +186,16 @@ def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
         f.write(f'Language:{lang_macro}\r\n'.encode(encoding, errors='ignore'))
         InternalDumpNlpJson(f, encoding, 0, nlpJson)
 
-g_NlpJsonStrRepl1 = re.compile('\\\\')
+# g_NlpJsonStrRepl1 = re.compile('\\\\')
 g_NlpJsonStrRepl2 = re.compile('\"')
 def NlpJsonStringProcessor(strl: str) -> str:
     return g_NlpJsonStrRepl2.sub('\"\"', strl)
 
 def InternalDumpNlpJson(f: io.BufferedWriter, encoding: str, depth: int, nlpJson: dict):
-    assert 'entities' in nlpJson
+    assert 'entries' in nlpJson
 
     is_first: bool = True
-    for entity in nlpJson['entities']:
+    for entity in nlpJson['entries']:
         if isinstance(entity, str):
             # write comma if not the first element
             if not is_first: f.write(','.encode(encoding))
diff --git a/NlpProc/README.md b/NlpProc/README.md
index f72bc21..f41fd20 100644
--- a/NlpProc/README.md
+++ b/NlpProc/README.md
@@ -3,3 +3,4 @@
 Example:
 
 Create templates: `py NlpJsonDecoder.py`
+Compile translations: `py NlpJsonEncoder.py`