fix issue that vt do recognize output nlp file
This commit is contained in:
		@ -38,6 +38,7 @@ if __name__ == "__main__":
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            # pick data from pre-loaded dict
 | 
					            # pick data from pre-loaded dict
 | 
				
			||||||
            diffIdxData = preLoadedData[ver]
 | 
					            diffIdxData = preLoadedData[ver]
 | 
				
			||||||
 | 
					            plainKeys = diffIdxData.plainKeys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # load lang file
 | 
					            # load lang file
 | 
				
			||||||
            # and only keeps its value.
 | 
					            # and only keeps its value.
 | 
				
			||||||
@ -59,6 +60,8 @@ if __name__ == "__main__":
 | 
				
			|||||||
            # convert plain json to nlp json
 | 
					            # convert plain json to nlp json
 | 
				
			||||||
            nlpJson = NlpUtils.PlainJson2NlpJson(plainKeys, plainValues)
 | 
					            nlpJson = NlpUtils.PlainJson2NlpJson(plainKeys, plainValues)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            NlpUtils.DumpJson(GetRawNlpPath(ver, lang, '')[:-5] + '.json', nlpJson)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # write into file with different encoding
 | 
					            # write into file with different encoding
 | 
				
			||||||
            lang_macro, encs = g_SupportedEncoding[lang]
 | 
					            lang_macro, encs = g_SupportedEncoding[lang]
 | 
				
			||||||
            for enc in encs:
 | 
					            for enc in encs:
 | 
				
			||||||
 | 
				
			|||||||
@ -4,6 +4,8 @@ import io
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					g_EnableDebugging = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
g_VirtoolsVersion: tuple[str] = (
 | 
					g_VirtoolsVersion: tuple[str] = (
 | 
				
			||||||
    '25', '35', '40', '50',
 | 
					    '25', '35', '40', '50',
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
@ -11,9 +13,11 @@ g_SupportedLangs: tuple[str] = (
 | 
				
			|||||||
    'zh-cn', 
 | 
					    'zh-cn', 
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ========== Basic File RW Functions ==========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def DumpJson(filepath: str, jsonData: dict):
 | 
					def DumpJson(filepath: str, jsonData: dict):
 | 
				
			||||||
    with open(filepath, 'w', encoding='utf-8') as f:
 | 
					    with open(filepath, 'w', encoding='utf-8') as f:
 | 
				
			||||||
        json.dump(jsonData, f, indent=4, sort_keys=False)
 | 
					        json.dump(jsonData, f, indent=(2 if g_EnableDebugging else None), sort_keys=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def LoadJson(filepath: str) -> dict:
 | 
					def LoadJson(filepath: str) -> dict:
 | 
				
			||||||
    with open(filepath, 'r', encoding='utf-8') as f:
 | 
					    with open(filepath, 'r', encoding='utf-8') as f:
 | 
				
			||||||
@ -122,24 +126,34 @@ def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList:
 | 
				
			|||||||
            InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
 | 
					            InternalNlpJson2PlainJson(entry, stack, keyList, valueList)
 | 
				
			||||||
            stack.pop()
 | 
					            stack.pop()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ========== Json Converter ==========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def PlainJson2NlpJson(keyList: list[str], valueList: list[str]) -> dict:
 | 
					def PlainJson2NlpJson(keyList: list[str], valueList: list[str]) -> dict:
 | 
				
			||||||
    # create the base section
 | 
					    # create the base section
 | 
				
			||||||
    # each section will have 3 k-v pair. language/section and entities are existed in original nlp json
 | 
					    # each section will have 3 k-v pair. language/section and entries are existed in original nlp json
 | 
				
			||||||
    # and key_map is served for path finding and convenient for looking for sub section.
 | 
					    # and key_map is served for path finding and convenient for looking for sub section.
 | 
				
			||||||
    result: dict = {
 | 
					    result: dict = {
 | 
				
			||||||
        "language": "English",
 | 
					        "language": "English",
 | 
				
			||||||
        "entities": [],
 | 
					        "entries": [],
 | 
				
			||||||
        "key_map": {}
 | 
					        "key_map": {}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    # inerate list and construct dict
 | 
					    # inerate list and construct dict
 | 
				
			||||||
    for k, v in zip(keyList, valueList):
 | 
					    for k, v in zip(keyList, valueList):
 | 
				
			||||||
        InternalPlainJson2NlpJson(result, k, v)
 | 
					        InternalPlainJson2NlpJson(result, k, v)
 | 
				
			||||||
 | 
					    # remove useless key map
 | 
				
			||||||
 | 
					    InternalDelNlpJsonKeyMap(result)
 | 
				
			||||||
    return result
 | 
					    return result
 | 
				
			||||||
 | 
					def InternalDelNlpJsonKeyMap(nlpJson: dict):
 | 
				
			||||||
 | 
					    # recursively calling self
 | 
				
			||||||
 | 
					    for v in nlpJson['key_map'].values():
 | 
				
			||||||
 | 
					        InternalDelNlpJsonKeyMap(v)
 | 
				
			||||||
 | 
					    # then delete self
 | 
				
			||||||
 | 
					    del nlpJson['key_map']
 | 
				
			||||||
def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
 | 
					def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
 | 
				
			||||||
    keypath = pairKey.split('/')
 | 
					    keypath = pairKey.split('/')
 | 
				
			||||||
    # confirm last node is number and remove it
 | 
					    # confirm last node is number and remove it
 | 
				
			||||||
    assert keypath[-1].isdecimal()
 | 
					    assert keypath[-1].isdecimal()
 | 
				
			||||||
    keypath = keypath[0:-1]
 | 
					    keypath = keypath[:-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # move to correct sub section
 | 
					    # move to correct sub section
 | 
				
			||||||
    for pathpart in keypath:
 | 
					    for pathpart in keypath:
 | 
				
			||||||
@ -150,21 +164,21 @@ def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str):
 | 
				
			|||||||
            # create a new one
 | 
					            # create a new one
 | 
				
			||||||
            sub_section = {
 | 
					            sub_section = {
 | 
				
			||||||
                'section': pathpart,
 | 
					                'section': pathpart,
 | 
				
			||||||
                'entities': [],
 | 
					                'entries': [],
 | 
				
			||||||
                'key_map': {}
 | 
					                'key_map': {}
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # add into current section
 | 
					            # add into current section
 | 
				
			||||||
            nlpJson['entities'].append(sub_section)
 | 
					            nlpJson['entries'].append(sub_section)
 | 
				
			||||||
            nlpJson['key_map'][pathpart] = sub_section
 | 
					            nlpJson['key_map'][pathpart] = sub_section
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # move to the new created sub section
 | 
					            # move to the new created sub section
 | 
				
			||||||
            nlpJson = sub_section
 | 
					            nlpJson = sub_section
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # insert data
 | 
					    # insert data
 | 
				
			||||||
    nlpJson['entities'].append(pairVal)
 | 
					    nlpJson['entries'].append(pairVal)
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ========== Raw Nlp Text Writer ==========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
 | 
					def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
 | 
				
			||||||
    # write in wb mode because we need explicitly write \r\n, not \n
 | 
					    # write in wb mode because we need explicitly write \r\n, not \n
 | 
				
			||||||
@ -172,16 +186,16 @@ def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict):
 | 
				
			|||||||
        f.write(f'Language:{lang_macro}\r\n'.encode(encoding, errors='ignore'))
 | 
					        f.write(f'Language:{lang_macro}\r\n'.encode(encoding, errors='ignore'))
 | 
				
			||||||
        InternalDumpNlpJson(f, encoding, 0, nlpJson)
 | 
					        InternalDumpNlpJson(f, encoding, 0, nlpJson)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
g_NlpJsonStrRepl1 = re.compile('\\\\')
 | 
					# g_NlpJsonStrRepl1 = re.compile('\\\\')
 | 
				
			||||||
g_NlpJsonStrRepl2 = re.compile('\"')
 | 
					g_NlpJsonStrRepl2 = re.compile('\"')
 | 
				
			||||||
def NlpJsonStringProcessor(strl: str) -> str:
 | 
					def NlpJsonStringProcessor(strl: str) -> str:
 | 
				
			||||||
    return g_NlpJsonStrRepl2.sub('\"\"', strl)
 | 
					    return g_NlpJsonStrRepl2.sub('\"\"', strl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def InternalDumpNlpJson(f: io.BufferedWriter, encoding: str, depth: int, nlpJson: dict):
 | 
					def InternalDumpNlpJson(f: io.BufferedWriter, encoding: str, depth: int, nlpJson: dict):
 | 
				
			||||||
    assert 'entities' in nlpJson
 | 
					    assert 'entries' in nlpJson
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    is_first: bool = True
 | 
					    is_first: bool = True
 | 
				
			||||||
    for entity in nlpJson['entities']:
 | 
					    for entity in nlpJson['entries']:
 | 
				
			||||||
        if isinstance(entity, str):
 | 
					        if isinstance(entity, str):
 | 
				
			||||||
            # write comma if not the first element
 | 
					            # write comma if not the first element
 | 
				
			||||||
            if not is_first: f.write(','.encode(encoding))
 | 
					            if not is_first: f.write(','.encode(encoding))
 | 
				
			||||||
 | 
				
			|||||||
@ -3,3 +3,4 @@
 | 
				
			|||||||
Example:
 | 
					Example:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Create templates: `py NlpJsonDecoder.py`
 | 
					Create templates: `py NlpJsonDecoder.py`
 | 
				
			||||||
 | 
					Compile translations: `py NlpJsonEncoder.py`
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user