import jsondiff import collections import io import json import re g_EnableDebugging = False g_VirtoolsVersion: tuple[str] = ( '25', '30', '35', '40', '50', ) if g_EnableDebugging: g_SupportedLangs: tuple[str] = ( 'template', ) else: g_SupportedLangs: tuple[str] = ( 'zh-cn', ) # ========== Basic File RW Functions ========== def DumpJson(filepath: str, jsonData: dict): with open(filepath, 'w', encoding='utf-8') as f: json.dump(jsonData, f, indent=2, sort_keys=False, ensure_ascii=False ) def LoadJson(filepath: str) -> dict: with open(filepath, 'r', encoding='utf-8') as f: return json.load(f) def DumpTrIndex(filepath: str, indexData: list[str]): with open(filepath, 'w', encoding='utf-8') as f: for item in indexData: f.write(item) f.write('\n') def LoadTrIndex(filepath: str) -> list[str]: data: list[str] = [] with open(filepath, 'r', encoding='utf-8') as f: while True: ln = f.readline() if ln == '': break data.append(ln.strip('\n')) return data def DumpTrTemplate(filepath: str, templateData: dict[str, str]): DumpJson(filepath, templateData) def LoadTrTemplate(filepath: str) -> dict[str, str]: return LoadJson(filepath) def DumpTrDiff(filepath: str, insertedKey: list[int], deletedKey: list[int]): with open(filepath, 'w', encoding='utf-8') as f: for entryIdx in insertedKey: f.write(f'i/{entryIdx}\n') for entryIdx in deletedKey: f.write(f'd/{entryIdx}\n') # return a tuple. (insertedKey, deletedKey) def LoadTrDiff(filepath: str) -> tuple: insertedKey: list[int] = [] deletedKey: list[int] = [] with open(filepath, 'r', encoding='utf-8') as f: while True: ln = f.readline() if ln == '': break sp = ln.strip('\n').split('/') if sp[0] == 'i': insertedKey.append(int(sp[1])) else: deletedKey.append(int(sp[1])) return (insertedKey, deletedKey) # return a tuple. (insertedKey, deletedKey, insertedVal) def SeperatePlainJsonDiff(diffData: dict) -> tuple: insertedKey: list[int] = [] insertedVal: list[str] = [] if jsondiff.insert in diffData: for (entryIdx, entryVal, ) in diffData[jsondiff.insert]: insertedKey.append(entryIdx) insertedVal.append(entryVal) if jsondiff.delete in diffData: deletedKey = diffData[jsondiff.delete][:] else: deletedKey = [] return (insertedKey, deletedKey, insertedVal) def CombinePlainJsonDiff(insertedKey: list[int], deletedKey: list[int], insertedVal: list[str]) -> dict: assert len(insertedKey) == len(insertedVal) result: dict = {} if len(insertedKey) != 0: result[jsondiff.insert] = [] for k, v in zip(insertedKey, insertedVal): result[jsondiff.insert].append((k, v)) if len(deletedKey) != 0: result[jsondiff.delete] = deletedKey[:] return result # return a tuple. (keyList, valueList) def NlpJson2PlainJson(nlpJson: dict) -> tuple: keyList: list[str] = [] valueList: list[str] = [] stack: collections.deque = collections.deque() InternalNlpJson2PlainJson(nlpJson, stack, keyList, valueList) return (keyList, valueList, ) def InternalNlpJson2PlainJson(nlpJson: dict, stack: collections.deque, keyList: list[str], valueList: list[str]): assert isinstance(nlpJson, dict) assert 'entries' in nlpJson counter = 0 for entry in nlpJson['entries']: if isinstance(entry, str): # is data node. add into result keyList.append('/'.join(tuple(stack) + (str(counter), ))) valueList.append(entry) counter += 1 else: # is a sub section # push section name and recursive calling this function stack.append(entry['section']) InternalNlpJson2PlainJson(entry, stack, keyList, valueList) stack.pop() # ========== Json Converter ========== def PlainJson2NlpJson(keyList: list[str], valueList: list[str]) -> dict: # create the base section # each section will have 3 k-v pair. language/section and entries are existed in original nlp json # and key_map is served for path finding and convenient for looking for sub section. result: dict = { "language": "English", "entries": [], "key_map": {} } # inerate list and construct dict for k, v in zip(keyList, valueList): InternalPlainJson2NlpJson(result, k, v) return result def RemoveKeyMapInGeneratedNlpJson(nlpJson: dict) -> dict: # remove useless key map InternalDelNlpJsonKeyMap(nlpJson) return nlpJson def InternalDelNlpJsonKeyMap(nlpJson: dict): # recursively calling self for v in nlpJson['key_map'].values(): InternalDelNlpJsonKeyMap(v) # then delete self del nlpJson['key_map'] def InternalPlainJson2NlpJson(nlpJson: dict, pairKey: str, pairVal: str): keypath = pairKey.split('/') # confirm last node is number and remove it assert keypath[-1].isdecimal() keypath = keypath[:-1] # move to correct sub section for pathpart in keypath: if pathpart in nlpJson['key_map']: # existed sub section. directly entering nlpJson = nlpJson['key_map'][pathpart] else: # create a new one sub_section = { 'section': pathpart, 'entries': [], 'key_map': {} } # add into current section nlpJson['entries'].append(sub_section) nlpJson['key_map'][pathpart] = sub_section # move to the new created sub section nlpJson = sub_section # insert data nlpJson['entries'].append(pairVal) # ========== Raw Nlp Text Writer ========== def DumpNlpJson(filepath: str, encoding: str, lang_macro: str, nlpJson: dict): # write in wb mode because we need explicitly write \r\n, not \n with open(filepath, 'wb') as f: f.write(f'Language:{lang_macro}\r\n'.encode(encoding, errors='ignore')) InternalDumpNlpJson(f, encoding, 0, nlpJson) # g_NlpJsonStrRepl1 = re.compile('\\\\') g_NlpJsonStrRepl2 = re.compile('\"') def NlpJsonStringProcessor(strl: str) -> str: return g_NlpJsonStrRepl2.sub('\"\"', strl) def InternalDumpNlpJson(f: io.BufferedWriter, encoding: str, depth: int, nlpJson: dict): assert 'entries' in nlpJson is_first: bool = True for entity in nlpJson['entries']: if isinstance(entity, str): # write comma if not the first element if not is_first: f.write(','.encode(encoding)) else: is_first = False # write real data # replace all " to "" to escape f.write('"{0}"'.format(NlpJsonStringProcessor(entity)).encode(encoding, errors='ignore')) else: # sub section # write section header and call self. if depth == 0: f.write(f'\r\n[{entity["section"]}]\r\n'.encode(encoding, errors='ignore')) else: f.write(f'\r\n<{entity["section"]}>\r\n'.encode(encoding, errors='ignore')) InternalDumpNlpJson(f, encoding, depth + 1, entity)