diff --git a/NlpEncoder/CMakeLists.txt b/NlpCodec/CMakeLists.txt similarity index 58% rename from NlpEncoder/CMakeLists.txt rename to NlpCodec/CMakeLists.txt index df0b344..a48049c 100644 --- a/NlpEncoder/CMakeLists.txt +++ b/NlpCodec/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.12) -project(NlpEncoder LANGUAGES CXX) +project(NlpCodec LANGUAGES CXX) # find packages find_package(ZLIB REQUIRED) @@ -8,12 +8,12 @@ find_package(ZLIB REQUIRED) set(CMAKE_CXX_STANDARD 17) # generate program -add_executable(NlpEncoder NlpEncoder.cpp) -target_link_libraries(NlpEncoder +add_executable(NlpCodec NlpCodec.cpp) +target_link_libraries(NlpCodec PRIVATE ${ZLIB_LIBRARIES} ) -target_include_directories(NlpEncoder +target_include_directories(NlpCodec PRIVATE ${ZLIB_INCLUDE_DIRS} ) diff --git a/NlpEncoder/NlpEncoder.cpp b/NlpCodec/NlpCodec.cpp similarity index 94% rename from NlpEncoder/NlpEncoder.cpp rename to NlpCodec/NlpCodec.cpp index 3b2de3f..d6263d2 100644 --- a/NlpEncoder/NlpEncoder.cpp +++ b/NlpCodec/NlpCodec.cpp @@ -7,7 +7,7 @@ #include #include -namespace NlpEncoder { +namespace NlpCodec { constexpr const uint8_t g_XorArray[] { 0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37, @@ -176,11 +176,11 @@ namespace NlpEncoder { } static void PrintHelp(void) { - fputs("NlpEncoder Usage\n", stdout); + fputs("NlpCodec Usage\n", stdout); fputs("\n", stdout); - fputs("NlpEncoder [compress | uncompress] \n", stdout); - fputs("compress - compress text file into nlp file.\n", stdout); - fputs("uncompress - decompress nlp file into text file.\n", stdout); + fputs("NlpCodec [encode | decode] \n", stdout); + fputs("encode - encode text file into nlp file.\n", stdout); + fputs("decode - decompress nlp file into text file.\n", stdout); fputs(" - the source file. text file in compress mode. nlp file in uncompress mode.\n", stdout); fputs(" - the destination file. nlp file in compress mode. text file in uncompress mode.\n", stdout); } @@ -195,7 +195,7 @@ int main(int argc, char* argv[]) { } std::string mode(argv[1]); - if (mode != "compress" && mode != "uncompress") { + if (mode != "encode" && mode != "decode") { fputs("[ERR] Unknow operation!\n", stdout); PrintHelp(); return 1; @@ -214,10 +214,10 @@ int main(int argc, char* argv[]) { // do real work bool result = true; - if (mode == "compress") { - result = NlpEncoder::EncodeNlp(infile, outfile); + if (mode == "encode") { + result = NlpCodec::EncodeNlp(infile, outfile); } else { - result = NlpEncoder::DecodeNlp(infile, outfile); + result = NlpCodec::DecodeNlp(infile, outfile); } // free resources and report diff --git a/NlpCodec/README.md b/NlpCodec/README.md new file mode 100644 index 0000000..85dadc2 --- /dev/null +++ b/NlpCodec/README.md @@ -0,0 +1,24 @@ +# Nlp Codec + +## Requirements + +* C++ 17 standard libs. +* zlib + +## Linux + +``` +mkdir out +cd out +cmake .. +make +``` + +## Windows MSYS2 + +``` +mkdir out +cd out +cmake -G "Unix Makefiles" .. +make +``` diff --git a/NlpEncoder/README.md b/NlpEncoder/README.md deleted file mode 100644 index 16a6300..0000000 --- a/NlpEncoder/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Nlp Encoder - -Requirements: C++ 17 standard. diff --git a/NlpParser/NlpRunner.java b/NlpParser/NlpRunner.java index af1467f..6f0ec35 100644 --- a/NlpParser/NlpRunner.java +++ b/NlpParser/NlpRunner.java @@ -19,6 +19,7 @@ import java.io.FileOutputStream; import java.io.FileInputStream; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; +import java.nio.charset.Charset; public class NlpRunner { public static class NlpJsonConverter extends NlpBaseListener { @@ -177,7 +178,7 @@ public class NlpRunner { } // start lex and parse - CharStream input = CharStreams.fromStream(fin, StandardCharsets.UTF_8); + CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252")); NlpLexer lexer = new NlpLexer(input); CommonTokenStream tokens = new CommonTokenStream(lexer); NlpParser parser = new NlpParser(tokens); diff --git a/NlpProc/NlpJsonEncoder.py b/NlpProc/NlpJsonEncoder.py index 2391d4a..17d932f 100644 --- a/NlpProc/NlpJsonEncoder.py +++ b/NlpProc/NlpJsonEncoder.py @@ -4,7 +4,7 @@ import collections if NlpUtils.g_EnableDebugging: g_SupportedEncoding = { - 'template': ('English', ('ascii', ), ) + 'template': ('English', ('windows-1252', ), ) } else: g_SupportedEncoding = { @@ -36,12 +36,12 @@ if __name__ == "__main__": preLoadedData[ver] = PreLoadedDiffIdxTuple._make((insertedKey, deletedKey, plainKeys)) # iterate lang first - # because we use progressive patch. we need iterate vt ver in order + # because we use progressive patch. we need iterate vt ver in order for each single languages for lang in NlpUtils.g_SupportedLangs: prevPlainValues: list[str] = None for ver in NlpUtils.g_VirtoolsVersion: - print(f'Processing {ver}.{lang}...') + print(f'Loading {ver}.{lang}...') # pick data from pre-loaded dict diffIdxData = preLoadedData[ver] diff --git a/NlpProc/NlpUtils.py b/NlpProc/NlpUtils.py index 62437d0..9bd15fa 100644 --- a/NlpProc/NlpUtils.py +++ b/NlpProc/NlpUtils.py @@ -4,7 +4,7 @@ import io import json import re -g_EnableDebugging = True +g_EnableDebugging = False g_VirtoolsVersion: tuple[str] = ( '25', '35', '40', '50', diff --git a/NlpProc/README.md b/NlpProc/README.md index f41fd20..4d612b3 100644 --- a/NlpProc/README.md +++ b/NlpProc/README.md @@ -4,3 +4,5 @@ Example: Create templates: `py NlpJsonDecoder.py` Compile translations: `py NlpJsonEncoder.py` + +NOTE: all python files should be executed in this folder. NOT ROOT folder. diff --git a/Scripts/compile_tr.sh b/Scripts/compile_tr.sh new file mode 100644 index 0000000..7d77ec3 --- /dev/null +++ b/Scripts/compile_tr.sh @@ -0,0 +1,16 @@ +cd NlpTr +mkdir out +cd ../NlpProc +python3 NlpJsonEncoder.py +cd .. + +cd NlpTr/out +for file in *.txt +do + if test -f $file + then + txt_file=$file + nlp_file=$(basename $file .txt)".nlp" + ../../NlpCodec/out/NlpCodec encode $txt_file $nlp_file + fi +done diff --git a/Scripts/create_new_tr.sh b/Scripts/create_new_tr.sh new file mode 100644 index 0000000..8402642 --- /dev/null +++ b/Scripts/create_new_tr.sh @@ -0,0 +1,16 @@ +if [ $# -ne 1 ] +then + echo "[ERR] invalid arguments" + echo "Syntax" + echo "" + echo "./create_new_tr.sh " + echo ": your preferred language symbol. such as en, de, zh-cn..." +fi + +cd NlpTr +cp VT25.template.json "VT25."$1".json" +cp VT35.template.json "VT35."$1".json" +cp VT40.template.json "VT40."$1".json" +cp VT50.template.json "VT50."$1".json" +cd .. +echo "DONE" diff --git a/Scripts/generate_source.sh b/Scripts/generate_source.sh index a8861e7..85d91b6 100644 --- a/Scripts/generate_source.sh +++ b/Scripts/generate_source.sh @@ -1,11 +1,15 @@ -./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT25.nlp NlpSrc/VT25.txt -./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT35.nlp NlpSrc/VT35.txt -./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT40.nlp NlpSrc/VT40.txt -./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT50.nlp NlpSrc/VT50.txt +./NlpCodec/out/NlpCodec decode NlpSrc/VT25.nlp NlpSrc/VT25.txt +./NlpCodec/out/NlpCodec decode NlpSrc/VT35.nlp NlpSrc/VT35.txt +./NlpCodec/out/NlpCodec decode NlpSrc/VT40.nlp NlpSrc/VT40.txt +./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt cd NlpParser java NlpRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json java NlpRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json java NlpRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json java NlpRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json -cd .. \ No newline at end of file +cd .. + +cd NlpProc +python3 NlpJsonDecoder.py +cd ..