add scripts. rename NlpCodec. update documents
This commit is contained in:
parent
7078341556
commit
150466966b
@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.12)
|
cmake_minimum_required(VERSION 3.12)
|
||||||
project(NlpEncoder LANGUAGES CXX)
|
project(NlpCodec LANGUAGES CXX)
|
||||||
|
|
||||||
# find packages
|
# find packages
|
||||||
find_package(ZLIB REQUIRED)
|
find_package(ZLIB REQUIRED)
|
||||||
@ -8,12 +8,12 @@ find_package(ZLIB REQUIRED)
|
|||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
# generate program
|
# generate program
|
||||||
add_executable(NlpEncoder NlpEncoder.cpp)
|
add_executable(NlpCodec NlpCodec.cpp)
|
||||||
target_link_libraries(NlpEncoder
|
target_link_libraries(NlpCodec
|
||||||
PRIVATE
|
PRIVATE
|
||||||
${ZLIB_LIBRARIES}
|
${ZLIB_LIBRARIES}
|
||||||
)
|
)
|
||||||
target_include_directories(NlpEncoder
|
target_include_directories(NlpCodec
|
||||||
PRIVATE
|
PRIVATE
|
||||||
${ZLIB_INCLUDE_DIRS}
|
${ZLIB_INCLUDE_DIRS}
|
||||||
)
|
)
|
@ -7,7 +7,7 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
namespace NlpEncoder {
|
namespace NlpCodec {
|
||||||
|
|
||||||
constexpr const uint8_t g_XorArray[] {
|
constexpr const uint8_t g_XorArray[] {
|
||||||
0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37,
|
0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37,
|
||||||
@ -176,11 +176,11 @@ namespace NlpEncoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void PrintHelp(void) {
|
static void PrintHelp(void) {
|
||||||
fputs("NlpEncoder Usage\n", stdout);
|
fputs("NlpCodec Usage\n", stdout);
|
||||||
fputs("\n", stdout);
|
fputs("\n", stdout);
|
||||||
fputs("NlpEncoder [compress | uncompress] <src> <dest>\n", stdout);
|
fputs("NlpCodec [encode | decode] <src> <dest>\n", stdout);
|
||||||
fputs("compress - compress text file into nlp file.\n", stdout);
|
fputs("encode - encode text file into nlp file.\n", stdout);
|
||||||
fputs("uncompress - decompress nlp file into text file.\n", stdout);
|
fputs("decode - decompress nlp file into text file.\n", stdout);
|
||||||
fputs("<src> - the source file. text file in compress mode. nlp file in uncompress mode.\n", stdout);
|
fputs("<src> - the source file. text file in compress mode. nlp file in uncompress mode.\n", stdout);
|
||||||
fputs("<dest> - the destination file. nlp file in compress mode. text file in uncompress mode.\n", stdout);
|
fputs("<dest> - the destination file. nlp file in compress mode. text file in uncompress mode.\n", stdout);
|
||||||
}
|
}
|
||||||
@ -195,7 +195,7 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string mode(argv[1]);
|
std::string mode(argv[1]);
|
||||||
if (mode != "compress" && mode != "uncompress") {
|
if (mode != "encode" && mode != "decode") {
|
||||||
fputs("[ERR] Unknow operation!\n", stdout);
|
fputs("[ERR] Unknow operation!\n", stdout);
|
||||||
PrintHelp();
|
PrintHelp();
|
||||||
return 1;
|
return 1;
|
||||||
@ -214,10 +214,10 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
// do real work
|
// do real work
|
||||||
bool result = true;
|
bool result = true;
|
||||||
if (mode == "compress") {
|
if (mode == "encode") {
|
||||||
result = NlpEncoder::EncodeNlp(infile, outfile);
|
result = NlpCodec::EncodeNlp(infile, outfile);
|
||||||
} else {
|
} else {
|
||||||
result = NlpEncoder::DecodeNlp(infile, outfile);
|
result = NlpCodec::DecodeNlp(infile, outfile);
|
||||||
}
|
}
|
||||||
|
|
||||||
// free resources and report
|
// free resources and report
|
24
NlpCodec/README.md
Normal file
24
NlpCodec/README.md
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# Nlp Codec
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
* C++ 17 standard libs.
|
||||||
|
* zlib
|
||||||
|
|
||||||
|
## Linux
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir out
|
||||||
|
cd out
|
||||||
|
cmake ..
|
||||||
|
make
|
||||||
|
```
|
||||||
|
|
||||||
|
## Windows MSYS2
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir out
|
||||||
|
cd out
|
||||||
|
cmake -G "Unix Makefiles" ..
|
||||||
|
make
|
||||||
|
```
|
@ -1,3 +0,0 @@
|
|||||||
# Nlp Encoder
|
|
||||||
|
|
||||||
Requirements: C++ 17 standard.
|
|
@ -19,6 +19,7 @@ import java.io.FileOutputStream;
|
|||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
|
||||||
public class NlpRunner {
|
public class NlpRunner {
|
||||||
public static class NlpJsonConverter extends NlpBaseListener {
|
public static class NlpJsonConverter extends NlpBaseListener {
|
||||||
@ -177,7 +178,7 @@ public class NlpRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// start lex and parse
|
// start lex and parse
|
||||||
CharStream input = CharStreams.fromStream(fin, StandardCharsets.UTF_8);
|
CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252"));
|
||||||
NlpLexer lexer = new NlpLexer(input);
|
NlpLexer lexer = new NlpLexer(input);
|
||||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
NlpParser parser = new NlpParser(tokens);
|
NlpParser parser = new NlpParser(tokens);
|
||||||
|
@ -4,7 +4,7 @@ import collections
|
|||||||
|
|
||||||
if NlpUtils.g_EnableDebugging:
|
if NlpUtils.g_EnableDebugging:
|
||||||
g_SupportedEncoding = {
|
g_SupportedEncoding = {
|
||||||
'template': ('English', ('ascii', ), )
|
'template': ('English', ('windows-1252', ), )
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
g_SupportedEncoding = {
|
g_SupportedEncoding = {
|
||||||
@ -36,12 +36,12 @@ if __name__ == "__main__":
|
|||||||
preLoadedData[ver] = PreLoadedDiffIdxTuple._make((insertedKey, deletedKey, plainKeys))
|
preLoadedData[ver] = PreLoadedDiffIdxTuple._make((insertedKey, deletedKey, plainKeys))
|
||||||
|
|
||||||
# iterate lang first
|
# iterate lang first
|
||||||
# because we use progressive patch. we need iterate vt ver in order
|
# because we use progressive patch. we need iterate vt ver in order for each single languages
|
||||||
for lang in NlpUtils.g_SupportedLangs:
|
for lang in NlpUtils.g_SupportedLangs:
|
||||||
|
|
||||||
prevPlainValues: list[str] = None
|
prevPlainValues: list[str] = None
|
||||||
for ver in NlpUtils.g_VirtoolsVersion:
|
for ver in NlpUtils.g_VirtoolsVersion:
|
||||||
print(f'Processing {ver}.{lang}...')
|
print(f'Loading {ver}.{lang}...')
|
||||||
|
|
||||||
# pick data from pre-loaded dict
|
# pick data from pre-loaded dict
|
||||||
diffIdxData = preLoadedData[ver]
|
diffIdxData = preLoadedData[ver]
|
||||||
|
@ -4,7 +4,7 @@ import io
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
g_EnableDebugging = True
|
g_EnableDebugging = False
|
||||||
|
|
||||||
g_VirtoolsVersion: tuple[str] = (
|
g_VirtoolsVersion: tuple[str] = (
|
||||||
'25', '35', '40', '50',
|
'25', '35', '40', '50',
|
||||||
|
@ -4,3 +4,5 @@ Example:
|
|||||||
|
|
||||||
Create templates: `py NlpJsonDecoder.py`
|
Create templates: `py NlpJsonDecoder.py`
|
||||||
Compile translations: `py NlpJsonEncoder.py`
|
Compile translations: `py NlpJsonEncoder.py`
|
||||||
|
|
||||||
|
NOTE: all python files should be executed in this folder. NOT ROOT folder.
|
||||||
|
16
Scripts/compile_tr.sh
Normal file
16
Scripts/compile_tr.sh
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
cd NlpTr
|
||||||
|
mkdir out
|
||||||
|
cd ../NlpProc
|
||||||
|
python3 NlpJsonEncoder.py
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
cd NlpTr/out
|
||||||
|
for file in *.txt
|
||||||
|
do
|
||||||
|
if test -f $file
|
||||||
|
then
|
||||||
|
txt_file=$file
|
||||||
|
nlp_file=$(basename $file .txt)".nlp"
|
||||||
|
../../NlpCodec/out/NlpCodec encode $txt_file $nlp_file
|
||||||
|
fi
|
||||||
|
done
|
16
Scripts/create_new_tr.sh
Normal file
16
Scripts/create_new_tr.sh
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
if [ $# -ne 1 ]
|
||||||
|
then
|
||||||
|
echo "[ERR] invalid arguments"
|
||||||
|
echo "Syntax"
|
||||||
|
echo ""
|
||||||
|
echo "./create_new_tr.sh <lang-symbol>"
|
||||||
|
echo "<lang-symbol>: your preferred language symbol. such as en, de, zh-cn..."
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd NlpTr
|
||||||
|
cp VT25.template.json "VT25."$1".json"
|
||||||
|
cp VT35.template.json "VT35."$1".json"
|
||||||
|
cp VT40.template.json "VT40."$1".json"
|
||||||
|
cp VT50.template.json "VT50."$1".json"
|
||||||
|
cd ..
|
||||||
|
echo "DONE"
|
@ -1,11 +1,15 @@
|
|||||||
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT25.nlp NlpSrc/VT25.txt
|
./NlpCodec/out/NlpCodec decode NlpSrc/VT25.nlp NlpSrc/VT25.txt
|
||||||
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT35.nlp NlpSrc/VT35.txt
|
./NlpCodec/out/NlpCodec decode NlpSrc/VT35.nlp NlpSrc/VT35.txt
|
||||||
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT40.nlp NlpSrc/VT40.txt
|
./NlpCodec/out/NlpCodec decode NlpSrc/VT40.nlp NlpSrc/VT40.txt
|
||||||
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT50.nlp NlpSrc/VT50.txt
|
./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt
|
||||||
|
|
||||||
cd NlpParser
|
cd NlpParser
|
||||||
java NlpRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json
|
java NlpRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json
|
||||||
java NlpRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json
|
java NlpRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json
|
||||||
java NlpRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json
|
java NlpRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json
|
||||||
java NlpRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json
|
java NlpRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
cd NlpProc
|
||||||
|
python3 NlpJsonDecoder.py
|
||||||
|
cd ..
|
||||||
|
Loading…
Reference in New Issue
Block a user