improve parser. add some readme.

This commit is contained in:
yyc12345 2023-06-30 22:12:44 +08:00
parent b6558a718d
commit faddd7ccc5
14 changed files with 88 additions and 5309 deletions

2
.gitattributes vendored
View File

@ -1,3 +1 @@
NlpSrc/*.txt text eol=crlf
NlpSrc/*.nlp binary NlpSrc/*.nlp binary

4
.gitignore vendored
View File

@ -1,11 +1,13 @@
## my ban ## my ban
out/ out/
temp/ temp/
NlpSrc/*
!NlpSrc/*.nlp
NlpParser/* NlpParser/*
!NlpParser/Nlp.g4 !NlpParser/Nlp.g4
!NlpParser/NlpRunner.java !NlpParser/NlpRunner.java
!NlpParser/testbench.txt !NlpParser/testbench.txt
!NlpParser/compile.bat !NlpParser/README.md
.vscode/ .vscode/

3
NlpEncoder/README.md Normal file
View File

@ -0,0 +1,3 @@
# Nlp Encoder
Requirements: C++ 17 standard.

View File

@ -14,6 +14,11 @@ import com.google.gson.GsonBuilder;
// import regex // import regex
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.Matcher; import java.util.regex.Matcher;
// import io related
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
public class NlpRunner { public class NlpRunner {
public static class NlpJsonConverter extends NlpBaseListener { public static class NlpJsonConverter extends NlpBaseListener {
@ -26,8 +31,8 @@ public class NlpRunner {
/* JSON related stuff */ /* JSON related stuff */
Gson mGsonInstance; Gson mGsonInstance;
public void printJson() { public String buildJsonString() {
System.out.print(mGsonInstance.toJson(mRoot));; return mGsonInstance.toJson(mRoot);
} }
/* String related stuff */ /* String related stuff */
@ -137,17 +142,54 @@ public class NlpRunner {
} }
} }
private static void printHelp() {
System.out.println("NlpRunner <src> <dest>");
System.out.println();
System.out.println("<src> - the decoded nlp text file.");
System.out.println("<dest> - the output json file.");
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
ANTLRInputStream input = new ANTLRInputStream(System.in); // check parameter
if (args.length != 2) {
System.out.println("[ERR] Invalid arguments!");
printHelp();
System.exit(1);
}
// open file stream
FileInputStream fin = null;
FileOutputStream fout = null;
try {
fin = new FileInputStream(args[0]);
fout = new FileOutputStream(args[1]);
} catch (Exception e) {
if (fin != null) fin.close();
if (fout != null) fout.close();
System.out.println("[ERR] Fail to open file!");
printHelp();
System.exit(1);
}
// start lex and parse
CharStream input = CharStreams.fromStream(fin, StandardCharsets.UTF_8);
NlpLexer lexer = new NlpLexer(input); NlpLexer lexer = new NlpLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer); CommonTokenStream tokens = new CommonTokenStream(lexer);
NlpParser parser = new NlpParser(tokens); NlpParser parser = new NlpParser(tokens);
// walk tree to build json
ParseTree tree = parser.document(); ParseTree tree = parser.document();
ParseTreeWalker walker = new ParseTreeWalker(); ParseTreeWalker walker = new ParseTreeWalker();
NlpJsonConverter converter = new NlpJsonConverter(); NlpJsonConverter converter = new NlpJsonConverter();
walker.walk(converter, tree); walker.walk(converter, tree);
converter.printJson();
System.out.println(); // write json
OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);
fw.write(converter.buildJsonString());
// close file stream
fin.close();
fw.close();
} }
} }

16
NlpParser/README.md Normal file
View File

@ -0,0 +1,16 @@
# Nlp Parser
Requirements:
* Antlr4
* Gson
Useful commands:
* `antlr4 Nlp.g4`: Analyze Antlr format file.
* `javac Nlp*.java`: Compile executable application.
* Do testbench
- `grun Nlp document -tree < testbench.txt`
- `grun Nlp document -gui < testbench.txt`
- `java NlpRunner testbench.txt result.json`

View File

@ -1,4 +0,0 @@
antlr4 Nlp.g4
javac Nlp*.java
;grun Nlp document -tree < testbench.txt
java NlpRunner < ../NlpSrc/VT50.txt > result.json

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

4
Scripts/README.md Normal file
View File

@ -0,0 +1,4 @@
# Scripts Readme
WARNING!
ALL of these scripts should be executed in the repository ROOT folder, not in THIS folder!

View File

@ -0,0 +1,5 @@
cd NlpEncoder
mkdir out
cd out
cmake ..
make

View File

@ -0,0 +1,3 @@
cd NlpParser
antlr4 Nlp.g4
javac Nlp*.java

View File

@ -2,3 +2,10 @@
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT35.nlp NlpSrc/VT35.txt ./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT35.nlp NlpSrc/VT35.txt
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT40.nlp NlpSrc/VT40.txt ./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT40.nlp NlpSrc/VT40.txt
./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT50.nlp NlpSrc/VT50.txt ./NlpEncoder/out/NlpEncoder uncompress NlpSrc/VT50.nlp NlpSrc/VT50.txt
cd NlpParser
java NlpRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json
java NlpRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json
java NlpRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json
java NlpRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json
cd ..