refactor: update NlpParser project

This commit is contained in:
yyc12345 2024-12-13 15:46:28 +08:00
parent b71f6867c5
commit 60fca862f3
4 changed files with 250 additions and 187 deletions

View File

@ -0,0 +1,100 @@
import java.util.Stack;
import java.util.stream.Collectors;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
public class JsonConverter extends NlpBaseListener {
public JsonConverter() {
mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
mRoot = new JsonObject();
mSection = new JsonArray();
mSectionStack = new Stack<JsonArray>();
}
/* ========== JSON related stuff ========== */
Gson mGsonInstance;
public String buildJsonString() {
return mGsonInstance.toJson(mRoot);
}
/* ========== Section layout related stuff ========== */
JsonObject mRoot;
JsonArray mSection;
Stack<JsonArray> mSectionStack;
private void pushSection() {
mSectionStack.push(mSection);
mSection = new JsonArray();
}
private void popSection() {
mSection = mSectionStack.pop();
}
/* ========== Listener ========== */
@Override
public void enterDocument(NlpParser.DocumentContext ctx) {
// insert language prop
mRoot.addProperty("language", StringHelper.cutLanguageHead(ctx.LANG_HEADER().getText()));
}
@Override
public void exitDocument(NlpParser.DocumentContext ctx) {
// insert document prop
mRoot.add("entries", mSection);
}
@Override
public void enterSection(NlpParser.SectionContext ctx) {
pushSection();
}
@Override
public void exitSection(NlpParser.SectionContext ctx) {
// create new object
JsonObject objSection = new JsonObject();
objSection.addProperty("section", StringHelper.cutSectionHead(ctx.SECTION_HEAD().getText()));
objSection.add("entries", mSection);
// pop and insert
popSection();
mSection.add(objSection);
}
@Override
public void enterSubSection(NlpParser.SubSectionContext ctx) {
pushSection();
}
@Override
public void exitSubSection(NlpParser.SubSectionContext ctx) {
// create new object
JsonObject objSubSection = new JsonObject();
objSubSection.addProperty("section", StringHelper.cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
objSubSection.add("entries", mSection);
// pop and insert
popSection();
mSection.add(objSubSection);
}
@Override
public void enterEntryString(NlpParser.EntryStringContext ctx) {
mSection.add(StringHelper.processString(ctx.ENTRY_STRING().getText()));
}
@Override
public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
mSection.add(StringHelper.processConcatedString(
ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())));
}
@Override
public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
}
}

View File

@ -1,20 +1,6 @@
// import antlr stuff
import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*; import org.antlr.v4.runtime.tree.*;
// import container
import java.util.Stack;
import java.util.stream.Collectors;
import java.util.List;
import java.lang.StringBuilder;
// import json
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
// import regex
import java.util.regex.Pattern;
import java.util.regex.Matcher;
// import io related
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
@ -22,179 +8,84 @@ import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset; import java.nio.charset.Charset;
public class MainRunner { public class MainRunner {
public static class NlpJsonConverter extends NlpBaseListener {
public NlpJsonConverter() {
mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
mRoot = new JsonObject();
mSection = new JsonArray();
mSectionStack = new Stack<JsonArray>();
}
/* JSON related stuff */
Gson mGsonInstance;
public String buildJsonString() {
return mGsonInstance.toJson(mRoot);
}
/* String related stuff */
// \\\\[^\\rn] match the concator. concator must not be appended with \n \r or \\
// [^\\r\\n]*[\\r\\n]+ is match to line breaker.
private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\"");
// private static final Pattern mRegEscSlash = Pattern.compile("\\\\\\\\");
private static final Pattern mRegEscTab = Pattern.compile("\\t");
private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n");
private String cutLangHead(String strl) {
return strl.substring("Language:".length());
}
private String cutSectionHead(String strl) {
return strl.substring(1, strl.length() - 1);
}
private String cutString(String strl) {
return strl.substring(1, strl.length() - 1);
}
private String regulateString(String strl) {
strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement("")); // remove string concator
strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with "
// strl = mRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));// leave double back slash alone. we still need it.
strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t")); // replace real escape to escape char
strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
return strl;
}
private String processString(String strl) {
return regulateString(cutString(strl));
}
private String processConcatedString(List<String> ls) {
StringBuilder sb = new StringBuilder();
for (String node : ls) {
sb.append(regulateString(cutString(node)));
}
return sb.toString();
}
/* Section layout related stuff */
JsonObject mRoot;
JsonArray mSection;
Stack<JsonArray> mSectionStack;
private void pushSection() {
mSectionStack.push(mSection);
mSection = new JsonArray();
}
private void popSection() {
mSection = mSectionStack.pop();
}
/* Listener */
@Override
public void enterDocument(NlpParser.DocumentContext ctx) {
// insert language prop
mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText()));
}
@Override
public void exitDocument(NlpParser.DocumentContext ctx) {
// insert document prop
mRoot.add("entries", mSection);
}
@Override
public void enterSection(NlpParser.SectionContext ctx) {
pushSection();
}
@Override
public void exitSection(NlpParser.SectionContext ctx) {
// create new object
JsonObject objSection = new JsonObject();
objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText()));
objSection.add("entries", mSection);
// pop and insert
popSection();
mSection.add(objSection);
}
@Override
public void enterSubSection(NlpParser.SubSectionContext ctx) {
pushSection();
}
@Override
public void exitSubSection(NlpParser.SubSectionContext ctx) {
// create new object
JsonObject objSubSection = new JsonObject();
objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
objSubSection.add("entries", mSection);
// pop and insert
popSection();
mSection.add(objSubSection);
}
@Override
public void enterEntryString(NlpParser.EntryStringContext ctx) {
mSection.add(processString(ctx.ENTRY_STRING().getText()));
}
@Override
public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
mSection.add(processConcatedString(
ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())
));
}
@Override
public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
}
}
private static void printHelp() { private static void printHelp() {
System.out.println("NlpParser Usage");
System.out.println("NlpParser <src> <dest>"); System.out.println("NlpParser <src> <dest>");
System.out.println(); System.out.println();
System.out.println("<src> - the decoded nlp text file."); System.out.println("<src> - the decoded NLP text file.");
System.out.println("<dest> - the output json file."); System.out.println("<dest> - the output json file.");
} }
public static void main(String[] args) throws Exception { private static class UserRequest {
// check parameter public UserRequest(String input_filepath, String output_filepath) {
this.mInputFilePath = input_filepath;
this.mOutputFilePath = output_filepath;
}
String mInputFilePath;
String mOutputFilePath;
public String getInputFilePath() {
return this.mInputFilePath;
}
public String getOutputFilePath() {
return this.mOutputFilePath;
}
}
private static UserRequest resolveArguments(String[] args) throws Exception {
// Check parameter
if (args.length != 2) { if (args.length != 2) {
System.out.println("[ERR] Invalid arguments!"); throw new Exception("Invalid arguments count!");
printHelp(); }
System.exit(1); // Return fetched argumnts
return new UserRequest(args[0], args[1]);
} }
// open file stream private static void executeWorker(UserRequest user_request) throws Exception {
FileInputStream fin = null; // Use try-with-resources to safely manage file stream.
FileOutputStream fout = null; try (FileInputStream fin = new FileInputStream(user_request.getInputFilePath());
try { FileOutputStream fout = new FileOutputStream(user_request.getOutputFilePath());
fin = new FileInputStream(args[0]); OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);) {
fout = new FileOutputStream(args[1]); // Start lex and parse
} catch (Exception e) {
if (fin != null) fin.close();
if (fout != null) fout.close();
System.out.println("[ERR] Fail to open file!");
printHelp();
System.exit(1);
}
// start lex and parse
CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252")); CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252"));
NlpLexer lexer = new NlpLexer(input); NlpLexer lexer = new NlpLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer); CommonTokenStream tokens = new CommonTokenStream(lexer);
NlpParser parser = new NlpParser(tokens); NlpParser parser = new NlpParser(tokens);
// walk tree to build json // Walk tree to build json
ParseTree tree = parser.document(); ParseTree tree = parser.document();
ParseTreeWalker walker = new ParseTreeWalker(); ParseTreeWalker walker = new ParseTreeWalker();
NlpJsonConverter converter = new NlpJsonConverter(); JsonConverter converter = new JsonConverter();
walker.walk(converter, tree); walker.walk(converter, tree);
// write json // Write json
OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);
fw.write(converter.buildJsonString()); fw.write(converter.buildJsonString());
}
}
// close file stream public static void main(String[] args) throws Exception {
fin.close(); // Check argument
fw.close(); UserRequest user_request = null;
try {
user_request = resolveArguments(args);
} catch (Exception e) {
System.out.print("[Argument Error] ");
System.out.println(e.getMessage());
printHelp();
return;
}
// Call converter
try {
executeWorker(user_request);
} catch (Exception e) {
System.out.print("[Converter Error] ");
System.out.println(e.getMessage());
return;
}
} }
} }

View File

@ -0,0 +1,72 @@
import java.util.List;
import java.lang.StringBuilder;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* String related stuff
*/
public class StringHelper {
/*
* Regex Constants.
*
* Hints:
*
* \\\\[^\\rn] match the concator. concator must not be appended with \n \r or
* \\.
*
* [^\\r\\n]*[\\r\\n]+ is match to line breaker.
*
*/
private static final Pattern gRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
private static final Pattern gRegDoubleQuote = Pattern.compile("\\\"\\\"");
// private static final Pattern gRegEscSlash = Pattern.compile("\\\\\\\\");
private static final Pattern gRegEscTab = Pattern.compile("\\t");
private static final Pattern gRegEscEol = Pattern.compile("\\r?\\n");
public static String cutLanguageHead(String strl) {
return strl.substring("Language:".length());
}
public static String cutSectionHead(String strl) {
return strl.substring(1, strl.length() - 1);
}
public static String cutString(String strl) {
return strl.substring(1, strl.length() - 1);
}
public static String regulateString(String strl) {
// remove string concator
strl = gRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));
// replace "" with "
strl = gRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));
// leave double back slash alone. we still need it.
// strl = gRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));
// replace real escape to escape char
strl = gRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));
strl = gRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
return strl;
}
public static String processString(String strl) {
return regulateString(cutString(strl));
}
public static String processConcatedString(List<String> ls) {
StringBuilder sb = new StringBuilder();
for (String node : ls) {
sb.append(regulateString(cutString(node)));
}
return sb.toString();
}
}

View File

@ -5,11 +5,11 @@
./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt ./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt
cd NlpParser cd NlpParser
java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.nested.json
java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.json java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.nested.json
java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.nested.json
java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.nested.json
java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.nested.json
cd .. cd ..
cd NlpProc cd NlpProc