refactor: update NlpParser project
This commit is contained in:
parent
b71f6867c5
commit
60fca862f3
100
NlpParser/JsonConverter.java
Normal file
100
NlpParser/JsonConverter.java
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
import java.util.Stack;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.google.gson.JsonArray;
|
||||||
|
import com.google.gson.JsonObject;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.GsonBuilder;
|
||||||
|
|
||||||
|
public class JsonConverter extends NlpBaseListener {
|
||||||
|
public JsonConverter() {
|
||||||
|
mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
|
||||||
|
mRoot = new JsonObject();
|
||||||
|
mSection = new JsonArray();
|
||||||
|
mSectionStack = new Stack<JsonArray>();
|
||||||
|
}
|
||||||
|
/* ========== JSON related stuff ========== */
|
||||||
|
|
||||||
|
Gson mGsonInstance;
|
||||||
|
|
||||||
|
public String buildJsonString() {
|
||||||
|
return mGsonInstance.toJson(mRoot);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ========== Section layout related stuff ========== */
|
||||||
|
|
||||||
|
JsonObject mRoot;
|
||||||
|
JsonArray mSection;
|
||||||
|
Stack<JsonArray> mSectionStack;
|
||||||
|
|
||||||
|
private void pushSection() {
|
||||||
|
mSectionStack.push(mSection);
|
||||||
|
mSection = new JsonArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void popSection() {
|
||||||
|
mSection = mSectionStack.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ========== Listener ========== */
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enterDocument(NlpParser.DocumentContext ctx) {
|
||||||
|
// insert language prop
|
||||||
|
mRoot.addProperty("language", StringHelper.cutLanguageHead(ctx.LANG_HEADER().getText()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void exitDocument(NlpParser.DocumentContext ctx) {
|
||||||
|
// insert document prop
|
||||||
|
mRoot.add("entries", mSection);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enterSection(NlpParser.SectionContext ctx) {
|
||||||
|
pushSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void exitSection(NlpParser.SectionContext ctx) {
|
||||||
|
// create new object
|
||||||
|
JsonObject objSection = new JsonObject();
|
||||||
|
objSection.addProperty("section", StringHelper.cutSectionHead(ctx.SECTION_HEAD().getText()));
|
||||||
|
objSection.add("entries", mSection);
|
||||||
|
// pop and insert
|
||||||
|
popSection();
|
||||||
|
mSection.add(objSection);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enterSubSection(NlpParser.SubSectionContext ctx) {
|
||||||
|
pushSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void exitSubSection(NlpParser.SubSectionContext ctx) {
|
||||||
|
// create new object
|
||||||
|
JsonObject objSubSection = new JsonObject();
|
||||||
|
objSubSection.addProperty("section", StringHelper.cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
|
||||||
|
objSubSection.add("entries", mSection);
|
||||||
|
// pop and insert
|
||||||
|
popSection();
|
||||||
|
mSection.add(objSubSection);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enterEntryString(NlpParser.EntryStringContext ctx) {
|
||||||
|
mSection.add(StringHelper.processString(ctx.ENTRY_STRING().getText()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
|
||||||
|
mSection.add(StringHelper.processConcatedString(
|
||||||
|
ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
|
||||||
|
mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
|
||||||
|
}
|
||||||
|
}
|
@ -1,20 +1,6 @@
|
|||||||
// import antlr stuff
|
|
||||||
import org.antlr.v4.runtime.*;
|
import org.antlr.v4.runtime.*;
|
||||||
import org.antlr.v4.runtime.tree.*;
|
import org.antlr.v4.runtime.tree.*;
|
||||||
// import container
|
|
||||||
import java.util.Stack;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.List;
|
|
||||||
import java.lang.StringBuilder;
|
|
||||||
// import json
|
|
||||||
import com.google.gson.JsonArray;
|
|
||||||
import com.google.gson.JsonObject;
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import com.google.gson.GsonBuilder;
|
|
||||||
// import regex
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
// import io related
|
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
@ -22,179 +8,84 @@ import java.nio.charset.StandardCharsets;
|
|||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
|
||||||
public class MainRunner {
|
public class MainRunner {
|
||||||
public static class NlpJsonConverter extends NlpBaseListener {
|
|
||||||
public NlpJsonConverter() {
|
|
||||||
mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
|
|
||||||
mRoot = new JsonObject();
|
|
||||||
mSection = new JsonArray();
|
|
||||||
mSectionStack = new Stack<JsonArray>();
|
|
||||||
}
|
|
||||||
/* JSON related stuff */
|
|
||||||
|
|
||||||
Gson mGsonInstance;
|
|
||||||
public String buildJsonString() {
|
|
||||||
return mGsonInstance.toJson(mRoot);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* String related stuff */
|
|
||||||
|
|
||||||
// \\\\[^\\rn] match the concator. concator must not be appended with \n \r or \\
|
|
||||||
// [^\\r\\n]*[\\r\\n]+ is match to line breaker.
|
|
||||||
private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
|
|
||||||
private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\"");
|
|
||||||
// private static final Pattern mRegEscSlash = Pattern.compile("\\\\\\\\");
|
|
||||||
private static final Pattern mRegEscTab = Pattern.compile("\\t");
|
|
||||||
private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n");
|
|
||||||
private String cutLangHead(String strl) {
|
|
||||||
return strl.substring("Language:".length());
|
|
||||||
}
|
|
||||||
private String cutSectionHead(String strl) {
|
|
||||||
return strl.substring(1, strl.length() - 1);
|
|
||||||
}
|
|
||||||
private String cutString(String strl) {
|
|
||||||
return strl.substring(1, strl.length() - 1);
|
|
||||||
}
|
|
||||||
private String regulateString(String strl) {
|
|
||||||
strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement("")); // remove string concator
|
|
||||||
strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with "
|
|
||||||
// strl = mRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));// leave double back slash alone. we still need it.
|
|
||||||
strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t")); // replace real escape to escape char
|
|
||||||
strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
|
|
||||||
|
|
||||||
return strl;
|
|
||||||
}
|
|
||||||
private String processString(String strl) {
|
|
||||||
return regulateString(cutString(strl));
|
|
||||||
}
|
|
||||||
private String processConcatedString(List<String> ls) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
for (String node : ls) {
|
|
||||||
sb.append(regulateString(cutString(node)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Section layout related stuff */
|
|
||||||
|
|
||||||
JsonObject mRoot;
|
|
||||||
JsonArray mSection;
|
|
||||||
Stack<JsonArray> mSectionStack;
|
|
||||||
private void pushSection() {
|
|
||||||
mSectionStack.push(mSection);
|
|
||||||
mSection = new JsonArray();
|
|
||||||
}
|
|
||||||
private void popSection() {
|
|
||||||
mSection = mSectionStack.pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Listener */
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void enterDocument(NlpParser.DocumentContext ctx) {
|
|
||||||
// insert language prop
|
|
||||||
mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText()));
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void exitDocument(NlpParser.DocumentContext ctx) {
|
|
||||||
// insert document prop
|
|
||||||
mRoot.add("entries", mSection);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void enterSection(NlpParser.SectionContext ctx) {
|
|
||||||
pushSection();
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void exitSection(NlpParser.SectionContext ctx) {
|
|
||||||
// create new object
|
|
||||||
JsonObject objSection = new JsonObject();
|
|
||||||
objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText()));
|
|
||||||
objSection.add("entries", mSection);
|
|
||||||
// pop and insert
|
|
||||||
popSection();
|
|
||||||
mSection.add(objSection);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void enterSubSection(NlpParser.SubSectionContext ctx) {
|
|
||||||
pushSection();
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void exitSubSection(NlpParser.SubSectionContext ctx) {
|
|
||||||
// create new object
|
|
||||||
JsonObject objSubSection = new JsonObject();
|
|
||||||
objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
|
|
||||||
objSubSection.add("entries", mSection);
|
|
||||||
// pop and insert
|
|
||||||
popSection();
|
|
||||||
mSection.add(objSubSection);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void enterEntryString(NlpParser.EntryStringContext ctx) {
|
|
||||||
mSection.add(processString(ctx.ENTRY_STRING().getText()));
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
|
|
||||||
mSection.add(processConcatedString(
|
|
||||||
ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
|
|
||||||
mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void printHelp() {
|
private static void printHelp() {
|
||||||
|
System.out.println("NlpParser Usage");
|
||||||
System.out.println("NlpParser <src> <dest>");
|
System.out.println("NlpParser <src> <dest>");
|
||||||
System.out.println();
|
System.out.println();
|
||||||
System.out.println("<src> - the decoded nlp text file.");
|
System.out.println("<src> - the decoded NLP text file.");
|
||||||
System.out.println("<dest> - the output json file.");
|
System.out.println("<dest> - the output json file.");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
private static class UserRequest {
|
||||||
// check parameter
|
public UserRequest(String input_filepath, String output_filepath) {
|
||||||
|
this.mInputFilePath = input_filepath;
|
||||||
|
this.mOutputFilePath = output_filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
String mInputFilePath;
|
||||||
|
String mOutputFilePath;
|
||||||
|
|
||||||
|
public String getInputFilePath() {
|
||||||
|
return this.mInputFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOutputFilePath() {
|
||||||
|
return this.mOutputFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static UserRequest resolveArguments(String[] args) throws Exception {
|
||||||
|
// Check parameter
|
||||||
if (args.length != 2) {
|
if (args.length != 2) {
|
||||||
System.out.println("[ERR] Invalid arguments!");
|
throw new Exception("Invalid arguments count!");
|
||||||
printHelp();
|
}
|
||||||
System.exit(1);
|
// Return fetched argumnts
|
||||||
|
return new UserRequest(args[0], args[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// open file stream
|
private static void executeWorker(UserRequest user_request) throws Exception {
|
||||||
FileInputStream fin = null;
|
// Use try-with-resources to safely manage file stream.
|
||||||
FileOutputStream fout = null;
|
try (FileInputStream fin = new FileInputStream(user_request.getInputFilePath());
|
||||||
try {
|
FileOutputStream fout = new FileOutputStream(user_request.getOutputFilePath());
|
||||||
fin = new FileInputStream(args[0]);
|
OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);) {
|
||||||
fout = new FileOutputStream(args[1]);
|
// Start lex and parse
|
||||||
} catch (Exception e) {
|
|
||||||
if (fin != null) fin.close();
|
|
||||||
if (fout != null) fout.close();
|
|
||||||
|
|
||||||
System.out.println("[ERR] Fail to open file!");
|
|
||||||
printHelp();
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// start lex and parse
|
|
||||||
CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252"));
|
CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252"));
|
||||||
NlpLexer lexer = new NlpLexer(input);
|
NlpLexer lexer = new NlpLexer(input);
|
||||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
NlpParser parser = new NlpParser(tokens);
|
NlpParser parser = new NlpParser(tokens);
|
||||||
|
|
||||||
// walk tree to build json
|
// Walk tree to build json
|
||||||
ParseTree tree = parser.document();
|
ParseTree tree = parser.document();
|
||||||
ParseTreeWalker walker = new ParseTreeWalker();
|
ParseTreeWalker walker = new ParseTreeWalker();
|
||||||
NlpJsonConverter converter = new NlpJsonConverter();
|
JsonConverter converter = new JsonConverter();
|
||||||
walker.walk(converter, tree);
|
walker.walk(converter, tree);
|
||||||
|
|
||||||
// write json
|
// Write json
|
||||||
OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);
|
|
||||||
fw.write(converter.buildJsonString());
|
fw.write(converter.buildJsonString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// close file stream
|
public static void main(String[] args) throws Exception {
|
||||||
fin.close();
|
// Check argument
|
||||||
fw.close();
|
UserRequest user_request = null;
|
||||||
|
try {
|
||||||
|
user_request = resolveArguments(args);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.print("[Argument Error] ");
|
||||||
|
System.out.println(e.getMessage());
|
||||||
|
printHelp();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call converter
|
||||||
|
try {
|
||||||
|
executeWorker(user_request);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.print("[Converter Error] ");
|
||||||
|
System.out.println(e.getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
72
NlpParser/StringHelper.java
Normal file
72
NlpParser/StringHelper.java
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import java.util.List;
|
||||||
|
import java.lang.StringBuilder;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String related stuff
|
||||||
|
*/
|
||||||
|
public class StringHelper {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Regex Constants.
|
||||||
|
*
|
||||||
|
* Hints:
|
||||||
|
*
|
||||||
|
* \\\\[^\\rn] match the concator. concator must not be appended with \n \r or
|
||||||
|
* \\.
|
||||||
|
*
|
||||||
|
* [^\\r\\n]*[\\r\\n]+ is match to line breaker.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static final Pattern gRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
|
||||||
|
private static final Pattern gRegDoubleQuote = Pattern.compile("\\\"\\\"");
|
||||||
|
// private static final Pattern gRegEscSlash = Pattern.compile("\\\\\\\\");
|
||||||
|
private static final Pattern gRegEscTab = Pattern.compile("\\t");
|
||||||
|
private static final Pattern gRegEscEol = Pattern.compile("\\r?\\n");
|
||||||
|
|
||||||
|
public static String cutLanguageHead(String strl) {
|
||||||
|
return strl.substring("Language:".length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String cutSectionHead(String strl) {
|
||||||
|
return strl.substring(1, strl.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String cutString(String strl) {
|
||||||
|
return strl.substring(1, strl.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String regulateString(String strl) {
|
||||||
|
// remove string concator
|
||||||
|
strl = gRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));
|
||||||
|
|
||||||
|
// replace "" with "
|
||||||
|
strl = gRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));
|
||||||
|
|
||||||
|
// leave double back slash alone. we still need it.
|
||||||
|
// strl = gRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));
|
||||||
|
|
||||||
|
// replace real escape to escape char
|
||||||
|
strl = gRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));
|
||||||
|
strl = gRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
|
||||||
|
|
||||||
|
return strl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String processString(String strl) {
|
||||||
|
return regulateString(cutString(strl));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String processConcatedString(List<String> ls) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (String node : ls) {
|
||||||
|
sb.append(regulateString(cutString(node)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -5,11 +5,11 @@
|
|||||||
./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt
|
./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt
|
||||||
|
|
||||||
cd NlpParser
|
cd NlpParser
|
||||||
java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json
|
java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.nested.json
|
||||||
java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.json
|
java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.nested.json
|
||||||
java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json
|
java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.nested.json
|
||||||
java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json
|
java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.nested.json
|
||||||
java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json
|
java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.nested.json
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
cd NlpProc
|
cd NlpProc
|
||||||
|
Loading…
Reference in New Issue
Block a user