refactor: update NlpParser project

2024-12-13 15:46:28 +08:00
parent b71f6867c5
commit 60fca862f3
4 changed files with 250 additions and 187 deletions
--- a/NlpParser/JsonConverter.java
+++ b/NlpParser/JsonConverter.java
@@ -0,0 +1,100 @@
 import java.util.Stack;
 import java.util.stream.Collectors;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonObject;
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 public class JsonConverter extends NlpBaseListener {
 	public JsonConverter() {
 		mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
 		mRoot = new JsonObject();
 		mSection = new JsonArray();
 		mSectionStack = new Stack<JsonArray>();
 	}
 	/* ========== JSON related stuff ========== */
 	Gson mGsonInstance;
 	public String buildJsonString() {
 		return mGsonInstance.toJson(mRoot);
 	}
 	/* ========== Section layout related stuff ========== */
 	JsonObject mRoot;
 	JsonArray mSection;
 	Stack<JsonArray> mSectionStack;
 	private void pushSection() {
 		mSectionStack.push(mSection);
 		mSection = new JsonArray();
 	}
 	private void popSection() {
 		mSection = mSectionStack.pop();
 	}
 	/* ========== Listener ========== */
 	@Override
 	public void enterDocument(NlpParser.DocumentContext ctx) {
 		// insert language prop
 		mRoot.addProperty("language", StringHelper.cutLanguageHead(ctx.LANG_HEADER().getText()));
 	}
 	@Override
 	public void exitDocument(NlpParser.DocumentContext ctx) {
 		// insert document prop
 		mRoot.add("entries", mSection);
 	}
 	@Override
 	public void enterSection(NlpParser.SectionContext ctx) {
 		pushSection();
 	}
 	@Override
 	public void exitSection(NlpParser.SectionContext ctx) {
 		// create new object
 		JsonObject objSection = new JsonObject();
 		objSection.addProperty("section", StringHelper.cutSectionHead(ctx.SECTION_HEAD().getText()));
 		objSection.add("entries", mSection);
 		// pop and insert
 		popSection();
 		mSection.add(objSection);
 	}
 	@Override
 	public void enterSubSection(NlpParser.SubSectionContext ctx) {
 		pushSection();
 	}
 	@Override
 	public void exitSubSection(NlpParser.SubSectionContext ctx) {
 		// create new object
 		JsonObject objSubSection = new JsonObject();
 		objSubSection.addProperty("section", StringHelper.cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
 		objSubSection.add("entries", mSection);
 		// pop and insert
 		popSection();
 		mSection.add(objSubSection);
 	}
 	@Override
 	public void enterEntryString(NlpParser.EntryStringContext ctx) {
 		mSection.add(StringHelper.processString(ctx.ENTRY_STRING().getText()));
 	}
 	@Override
 	public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
 		mSection.add(StringHelper.processConcatedString(
 				ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())));
 	}
 	@Override
 	public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
 		mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
 	}
 }
--- a/NlpParser/MainRunner.java
+++ b/NlpParser/MainRunner.java
@@ -1,20 +1,6 @@
 // import antlr stuff
 import org.antlr.v4.runtime.*;
 import org.antlr.v4.runtime.tree.*;
-// import container
+
 import java.util.Stack;
 import java.util.stream.Collectors;
 import java.util.List;
 import java.lang.StringBuilder;
 // import json
 import com.google.gson.JsonArray;
 import com.google.gson.JsonObject;
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 // import regex
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 // import io related
 import java.io.FileOutputStream;
 import java.io.FileInputStream;
 import java.io.OutputStreamWriter;
@@ -22,179 +8,84 @@ import java.nio.charset.StandardCharsets;
 import java.nio.charset.Charset;
 public class MainRunner {
 	public static class NlpJsonConverter extends NlpBaseListener {
 		public NlpJsonConverter() {
 			mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
 			mRoot = new JsonObject();
 			mSection = new JsonArray();
 			mSectionStack = new Stack<JsonArray>();
 		}
 		/* JSON related stuff */
 		Gson mGsonInstance;
 		public String buildJsonString() {
 			return mGsonInstance.toJson(mRoot);
 		}
 		/* String related stuff */
 		// \\\\[^\\rn] match the concator. concator must not be appended with \n \r or \\ 
 		// [^\\r\\n]*[\\r\\n]+ is match to line breaker.
 		private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
 		private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\"");
 		// private static final Pattern mRegEscSlash = Pattern.compile("\\\\\\\\");
 		private static final Pattern mRegEscTab = Pattern.compile("\\t");
 		private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n");
 		private String cutLangHead(String strl) {
 			return strl.substring("Language:".length());
 		}
 		private String cutSectionHead(String strl) {
 			return strl.substring(1, strl.length() - 1);
 		}
 		private String cutString(String strl) {
 			return strl.substring(1, strl.length() - 1);
 		}
 		private String regulateString(String strl) {
 			strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));		// remove string concator
 			strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with "
 			// strl = mRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));// leave double back slash alone. we still need it.
 			strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));	// replace real escape to escape char
 			strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
 			return strl;			
 		}
 		private String processString(String strl) {
 			return regulateString(cutString(strl));
 		}
 		private String processConcatedString(List<String> ls) {
 			StringBuilder sb = new StringBuilder();
 			for (String node : ls) {
 				sb.append(regulateString(cutString(node)));
 			}
 			return sb.toString();
 		}
 		/* Section layout related stuff */
 		JsonObject mRoot;
 		JsonArray mSection;
 		Stack<JsonArray> mSectionStack;
 		private void pushSection() {
 			mSectionStack.push(mSection);
 			mSection = new JsonArray();
 		}
 		private void popSection() {
 			mSection = mSectionStack.pop();
 		}
 		/* Listener */
 		@Override
 		public void enterDocument(NlpParser.DocumentContext ctx) {
 			// insert language prop
 			mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText()));
 		}
 		@Override
 		public void exitDocument(NlpParser.DocumentContext ctx) {
 			// insert document prop
 			mRoot.add("entries", mSection);
 		}
 		@Override 
 		public void enterSection(NlpParser.SectionContext ctx) { 
 			pushSection();
 		}
 		@Override 
 		public void exitSection(NlpParser.SectionContext ctx) { 
 			// create new object
 			JsonObject objSection = new JsonObject();
 			objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText()));
 			objSection.add("entries", mSection);
 			// pop and insert
 			popSection();
 			mSection.add(objSection);
 		}
 		@Override 
 		public void enterSubSection(NlpParser.SubSectionContext ctx) { 
 			pushSection();
 		}
 		@Override 
 		public void exitSubSection(NlpParser.SubSectionContext ctx) {
 			// create new object
 			JsonObject objSubSection = new JsonObject();
 			objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
 			objSubSection.add("entries", mSection);
 			// pop and insert
 			popSection();
 			mSection.add(objSubSection);
 		}
 		@Override 
 		public void enterEntryString(NlpParser.EntryStringContext ctx) {
 			mSection.add(processString(ctx.ENTRY_STRING().getText()));
 		}
 		@Override 
 		public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
 			mSection.add(processConcatedString(
 					ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())
 					));
 		}
 		@Override 
 		public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) { 
 			mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
 		}
 	}
 	private static void printHelp() {
 		System.out.println("NlpParser Usage");
 		System.out.println("NlpParser <src> <dest>");
 		System.out.println();
-		System.out.println("<src> - the decoded nlp text file.");
+		System.out.println("<src>  - the decoded NLP text file.");
 		System.out.println("<dest> - the output json file.");
 	}
-	public static void main(String[] args) throws Exception {
+	private static class UserRequest {
-		// check parameter
+		public UserRequest(String input_filepath, String output_filepath) {
 			this.mInputFilePath = input_filepath;
 			this.mOutputFilePath = output_filepath;
 		}
 		String mInputFilePath;
 		String mOutputFilePath;
 		public String getInputFilePath() {
 			return this.mInputFilePath;
 		}
 		public String getOutputFilePath() {
 			return this.mOutputFilePath;
 		}
 	}
 	private static UserRequest resolveArguments(String[] args) throws Exception {
 		// Check parameter
 		if (args.length != 2) {
-			System.out.println("[ERR] Invalid arguments!");
+			throw new Exception("Invalid arguments count!");
-			printHelp();
+		}
-			System.exit(1);
+		// Return fetched argumnts
 		return new UserRequest(args[0], args[1]);
 	}
-		// open file stream
+	private static void executeWorker(UserRequest user_request) throws Exception {
-		FileInputStream fin = null;
+		// Use try-with-resources to safely manage file stream.
-		FileOutputStream fout = null;
+		try (FileInputStream fin = new FileInputStream(user_request.getInputFilePath());
-		try {
+				FileOutputStream fout = new FileOutputStream(user_request.getOutputFilePath());
-			fin = new FileInputStream(args[0]);
+				OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);) {
-			fout = new FileOutputStream(args[1]);
+			// Start lex and parse
 		} catch (Exception e) {
 			if (fin != null) fin.close();
 			if (fout != null) fout.close();
 			System.out.println("[ERR] Fail to open file!");
 			printHelp();
 			System.exit(1);
 		}
 		// start lex and parse
 			CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252"));
 			NlpLexer lexer = new NlpLexer(input);
 			CommonTokenStream tokens = new CommonTokenStream(lexer);
 			NlpParser parser = new NlpParser(tokens);
-		// walk tree to build json
+			// Walk tree to build json
 			ParseTree tree = parser.document();
 			ParseTreeWalker walker = new ParseTreeWalker();
-		NlpJsonConverter converter = new NlpJsonConverter();
+			JsonConverter converter = new JsonConverter();
 			walker.walk(converter, tree);
-		// write json
+			// Write json
 		OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);
 			fw.write(converter.buildJsonString());
 		}
 	}
-		// close file stream
+	public static void main(String[] args) throws Exception {
-		fin.close();
+		// Check argument
-		fw.close();
+		UserRequest user_request = null;
 		try {
 			user_request = resolveArguments(args);
 		} catch (Exception e) {
 			System.out.print("[Argument Error] ");
 			System.out.println(e.getMessage());
 			printHelp();
 			return;
 		}
 		// Call converter
 		try {
 			executeWorker(user_request);
 		} catch (Exception e) {
 			System.out.print("[Converter Error] ");
 			System.out.println(e.getMessage());
 			return;
 		}
 	}
 }
--- a/NlpParser/StringHelper.java
+++ b/NlpParser/StringHelper.java
@@ -0,0 +1,72 @@
 import java.util.List;
 import java.lang.StringBuilder;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 /**
 * String related stuff
 */
 public class StringHelper {
 	/*
 	 * Regex Constants.
 	 * 
 	 * Hints:
 	 * 
 	 * \\\\[^\\rn] match the concator. concator must not be appended with \n \r or
 	 * \\.
 	 * 
 	 * [^\\r\\n]*[\\r\\n]+ is match to line breaker.
 	 * 
 	 */
 	private static final Pattern gRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
 	private static final Pattern gRegDoubleQuote = Pattern.compile("\\\"\\\"");
 	// private static final Pattern gRegEscSlash = Pattern.compile("\\\\\\\\");
 	private static final Pattern gRegEscTab = Pattern.compile("\\t");
 	private static final Pattern gRegEscEol = Pattern.compile("\\r?\\n");
 	public static String cutLanguageHead(String strl) {
 		return strl.substring("Language:".length());
 	}
 	public static String cutSectionHead(String strl) {
 		return strl.substring(1, strl.length() - 1);
 	}
 	public static String cutString(String strl) {
 		return strl.substring(1, strl.length() - 1);
 	}
 	public static String regulateString(String strl) {
 		// remove string concator
 		strl = gRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));
 		// replace "" with "
 		strl = gRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));
 		// leave double back slash alone. we still need it.
 //		strl = gRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));
 		// replace real escape to escape char
 		strl = gRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));
 		strl = gRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
 		return strl;
 	}
 	public static String processString(String strl) {
 		return regulateString(cutString(strl));
 	}
 	public static String processConcatedString(List<String> ls) {
 		StringBuilder sb = new StringBuilder();
 		for (String node : ls) {
 			sb.append(regulateString(cutString(node)));
 		}
 		return sb.toString();
 	}
 }
--- a/Scripts/generate_source.sh
+++ b/Scripts/generate_source.sh
@@ -5,11 +5,11 @@
 ./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt
 cd NlpParser
-java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json
+java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.nested.json
-java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.json
+java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.nested.json
-java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json
+java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.nested.json
-java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json
+java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.nested.json
-java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json
+java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.nested.json
 cd ..
 cd NlpProc