refactor: update NlpParser project

2024-12-13 15:46:28 +08:00
parent b71f6867c5
commit 60fca862f3
4 changed files with 250 additions and 187 deletions
--- a/NlpParser/JsonConverter.java
+++ b/NlpParser/JsonConverter.java
@@ -0,0 +1,100 @@
+import java.util.Stack;
+import java.util.stream.Collectors;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+
+public class JsonConverter extends NlpBaseListener {
+	public JsonConverter() {
+		mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
+		mRoot = new JsonObject();
+		mSection = new JsonArray();
+		mSectionStack = new Stack<JsonArray>();
+	}
+	/* ========== JSON related stuff ========== */
+
+	Gson mGsonInstance;
+
+	public String buildJsonString() {
+		return mGsonInstance.toJson(mRoot);
+	}
+
+	/* ========== Section layout related stuff ========== */
+
+	JsonObject mRoot;
+	JsonArray mSection;
+	Stack<JsonArray> mSectionStack;
+
+	private void pushSection() {
+		mSectionStack.push(mSection);
+		mSection = new JsonArray();
+	}
+
+	private void popSection() {
+		mSection = mSectionStack.pop();
+	}
+
+	/* ========== Listener ========== */
+
+	@Override
+	public void enterDocument(NlpParser.DocumentContext ctx) {
+		// insert language prop
+		mRoot.addProperty("language", StringHelper.cutLanguageHead(ctx.LANG_HEADER().getText()));
+	}
+
+	@Override
+	public void exitDocument(NlpParser.DocumentContext ctx) {
+		// insert document prop
+		mRoot.add("entries", mSection);
+	}
+
+	@Override
+	public void enterSection(NlpParser.SectionContext ctx) {
+		pushSection();
+	}
+
+	@Override
+	public void exitSection(NlpParser.SectionContext ctx) {
+		// create new object
+		JsonObject objSection = new JsonObject();
+		objSection.addProperty("section", StringHelper.cutSectionHead(ctx.SECTION_HEAD().getText()));
+		objSection.add("entries", mSection);
+		// pop and insert
+		popSection();
+		mSection.add(objSection);
+	}
+
+	@Override
+	public void enterSubSection(NlpParser.SubSectionContext ctx) {
+		pushSection();
+	}
+
+	@Override
+	public void exitSubSection(NlpParser.SubSectionContext ctx) {
+		// create new object
+		JsonObject objSubSection = new JsonObject();
+		objSubSection.addProperty("section", StringHelper.cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
+		objSubSection.add("entries", mSection);
+		// pop and insert
+		popSection();
+		mSection.add(objSubSection);
+	}
+
+	@Override
+	public void enterEntryString(NlpParser.EntryStringContext ctx) {
+		mSection.add(StringHelper.processString(ctx.ENTRY_STRING().getText()));
+	}
+
+	@Override
+	public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
+		mSection.add(StringHelper.processConcatedString(
+				ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())));
+	}
+
+	@Override
+	public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
+		mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
+	}
+}
--- a/NlpParser/MainRunner.java
+++ b/NlpParser/MainRunner.java
@@ -1,20 +1,6 @@
-// import antlr stuff
 import org.antlr.v4.runtime.*;
 import org.antlr.v4.runtime.tree.*;
-// import container
-import java.util.Stack;
-import java.util.stream.Collectors;
-import java.util.List;
-import java.lang.StringBuilder;
-// import json
-import com.google.gson.JsonArray;
-import com.google.gson.JsonObject;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-// import regex
-import java.util.regex.Pattern;
-import java.util.regex.Matcher;
-// import io related
+
 import java.io.FileOutputStream;
 import java.io.FileInputStream;
 import java.io.OutputStreamWriter;
@@ -22,179 +8,84 @@ import java.nio.charset.StandardCharsets;
 import java.nio.charset.Charset;

 public class MainRunner {
-	public static class NlpJsonConverter extends NlpBaseListener {
-		public NlpJsonConverter() {
-			mGsonInstance = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
-			mRoot = new JsonObject();
-			mSection = new JsonArray();
-			mSectionStack = new Stack<JsonArray>();
-		}
-		/* JSON related stuff */
-		
-		Gson mGsonInstance;
-		public String buildJsonString() {
-			return mGsonInstance.toJson(mRoot);
-		}
-		
-		/* String related stuff */
-		
-		// \\\\[^\\rn] match the concator. concator must not be appended with \n \r or \\ 
-		// [^\\r\\n]*[\\r\\n]+ is match to line breaker.
-		private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
-		private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\"");
-		// private static final Pattern mRegEscSlash = Pattern.compile("\\\\\\\\");
-		private static final Pattern mRegEscTab = Pattern.compile("\\t");
-		private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n");
-		private String cutLangHead(String strl) {
-			return strl.substring("Language:".length());
-		}
-		private String cutSectionHead(String strl) {
-			return strl.substring(1, strl.length() - 1);
-		}
-		private String cutString(String strl) {
-			return strl.substring(1, strl.length() - 1);
-		}
-		private String regulateString(String strl) {
-			strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));		// remove string concator
-			strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with "
-			// strl = mRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));// leave double back slash alone. we still need it.
-			strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));	// replace real escape to escape char
-			strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
-			
-			return strl;			
-		}
-		private String processString(String strl) {
-			return regulateString(cutString(strl));
-		}
-		private String processConcatedString(List<String> ls) {
-			StringBuilder sb = new StringBuilder();
-			for (String node : ls) {
-				sb.append(regulateString(cutString(node)));
-			}
-			
-			return sb.toString();
-		}
-		
-		/* Section layout related stuff */
-		
-		JsonObject mRoot;
-		JsonArray mSection;
-		Stack<JsonArray> mSectionStack;
-		private void pushSection() {
-			mSectionStack.push(mSection);
-			mSection = new JsonArray();
-		}
-		private void popSection() {
-			mSection = mSectionStack.pop();
-		}
-		
-		/* Listener */
-		
-		@Override
-		public void enterDocument(NlpParser.DocumentContext ctx) {
-			// insert language prop
-			mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText()));
-		}
-		@Override
-		public void exitDocument(NlpParser.DocumentContext ctx) {
-			// insert document prop
-			mRoot.add("entries", mSection);
-		}
-		
-		@Override 
-		public void enterSection(NlpParser.SectionContext ctx) { 
-			pushSection();
-		}
-		@Override 
-		public void exitSection(NlpParser.SectionContext ctx) { 
-			// create new object
-			JsonObject objSection = new JsonObject();
-			objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText()));
-			objSection.add("entries", mSection);
-			// pop and insert
-			popSection();
-			mSection.add(objSection);
-		}
-		
-		@Override 
-		public void enterSubSection(NlpParser.SubSectionContext ctx) { 
-			pushSection();
-		}
-		@Override 
-		public void exitSubSection(NlpParser.SubSectionContext ctx) {
-			// create new object
-			JsonObject objSubSection = new JsonObject();
-			objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
-			objSubSection.add("entries", mSection);
-			// pop and insert
-			popSection();
-			mSection.add(objSubSection);
-		}
-		
-		@Override 
-		public void enterEntryString(NlpParser.EntryStringContext ctx) {
-			mSection.add(processString(ctx.ENTRY_STRING().getText()));
-		}
-		@Override 
-		public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
-			mSection.add(processConcatedString(
-					ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())
-					));
-		}
-		@Override 
-		public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) { 
-			mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
-		}
-	}

 	private static void printHelp() {
+		System.out.println("NlpParser Usage");
 		System.out.println("NlpParser <src> <dest>");
 		System.out.println();
-		System.out.println("<src> - the decoded nlp text file.");
+		System.out.println("<src>  - the decoded NLP text file.");
 		System.out.println("<dest> - the output json file.");
 	}

-	public static void main(String[] args) throws Exception {
-		// check parameter
+	private static class UserRequest {
+		public UserRequest(String input_filepath, String output_filepath) {
+			this.mInputFilePath = input_filepath;
+			this.mOutputFilePath = output_filepath;
+		}
+
+		String mInputFilePath;
+		String mOutputFilePath;
+
+		public String getInputFilePath() {
+			return this.mInputFilePath;
+		}
+
+		public String getOutputFilePath() {
+			return this.mOutputFilePath;
+		}
+
+	}
+
+	private static UserRequest resolveArguments(String[] args) throws Exception {
+		// Check parameter
 		if (args.length != 2) {
-			System.out.println("[ERR] Invalid arguments!");
-			printHelp();
-			System.exit(1);
+			throw new Exception("Invalid arguments count!");
+		}
+		// Return fetched argumnts
+		return new UserRequest(args[0], args[1]);
 	}

-		// open file stream
-		FileInputStream fin = null;
-		FileOutputStream fout = null;
-		try {
-			fin = new FileInputStream(args[0]);
-			fout = new FileOutputStream(args[1]);
-		} catch (Exception e) {
-			if (fin != null) fin.close();
-			if (fout != null) fout.close();
-			
-			System.out.println("[ERR] Fail to open file!");
-			printHelp();
-			System.exit(1);
-		}
-
-		// start lex and parse
+	private static void executeWorker(UserRequest user_request) throws Exception {
+		// Use try-with-resources to safely manage file stream.
+		try (FileInputStream fin = new FileInputStream(user_request.getInputFilePath());
+				FileOutputStream fout = new FileOutputStream(user_request.getOutputFilePath());
+				OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);) {
+			// Start lex and parse
 			CharStream input = CharStreams.fromStream(fin, Charset.forName("windows-1252"));
 			NlpLexer lexer = new NlpLexer(input);
 			CommonTokenStream tokens = new CommonTokenStream(lexer);
 			NlpParser parser = new NlpParser(tokens);

-		// walk tree to build json
+			// Walk tree to build json
 			ParseTree tree = parser.document();
 			ParseTreeWalker walker = new ParseTreeWalker();
-		NlpJsonConverter converter = new NlpJsonConverter();
+			JsonConverter converter = new JsonConverter();
 			walker.walk(converter, tree);

-		// write json
-		OutputStreamWriter fw = new OutputStreamWriter(fout, StandardCharsets.UTF_8);
+			// Write json
 			fw.write(converter.buildJsonString());
+		}
+	}

-		// close file stream
-		fin.close();
-		fw.close();
+	public static void main(String[] args) throws Exception {
+		// Check argument
+		UserRequest user_request = null;
+		try {
+			user_request = resolveArguments(args);
+		} catch (Exception e) {
+			System.out.print("[Argument Error] ");
+			System.out.println(e.getMessage());
+			printHelp();
+			return;
+		}
+
+		// Call converter
+		try {
+			executeWorker(user_request);
+		} catch (Exception e) {
+			System.out.print("[Converter Error] ");
+			System.out.println(e.getMessage());
+			return;
+		}
 	}
 }
--- a/NlpParser/StringHelper.java
+++ b/NlpParser/StringHelper.java
@@ -0,0 +1,72 @@
+import java.util.List;
+import java.lang.StringBuilder;
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+/**
+ * String related stuff
+ */
+public class StringHelper {
+
+	/*
+	 * Regex Constants.
+	 * 
+	 * Hints:
+	 * 
+	 * \\\\[^\\rn] match the concator. concator must not be appended with \n \r or
+	 * \\.
+	 * 
+	 * [^\\r\\n]*[\\r\\n]+ is match to line breaker.
+	 * 
+	 */
+
+	private static final Pattern gRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
+	private static final Pattern gRegDoubleQuote = Pattern.compile("\\\"\\\"");
+	// private static final Pattern gRegEscSlash = Pattern.compile("\\\\\\\\");
+	private static final Pattern gRegEscTab = Pattern.compile("\\t");
+	private static final Pattern gRegEscEol = Pattern.compile("\\r?\\n");
+
+	public static String cutLanguageHead(String strl) {
+		return strl.substring("Language:".length());
+	}
+
+	public static String cutSectionHead(String strl) {
+		return strl.substring(1, strl.length() - 1);
+	}
+
+	public static String cutString(String strl) {
+		return strl.substring(1, strl.length() - 1);
+	}
+
+	public static String regulateString(String strl) {
+		// remove string concator
+		strl = gRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));
+
+		// replace "" with "
+		strl = gRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));
+
+		// leave double back slash alone. we still need it.
+//		strl = gRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));
+
+		// replace real escape to escape char
+		strl = gRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));
+		strl = gRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
+
+		return strl;
+	}
+
+	public static String processString(String strl) {
+		return regulateString(cutString(strl));
+	}
+
+	public static String processConcatedString(List<String> ls) {
+		StringBuilder sb = new StringBuilder();
+		for (String node : ls) {
+			sb.append(regulateString(cutString(node)));
+		}
+
+		return sb.toString();
+	}
+
+}
--- a/Scripts/generate_source.sh
+++ b/Scripts/generate_source.sh
@@ -5,11 +5,11 @@
 ./NlpCodec/out/NlpCodec decode NlpSrc/VT50.nlp NlpSrc/VT50.txt

 cd NlpParser
-java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.json
-java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.json
-java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.json
-java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.json
-java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.json
+java MainRunner ../NlpSrc/VT25.txt ../NlpSrc/VT25.nested.json
+java MainRunner ../NlpSrc/VT30.txt ../NlpSrc/VT30.nested.json
+java MainRunner ../NlpSrc/VT35.txt ../NlpSrc/VT35.nested.json
+java MainRunner ../NlpSrc/VT40.txt ../NlpSrc/VT40.nested.json
+java MainRunner ../NlpSrc/VT50.txt ../NlpSrc/VT50.nested.json
 cd ..

 cd NlpProc