From b6558a718de432b7cc101fd45a212b3334f2b505 Mon Sep 17 00:00:00 2001
From: yyc12345 <yyc12321@outlook.com>
Date: Fri, 30 Jun 2023 17:19:08 +0800
Subject: [PATCH] fix parser error

---
 NlpParser/Nlp.g4         |  12 ++-
 NlpParser/NlpRunner.java | 163 ++++++++++++++++++---------------------
 NlpParser/compile.bat    |   2 +-
 NlpParser/testbench.txt  |  93 ++++++++++------------
 4 files changed, 125 insertions(+), 145 deletions(-)
diff --git a/NlpParser/Nlp.g4 b/NlpParser/Nlp.g4
index b543bd6..949fc62 100644
--- a/NlpParser/Nlp.g4
+++ b/NlpParser/Nlp.g4
@@ -1,5 +1,7 @@
 grammar Nlp;
 
+// ===== Parser =====
+
 document: LANG_HEADER (section)* ;
 
 section: SECTION_HEAD (subSection | entry)* ;
@@ -7,22 +9,24 @@ section: SECTION_HEAD (subSection | entry)* ;
 subSection: SUB_SECTION_HEAD (entry)* ;
 
 entry: ENTRY_STRING                             # entryString
-| ENTRY_STRING (LINE_CONCAT ENTRY_STRING)+      # entryConcatedString
+| ENTRY_STRING (LINE_CONCATOR ENTRY_STRING)+    # entryConcatedString
 | ENTRY_INTEGER                                 # entryInteger
 ;
 
+// ===== Lexer =====
+
 LANG_HEADER: 'Language:' [a-zA-Z]+ ;
 
 SECTION_HEAD: '[' NAME_SECTION ']' ;
 SUB_SECTION_HEAD: '<' NAME_SECTION '>' ;
 fragment NAME_SECTION: [ a-zA-Z0-9]+ ;   // section name are consisted of space, char and number
 
-ENTRY_STRING: '"' (STRING_ESC|.)*? '"' ;
-fragment STRING_ESC: '\\"' | '\\\\' ; 
+ENTRY_STRING: '"' (ENTRY_STRING_ESC| ~'"' )* '"' ;
+fragment ENTRY_STRING_ESC: '""' | '\\\\' | '\\t' | '\\n' ; 
 
 ENTRY_INTEGER: [1-9][0-9]+ ;
 
+LINE_CONCATOR: '\\';
 SPLITTOR: [ ,;\r\n]+ -> skip;               // ignore all splittor and space
-LINE_CONCAT: '\\' ;
 LINE_COMMENT: '//' ~[\r\n]* -> skip ;       // consume all non-line-breaker. because we need line breaker.
 BLOCK_COMMENT: '/*' .*? '*/' -> skip ;
diff --git a/NlpParser/NlpRunner.java b/NlpParser/NlpRunner.java
index 63fb006..1a509ea 100644
--- a/NlpParser/NlpRunner.java
+++ b/NlpParser/NlpRunner.java
@@ -1,20 +1,41 @@
+// import antlr stuff
 import org.antlr.v4.runtime.*;
 import org.antlr.v4.runtime.tree.*;
+// import container
 import java.util.Stack;
+import java.util.stream.Collectors;
 import java.util.List;
 import java.lang.StringBuilder;
+// import json
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+// import regex
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 
 public class NlpRunner {
-	public static class ConvertToJson extends NlpBaseListener {
-		public ConvertToJson() {
-			mIndent = 0;
-			
-			mEntryIndex = 0;
-			mEntryIndexStack = new Stack<Integer>();
-			mIsFirst = true;
-			mIsFirstStack = new Stack<Boolean>();
+	public static class NlpJsonConverter extends NlpBaseListener {
+		public NlpJsonConverter() {
+			mGsonInstance = new GsonBuilder().setPrettyPrinting().create();
+			mRoot = new JsonObject();
+			mSection = new JsonArray();
+			mSectionStack = new Stack<JsonArray>();
+		}
+		/* JSON related stuff */
+		
+		Gson mGsonInstance;
+		public void printJson() {
+			System.out.print(mGsonInstance.toJson(mRoot));;
 		}
 		
+		/* String related stuff */
+		
+		private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\r\\n]*[\\r\\n]+");
+		private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\"");
+		private static final Pattern mRegEscTab = Pattern.compile("\\t");
+		private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n");
 		private String cutLangHead(String strl) {
 			return strl.substring("Language:".length());
 		}
@@ -24,129 +45,95 @@ public class NlpRunner {
 		private String cutString(String strl) {
 			return strl.substring(1, strl.length() - 1);
 		}
-		private String joinConcatedString(List<TerminalNode> ls) {
+		private String regulateString(String strl) {
+			strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));		// remove string concator \\[^\r\n]*[\r\n]+
+			strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with "
+			strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));	// replace real \t to escape char
+			strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));	// replace all real \n to escape char
+			
+			return strl;			
+		}
+		private String processString(String strl) {
+			return regulateString(cutString(strl));
+		}
+		private String processConcatedString(List<String> ls) {
 			StringBuilder sb = new StringBuilder();
-			for (TerminalNode node : ls) {
-				sb.append(cutString(node.getText()));
+			for (String node : ls) {
+				sb.append(regulateString(cutString(node)));
 			}
 			
 			return sb.toString();
 		}
 		
-		int mEntryIndex;
-		boolean mIsFirst;
-		Stack<Integer> mEntryIndexStack;
-		Stack<Boolean> mIsFirstStack;
+		/* Section layout related stuff */
+		
+		JsonObject mRoot;
+		JsonArray mSection;
+		Stack<JsonArray> mSectionStack;
 		private void pushSection() {
-			mEntryIndexStack.push(mEntryIndex);
-			mEntryIndex = 0;
-			mIsFirstStack.push(mIsFirst);
-			mIsFirst = true;
+			mSectionStack.push(mSection);
+			mSection = new JsonArray();
 		}
 		private void popSection() {
-			mEntryIndex = mEntryIndexStack.pop();
-			mIsFirst = mIsFirstStack.pop();
-		}
-		private void printComma() {
-			// only the first entry do not need comma
-			if (mIsFirst) {
-				mIsFirst = false;
-			} else {
-				System.out.print(',');
-			}
+			mSection = mSectionStack.pop();
 		}
 		
-		int mIndent;
-		private void printIndent() {
-			for(int i = 0; i < mIndent; ++i) {
-				System.out.print('\t');
-			}
-		}
-		private void printEOL() {
-			System.out.print('\n');
-		}
-		private void printIndentLn(String strl) {
-			// call this when writting tail bracket
-			printEOL();
-			printIndent();
-			System.out.print(strl);
-		}
-		private void printIndentCommaLn(String strl) {
-			// call this when writting anything else.
-			printComma();
-			printEOL();
-			printIndent();
-			System.out.print(strl);
-		}
-		
-		private void printStrEntry(String val) {
-			printIndentCommaLn(String.format("\"%s\": \"%s\"", mEntryIndex++, val));
-		}
-		private void printIntEntry(int val) {
-			printIndentCommaLn(String.format("\"%s\": %d", mEntryIndex++, val));
-		}
-
+		/* Listener */
 		
 		@Override
 		public void enterDocument(NlpParser.DocumentContext ctx) {
-			printIndentCommaLn("{");
-			pushSection();
-			++mIndent;
-			
-			printIndentCommaLn(String.format("\"Language\": \"%s\"", cutLangHead(ctx.LANG_HEADER().getText())));
-			
-			printIndentCommaLn("\"document\": {");
-			pushSection();
-			++mIndent;
+			// insert language prop
+			mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText()));
 		}
 		@Override
 		public void exitDocument(NlpParser.DocumentContext ctx) {
-			--mIndent;
-			popSection();
-			printIndentLn("}");
-			
-			--mIndent;
-			popSection();
-			printIndentLn("}");
+			// insert document prop
+			mRoot.add("entries", mSection);
 		}
 		
 		@Override 
 		public void enterSection(NlpParser.SectionContext ctx) { 
-			printIndentCommaLn(String.format("\"%s\": {", cutSectionHead(ctx.SECTION_HEAD().getText())));
 			pushSection();
-			++mIndent;
 		}
 		@Override 
 		public void exitSection(NlpParser.SectionContext ctx) { 
-			--mIndent;
+			// create new object
+			JsonObject objSection = new JsonObject();
+			objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText()));
+			objSection.add("entries", mSection);
+			// pop and insert
 			popSection();
-			printIndentLn("}");
+			mSection.add(objSection);
 		}
 		
 		@Override 
 		public void enterSubSection(NlpParser.SubSectionContext ctx) { 
-			printIndentCommaLn(String.format("\"%s\": {", cutSectionHead(ctx.SUB_SECTION_HEAD().getText())));
 			pushSection();
-			++mIndent;
 		}
 		@Override 
 		public void exitSubSection(NlpParser.SubSectionContext ctx) {
-			--mIndent;
+			// create new object
+			JsonObject objSubSection = new JsonObject();
+			objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
+			objSubSection.add("entries", mSection);
+			// pop and insert
 			popSection();
-			printIndentLn("}");
+			mSection.add(objSubSection);
 		}
 		
 		@Override 
 		public void enterEntryString(NlpParser.EntryStringContext ctx) {
-			printStrEntry(cutString(ctx.ENTRY_STRING().getText()));
+			mSection.add(processString(ctx.ENTRY_STRING().getText()));
 		}
 		@Override 
 		public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
-			printStrEntry(joinConcatedString(ctx.ENTRY_STRING()));
+			mSection.add(processConcatedString(
+					ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())
+					));
 		}
 		@Override 
 		public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) { 
-			printIntEntry(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
+			mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
 		}
 	}
 	
@@ -158,7 +145,9 @@ public class NlpRunner {
 		
 		ParseTree tree = parser.document();
 		ParseTreeWalker walker = new ParseTreeWalker();
-		walker.walk(new ConvertToJson(), tree);
+		NlpJsonConverter converter = new NlpJsonConverter();
+		walker.walk(converter, tree);
+		converter.printJson();
 		System.out.println();
 	}
 }
diff --git a/NlpParser/compile.bat b/NlpParser/compile.bat
index 4305e92..9748f2e 100644
--- a/NlpParser/compile.bat
+++ b/NlpParser/compile.bat
@@ -1,4 +1,4 @@
 antlr4 Nlp.g4
 javac Nlp*.java
 ;grun Nlp document -tree < testbench.txt
-java NlpRunner < ../NlpSrc/VT50.txt > result.txt
\ No newline at end of file
+java NlpRunner < ../NlpSrc/VT50.txt > result.json
\ No newline at end of file
diff --git a/NlpParser/testbench.txt b/NlpParser/testbench.txt
index b76e217..4ef8688 100644
--- a/NlpParser/testbench.txt
+++ b/NlpParser/testbench.txt
@@ -1,71 +1,58 @@
 Language:English
 
 /*
-			----===[[[[ Virtools Language Pack File ]]]]===----
-
-   Rules:
-   First line must contain the string 'Language:' followed by the language name.
-   The file may be seen as sections, each section name is enclosed in brackets ([,])
-   Sections can have sub-sections, with names enclosed in brackets (<,>)
-   For every section or sub-section, you can have as many entries as you want,
-   which can be:	- strings (optionaly enclosed in quotes)
-			- numbers
-   Separators for entries are ',',';' or LF (line feed)
-   Entries belong to the last section or sub-section specified, until
-   a new section is reached. The first entry of the section has an index of 0,
-   the next one, an index of 1, and so on.
-   Comments can be added using the standard ANSI C++ scheme.
-   Also, dont remove %s,%d and %f tags, those will be replaced at runtime by strings or numbers.
-   A string can take multiple lines, adding \ at the end of the line means that the entry
-   continues on the next line.
-
-   NOTE FOR TRANSLATORS: DO NOT CHANGE ENTRIES ORDER IN THIS FILE!!!
-
+long comments
 */
 
-[Version]  
-"Virtools Dev","Virtools Crea"
-"Virtools Dev Evaluation","Virtools Crea Evaluation"
-"Virtools Dev Education","Virtools Crea Education"
+// short comments
 
+[Layout 0]
 
-[Profiler]
-"Profiler"
+"entry 0"
 
-"Database","Framerate","Objects Drawn","Faces Drawn","Lines Drawn"
-"Behavior Code Execution Details...","Collisions Management","Parametric Operations",
-"IK","Floors Management","Other Code"
+<layout 0 0>
+"Virtools Dev1","Virtools Dev2"
+"Virtools Dev3"
+"Virtools Dev1","Virtools Dev2",
+"Virtools Dev3"
 
-[Common]
+<layout 0 1>
+"Virtools Dev"
 
-"MultiSelection"
+[Layout 1]
 
-// -- DONT CHANGE THIS
+<layout 1 0>
+<layout 1 1>
 
-<Registry>
-"Software\\Virtools\\Global","Usage Count"
+[String Escape]
 
-<Timebomb>
-"Key1","Key2","Key3","SYSINFO.SysInfo32\\CLSID","\\csrsrv32.dll"
+<String Escape 0>
 
-// -- ENDOF DONT CHANGE
+"foo bar"
+"\\foo\tbar\n"
+"foo	barr	"
+"\\""foo""\t""bar"""
 
-<Timebomb Messages>
-"Your license has expired.\n\nYou must either stop evaluating Virtools or\nextend it by clicking the 'Extend' button and following the instructions.\n\nAlso, check out our web resources for more information:"
-"- Main Web Site","- Virtools mailing list, additional resources, discussions and more."
-"Customer Key:","Check Key:"
-"HOWTO:\n\t- A new customer key will be generated.\n\t- Send it by e-mail to our support service at:  support@Virtools.com\n"\
-"\t  You will receive back a valid check key according to your customer key.\n\t- Copy/Paste the key into corresponding field above.\n"\
-"\t- Your license will be extended to as many days as specified in your agreement.\n\n"\
-"WARNING: Once generated, the customer key is valid for 5 days only."
+<String Escape 1>
 
+"foo"\      // short comments
+"bar"
 
-[3D Layout]
-"3D Layout"
+"foo\       // short comments
+bar"
 
-<Names>
-"New 3D Frame","New 2D Frame","New Camera","New Light","New Curve","New Grid","New Material","New Texture","New Portal"
- 
-<Actions>
-"--- Out of range ---"
-"%s '%s' created." // on creation (ie. TargetLight 'light0000' created.)
+"foo"\      // short comments
+"bar"\      // short comments
+"bar"
+
+"foo\       // short comments
+bar\        // short comments
+bar"
+
+<String Escape 2>
+
+"foo""\\"\
+"""bar"
+
+"foo""\\\
+""bar"