From b6558a718de432b7cc101fd45a212b3334f2b505 Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Fri, 30 Jun 2023 17:19:08 +0800 Subject: [PATCH] fix parser error --- NlpParser/Nlp.g4 | 12 ++- NlpParser/NlpRunner.java | 163 ++++++++++++++++++--------------------- NlpParser/compile.bat | 2 +- NlpParser/testbench.txt | 93 ++++++++++------------ 4 files changed, 125 insertions(+), 145 deletions(-) diff --git a/NlpParser/Nlp.g4 b/NlpParser/Nlp.g4 index b543bd6..949fc62 100644 --- a/NlpParser/Nlp.g4 +++ b/NlpParser/Nlp.g4 @@ -1,5 +1,7 @@ grammar Nlp; +// ===== Parser ===== + document: LANG_HEADER (section)* ; section: SECTION_HEAD (subSection | entry)* ; @@ -7,22 +9,24 @@ section: SECTION_HEAD (subSection | entry)* ; subSection: SUB_SECTION_HEAD (entry)* ; entry: ENTRY_STRING # entryString -| ENTRY_STRING (LINE_CONCAT ENTRY_STRING)+ # entryConcatedString +| ENTRY_STRING (LINE_CONCATOR ENTRY_STRING)+ # entryConcatedString | ENTRY_INTEGER # entryInteger ; +// ===== Lexer ===== + LANG_HEADER: 'Language:' [a-zA-Z]+ ; SECTION_HEAD: '[' NAME_SECTION ']' ; SUB_SECTION_HEAD: '<' NAME_SECTION '>' ; fragment NAME_SECTION: [ a-zA-Z0-9]+ ; // section name are consisted of space, char and number -ENTRY_STRING: '"' (STRING_ESC|.)*? '"' ; -fragment STRING_ESC: '\\"' | '\\\\' ; +ENTRY_STRING: '"' (ENTRY_STRING_ESC| ~'"' )* '"' ; +fragment ENTRY_STRING_ESC: '""' | '\\\\' | '\\t' | '\\n' ; ENTRY_INTEGER: [1-9][0-9]+ ; +LINE_CONCATOR: '\\'; SPLITTOR: [ ,;\r\n]+ -> skip; // ignore all splittor and space -LINE_CONCAT: '\\' ; LINE_COMMENT: '//' ~[\r\n]* -> skip ; // consume all non-line-breaker. because we need line breaker. BLOCK_COMMENT: '/*' .*? '*/' -> skip ; diff --git a/NlpParser/NlpRunner.java b/NlpParser/NlpRunner.java index 63fb006..1a509ea 100644 --- a/NlpParser/NlpRunner.java +++ b/NlpParser/NlpRunner.java @@ -1,20 +1,41 @@ +// import antlr stuff import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.tree.*; +// import container import java.util.Stack; +import java.util.stream.Collectors; import java.util.List; import java.lang.StringBuilder; +// import json +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +// import regex +import java.util.regex.Pattern; +import java.util.regex.Matcher; public class NlpRunner { - public static class ConvertToJson extends NlpBaseListener { - public ConvertToJson() { - mIndent = 0; - - mEntryIndex = 0; - mEntryIndexStack = new Stack(); - mIsFirst = true; - mIsFirstStack = new Stack(); + public static class NlpJsonConverter extends NlpBaseListener { + public NlpJsonConverter() { + mGsonInstance = new GsonBuilder().setPrettyPrinting().create(); + mRoot = new JsonObject(); + mSection = new JsonArray(); + mSectionStack = new Stack(); + } + /* JSON related stuff */ + + Gson mGsonInstance; + public void printJson() { + System.out.print(mGsonInstance.toJson(mRoot));; } + /* String related stuff */ + + private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\r\\n]*[\\r\\n]+"); + private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\""); + private static final Pattern mRegEscTab = Pattern.compile("\\t"); + private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n"); private String cutLangHead(String strl) { return strl.substring("Language:".length()); } @@ -24,129 +45,95 @@ public class NlpRunner { private String cutString(String strl) { return strl.substring(1, strl.length() - 1); } - private String joinConcatedString(List ls) { + private String regulateString(String strl) { + strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement("")); // remove string concator \\[^\r\n]*[\r\n]+ + strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with " + strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t")); // replace real \t to escape char + strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n")); // replace all real \n to escape char + + return strl; + } + private String processString(String strl) { + return regulateString(cutString(strl)); + } + private String processConcatedString(List ls) { StringBuilder sb = new StringBuilder(); - for (TerminalNode node : ls) { - sb.append(cutString(node.getText())); + for (String node : ls) { + sb.append(regulateString(cutString(node))); } return sb.toString(); } - int mEntryIndex; - boolean mIsFirst; - Stack mEntryIndexStack; - Stack mIsFirstStack; + /* Section layout related stuff */ + + JsonObject mRoot; + JsonArray mSection; + Stack mSectionStack; private void pushSection() { - mEntryIndexStack.push(mEntryIndex); - mEntryIndex = 0; - mIsFirstStack.push(mIsFirst); - mIsFirst = true; + mSectionStack.push(mSection); + mSection = new JsonArray(); } private void popSection() { - mEntryIndex = mEntryIndexStack.pop(); - mIsFirst = mIsFirstStack.pop(); - } - private void printComma() { - // only the first entry do not need comma - if (mIsFirst) { - mIsFirst = false; - } else { - System.out.print(','); - } + mSection = mSectionStack.pop(); } - int mIndent; - private void printIndent() { - for(int i = 0; i < mIndent; ++i) { - System.out.print('\t'); - } - } - private void printEOL() { - System.out.print('\n'); - } - private void printIndentLn(String strl) { - // call this when writting tail bracket - printEOL(); - printIndent(); - System.out.print(strl); - } - private void printIndentCommaLn(String strl) { - // call this when writting anything else. - printComma(); - printEOL(); - printIndent(); - System.out.print(strl); - } - - private void printStrEntry(String val) { - printIndentCommaLn(String.format("\"%s\": \"%s\"", mEntryIndex++, val)); - } - private void printIntEntry(int val) { - printIndentCommaLn(String.format("\"%s\": %d", mEntryIndex++, val)); - } - + /* Listener */ @Override public void enterDocument(NlpParser.DocumentContext ctx) { - printIndentCommaLn("{"); - pushSection(); - ++mIndent; - - printIndentCommaLn(String.format("\"Language\": \"%s\"", cutLangHead(ctx.LANG_HEADER().getText()))); - - printIndentCommaLn("\"document\": {"); - pushSection(); - ++mIndent; + // insert language prop + mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText())); } @Override public void exitDocument(NlpParser.DocumentContext ctx) { - --mIndent; - popSection(); - printIndentLn("}"); - - --mIndent; - popSection(); - printIndentLn("}"); + // insert document prop + mRoot.add("entries", mSection); } @Override public void enterSection(NlpParser.SectionContext ctx) { - printIndentCommaLn(String.format("\"%s\": {", cutSectionHead(ctx.SECTION_HEAD().getText()))); pushSection(); - ++mIndent; } @Override public void exitSection(NlpParser.SectionContext ctx) { - --mIndent; + // create new object + JsonObject objSection = new JsonObject(); + objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText())); + objSection.add("entries", mSection); + // pop and insert popSection(); - printIndentLn("}"); + mSection.add(objSection); } @Override public void enterSubSection(NlpParser.SubSectionContext ctx) { - printIndentCommaLn(String.format("\"%s\": {", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()))); pushSection(); - ++mIndent; } @Override public void exitSubSection(NlpParser.SubSectionContext ctx) { - --mIndent; + // create new object + JsonObject objSubSection = new JsonObject(); + objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText())); + objSubSection.add("entries", mSection); + // pop and insert popSection(); - printIndentLn("}"); + mSection.add(objSubSection); } @Override public void enterEntryString(NlpParser.EntryStringContext ctx) { - printStrEntry(cutString(ctx.ENTRY_STRING().getText())); + mSection.add(processString(ctx.ENTRY_STRING().getText())); } @Override public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) { - printStrEntry(joinConcatedString(ctx.ENTRY_STRING())); + mSection.add(processConcatedString( + ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList()) + )); } @Override public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) { - printIntEntry(Integer.parseInt(ctx.ENTRY_INTEGER().getText())); + mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText())); } } @@ -158,7 +145,9 @@ public class NlpRunner { ParseTree tree = parser.document(); ParseTreeWalker walker = new ParseTreeWalker(); - walker.walk(new ConvertToJson(), tree); + NlpJsonConverter converter = new NlpJsonConverter(); + walker.walk(converter, tree); + converter.printJson(); System.out.println(); } } diff --git a/NlpParser/compile.bat b/NlpParser/compile.bat index 4305e92..9748f2e 100644 --- a/NlpParser/compile.bat +++ b/NlpParser/compile.bat @@ -1,4 +1,4 @@ antlr4 Nlp.g4 javac Nlp*.java ;grun Nlp document -tree < testbench.txt -java NlpRunner < ../NlpSrc/VT50.txt > result.txt \ No newline at end of file +java NlpRunner < ../NlpSrc/VT50.txt > result.json \ No newline at end of file diff --git a/NlpParser/testbench.txt b/NlpParser/testbench.txt index b76e217..4ef8688 100644 --- a/NlpParser/testbench.txt +++ b/NlpParser/testbench.txt @@ -1,71 +1,58 @@ Language:English /* - ----===[[[[ Virtools Language Pack File ]]]]===---- - - Rules: - First line must contain the string 'Language:' followed by the language name. - The file may be seen as sections, each section name is enclosed in brackets ([,]) - Sections can have sub-sections, with names enclosed in brackets (<,>) - For every section or sub-section, you can have as many entries as you want, - which can be: - strings (optionaly enclosed in quotes) - - numbers - Separators for entries are ',',';' or LF (line feed) - Entries belong to the last section or sub-section specified, until - a new section is reached. The first entry of the section has an index of 0, - the next one, an index of 1, and so on. - Comments can be added using the standard ANSI C++ scheme. - Also, dont remove %s,%d and %f tags, those will be replaced at runtime by strings or numbers. - A string can take multiple lines, adding \ at the end of the line means that the entry - continues on the next line. - - NOTE FOR TRANSLATORS: DO NOT CHANGE ENTRIES ORDER IN THIS FILE!!! - +long comments */ -[Version] -"Virtools Dev","Virtools Crea" -"Virtools Dev Evaluation","Virtools Crea Evaluation" -"Virtools Dev Education","Virtools Crea Education" +// short comments +[Layout 0] -[Profiler] -"Profiler" +"entry 0" -"Database","Framerate","Objects Drawn","Faces Drawn","Lines Drawn" -"Behavior Code Execution Details...","Collisions Management","Parametric Operations", -"IK","Floors Management","Other Code" + +"Virtools Dev1","Virtools Dev2" +"Virtools Dev3" +"Virtools Dev1","Virtools Dev2", +"Virtools Dev3" -[Common] + +"Virtools Dev" -"MultiSelection" +[Layout 1] -// -- DONT CHANGE THIS + + - -"Software\\Virtools\\Global","Usage Count" +[String Escape] - -"Key1","Key2","Key3","SYSINFO.SysInfo32\\CLSID","\\csrsrv32.dll" + -// -- ENDOF DONT CHANGE +"foo bar" +"\\foo\tbar\n" +"foo barr " +"\\""foo""\t""bar""" - -"Your license has expired.\n\nYou must either stop evaluating Virtools or\nextend it by clicking the 'Extend' button and following the instructions.\n\nAlso, check out our web resources for more information:" -"- Main Web Site","- Virtools mailing list, additional resources, discussions and more." -"Customer Key:","Check Key:" -"HOWTO:\n\t- A new customer key will be generated.\n\t- Send it by e-mail to our support service at: support@Virtools.com\n"\ -"\t You will receive back a valid check key according to your customer key.\n\t- Copy/Paste the key into corresponding field above.\n"\ -"\t- Your license will be extended to as many days as specified in your agreement.\n\n"\ -"WARNING: Once generated, the customer key is valid for 5 days only." + +"foo"\ // short comments +"bar" -[3D Layout] -"3D Layout" +"foo\ // short comments +bar" - -"New 3D Frame","New 2D Frame","New Camera","New Light","New Curve","New Grid","New Material","New Texture","New Portal" - - -"--- Out of range ---" -"%s '%s' created." // on creation (ie. TargetLight 'light0000' created.) +"foo"\ // short comments +"bar"\ // short comments +"bar" + +"foo\ // short comments +bar\ // short comments +bar" + + + +"foo""\\"\ +"""bar" + +"foo""\\\ +""bar"