fix parser error

This commit is contained in:
yyc12345 2023-06-30 17:19:08 +08:00
parent 2525cf402d
commit b6558a718d
4 changed files with 125 additions and 145 deletions

View File

@ -1,5 +1,7 @@
grammar Nlp;
// ===== Parser =====
document: LANG_HEADER (section)* ;
section: SECTION_HEAD (subSection | entry)* ;
@ -7,22 +9,24 @@ section: SECTION_HEAD (subSection | entry)* ;
subSection: SUB_SECTION_HEAD (entry)* ;
entry: ENTRY_STRING # entryString
| ENTRY_STRING (LINE_CONCAT ENTRY_STRING)+ # entryConcatedString
| ENTRY_STRING (LINE_CONCATOR ENTRY_STRING)+ # entryConcatedString
| ENTRY_INTEGER # entryInteger
;
// ===== Lexer =====
LANG_HEADER: 'Language:' [a-zA-Z]+ ;
SECTION_HEAD: '[' NAME_SECTION ']' ;
SUB_SECTION_HEAD: '<' NAME_SECTION '>' ;
fragment NAME_SECTION: [ a-zA-Z0-9]+ ; // section name are consisted of space, char and number
ENTRY_STRING: '"' (STRING_ESC|.)*? '"' ;
fragment STRING_ESC: '\\"' | '\\\\' ;
ENTRY_STRING: '"' (ENTRY_STRING_ESC| ~'"' )* '"' ;
fragment ENTRY_STRING_ESC: '""' | '\\\\' | '\\t' | '\\n' ;
ENTRY_INTEGER: [1-9][0-9]+ ;
LINE_CONCATOR: '\\';
SPLITTOR: [ ,;\r\n]+ -> skip; // ignore all splittor and space
LINE_CONCAT: '\\' ;
LINE_COMMENT: '//' ~[\r\n]* -> skip ; // consume all non-line-breaker. because we need line breaker.
BLOCK_COMMENT: '/*' .*? '*/' -> skip ;

View File

@ -1,20 +1,41 @@
// import antlr stuff
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;
// import container
import java.util.Stack;
import java.util.stream.Collectors;
import java.util.List;
import java.lang.StringBuilder;
// import json
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
// import regex
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class NlpRunner {
public static class ConvertToJson extends NlpBaseListener {
public ConvertToJson() {
mIndent = 0;
mEntryIndex = 0;
mEntryIndexStack = new Stack<Integer>();
mIsFirst = true;
mIsFirstStack = new Stack<Boolean>();
public static class NlpJsonConverter extends NlpBaseListener {
public NlpJsonConverter() {
mGsonInstance = new GsonBuilder().setPrettyPrinting().create();
mRoot = new JsonObject();
mSection = new JsonArray();
mSectionStack = new Stack<JsonArray>();
}
/* JSON related stuff */
Gson mGsonInstance;
public void printJson() {
System.out.print(mGsonInstance.toJson(mRoot));;
}
/* String related stuff */
private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\r\\n]*[\\r\\n]+");
private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\"");
private static final Pattern mRegEscTab = Pattern.compile("\\t");
private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n");
private String cutLangHead(String strl) {
return strl.substring("Language:".length());
}
@ -24,129 +45,95 @@ public class NlpRunner {
private String cutString(String strl) {
return strl.substring(1, strl.length() - 1);
}
private String joinConcatedString(List<TerminalNode> ls) {
private String regulateString(String strl) {
strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement("")); // remove string concator \\[^\r\n]*[\r\n]+
strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with "
strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t")); // replace real \t to escape char
strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n")); // replace all real \n to escape char
return strl;
}
private String processString(String strl) {
return regulateString(cutString(strl));
}
private String processConcatedString(List<String> ls) {
StringBuilder sb = new StringBuilder();
for (TerminalNode node : ls) {
sb.append(cutString(node.getText()));
for (String node : ls) {
sb.append(regulateString(cutString(node)));
}
return sb.toString();
}
int mEntryIndex;
boolean mIsFirst;
Stack<Integer> mEntryIndexStack;
Stack<Boolean> mIsFirstStack;
/* Section layout related stuff */
JsonObject mRoot;
JsonArray mSection;
Stack<JsonArray> mSectionStack;
private void pushSection() {
mEntryIndexStack.push(mEntryIndex);
mEntryIndex = 0;
mIsFirstStack.push(mIsFirst);
mIsFirst = true;
mSectionStack.push(mSection);
mSection = new JsonArray();
}
private void popSection() {
mEntryIndex = mEntryIndexStack.pop();
mIsFirst = mIsFirstStack.pop();
}
private void printComma() {
// only the first entry do not need comma
if (mIsFirst) {
mIsFirst = false;
} else {
System.out.print(',');
}
mSection = mSectionStack.pop();
}
int mIndent;
private void printIndent() {
for(int i = 0; i < mIndent; ++i) {
System.out.print('\t');
}
}
private void printEOL() {
System.out.print('\n');
}
private void printIndentLn(String strl) {
// call this when writting tail bracket
printEOL();
printIndent();
System.out.print(strl);
}
private void printIndentCommaLn(String strl) {
// call this when writting anything else.
printComma();
printEOL();
printIndent();
System.out.print(strl);
}
private void printStrEntry(String val) {
printIndentCommaLn(String.format("\"%s\": \"%s\"", mEntryIndex++, val));
}
private void printIntEntry(int val) {
printIndentCommaLn(String.format("\"%s\": %d", mEntryIndex++, val));
}
/* Listener */
@Override
public void enterDocument(NlpParser.DocumentContext ctx) {
printIndentCommaLn("{");
pushSection();
++mIndent;
printIndentCommaLn(String.format("\"Language\": \"%s\"", cutLangHead(ctx.LANG_HEADER().getText())));
printIndentCommaLn("\"document\": {");
pushSection();
++mIndent;
// insert language prop
mRoot.addProperty("language", cutLangHead(ctx.LANG_HEADER().getText()));
}
@Override
public void exitDocument(NlpParser.DocumentContext ctx) {
--mIndent;
popSection();
printIndentLn("}");
--mIndent;
popSection();
printIndentLn("}");
// insert document prop
mRoot.add("entries", mSection);
}
@Override
public void enterSection(NlpParser.SectionContext ctx) {
printIndentCommaLn(String.format("\"%s\": {", cutSectionHead(ctx.SECTION_HEAD().getText())));
pushSection();
++mIndent;
}
@Override
public void exitSection(NlpParser.SectionContext ctx) {
--mIndent;
// create new object
JsonObject objSection = new JsonObject();
objSection.addProperty("section", cutSectionHead(ctx.SECTION_HEAD().getText()));
objSection.add("entries", mSection);
// pop and insert
popSection();
printIndentLn("}");
mSection.add(objSection);
}
@Override
public void enterSubSection(NlpParser.SubSectionContext ctx) {
printIndentCommaLn(String.format("\"%s\": {", cutSectionHead(ctx.SUB_SECTION_HEAD().getText())));
pushSection();
++mIndent;
}
@Override
public void exitSubSection(NlpParser.SubSectionContext ctx) {
--mIndent;
// create new object
JsonObject objSubSection = new JsonObject();
objSubSection.addProperty("section", cutSectionHead(ctx.SUB_SECTION_HEAD().getText()));
objSubSection.add("entries", mSection);
// pop and insert
popSection();
printIndentLn("}");
mSection.add(objSubSection);
}
@Override
public void enterEntryString(NlpParser.EntryStringContext ctx) {
printStrEntry(cutString(ctx.ENTRY_STRING().getText()));
mSection.add(processString(ctx.ENTRY_STRING().getText()));
}
@Override
public void enterEntryConcatedString(NlpParser.EntryConcatedStringContext ctx) {
printStrEntry(joinConcatedString(ctx.ENTRY_STRING()));
mSection.add(processConcatedString(
ctx.ENTRY_STRING().stream().map(value -> value.getText()).collect(Collectors.toList())
));
}
@Override
public void enterEntryInteger(NlpParser.EntryIntegerContext ctx) {
printIntEntry(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
mSection.add(Integer.parseInt(ctx.ENTRY_INTEGER().getText()));
}
}
@ -158,7 +145,9 @@ public class NlpRunner {
ParseTree tree = parser.document();
ParseTreeWalker walker = new ParseTreeWalker();
walker.walk(new ConvertToJson(), tree);
NlpJsonConverter converter = new NlpJsonConverter();
walker.walk(converter, tree);
converter.printJson();
System.out.println();
}
}

View File

@ -1,4 +1,4 @@
antlr4 Nlp.g4
javac Nlp*.java
;grun Nlp document -tree < testbench.txt
java NlpRunner < ../NlpSrc/VT50.txt > result.txt
java NlpRunner < ../NlpSrc/VT50.txt > result.json

View File

@ -1,71 +1,58 @@
Language:English
/*
----===[[[[ Virtools Language Pack File ]]]]===----
Rules:
First line must contain the string 'Language:' followed by the language name.
The file may be seen as sections, each section name is enclosed in brackets ([,])
Sections can have sub-sections, with names enclosed in brackets (<,>)
For every section or sub-section, you can have as many entries as you want,
which can be: - strings (optionaly enclosed in quotes)
- numbers
Separators for entries are ',',';' or LF (line feed)
Entries belong to the last section or sub-section specified, until
a new section is reached. The first entry of the section has an index of 0,
the next one, an index of 1, and so on.
Comments can be added using the standard ANSI C++ scheme.
Also, dont remove %s,%d and %f tags, those will be replaced at runtime by strings or numbers.
A string can take multiple lines, adding \ at the end of the line means that the entry
continues on the next line.
NOTE FOR TRANSLATORS: DO NOT CHANGE ENTRIES ORDER IN THIS FILE!!!
long comments
*/
[Version]
"Virtools Dev","Virtools Crea"
"Virtools Dev Evaluation","Virtools Crea Evaluation"
"Virtools Dev Education","Virtools Crea Education"
// short comments
[Layout 0]
[Profiler]
"Profiler"
"entry 0"
"Database","Framerate","Objects Drawn","Faces Drawn","Lines Drawn"
"Behavior Code Execution Details...","Collisions Management","Parametric Operations",
"IK","Floors Management","Other Code"
<layout 0 0>
"Virtools Dev1","Virtools Dev2"
"Virtools Dev3"
"Virtools Dev1","Virtools Dev2",
"Virtools Dev3"
[Common]
<layout 0 1>
"Virtools Dev"
"MultiSelection"
[Layout 1]
// -- DONT CHANGE THIS
<layout 1 0>
<layout 1 1>
<Registry>
"Software\\Virtools\\Global","Usage Count"
[String Escape]
<Timebomb>
"Key1","Key2","Key3","SYSINFO.SysInfo32\\CLSID","\\csrsrv32.dll"
<String Escape 0>
// -- ENDOF DONT CHANGE
"foo bar"
"\\foo\tbar\n"
"foo barr "
"\\""foo""\t""bar"""
<Timebomb Messages>
"Your license has expired.\n\nYou must either stop evaluating Virtools or\nextend it by clicking the 'Extend' button and following the instructions.\n\nAlso, check out our web resources for more information:"
"- Main Web Site","- Virtools mailing list, additional resources, discussions and more."
"Customer Key:","Check Key:"
"HOWTO:\n\t- A new customer key will be generated.\n\t- Send it by e-mail to our support service at: support@Virtools.com\n"\
"\t You will receive back a valid check key according to your customer key.\n\t- Copy/Paste the key into corresponding field above.\n"\
"\t- Your license will be extended to as many days as specified in your agreement.\n\n"\
"WARNING: Once generated, the customer key is valid for 5 days only."
<String Escape 1>
"foo"\ // short comments
"bar"
[3D Layout]
"3D Layout"
"foo\ // short comments
bar"
<Names>
"New 3D Frame","New 2D Frame","New Camera","New Light","New Curve","New Grid","New Material","New Texture","New Portal"
<Actions>
"--- Out of range ---"
"%s '%s' created." // on creation (ie. TargetLight 'light0000' created.)
"foo"\ // short comments
"bar"\ // short comments
"bar"
"foo\ // short comments
bar\ // short comments
bar"
<String Escape 2>
"foo""\\"\
"""bar"
"foo""\\\
""bar"