From 2a234dbf7de85c8fd3402b731e000f253b40821b Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Fri, 30 Jun 2023 22:49:36 +0800 Subject: [PATCH] fix backslash regex match error --- NlpParser/NlpRunner.java | 12 ++++++++---- NlpParser/testbench.txt | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/NlpParser/NlpRunner.java b/NlpParser/NlpRunner.java index fc76cd1..d53998b 100644 --- a/NlpParser/NlpRunner.java +++ b/NlpParser/NlpRunner.java @@ -37,8 +37,11 @@ public class NlpRunner { /* String related stuff */ - private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\r\\n]*[\\r\\n]+"); + // \\\\[^\\rn] match the concator. concator must not be appended with \n \r or \\ + // [^\\r\\n]*[\\r\\n]+ is match to line breaker. + private static final Pattern mRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+"); private static final Pattern mRegDoubleQuote = Pattern.compile("\\\"\\\""); + private static final Pattern mRegEscSlash = Pattern.compile("\\\\\\\\"); private static final Pattern mRegEscTab = Pattern.compile("\\t"); private static final Pattern mRegEscEol = Pattern.compile("\\r?\\n"); private String cutLangHead(String strl) { @@ -51,10 +54,11 @@ public class NlpRunner { return strl.substring(1, strl.length() - 1); } private String regulateString(String strl) { - strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement("")); // remove string concator \\[^\r\n]*[\r\n]+ + strl = mRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement("")); // remove string concator strl = mRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));// replace "" with " - strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t")); // replace real \t to escape char - strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n")); // replace all real \n to escape char + strl = mRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\")); // replace real escape to escape char + strl = mRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t")); + strl = mRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n")); return strl; } diff --git a/NlpParser/testbench.txt b/NlpParser/testbench.txt index 4ef8688..acfc92c 100644 --- a/NlpParser/testbench.txt +++ b/NlpParser/testbench.txt @@ -56,3 +56,6 @@ bar" "foo""\\\ ""bar" + +"foo\n\ +bar"