VirtoolsTranslation/NlpParser/StringHelper.java

73 lines
2.0 KiB
Java

import java.util.List;
import java.lang.StringBuilder;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* String related stuff
*/
public class StringHelper {
/*
* Regex Constants.
*
* Hints:
*
* \\\\[^\\rn] match the concator. concator must not be appended with \n \r or
* \\.
*
* [^\\r\\n]*[\\r\\n]+ is match to line breaker.
*
*/
private static final Pattern gRegStrCctor = Pattern.compile("\\\\[^\\\\rn][^\\r\\n]*[\\r\\n]+");
private static final Pattern gRegDoubleQuote = Pattern.compile("\\\"\\\"");
// private static final Pattern gRegEscSlash = Pattern.compile("\\\\\\\\");
private static final Pattern gRegEscTab = Pattern.compile("\\t");
private static final Pattern gRegEscEol = Pattern.compile("\\r?\\n");
public static String cutLanguageHead(String strl) {
return strl.substring("Language:".length());
}
public static String cutSectionHead(String strl) {
return strl.substring(1, strl.length() - 1);
}
public static String cutString(String strl) {
return strl.substring(1, strl.length() - 1);
}
public static String regulateString(String strl) {
// remove string concator
strl = gRegStrCctor.matcher(strl).replaceAll(Matcher.quoteReplacement(""));
// replace "" with "
strl = gRegDoubleQuote.matcher(strl).replaceAll(Matcher.quoteReplacement("\""));
// leave double back slash alone. we still need it.
// strl = gRegEscSlash.matcher(strl).replaceAll(Matcher.quoteReplacement("\\"));
// replace real escape to escape char
strl = gRegEscTab.matcher(strl).replaceAll(Matcher.quoteReplacement("\\t"));
strl = gRegEscEol.matcher(strl).replaceAll(Matcher.quoteReplacement("\\n"));
return strl;
}
public static String processString(String strl) {
return regulateString(cutString(strl));
}
public static String processConcatedString(List<String> ls) {
StringBuilder sb = new StringBuilder();
for (String node : ls) {
sb.append(regulateString(cutString(node)));
}
return sb.toString();
}
}