refact: introduce hasNonAsciiAndControlCharacters

Make regex thread_local.
This commit is contained in:
black-desk 2023-08-25 12:05:10 +08:00 committed by Comix
parent 17536bca3b
commit a16754647a

View File

@ -15,6 +15,29 @@
#include <chrono> #include <chrono>
#include <cstdio> #include <cstdio>
namespace {
bool hasNonAsciiAndControlCharacters(const QString &str) noexcept
{
static const QRegularExpression _matchControlChars = []() {
QRegularExpression tmp{R"(\p{Cc})"};
tmp.optimize();
return tmp;
}();
thread_local const auto matchControlChars = _matchControlChars;
static const QRegularExpression _matchNonAsciiChars = []() {
QRegularExpression tmp{R"([^\x00-\x7f])"};
tmp.optimize();
return tmp;
}();
thread_local const auto matchNonAsciiChars = _matchNonAsciiChars;
if (str.contains(matchControlChars) and str.contains(matchNonAsciiChars)) {
return true;
}
return false;
}
} // namespace
auto DesktopEntry::parseGroupHeader(const QString &str) noexcept auto DesktopEntry::parseGroupHeader(const QString &str) noexcept
{ {
// https://specifications.freedesktop.org/desktop-entry-spec/desktop-entry-spec-latest.html#group-header // https://specifications.freedesktop.org/desktop-entry-spec/desktop-entry-spec-latest.html#group-header
@ -22,14 +45,7 @@ auto DesktopEntry::parseGroupHeader(const QString &str) noexcept
auto groupHeader = str.sliced(1, str.size() - 2).trimmed(); auto groupHeader = str.sliced(1, str.size() - 2).trimmed();
decltype(m_entryMap)::iterator it{m_entryMap.end()}; decltype(m_entryMap)::iterator it{m_entryMap.end()};
// NOTE: if (groupHeader.contains('[') || groupHeader.contains(']') || hasNonAsciiAndControlCharacters(groupHeader)) {
// This regex match '[', ']', control characters
// and all non-ascii characters.
// They are invalid in group header.
// https://regex101.com/r/bZhHZo/1
QRegularExpression re{R"([^\x20-\x5a\x5e-\x7e\x5c])"};
auto matcher = re.match(groupHeader);
if (matcher.hasMatch()) {
qWarning() << "group header invalid:" << str; qWarning() << "group header invalid:" << str;
return it; return it;
} }
@ -61,11 +77,12 @@ bool DesktopEntry::isInvalidLocaleString(const QString &str) noexcept
constexpr auto Modifier = R"((?:@[a-z=;]+))"; // modifier of locale postfix. eg.(euro;collation=traditional) constexpr auto Modifier = R"((?:@[a-z=;]+))"; // modifier of locale postfix. eg.(euro;collation=traditional)
const static auto validKey = QString(R"(^%1%2?%3?%4?$)").arg(Language, Country, Encoding, Modifier); const static auto validKey = QString(R"(^%1%2?%3?%4?$)").arg(Language, Country, Encoding, Modifier);
// example: https://regex101.com/r/hylOay/2 // example: https://regex101.com/r/hylOay/2
static QRegularExpression re = []() -> QRegularExpression { static const QRegularExpression _re = []() -> QRegularExpression {
QRegularExpression tmp{validKey}; QRegularExpression tmp{validKey};
tmp.optimize(); tmp.optimize();
return tmp; return tmp;
}(); }();
thread_local const auto re = _re;
return re.match(str).hasMatch(); return re.match(str).hasMatch();
} }
@ -104,7 +121,13 @@ std::optional<QPair<QString, QString>> DesktopEntry::processEntryKey(const QStri
key = keyStr; key = keyStr;
} }
QRegularExpression re{"R([^A-Za-z0-9-])"}; static const QRegularExpression _re = []() {
QRegularExpression tmp{"R([^A-Za-z0-9-])"};
tmp.optimize();
return tmp;
}();
// NOTE: https://stackoverflow.com/a/25583104
thread_local const QRegularExpression re = _re;
if (re.match(key).hasMatch()) { if (re.match(key).hasMatch()) {
qWarning() << "keyName's format is invalid."; qWarning() << "keyName's format is invalid.";
return std::nullopt; return std::nullopt;
@ -477,10 +500,9 @@ QString DesktopEntry::Value::toString(bool &ok) const noexcept
if (str == this->end()) { if (str == this->end()) {
return {}; return {};
} }
auto unescapedStr = unescape(*str); auto unescapedStr = unescape(*str);
constexpr auto controlChars = "\\p{Cc}"; if (hasNonAsciiAndControlCharacters(unescapedStr)) {
constexpr auto asciiChars = "[^\x00-\x7f]";
if (unescapedStr.contains(QRegularExpression{controlChars}) and unescapedStr.contains(QRegularExpression{asciiChars})) {
return {}; return {};
} }