diff --git a/benchmark/yycc/string/op.cpp b/benchmark/yycc/string/op.cpp index d62e817..2f51f3c 100644 --- a/benchmark/yycc/string/op.cpp +++ b/benchmark/yycc/string/op.cpp @@ -15,4 +15,12 @@ namespace yyccbench::string::op { } BENCHMARK(BM_StringStrip)->Name("StringStrip"); + static void BM_StringTrim(benchmark::State& state) { + std::u8string_view strl = u8" \thello\r\n"sv, words = u8" \t\r\n"sv; + for (auto _ : state) { + auto rv = OP::trim(strl, words); + } + } + BENCHMARK(BM_StringTrim)->Name("StringTrim"); + } diff --git a/src/yycc/carton/clap/manual.cpp b/src/yycc/carton/clap/manual.cpp index 1849907..1bd1fdf 100644 --- a/src/yycc/carton/clap/manual.cpp +++ b/src/yycc/carton/clap/manual.cpp @@ -2,10 +2,13 @@ #include "../termcolor.hpp" #include "../../patch/stream.hpp" #include "../../patch/format.hpp" +#include "../../string/op.hpp" +#include #define CLAP ::yycc::carton::clap #define TABULATE ::yycc::carton::tabulate #define TERMCOLOR ::yycc::carton::termcolor +#define OP ::yycc::string::op #define FORMAT ::yycc::patch::format using namespace ::yycc::patch::stream; @@ -51,6 +54,10 @@ namespace yycc::carton::clap::manual { const auto &options = app.get_options(); for (const auto ®_opt : options.all_options()) { const auto &opt = reg_opt.get_option(); + + //for (const auto [index, item] : std::views::enumerate(header)) { + // + //} } } diff --git a/src/yycc/string/op.cpp b/src/yycc/string/op.cpp index 9b2525d..3f2b6dc 100644 --- a/src/yycc/string/op.cpp +++ b/src/yycc/string/op.cpp @@ -379,7 +379,7 @@ namespace yycc::string::op { #pragma endregion template - std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) { + static std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) { std::optional prefix, suffix; if constexpr (bDoLeft) prefix = TrieTree(); if constexpr (bDoRight) suffix = TrieTree(); @@ -419,6 +419,62 @@ namespace yycc::string::op { #pragma endregion +#pragma region Trim + + template + std::u8string_view internal_trim(const std::u8string_view& strl, const std::u8string_view& words) { + // check words + if (!std::ranges::none_of(words, [](auto c) { return static_cast(c) & 0x80; })) { + throw std::invalid_argument("given words are not all ASCII (<= 0x7F) only"); + } + + // prepare return value + std::u8string_view rv = strl; + + // remove left first + if constexpr (bDoLeft) { + auto finder = rv.find_first_not_of(words); + if (finder == std::u8string_view::npos) { + // all string are in given words + rv = std::u8string_view(); + } else { + // remove by offset + rv = rv.substr(finder); + } + } + + // remove right + if constexpr (bDoRight) { + auto finder = rv.find_last_not_of(words); + if (finder == std::u8string_view::npos) { + // all string are in given words + rv = std::u8string_view(); + } else { + // remove by offset + rv = rv.substr(0, finder + 1); + } + } + + // return value + return rv; + } + + std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words) { + return internal_trim(strl, words); + } + + std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words) { + return internal_trim(strl, words); + } + + std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words) { + return internal_trim(strl, words); + } + + +#pragma endregion + + #pragma region Split // Reference: diff --git a/src/yycc/string/op.hpp b/src/yycc/string/op.hpp index bba8fd5..e2e5742 100644 --- a/src/yycc/string/op.hpp +++ b/src/yycc/string/op.hpp @@ -144,32 +144,75 @@ namespace yycc::string::op { #pragma endregion -#pragma region Strip +#pragma region Strip and Trim /** * @brief Remove leading and trailing whitespace from the string. - * @param[in,out] strl The string to be stripped. + * @details + * This "strip" function is full Unicode supported. + * It means that it is different with all other ordinary implementations, + * that treat each UTF8 code unit as an invididual chars when stripping. + * This function will break given words by UTF8 code point first, + * and try to strip these code points in given string. + * So it can strip Unicode whitespace or any other characters correctly. + * However, obviously, it is slower than ASCII-only version "trim". + * If you only need to strip ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F, + * please consider using trim() for better performance. + * @param[in] strl The string to be stripped. * @param[in] words The characters to be stripped. * @return The string view with leading and trailing whitespace removed. + * @see See trim() for ASCII-only version "strip". */ std::u8string_view strip(const std::u8string_view& strl, const std::u8string_view& words); - /** * @brief Remove leading whitespace from the string. - * @param[in,out] strl The string to be stripped. + * @param[in] strl The string to be stripped. * @param[in] words The characters to be stripped. * @return The string view with leading whitespace removed. + * @see See strip() for more info. */ std::u8string_view lstrip(const std::u8string_view& strl, const std::u8string_view& words); - /** * @brief Remove trailing whitespace from the string. - * @param[in,out] strl The string to be stripped. + * @param[in] strl The string to be stripped. * @param[in] words The characters to be stripped. * @return The string view with trailing whitespace removed. + * @see See strip() for more info. */ std::u8string_view rstrip(const std::u8string_view& strl, const std::u8string_view& words); + /** + * @brief Remove leading and trailing whitespace from the string. + * @details + * This function is limited "trim" function. + * It brutely think each code unit in given words are invididual chars during stripping. + * So it can only trim ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F. + * If you need to trim Unicode whitespace or any other characters, + * please consider using strip() for correct behavior. + * @param[in] strl The view of string to be trimmed. + * @param[in] words The characters to be trimmed. + * @return The string view with leading and trailing whitespace removed. + * @see See strip() for full Unicode supported version "trim". + */ + std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words); + /** + * @brief Remove leading whitespace from the string. + * @param[in] strl The view of string to be trimmed. + * @param[in] words The characters to be trimmed. + * @return The string view with leading whitespace removed. + * @see See trim() for more info. + */ + std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words); + /** + * @brief Remove trailing whitespace from the string. + * @param[in] strl The view of string to be trimmed. + * @param[in] words The characters to be trimmed. + * @return The string view with trailing whitespace removed. + * @see See trim() for more info. + */ + std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words); + + #pragma endregion #pragma region Split diff --git a/test/yycc/string/op.cpp b/test/yycc/string/op.cpp index ff290ad..5ab270f 100644 --- a/test/yycc/string/op.cpp +++ b/test/yycc/string/op.cpp @@ -86,6 +86,12 @@ namespace yycctest::string::op { EXPECT_EQ(rv, u8" \taaa"); } + // Full strip + { + auto rv = OP::strip(u8" ", u8" "); + EXPECT_TRUE(rv.empty()); + } + // Special strip { auto rv = OP::strip(u8"啊啊啊aaaあああ", u8"啊あ"); @@ -110,6 +116,32 @@ namespace yycctest::string::op { } } + TEST(StringOp, Trim) { + // Normal trim + { + auto rv = OP::trim(u8" \taaa\n", u8" \t\r\n"); + EXPECT_EQ(rv, u8"aaa"); + } + { + auto rv = OP::ltrim(u8" \taaa\n", u8" \t\r\n"); + EXPECT_EQ(rv, u8"aaa\n"); + } + { + auto rv = OP::rtrim(u8" \taaa\n", u8" \t\r\n"); + EXPECT_EQ(rv, u8" \taaa"); + } + + // Bad words + { + EXPECT_ANY_THROW(OP::trim(u8"q啊啊啊aaaあああp", u8"p啊q")); + } + + // Full trim + { + auto rv = OP::trim(u8" ", u8" "); + EXPECT_TRUE(rv.empty()); + } + } TEST(StringOp, Split) { // Normal {