feat: add trim in string op opposited with strip

2025-10-01 20:53:43 +08:00
parent 05a80268ab
commit 446f880df4
5 changed files with 153 additions and 7 deletions
--- a/benchmark/yycc/string/op.cpp
+++ b/benchmark/yycc/string/op.cpp
@@ -15,4 +15,12 @@ namespace yyccbench::string::op {
    }
    BENCHMARK(BM_StringStrip)->Name("StringStrip");

+    static void BM_StringTrim(benchmark::State& state) {
+        std::u8string_view strl = u8" \thello\r\n"sv, words = u8" \t\r\n"sv;
+        for (auto _ : state) {
+            auto rv = OP::trim(strl, words);
+        }
+    }
+    BENCHMARK(BM_StringTrim)->Name("StringTrim");
+
 }
--- a/src/yycc/carton/clap/manual.cpp
+++ b/src/yycc/carton/clap/manual.cpp
@@ -2,10 +2,13 @@
 #include "../termcolor.hpp"
 #include "../../patch/stream.hpp"
 #include "../../patch/format.hpp"
+#include "../../string/op.hpp"
+#include <ranges>

 #define CLAP ::yycc::carton::clap
 #define TABULATE ::yycc::carton::tabulate
 #define TERMCOLOR ::yycc::carton::termcolor
+#define OP ::yycc::string::op
 #define FORMAT ::yycc::patch::format

 using namespace ::yycc::patch::stream;
@@ -51,6 +54,10 @@ namespace yycc::carton::clap::manual {
        const auto &options = app.get_options();
        for (const auto &reg_opt : options.all_options()) {
            const auto &opt = reg_opt.get_option();
+
+            //for (const auto [index, item] : std::views::enumerate(header)) {
+            //
+            //}
        }
    }

--- a/src/yycc/string/op.cpp
+++ b/src/yycc/string/op.cpp
@@ -379,7 +379,7 @@ namespace yycc::string::op {
 #pragma endregion

    template<bool bDoLeft, bool bDoRight>
-    std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
+    static std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
        std::optional<TrieTree> prefix, suffix;
        if constexpr (bDoLeft) prefix = TrieTree();
        if constexpr (bDoRight) suffix = TrieTree();
@@ -419,6 +419,62 @@ namespace yycc::string::op {

 #pragma endregion

+#pragma region Trim
+
+    template<bool bDoLeft, bool bDoRight>
+    std::u8string_view internal_trim(const std::u8string_view& strl, const std::u8string_view& words) {
+        // check words
+        if (!std::ranges::none_of(words, [](auto c) { return static_cast<uint8_t>(c) & 0x80; })) {
+            throw std::invalid_argument("given words are not all ASCII (<= 0x7F) only");
+        }
+
+        // prepare return value
+        std::u8string_view rv = strl;
+
+        // remove left first
+        if constexpr (bDoLeft) {
+            auto finder = rv.find_first_not_of(words);
+            if (finder == std::u8string_view::npos) {
+                // all string are in given words
+                rv = std::u8string_view();
+            } else {
+                // remove by offset
+                rv = rv.substr(finder);
+            }
+        }
+
+        // remove right
+        if constexpr (bDoRight) {
+            auto finder = rv.find_last_not_of(words);
+            if (finder == std::u8string_view::npos) {
+                // all string are in given words
+                rv = std::u8string_view();
+            } else {
+                // remove by offset
+                rv = rv.substr(0, finder + 1);
+            }
+        }
+
+        // return value
+        return rv;
+    }
+
+    std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words) {
+        return internal_trim<true, true>(strl, words);
+    }
+
+    std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words) {
+        return internal_trim<true, false>(strl, words);
+    }
+
+    std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words) {
+        return internal_trim<false, true>(strl, words);
+    }
+
+
+#pragma endregion
+
+
 #pragma region Split

    // Reference:
--- a/src/yycc/string/op.hpp
+++ b/src/yycc/string/op.hpp
@@ -144,32 +144,75 @@ namespace yycc::string::op {

 #pragma endregion

-#pragma region Strip
+#pragma region Strip and Trim

    /**
     * @brief Remove leading and trailing whitespace from the string.
-     * @param[in,out] strl The string to be stripped.
+     * @details
+     * This "strip" function is full Unicode supported.
+     * It means that it is different with all other ordinary implementations,
+     * that treat each UTF8 code unit as an invididual chars when stripping.
+     * This function will break given words by UTF8 code point first,
+     * and try to strip these code points in given string.
+     * So it can strip Unicode whitespace or any other characters correctly.
+     * However, obviously, it is slower than ASCII-only version "trim".
+     * If you only need to strip ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F,
+     * please consider using trim() for better performance.
+     * @param[in] strl The string to be stripped.
     * @param[in] words The characters to be stripped.
     * @return The string view with leading and trailing whitespace removed.
+     * @see See trim() for ASCII-only version "strip".
     */
    std::u8string_view strip(const std::u8string_view& strl, const std::u8string_view& words);
-
    /**
     * @brief Remove leading whitespace from the string.
-     * @param[in,out] strl The string to be stripped.
+     * @param[in] strl The string to be stripped.
     * @param[in] words The characters to be stripped.
     * @return The string view with leading whitespace removed.
+     * @see See strip() for more info.
     */
    std::u8string_view lstrip(const std::u8string_view& strl, const std::u8string_view& words);
-
    /**
     * @brief Remove trailing whitespace from the string.
-     * @param[in,out] strl The string to be stripped.
+     * @param[in] strl The string to be stripped.
     * @param[in] words The characters to be stripped.
     * @return The string view with trailing whitespace removed.
+     * @see See strip() for more info.
     */
    std::u8string_view rstrip(const std::u8string_view& strl, const std::u8string_view& words);

+    /**
+     * @brief Remove leading and trailing whitespace from the string.
+     * @details
+     * This function is limited "trim" function.
+     * It brutely think each code unit in given words are invididual chars during stripping.
+     * So it can only trim ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F.
+     * If you need to trim Unicode whitespace or any other characters,
+     * please consider using strip() for correct behavior.
+     * @param[in] strl The view of string to be trimmed.
+     * @param[in] words The characters to be trimmed.
+     * @return The string view with leading and trailing whitespace removed.
+     * @see See strip() for full Unicode supported version "trim".
+     */
+    std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words);
+    /**
+     * @brief Remove leading whitespace from the string.
+     * @param[in] strl The view of string to be trimmed.
+     * @param[in] words The characters to be trimmed.
+     * @return The string view with leading whitespace removed.
+     * @see See trim() for more info.
+     */
+    std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words);
+    /**
+     * @brief Remove trailing whitespace from the string.
+     * @param[in] strl The view of string to be trimmed.
+     * @param[in] words The characters to be trimmed.
+     * @return The string view with trailing whitespace removed.
+     * @see See trim() for more info.
+     */
+    std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words);
+
+
 #pragma endregion

 #pragma region Split
--- a/test/yycc/string/op.cpp
+++ b/test/yycc/string/op.cpp
@@ -86,6 +86,12 @@ namespace yycctest::string::op {
            EXPECT_EQ(rv, u8" \taaa");
        }

+        // Full strip
+        {
+            auto rv = OP::strip(u8"   ", u8" ");
+            EXPECT_TRUE(rv.empty());
+        }
+
        // Special strip
        {
            auto rv = OP::strip(u8"啊啊啊aaaあああ", u8"啊あ");
@@ -110,6 +116,32 @@ namespace yycctest::string::op {
        }
    }

+    TEST(StringOp, Trim) {
+        // Normal trim
+        {
+            auto rv = OP::trim(u8" \taaa\n", u8" \t\r\n");
+            EXPECT_EQ(rv, u8"aaa");
+        }
+        {
+            auto rv = OP::ltrim(u8" \taaa\n", u8" \t\r\n");
+            EXPECT_EQ(rv, u8"aaa\n");
+        }
+        {
+            auto rv = OP::rtrim(u8" \taaa\n", u8" \t\r\n");
+            EXPECT_EQ(rv, u8" \taaa");
+        }
+
+        // Bad words
+        {
+            EXPECT_ANY_THROW(OP::trim(u8"q啊啊啊aaaあああp", u8"p啊q"));
+        }
+
+        // Full trim
+        {
+            auto rv = OP::trim(u8"   ", u8" ");
+            EXPECT_TRUE(rv.empty());
+        }
+    }
    TEST(StringOp, Split) {
        // Normal
        {