1
0

feat: add trim in string op opposited with strip

This commit is contained in:
2025-10-01 20:53:43 +08:00
parent 05a80268ab
commit 446f880df4
5 changed files with 153 additions and 7 deletions

View File

@ -15,4 +15,12 @@ namespace yyccbench::string::op {
}
BENCHMARK(BM_StringStrip)->Name("StringStrip");
static void BM_StringTrim(benchmark::State& state) {
std::u8string_view strl = u8" \thello\r\n"sv, words = u8" \t\r\n"sv;
for (auto _ : state) {
auto rv = OP::trim(strl, words);
}
}
BENCHMARK(BM_StringTrim)->Name("StringTrim");
}

View File

@ -2,10 +2,13 @@
#include "../termcolor.hpp"
#include "../../patch/stream.hpp"
#include "../../patch/format.hpp"
#include "../../string/op.hpp"
#include <ranges>
#define CLAP ::yycc::carton::clap
#define TABULATE ::yycc::carton::tabulate
#define TERMCOLOR ::yycc::carton::termcolor
#define OP ::yycc::string::op
#define FORMAT ::yycc::patch::format
using namespace ::yycc::patch::stream;
@ -51,6 +54,10 @@ namespace yycc::carton::clap::manual {
const auto &options = app.get_options();
for (const auto &reg_opt : options.all_options()) {
const auto &opt = reg_opt.get_option();
//for (const auto [index, item] : std::views::enumerate(header)) {
//
//}
}
}

View File

@ -379,7 +379,7 @@ namespace yycc::string::op {
#pragma endregion
template<bool bDoLeft, bool bDoRight>
std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
static std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
std::optional<TrieTree> prefix, suffix;
if constexpr (bDoLeft) prefix = TrieTree();
if constexpr (bDoRight) suffix = TrieTree();
@ -419,6 +419,62 @@ namespace yycc::string::op {
#pragma endregion
#pragma region Trim
template<bool bDoLeft, bool bDoRight>
std::u8string_view internal_trim(const std::u8string_view& strl, const std::u8string_view& words) {
// check words
if (!std::ranges::none_of(words, [](auto c) { return static_cast<uint8_t>(c) & 0x80; })) {
throw std::invalid_argument("given words are not all ASCII (<= 0x7F) only");
}
// prepare return value
std::u8string_view rv = strl;
// remove left first
if constexpr (bDoLeft) {
auto finder = rv.find_first_not_of(words);
if (finder == std::u8string_view::npos) {
// all string are in given words
rv = std::u8string_view();
} else {
// remove by offset
rv = rv.substr(finder);
}
}
// remove right
if constexpr (bDoRight) {
auto finder = rv.find_last_not_of(words);
if (finder == std::u8string_view::npos) {
// all string are in given words
rv = std::u8string_view();
} else {
// remove by offset
rv = rv.substr(0, finder + 1);
}
}
// return value
return rv;
}
std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words) {
return internal_trim<true, true>(strl, words);
}
std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words) {
return internal_trim<true, false>(strl, words);
}
std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words) {
return internal_trim<false, true>(strl, words);
}
#pragma endregion
#pragma region Split
// Reference:

View File

@ -144,32 +144,75 @@ namespace yycc::string::op {
#pragma endregion
#pragma region Strip
#pragma region Strip and Trim
/**
* @brief Remove leading and trailing whitespace from the string.
* @param[in,out] strl The string to be stripped.
* @details
* This "strip" function is full Unicode supported.
* It means that it is different with all other ordinary implementations,
* that treat each UTF8 code unit as an invididual chars when stripping.
* This function will break given words by UTF8 code point first,
* and try to strip these code points in given string.
* So it can strip Unicode whitespace or any other characters correctly.
* However, obviously, it is slower than ASCII-only version "trim".
* If you only need to strip ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F,
* please consider using trim() for better performance.
* @param[in] strl The string to be stripped.
* @param[in] words The characters to be stripped.
* @return The string view with leading and trailing whitespace removed.
* @see See trim() for ASCII-only version "strip".
*/
std::u8string_view strip(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove leading whitespace from the string.
* @param[in,out] strl The string to be stripped.
* @param[in] strl The string to be stripped.
* @param[in] words The characters to be stripped.
* @return The string view with leading whitespace removed.
* @see See strip() for more info.
*/
std::u8string_view lstrip(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove trailing whitespace from the string.
* @param[in,out] strl The string to be stripped.
* @param[in] strl The string to be stripped.
* @param[in] words The characters to be stripped.
* @return The string view with trailing whitespace removed.
* @see See strip() for more info.
*/
std::u8string_view rstrip(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove leading and trailing whitespace from the string.
* @details
* This function is limited "trim" function.
* It brutely think each code unit in given words are invididual chars during stripping.
* So it can only trim ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F.
* If you need to trim Unicode whitespace or any other characters,
* please consider using strip() for correct behavior.
* @param[in] strl The view of string to be trimmed.
* @param[in] words The characters to be trimmed.
* @return The string view with leading and trailing whitespace removed.
* @see See strip() for full Unicode supported version "trim".
*/
std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove leading whitespace from the string.
* @param[in] strl The view of string to be trimmed.
* @param[in] words The characters to be trimmed.
* @return The string view with leading whitespace removed.
* @see See trim() for more info.
*/
std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove trailing whitespace from the string.
* @param[in] strl The view of string to be trimmed.
* @param[in] words The characters to be trimmed.
* @return The string view with trailing whitespace removed.
* @see See trim() for more info.
*/
std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words);
#pragma endregion
#pragma region Split

View File

@ -86,6 +86,12 @@ namespace yycctest::string::op {
EXPECT_EQ(rv, u8" \taaa");
}
// Full strip
{
auto rv = OP::strip(u8" ", u8" ");
EXPECT_TRUE(rv.empty());
}
// Special strip
{
auto rv = OP::strip(u8"啊啊啊aaaあああ", u8"啊あ");
@ -110,6 +116,32 @@ namespace yycctest::string::op {
}
}
TEST(StringOp, Trim) {
// Normal trim
{
auto rv = OP::trim(u8" \taaa\n", u8" \t\r\n");
EXPECT_EQ(rv, u8"aaa");
}
{
auto rv = OP::ltrim(u8" \taaa\n", u8" \t\r\n");
EXPECT_EQ(rv, u8"aaa\n");
}
{
auto rv = OP::rtrim(u8" \taaa\n", u8" \t\r\n");
EXPECT_EQ(rv, u8" \taaa");
}
// Bad words
{
EXPECT_ANY_THROW(OP::trim(u8"q啊啊啊aaaあああp", u8"p啊q"));
}
// Full trim
{
auto rv = OP::trim(u8" ", u8" ");
EXPECT_TRUE(rv.empty());
}
}
TEST(StringOp, Split) {
// Normal
{