feat: add trim in string op opposited with strip
This commit is contained in:
@ -15,4 +15,12 @@ namespace yyccbench::string::op {
|
||||
}
|
||||
BENCHMARK(BM_StringStrip)->Name("StringStrip");
|
||||
|
||||
static void BM_StringTrim(benchmark::State& state) {
|
||||
std::u8string_view strl = u8" \thello\r\n"sv, words = u8" \t\r\n"sv;
|
||||
for (auto _ : state) {
|
||||
auto rv = OP::trim(strl, words);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_StringTrim)->Name("StringTrim");
|
||||
|
||||
}
|
||||
|
@ -2,10 +2,13 @@
|
||||
#include "../termcolor.hpp"
|
||||
#include "../../patch/stream.hpp"
|
||||
#include "../../patch/format.hpp"
|
||||
#include "../../string/op.hpp"
|
||||
#include <ranges>
|
||||
|
||||
#define CLAP ::yycc::carton::clap
|
||||
#define TABULATE ::yycc::carton::tabulate
|
||||
#define TERMCOLOR ::yycc::carton::termcolor
|
||||
#define OP ::yycc::string::op
|
||||
#define FORMAT ::yycc::patch::format
|
||||
|
||||
using namespace ::yycc::patch::stream;
|
||||
@ -51,6 +54,10 @@ namespace yycc::carton::clap::manual {
|
||||
const auto &options = app.get_options();
|
||||
for (const auto ®_opt : options.all_options()) {
|
||||
const auto &opt = reg_opt.get_option();
|
||||
|
||||
//for (const auto [index, item] : std::views::enumerate(header)) {
|
||||
//
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -379,7 +379,7 @@ namespace yycc::string::op {
|
||||
#pragma endregion
|
||||
|
||||
template<bool bDoLeft, bool bDoRight>
|
||||
std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
|
||||
static std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
|
||||
std::optional<TrieTree> prefix, suffix;
|
||||
if constexpr (bDoLeft) prefix = TrieTree();
|
||||
if constexpr (bDoRight) suffix = TrieTree();
|
||||
@ -419,6 +419,62 @@ namespace yycc::string::op {
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Trim
|
||||
|
||||
template<bool bDoLeft, bool bDoRight>
|
||||
std::u8string_view internal_trim(const std::u8string_view& strl, const std::u8string_view& words) {
|
||||
// check words
|
||||
if (!std::ranges::none_of(words, [](auto c) { return static_cast<uint8_t>(c) & 0x80; })) {
|
||||
throw std::invalid_argument("given words are not all ASCII (<= 0x7F) only");
|
||||
}
|
||||
|
||||
// prepare return value
|
||||
std::u8string_view rv = strl;
|
||||
|
||||
// remove left first
|
||||
if constexpr (bDoLeft) {
|
||||
auto finder = rv.find_first_not_of(words);
|
||||
if (finder == std::u8string_view::npos) {
|
||||
// all string are in given words
|
||||
rv = std::u8string_view();
|
||||
} else {
|
||||
// remove by offset
|
||||
rv = rv.substr(finder);
|
||||
}
|
||||
}
|
||||
|
||||
// remove right
|
||||
if constexpr (bDoRight) {
|
||||
auto finder = rv.find_last_not_of(words);
|
||||
if (finder == std::u8string_view::npos) {
|
||||
// all string are in given words
|
||||
rv = std::u8string_view();
|
||||
} else {
|
||||
// remove by offset
|
||||
rv = rv.substr(0, finder + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// return value
|
||||
return rv;
|
||||
}
|
||||
|
||||
std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words) {
|
||||
return internal_trim<true, true>(strl, words);
|
||||
}
|
||||
|
||||
std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words) {
|
||||
return internal_trim<true, false>(strl, words);
|
||||
}
|
||||
|
||||
std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words) {
|
||||
return internal_trim<false, true>(strl, words);
|
||||
}
|
||||
|
||||
|
||||
#pragma endregion
|
||||
|
||||
|
||||
#pragma region Split
|
||||
|
||||
// Reference:
|
||||
|
@ -144,32 +144,75 @@ namespace yycc::string::op {
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Strip
|
||||
#pragma region Strip and Trim
|
||||
|
||||
/**
|
||||
* @brief Remove leading and trailing whitespace from the string.
|
||||
* @param[in,out] strl The string to be stripped.
|
||||
* @details
|
||||
* This "strip" function is full Unicode supported.
|
||||
* It means that it is different with all other ordinary implementations,
|
||||
* that treat each UTF8 code unit as an invididual chars when stripping.
|
||||
* This function will break given words by UTF8 code point first,
|
||||
* and try to strip these code points in given string.
|
||||
* So it can strip Unicode whitespace or any other characters correctly.
|
||||
* However, obviously, it is slower than ASCII-only version "trim".
|
||||
* If you only need to strip ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F,
|
||||
* please consider using trim() for better performance.
|
||||
* @param[in] strl The string to be stripped.
|
||||
* @param[in] words The characters to be stripped.
|
||||
* @return The string view with leading and trailing whitespace removed.
|
||||
* @see See trim() for ASCII-only version "strip".
|
||||
*/
|
||||
std::u8string_view strip(const std::u8string_view& strl, const std::u8string_view& words);
|
||||
|
||||
/**
|
||||
* @brief Remove leading whitespace from the string.
|
||||
* @param[in,out] strl The string to be stripped.
|
||||
* @param[in] strl The string to be stripped.
|
||||
* @param[in] words The characters to be stripped.
|
||||
* @return The string view with leading whitespace removed.
|
||||
* @see See strip() for more info.
|
||||
*/
|
||||
std::u8string_view lstrip(const std::u8string_view& strl, const std::u8string_view& words);
|
||||
|
||||
/**
|
||||
* @brief Remove trailing whitespace from the string.
|
||||
* @param[in,out] strl The string to be stripped.
|
||||
* @param[in] strl The string to be stripped.
|
||||
* @param[in] words The characters to be stripped.
|
||||
* @return The string view with trailing whitespace removed.
|
||||
* @see See strip() for more info.
|
||||
*/
|
||||
std::u8string_view rstrip(const std::u8string_view& strl, const std::u8string_view& words);
|
||||
|
||||
/**
|
||||
* @brief Remove leading and trailing whitespace from the string.
|
||||
* @details
|
||||
* This function is limited "trim" function.
|
||||
* It brutely think each code unit in given words are invididual chars during stripping.
|
||||
* So it can only trim ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F.
|
||||
* If you need to trim Unicode whitespace or any other characters,
|
||||
* please consider using strip() for correct behavior.
|
||||
* @param[in] strl The view of string to be trimmed.
|
||||
* @param[in] words The characters to be trimmed.
|
||||
* @return The string view with leading and trailing whitespace removed.
|
||||
* @see See strip() for full Unicode supported version "trim".
|
||||
*/
|
||||
std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words);
|
||||
/**
|
||||
* @brief Remove leading whitespace from the string.
|
||||
* @param[in] strl The view of string to be trimmed.
|
||||
* @param[in] words The characters to be trimmed.
|
||||
* @return The string view with leading whitespace removed.
|
||||
* @see See trim() for more info.
|
||||
*/
|
||||
std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words);
|
||||
/**
|
||||
* @brief Remove trailing whitespace from the string.
|
||||
* @param[in] strl The view of string to be trimmed.
|
||||
* @param[in] words The characters to be trimmed.
|
||||
* @return The string view with trailing whitespace removed.
|
||||
* @see See trim() for more info.
|
||||
*/
|
||||
std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words);
|
||||
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Split
|
||||
|
@ -86,6 +86,12 @@ namespace yycctest::string::op {
|
||||
EXPECT_EQ(rv, u8" \taaa");
|
||||
}
|
||||
|
||||
// Full strip
|
||||
{
|
||||
auto rv = OP::strip(u8" ", u8" ");
|
||||
EXPECT_TRUE(rv.empty());
|
||||
}
|
||||
|
||||
// Special strip
|
||||
{
|
||||
auto rv = OP::strip(u8"啊啊啊aaaあああ", u8"啊あ");
|
||||
@ -110,6 +116,32 @@ namespace yycctest::string::op {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringOp, Trim) {
|
||||
// Normal trim
|
||||
{
|
||||
auto rv = OP::trim(u8" \taaa\n", u8" \t\r\n");
|
||||
EXPECT_EQ(rv, u8"aaa");
|
||||
}
|
||||
{
|
||||
auto rv = OP::ltrim(u8" \taaa\n", u8" \t\r\n");
|
||||
EXPECT_EQ(rv, u8"aaa\n");
|
||||
}
|
||||
{
|
||||
auto rv = OP::rtrim(u8" \taaa\n", u8" \t\r\n");
|
||||
EXPECT_EQ(rv, u8" \taaa");
|
||||
}
|
||||
|
||||
// Bad words
|
||||
{
|
||||
EXPECT_ANY_THROW(OP::trim(u8"q啊啊啊aaaあああp", u8"p啊q"));
|
||||
}
|
||||
|
||||
// Full trim
|
||||
{
|
||||
auto rv = OP::trim(u8" ", u8" ");
|
||||
EXPECT_TRUE(rv.empty());
|
||||
}
|
||||
}
|
||||
TEST(StringOp, Split) {
|
||||
// Normal
|
||||
{
|
||||
|
Reference in New Issue
Block a user