1
0

feat: add trim in string op opposited with strip

This commit is contained in:
2025-10-01 20:53:43 +08:00
parent 05a80268ab
commit 446f880df4
5 changed files with 153 additions and 7 deletions

View File

@ -379,7 +379,7 @@ namespace yycc::string::op {
#pragma endregion
template<bool bDoLeft, bool bDoRight>
std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
static std::u8string_view internal_strip(const std::u8string_view& strl, const std::u8string_view& words) {
std::optional<TrieTree> prefix, suffix;
if constexpr (bDoLeft) prefix = TrieTree();
if constexpr (bDoRight) suffix = TrieTree();
@ -419,6 +419,62 @@ namespace yycc::string::op {
#pragma endregion
#pragma region Trim
template<bool bDoLeft, bool bDoRight>
std::u8string_view internal_trim(const std::u8string_view& strl, const std::u8string_view& words) {
// check words
if (!std::ranges::none_of(words, [](auto c) { return static_cast<uint8_t>(c) & 0x80; })) {
throw std::invalid_argument("given words are not all ASCII (<= 0x7F) only");
}
// prepare return value
std::u8string_view rv = strl;
// remove left first
if constexpr (bDoLeft) {
auto finder = rv.find_first_not_of(words);
if (finder == std::u8string_view::npos) {
// all string are in given words
rv = std::u8string_view();
} else {
// remove by offset
rv = rv.substr(finder);
}
}
// remove right
if constexpr (bDoRight) {
auto finder = rv.find_last_not_of(words);
if (finder == std::u8string_view::npos) {
// all string are in given words
rv = std::u8string_view();
} else {
// remove by offset
rv = rv.substr(0, finder + 1);
}
}
// return value
return rv;
}
std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words) {
return internal_trim<true, true>(strl, words);
}
std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words) {
return internal_trim<true, false>(strl, words);
}
std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words) {
return internal_trim<false, true>(strl, words);
}
#pragma endregion
#pragma region Split
// Reference:

View File

@ -144,32 +144,75 @@ namespace yycc::string::op {
#pragma endregion
#pragma region Strip
#pragma region Strip and Trim
/**
* @brief Remove leading and trailing whitespace from the string.
* @param[in,out] strl The string to be stripped.
* @details
* This "strip" function is full Unicode supported.
* It means that it is different with all other ordinary implementations,
* that treat each UTF8 code unit as an invididual chars when stripping.
* This function will break given words by UTF8 code point first,
* and try to strip these code points in given string.
* So it can strip Unicode whitespace or any other characters correctly.
* However, obviously, it is slower than ASCII-only version "trim".
* If you only need to strip ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F,
* please consider using trim() for better performance.
* @param[in] strl The string to be stripped.
* @param[in] words The characters to be stripped.
* @return The string view with leading and trailing whitespace removed.
* @see See trim() for ASCII-only version "strip".
*/
std::u8string_view strip(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove leading whitespace from the string.
* @param[in,out] strl The string to be stripped.
* @param[in] strl The string to be stripped.
* @param[in] words The characters to be stripped.
* @return The string view with leading whitespace removed.
* @see See strip() for more info.
*/
std::u8string_view lstrip(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove trailing whitespace from the string.
* @param[in,out] strl The string to be stripped.
* @param[in] strl The string to be stripped.
* @param[in] words The characters to be stripped.
* @return The string view with trailing whitespace removed.
* @see See strip() for more info.
*/
std::u8string_view rstrip(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove leading and trailing whitespace from the string.
* @details
* This function is limited "trim" function.
* It brutely think each code unit in given words are invididual chars during stripping.
* So it can only trim ASCII whitespace (space, tab, newline) or any other code point lower than \c 0x7F.
* If you need to trim Unicode whitespace or any other characters,
* please consider using strip() for correct behavior.
* @param[in] strl The view of string to be trimmed.
* @param[in] words The characters to be trimmed.
* @return The string view with leading and trailing whitespace removed.
* @see See strip() for full Unicode supported version "trim".
*/
std::u8string_view trim(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove leading whitespace from the string.
* @param[in] strl The view of string to be trimmed.
* @param[in] words The characters to be trimmed.
* @return The string view with leading whitespace removed.
* @see See trim() for more info.
*/
std::u8string_view ltrim(const std::u8string_view& strl, const std::u8string_view& words);
/**
* @brief Remove trailing whitespace from the string.
* @param[in] strl The view of string to be trimmed.
* @param[in] words The characters to be trimmed.
* @return The string view with trailing whitespace removed.
* @see See trim() for more info.
*/
std::u8string_view rtrim(const std::u8string_view& strl, const std::u8string_view& words);
#pragma endregion
#pragma region Split