feat: add lazy split in string op namespace.
This commit is contained in:
@ -10,7 +10,8 @@
|
|||||||
*/
|
*/
|
||||||
namespace yycc::patch::stream {
|
namespace yycc::patch::stream {
|
||||||
|
|
||||||
// TODO: replace all old way of outputing utf8 string with this new way.
|
// TODO: fix all REINTERPRET::as_ordinary_view polyfill for std::ostream.
|
||||||
|
// TODO: replace all old way (C-style output) with this new way if possible.
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, const std::u8string_view& u8str);
|
std::ostream& operator<<(std::ostream& os, const std::u8string_view& u8str);
|
||||||
std::ostream& operator<<(std::ostream& os, const char8_t* u8str);
|
std::ostream& operator<<(std::ostream& os, const char8_t* u8str);
|
||||||
|
@ -168,45 +168,115 @@ namespace yycc::string::op {
|
|||||||
|
|
||||||
#pragma region Split
|
#pragma region Split
|
||||||
|
|
||||||
std::vector<std::u8string_view> split(const std::u8string_view& strl, const std::u8string_view& _delimiter) {
|
// Reference:
|
||||||
// Reference:
|
// https://stackoverflow.com/questions/14265581/parse-split-a-string-in-c-using-string-delimiter-standard-c
|
||||||
// https://stackoverflow.com/questions/14265581/parse-split-a-string-in-c-using-string-delimiter-standard-c
|
|
||||||
|
|
||||||
// prepare return value
|
#pragma region Lazy Split Iterator
|
||||||
|
|
||||||
|
LazySplitIterator::LazySplitIterator(std::optional<std::u8string_view> strl, const std::u8string_view& delimiter) :
|
||||||
|
m_current_str(std::nullopt), m_next_str(strl), m_delimiter(delimiter) {
|
||||||
|
// We can archive result by assign string into next string,
|
||||||
|
// and call next operator.
|
||||||
|
++(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
LazySplitIterator::reference LazySplitIterator::operator*() const {
|
||||||
|
return m_current_str.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
LazySplitIterator::pointer LazySplitIterator::operator->() const {
|
||||||
|
return &m_current_str.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
LazySplitIterator& LazySplitIterator::operator++() {
|
||||||
|
// move next string to current string and clear next string
|
||||||
|
m_current_str = m_next_str;
|
||||||
|
m_next_str = std::nullopt;
|
||||||
|
|
||||||
|
// check whether there is string to be spliited
|
||||||
|
if (m_current_str.has_value()) {
|
||||||
|
// extract string
|
||||||
|
const auto strl = m_current_str.value();
|
||||||
|
|
||||||
|
// if delimiter is empty, return original string.
|
||||||
|
// if string need to be splitted is empty, return original string (empty string).
|
||||||
|
if (strl.empty() || m_delimiter.empty()) {
|
||||||
|
m_current_str = strl;
|
||||||
|
m_next_str = std::nullopt;
|
||||||
|
} else {
|
||||||
|
// start spliting
|
||||||
|
size_t current = 0;
|
||||||
|
if ((current = strl.find(m_delimiter)) != std::u8string_view::npos) {
|
||||||
|
// We still can find items, split it and put into 2 string view respectively.
|
||||||
|
m_current_str = strl.substr(0, current);
|
||||||
|
m_next_str = strl.substr(current + m_delimiter.size());
|
||||||
|
} else {
|
||||||
|
// We can not find any more delimiter, so this is the last item
|
||||||
|
m_current_str = strl;
|
||||||
|
m_next_str = std::nullopt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No value. Initialize as an end iterator.
|
||||||
|
m_current_str = std::nullopt;
|
||||||
|
m_next_str = std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return self
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
LazySplitIterator LazySplitIterator::operator++(int) {
|
||||||
|
LazySplitIterator temp = *this;
|
||||||
|
++(*this);
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool LazySplitIterator::operator==(const LazySplitIterator& other) const {
|
||||||
|
return (this->m_current_str == other.m_current_str) && (this->m_next_str == other.m_next_str)
|
||||||
|
&& (this->m_delimiter == other.m_delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool LazySplitIterator::operator!=(const LazySplitIterator& other) const {
|
||||||
|
return !(*this == other);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
#pragma region Lazy Split
|
||||||
|
|
||||||
|
LazySplit::LazySplit(const std::u8string_view& strl, const std::u8string_view& delimiter) : m_strl(strl), m_delimiter(delimiter) {}
|
||||||
|
|
||||||
|
LazySplitIterator LazySplit::begin() const {
|
||||||
|
return LazySplitIterator(m_strl, m_delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
LazySplitIterator LazySplit::end() const {
|
||||||
|
// Pass std::nullopt to indicate end.
|
||||||
|
return LazySplitIterator(std::nullopt, m_delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma endregion
|
||||||
|
|
||||||
|
LazySplit lazy_split(const std::u8string_view& strl, const std::u8string_view& delimiter) {
|
||||||
|
return LazySplit(strl, delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::u8string_view> split(const std::u8string_view& strl, const std::u8string_view& delimiter) {
|
||||||
|
// use lazy split iterator to do the splitting
|
||||||
std::vector<std::u8string_view> elems;
|
std::vector<std::u8string_view> elems;
|
||||||
|
for (const auto& strl_view : lazy_split(strl, delimiter)) {
|
||||||
// if string need to be splitted is empty, return original string (empty string).
|
elems.emplace_back(strl_view);
|
||||||
// if delimiter is empty, return original string.
|
|
||||||
std::u8string delimiter(_delimiter);
|
|
||||||
if (strl.empty() || delimiter.empty()) {
|
|
||||||
elems.emplace_back(strl);
|
|
||||||
return elems;
|
|
||||||
}
|
|
||||||
|
|
||||||
// start spliting
|
|
||||||
std::size_t previous = 0, current;
|
|
||||||
while ((current = strl.find(delimiter.c_str(), previous)) != std::u8string::npos) {
|
|
||||||
elems.emplace_back(strl.substr(previous, current - previous));
|
|
||||||
previous = current + delimiter.size();
|
|
||||||
}
|
|
||||||
// try insert last part but prevent possible out of range exception
|
|
||||||
if (previous <= strl.size()) {
|
|
||||||
elems.emplace_back(strl.substr(previous));
|
|
||||||
}
|
}
|
||||||
return elems;
|
return elems;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::u8string> split_owned(const std::u8string_view& strl, const std::u8string_view& _delimiter) {
|
std::vector<std::u8string> split_owned(const std::u8string_view& strl, const std::u8string_view& delimiter) {
|
||||||
// call split view
|
// use lazy split iterator to do the splitting
|
||||||
auto view_result = split(strl, _delimiter);
|
|
||||||
|
|
||||||
// copy string view result to string
|
|
||||||
std::vector<std::u8string> elems;
|
std::vector<std::u8string> elems;
|
||||||
elems.reserve(view_result.size());
|
for (const auto& strl_view : lazy_split(strl, delimiter)) {
|
||||||
for (const auto& strl_view : view_result) {
|
|
||||||
elems.emplace_back(std::u8string(strl_view));
|
elems.emplace_back(std::u8string(strl_view));
|
||||||
}
|
}
|
||||||
// return copied result
|
|
||||||
return elems;
|
return elems;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <cstdarg>
|
#include <cstdarg>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
namespace yycc::string::op {
|
namespace yycc::string::op {
|
||||||
|
|
||||||
@ -125,33 +126,93 @@ namespace yycc::string::op {
|
|||||||
// TODO:
|
// TODO:
|
||||||
// Add strip, lstrip and rstrip functions.
|
// Add strip, lstrip and rstrip functions.
|
||||||
|
|
||||||
|
#pragma region Split
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Iterator class for lazy splitting of strings.
|
||||||
|
*/
|
||||||
|
class LazySplitIterator {
|
||||||
|
public:
|
||||||
|
using iterator_category = std::forward_iterator_tag;
|
||||||
|
using value_type = std::u8string_view;
|
||||||
|
using difference_type = std::ptrdiff_t;
|
||||||
|
using pointer = const std::u8string_view*;
|
||||||
|
using reference = const std::u8string_view&;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Current splitted item.
|
||||||
|
* @details Currently already splitted item for user fetching.
|
||||||
|
* If this value is std::nullopt, it means that we are reach the split process endpoint.
|
||||||
|
*/
|
||||||
|
std::optional<std::u8string_view> m_current_str;
|
||||||
|
/**
|
||||||
|
* @brief The string passed to the next of iterator.
|
||||||
|
* @details It actually the remains after split excluding delimiter.
|
||||||
|
*/
|
||||||
|
std::optional<std::u8string_view> m_next_str;
|
||||||
|
std::u8string_view m_delimiter; ///< Delimiter
|
||||||
|
|
||||||
|
public:
|
||||||
|
LazySplitIterator(std::optional<std::u8string_view> strl, const std::u8string_view& delimiter);
|
||||||
|
|
||||||
|
reference operator*() const;
|
||||||
|
pointer operator->() const;
|
||||||
|
LazySplitIterator& operator++();
|
||||||
|
LazySplitIterator operator++(int);
|
||||||
|
bool operator==(const LazySplitIterator& other) const;
|
||||||
|
bool operator!=(const LazySplitIterator& other) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Class for lazy splitting of strings.
|
||||||
|
*/
|
||||||
|
class LazySplit {
|
||||||
|
private:
|
||||||
|
std::u8string_view m_strl; ///< Original string
|
||||||
|
std::u8string_view m_delimiter; ///< Delimiter
|
||||||
|
|
||||||
|
public:
|
||||||
|
LazySplit(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
||||||
|
LazySplitIterator begin() const;
|
||||||
|
LazySplitIterator end() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Lazily split given string with specified delimiter.
|
||||||
|
* @param[in] strl The string need to be splitting.
|
||||||
|
* @param[in] delimiter The delimiter for splitting.
|
||||||
|
* @return
|
||||||
|
* LazySplit object that can be used in range-based for loops.
|
||||||
|
* \par
|
||||||
|
* Every items in result is a splitted entries.
|
||||||
|
* If given string or delimiter are empty,
|
||||||
|
* the result container will only contain one entry which is equal to given string.
|
||||||
|
*/
|
||||||
|
LazySplit lazy_split(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
||||||
/**
|
/**
|
||||||
* @brief Split given string with specified delimiter as string view.
|
* @brief Split given string with specified delimiter as string view.
|
||||||
|
* @details
|
||||||
|
* If your split involve large items, please consider using lazy_split(),
|
||||||
|
* because it split entries one by one rather than one time output.
|
||||||
* @param[in] strl The string need to be splitting.
|
* @param[in] strl The string need to be splitting.
|
||||||
* @param[in] _delimiter The delimiter for splitting.
|
* @param[in] delimiter The delimiter for splitting.
|
||||||
* @return
|
* @return Split result in string view format.
|
||||||
* The split result with string view format.
|
* @see lazy_split() for more about aplit rules.
|
||||||
* This will not produce any copy of original string.
|
|
||||||
* \par
|
|
||||||
* If given string or delimiter are empty,
|
|
||||||
* the result container will only contain 1 entry which is equal to given string.
|
|
||||||
* @see Split(const std::u8string_view&, const char8_t*)
|
|
||||||
*/
|
*/
|
||||||
std::vector<std::u8string_view> split(const std::u8string_view& strl, const std::u8string_view& _delimiter);
|
std::vector<std::u8string_view> split(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
||||||
/**
|
/**
|
||||||
* @brief Split given string with specified delimiter.
|
* @brief Split given string with specified delimiter.
|
||||||
|
* @details
|
||||||
|
* If there is no requirement about storing result,
|
||||||
|
* please consider using split() becuase it use less memory.
|
||||||
* @param[in] strl The string need to be splitting.
|
* @param[in] strl The string need to be splitting.
|
||||||
* @param[in] _delimiter The delimiter for splitting.
|
* @param[in] delimiter The delimiter for splitting.
|
||||||
* @return
|
* @return Split result in string format.
|
||||||
* The split result.
|
* @see lazy_split() for more about aplit rules.
|
||||||
* \par
|
|
||||||
* If given string or delimiter are empty,
|
|
||||||
* the result container will only contain 1 entry which is equal to given string.
|
|
||||||
*/
|
*/
|
||||||
std::vector<std::u8string> split_owned(const std::u8string_view& strl, const std::u8string_view& _delimiter);
|
std::vector<std::u8string> split_owned(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
||||||
|
|
||||||
// TODO:
|
#pragma endregion
|
||||||
// Add lazy_split(const std::u8string_view& strl, const std::u8string_view& _delimiter);
|
|
||||||
// Once we add it, we need redirect all split function into it.
|
|
||||||
|
|
||||||
} // namespace yycc::string::op
|
} // namespace yycc::string::op
|
||||||
|
Reference in New Issue
Block a user