diff --git a/src/yycc/patch/stream.hpp b/src/yycc/patch/stream.hpp index 159ce14..33fbe38 100644 --- a/src/yycc/patch/stream.hpp +++ b/src/yycc/patch/stream.hpp @@ -10,7 +10,8 @@ */ namespace yycc::patch::stream { - // TODO: replace all old way of outputing utf8 string with this new way. + // TODO: fix all REINTERPRET::as_ordinary_view polyfill for std::ostream. + // TODO: replace all old way (C-style output) with this new way if possible. std::ostream& operator<<(std::ostream& os, const std::u8string_view& u8str); std::ostream& operator<<(std::ostream& os, const char8_t* u8str); diff --git a/src/yycc/string/op.cpp b/src/yycc/string/op.cpp index 48efdac..6e1d9bd 100644 --- a/src/yycc/string/op.cpp +++ b/src/yycc/string/op.cpp @@ -168,45 +168,115 @@ namespace yycc::string::op { #pragma region Split - std::vector split(const std::u8string_view& strl, const std::u8string_view& _delimiter) { - // Reference: - // https://stackoverflow.com/questions/14265581/parse-split-a-string-in-c-using-string-delimiter-standard-c + // Reference: + // https://stackoverflow.com/questions/14265581/parse-split-a-string-in-c-using-string-delimiter-standard-c - // prepare return value +#pragma region Lazy Split Iterator + + LazySplitIterator::LazySplitIterator(std::optional strl, const std::u8string_view& delimiter) : + m_current_str(std::nullopt), m_next_str(strl), m_delimiter(delimiter) { + // We can archive result by assign string into next string, + // and call next operator. + ++(*this); + } + + LazySplitIterator::reference LazySplitIterator::operator*() const { + return m_current_str.value(); + } + + LazySplitIterator::pointer LazySplitIterator::operator->() const { + return &m_current_str.value(); + } + + LazySplitIterator& LazySplitIterator::operator++() { + // move next string to current string and clear next string + m_current_str = m_next_str; + m_next_str = std::nullopt; + + // check whether there is string to be spliited + if (m_current_str.has_value()) { + // extract string + const auto strl = m_current_str.value(); + + // if delimiter is empty, return original string. + // if string need to be splitted is empty, return original string (empty string). + if (strl.empty() || m_delimiter.empty()) { + m_current_str = strl; + m_next_str = std::nullopt; + } else { + // start spliting + size_t current = 0; + if ((current = strl.find(m_delimiter)) != std::u8string_view::npos) { + // We still can find items, split it and put into 2 string view respectively. + m_current_str = strl.substr(0, current); + m_next_str = strl.substr(current + m_delimiter.size()); + } else { + // We can not find any more delimiter, so this is the last item + m_current_str = strl; + m_next_str = std::nullopt; + } + } + } else { + // No value. Initialize as an end iterator. + m_current_str = std::nullopt; + m_next_str = std::nullopt; + } + + // return self + return *this; + } + + LazySplitIterator LazySplitIterator::operator++(int) { + LazySplitIterator temp = *this; + ++(*this); + return temp; + } + + bool LazySplitIterator::operator==(const LazySplitIterator& other) const { + return (this->m_current_str == other.m_current_str) && (this->m_next_str == other.m_next_str) + && (this->m_delimiter == other.m_delimiter); + } + + bool LazySplitIterator::operator!=(const LazySplitIterator& other) const { + return !(*this == other); + } + +#pragma endregion + +#pragma region Lazy Split + + LazySplit::LazySplit(const std::u8string_view& strl, const std::u8string_view& delimiter) : m_strl(strl), m_delimiter(delimiter) {} + + LazySplitIterator LazySplit::begin() const { + return LazySplitIterator(m_strl, m_delimiter); + } + + LazySplitIterator LazySplit::end() const { + // Pass std::nullopt to indicate end. + return LazySplitIterator(std::nullopt, m_delimiter); + } + +#pragma endregion + + LazySplit lazy_split(const std::u8string_view& strl, const std::u8string_view& delimiter) { + return LazySplit(strl, delimiter); + } + + std::vector split(const std::u8string_view& strl, const std::u8string_view& delimiter) { + // use lazy split iterator to do the splitting std::vector elems; - - // if string need to be splitted is empty, return original string (empty string). - // if delimiter is empty, return original string. - std::u8string delimiter(_delimiter); - if (strl.empty() || delimiter.empty()) { - elems.emplace_back(strl); - return elems; - } - - // start spliting - std::size_t previous = 0, current; - while ((current = strl.find(delimiter.c_str(), previous)) != std::u8string::npos) { - elems.emplace_back(strl.substr(previous, current - previous)); - previous = current + delimiter.size(); - } - // try insert last part but prevent possible out of range exception - if (previous <= strl.size()) { - elems.emplace_back(strl.substr(previous)); + for (const auto& strl_view : lazy_split(strl, delimiter)) { + elems.emplace_back(strl_view); } return elems; } - std::vector split_owned(const std::u8string_view& strl, const std::u8string_view& _delimiter) { - // call split view - auto view_result = split(strl, _delimiter); - - // copy string view result to string + std::vector split_owned(const std::u8string_view& strl, const std::u8string_view& delimiter) { + // use lazy split iterator to do the splitting std::vector elems; - elems.reserve(view_result.size()); - for (const auto& strl_view : view_result) { + for (const auto& strl_view : lazy_split(strl, delimiter)) { elems.emplace_back(std::u8string(strl_view)); } - // return copied result return elems; } diff --git a/src/yycc/string/op.hpp b/src/yycc/string/op.hpp index 54b6055..f455153 100644 --- a/src/yycc/string/op.hpp +++ b/src/yycc/string/op.hpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace yycc::string::op { @@ -125,33 +126,93 @@ namespace yycc::string::op { // TODO: // Add strip, lstrip and rstrip functions. +#pragma region Split + + /** + * @brief Iterator class for lazy splitting of strings. + */ + class LazySplitIterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = std::u8string_view; + using difference_type = std::ptrdiff_t; + using pointer = const std::u8string_view*; + using reference = const std::u8string_view&; + + private: + /** + * @brief Current splitted item. + * @details Currently already splitted item for user fetching. + * If this value is std::nullopt, it means that we are reach the split process endpoint. + */ + std::optional m_current_str; + /** + * @brief The string passed to the next of iterator. + * @details It actually the remains after split excluding delimiter. + */ + std::optional m_next_str; + std::u8string_view m_delimiter; ///< Delimiter + + public: + LazySplitIterator(std::optional strl, const std::u8string_view& delimiter); + + reference operator*() const; + pointer operator->() const; + LazySplitIterator& operator++(); + LazySplitIterator operator++(int); + bool operator==(const LazySplitIterator& other) const; + bool operator!=(const LazySplitIterator& other) const; + }; + + /** + * @brief Class for lazy splitting of strings. + */ + class LazySplit { + private: + std::u8string_view m_strl; ///< Original string + std::u8string_view m_delimiter; ///< Delimiter + + public: + LazySplit(const std::u8string_view& strl, const std::u8string_view& delimiter); + LazySplitIterator begin() const; + LazySplitIterator end() const; + }; + + /** + * @brief Lazily split given string with specified delimiter. + * @param[in] strl The string need to be splitting. + * @param[in] delimiter The delimiter for splitting. + * @return + * LazySplit object that can be used in range-based for loops. + * \par + * Every items in result is a splitted entries. + * If given string or delimiter are empty, + * the result container will only contain one entry which is equal to given string. + */ + LazySplit lazy_split(const std::u8string_view& strl, const std::u8string_view& delimiter); /** * @brief Split given string with specified delimiter as string view. + * @details + * If your split involve large items, please consider using lazy_split(), + * because it split entries one by one rather than one time output. * @param[in] strl The string need to be splitting. - * @param[in] _delimiter The delimiter for splitting. - * @return - * The split result with string view format. - * This will not produce any copy of original string. - * \par - * If given string or delimiter are empty, - * the result container will only contain 1 entry which is equal to given string. - * @see Split(const std::u8string_view&, const char8_t*) + * @param[in] delimiter The delimiter for splitting. + * @return Split result in string view format. + * @see lazy_split() for more about aplit rules. */ - std::vector split(const std::u8string_view& strl, const std::u8string_view& _delimiter); + std::vector split(const std::u8string_view& strl, const std::u8string_view& delimiter); /** * @brief Split given string with specified delimiter. + * @details + * If there is no requirement about storing result, + * please consider using split() becuase it use less memory. * @param[in] strl The string need to be splitting. - * @param[in] _delimiter The delimiter for splitting. - * @return - * The split result. - * \par - * If given string or delimiter are empty, - * the result container will only contain 1 entry which is equal to given string. + * @param[in] delimiter The delimiter for splitting. + * @return Split result in string format. + * @see lazy_split() for more about aplit rules. */ - std::vector split_owned(const std::u8string_view& strl, const std::u8string_view& _delimiter); + std::vector split_owned(const std::u8string_view& strl, const std::u8string_view& delimiter); - // TODO: - // Add lazy_split(const std::u8string_view& strl, const std::u8string_view& _delimiter); - // Once we add it, we need redirect all split function into it. +#pragma endregion } // namespace yycc::string::op