2024-04-25 10:38:13 +08:00
|
|
|
#pragma once
|
2025-09-22 22:14:36 +08:00
|
|
|
#include "../macro/printf_checker.hpp"
|
2025-07-25 09:35:26 +08:00
|
|
|
#include <string>
|
|
|
|
#include <string_view>
|
2024-04-25 10:38:13 +08:00
|
|
|
#include <cstdarg>
|
2024-05-21 10:24:05 +08:00
|
|
|
#include <functional>
|
|
|
|
#include <vector>
|
2025-09-26 14:43:13 +08:00
|
|
|
#include <optional>
|
2024-04-25 10:38:13 +08:00
|
|
|
|
2025-06-20 23:38:34 +08:00
|
|
|
namespace yycc::string::op {
|
2024-06-10 17:55:23 +08:00
|
|
|
|
2025-09-26 21:43:12 +08:00
|
|
|
#pragma region Printf
|
|
|
|
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
|
|
|
* @brief Perform an UTF8 string formatting operation.
|
|
|
|
* @param[in] format The format string.
|
|
|
|
* @param[in] ... Argument list of format string.
|
2025-09-22 22:14:36 +08:00
|
|
|
* @return The formatted result.
|
2025-07-25 09:35:26 +08:00
|
|
|
*/
|
2025-09-22 22:14:36 +08:00
|
|
|
std::u8string printf(YYCC_PRINTF_CHECK_FMTSTR const char8_t* format, ...) YYCC_PRINTF_CHECK_ATTR(1, 2);
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
|
|
|
* @brief Perform an UTF8 string formatting operation.
|
|
|
|
* @param[in] format The format string.
|
|
|
|
* @param[in] argptr Argument list of format string.
|
2025-09-22 22:14:36 +08:00
|
|
|
* @return The formatted result.
|
2025-07-25 09:35:26 +08:00
|
|
|
*/
|
2025-09-22 22:14:36 +08:00
|
|
|
std::u8string vprintf(const char8_t* format, va_list argptr);
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
|
|
|
* @brief Perform an ordinary string formatting operation.
|
|
|
|
* @param[in] format The format string.
|
|
|
|
* @param[in] ... Argument list of format string.
|
2025-09-22 22:14:36 +08:00
|
|
|
* @return The formatted result.
|
2025-07-25 09:35:26 +08:00
|
|
|
*/
|
2025-09-22 22:14:36 +08:00
|
|
|
std::string printf(YYCC_PRINTF_CHECK_FMTSTR const char* format, ...) YYCC_PRINTF_CHECK_ATTR(1, 2);
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
|
|
|
* @brief Perform an ordinary string formatting operation.
|
|
|
|
* @param[in] format The format string.
|
|
|
|
* @param[in] argptr Argument list of format string.
|
2025-09-22 22:14:36 +08:00
|
|
|
* @return The formatted result.
|
2025-07-25 09:35:26 +08:00
|
|
|
*/
|
2025-09-22 22:14:36 +08:00
|
|
|
std::string vprintf(const char* format, va_list argptr);
|
2024-04-25 10:38:13 +08:00
|
|
|
|
2025-09-26 21:43:12 +08:00
|
|
|
#pragma endregion
|
|
|
|
|
|
|
|
#pragma region Replace
|
|
|
|
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
2024-07-24 15:03:31 +08:00
|
|
|
* @brief Modify given string with all occurrences of substring \e old replaced by \e new.
|
|
|
|
* @param[in,out] strl The string for replacing
|
|
|
|
* @param[in] _from_strl The \e old string.
|
|
|
|
* @param[in] _to_strl The \e new string.
|
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
void replace(std::u8string& strl, const std::u8string_view& _from_strl, const std::u8string_view& _to_strl);
|
|
|
|
/**
|
2024-07-24 15:03:31 +08:00
|
|
|
* @brief Return a copy with all occurrences of substring \e old replaced by \e new.
|
|
|
|
* @param[in] _strl The string for replacing
|
|
|
|
* @param[in] _from_strl The \e old string.
|
|
|
|
* @param[in] _to_strl The \e new string.
|
|
|
|
* @return The result of replacement.
|
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
std::u8string replace(const std::u8string_view& _strl, const std::u8string_view& _from_strl, const std::u8string_view& _to_strl);
|
2024-06-10 17:55:23 +08:00
|
|
|
|
2025-09-26 21:43:12 +08:00
|
|
|
#pragma endregion
|
|
|
|
|
|
|
|
#pragma region Join
|
|
|
|
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
2024-07-24 15:03:31 +08:00
|
|
|
* @brief The data provider of general join function.
|
|
|
|
* @details
|
|
|
|
* For programmer using lambda to implement this function pointer:
|
|
|
|
* \li During calling, implementation should assign the reference of string view passed in argument
|
|
|
|
* to the string which need to be joined.
|
|
|
|
* \li Function return true to continue joining. otherwise return false to stop joining.
|
|
|
|
* The argument content assigned in the calling returning false is not included in join process.
|
2024-05-21 10:24:05 +08:00
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
using JoinDataProvider = std::function<bool(std::u8string_view&)>;
|
|
|
|
/**
|
2024-07-24 15:03:31 +08:00
|
|
|
* @brief Universal join function.
|
|
|
|
* @details
|
|
|
|
* This function use function pointer as a general data provider interface,
|
|
|
|
* so this function suit for all types container.
|
|
|
|
* You can use this universal join function for any custom container by
|
|
|
|
* using C++ lambda syntax to create a code block adapted to this function pointer.
|
|
|
|
* @param[in] fct_data The function pointer in JoinDataProvider type prividing the data to be joined.
|
2025-06-22 17:14:49 +08:00
|
|
|
* @param[in] delimiter The delimiter used for joining.
|
2024-07-24 15:03:31 +08:00
|
|
|
* @return The result string of joining.
|
2024-05-21 10:24:05 +08:00
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
std::u8string join(JoinDataProvider fct_data, const std::u8string_view& delimiter);
|
|
|
|
/**
|
2024-08-26 11:58:20 +08:00
|
|
|
* @brief Specialized join function for standard library container.
|
|
|
|
* @tparam InputIt
|
|
|
|
* Must meet the requirements of LegacyInputIterator.
|
2025-07-25 09:35:26 +08:00
|
|
|
* It also can be dereferenced and then implicitly converted to std::u8string_view.
|
2024-08-26 11:58:20 +08:00
|
|
|
* @param[in] first The beginning of the range of elements to join.
|
|
|
|
* @param[in] last The terminal of the range of elements to join (exclusive).
|
2025-06-22 17:14:49 +08:00
|
|
|
* @param[in] delimiter The delimiter used for joining.
|
2024-07-24 15:03:31 +08:00
|
|
|
* @return The result string of joining.
|
2024-05-21 10:24:05 +08:00
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
template<class InputIt>
|
|
|
|
std::u8string join(InputIt first, InputIt last, const std::u8string_view& delimiter) {
|
|
|
|
return join(
|
|
|
|
[&first, &last](std::u8string_view& view) -> bool {
|
|
|
|
// if we reach tail, return false to stop join process
|
|
|
|
if (first == last) return false;
|
|
|
|
// otherwise fetch data, inc iterator and return.
|
|
|
|
view = *first;
|
|
|
|
++first;
|
|
|
|
return true;
|
|
|
|
},
|
|
|
|
delimiter);
|
|
|
|
}
|
2024-04-26 15:37:28 +08:00
|
|
|
|
2025-09-26 21:43:12 +08:00
|
|
|
#pragma endregion
|
|
|
|
|
|
|
|
#pragma region Lower Upper
|
|
|
|
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
2024-07-24 15:03:31 +08:00
|
|
|
* @brief Convert given string to lowercase.
|
|
|
|
* @param[in,out] strl The string to be lowercase.
|
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
void lower(std::u8string& strl);
|
|
|
|
/**
|
2024-08-26 11:58:20 +08:00
|
|
|
* @brief Return a copy of the string converted to lowercase.
|
|
|
|
* @param[in] strl The string to be lowercase.
|
|
|
|
* @return The copy of the string converted to lowercase.
|
2024-07-24 15:03:31 +08:00
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
std::u8string to_lower(const std::u8string_view& strl);
|
|
|
|
/**
|
2024-07-24 15:03:31 +08:00
|
|
|
* @brief Convert given string to uppercase.
|
|
|
|
* @param[in,out] strl The string to be uppercase.
|
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
void upper(std::u8string& strl);
|
|
|
|
/**
|
2024-08-26 11:58:20 +08:00
|
|
|
* @brief Return a copy of the string converted to uppercase.
|
|
|
|
* @param[in] strl The string to be uppercase.
|
|
|
|
* @return The copy of the string converted to uppercase.
|
|
|
|
*/
|
2025-07-25 09:35:26 +08:00
|
|
|
std::u8string to_upper(const std::u8string_view& strl);
|
|
|
|
|
2025-09-26 21:43:12 +08:00
|
|
|
#pragma endregion
|
|
|
|
|
|
|
|
#pragma region Strip
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Remove leading and trailing whitespace from the string.
|
|
|
|
* @param[in,out] strl The string to be stripped.
|
|
|
|
* @param[in] words The characters to be stripped.
|
|
|
|
*/
|
|
|
|
void strip(std::u8string& strl, const std::u8string_view& words);
|
|
|
|
/**
|
|
|
|
* @brief Return a copy of the string with leading and trailing whitespace removed.
|
|
|
|
* @param[in] strl The string to be stripped.
|
|
|
|
* @param[in] words The characters to be stripped.
|
|
|
|
* @return The copy of the string with leading and trailing whitespace removed.
|
|
|
|
*/
|
|
|
|
std::u8string to_strip(const std::u8string_view& strl, const std::u8string_view& words);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Remove leading whitespace from the string.
|
|
|
|
* @param[in,out] strl The string to be stripped.
|
|
|
|
* @param[in] words The characters to be stripped.
|
|
|
|
*/
|
|
|
|
void lstrip(std::u8string& strl, const std::u8string_view& words);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Return a copy of the string with leading whitespace removed.
|
|
|
|
* @param[in] strl The string to be stripped.
|
|
|
|
* @param[in] words The characters to be stripped.
|
|
|
|
* @return The copy of the string with leading whitespace removed.
|
|
|
|
*/
|
|
|
|
std::u8string to_lstrip(const std::u8string_view& strl, const std::u8string_view& words);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Remove trailing whitespace from the string.
|
|
|
|
* @param[in,out] strl The string to be stripped.
|
|
|
|
* @param[in] words The characters to be stripped.
|
|
|
|
*/
|
|
|
|
void rstrip(std::u8string& strl, const std::u8string_view& words);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Return a copy of the string with trailing whitespace removed.
|
|
|
|
* @param[in] strl The string to be stripped.
|
|
|
|
* @param[in] words The characters to be stripped.
|
|
|
|
* @return The copy of the string with trailing whitespace removed.
|
|
|
|
*/
|
|
|
|
std::u8string to_rstrip(const std::u8string_view& strl, const std::u8string_view& words);
|
|
|
|
|
|
|
|
|
|
|
|
#pragma endregion
|
2024-05-21 10:24:05 +08:00
|
|
|
|
2025-09-26 14:43:13 +08:00
|
|
|
#pragma region Split
|
|
|
|
|
2025-06-22 17:14:49 +08:00
|
|
|
/**
|
2025-09-26 14:43:13 +08:00
|
|
|
* @brief Iterator class for lazy splitting of strings.
|
|
|
|
*/
|
|
|
|
class LazySplitIterator {
|
|
|
|
public:
|
|
|
|
using iterator_category = std::forward_iterator_tag;
|
|
|
|
using value_type = std::u8string_view;
|
|
|
|
using difference_type = std::ptrdiff_t;
|
|
|
|
using pointer = const std::u8string_view*;
|
|
|
|
using reference = const std::u8string_view&;
|
|
|
|
|
|
|
|
private:
|
|
|
|
/**
|
|
|
|
* @brief Current splitted item.
|
|
|
|
* @details Currently already splitted item for user fetching.
|
|
|
|
* If this value is std::nullopt, it means that we are reach the split process endpoint.
|
|
|
|
*/
|
|
|
|
std::optional<std::u8string_view> m_current_str;
|
|
|
|
/**
|
|
|
|
* @brief The string passed to the next of iterator.
|
|
|
|
* @details It actually the remains after split excluding delimiter.
|
|
|
|
*/
|
|
|
|
std::optional<std::u8string_view> m_next_str;
|
|
|
|
std::u8string_view m_delimiter; ///< Delimiter
|
|
|
|
|
|
|
|
public:
|
|
|
|
LazySplitIterator(std::optional<std::u8string_view> strl, const std::u8string_view& delimiter);
|
|
|
|
|
|
|
|
reference operator*() const;
|
|
|
|
pointer operator->() const;
|
|
|
|
LazySplitIterator& operator++();
|
|
|
|
LazySplitIterator operator++(int);
|
|
|
|
bool operator==(const LazySplitIterator& other) const;
|
|
|
|
bool operator!=(const LazySplitIterator& other) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Class for lazy splitting of strings.
|
|
|
|
*/
|
|
|
|
class LazySplit {
|
|
|
|
private:
|
|
|
|
std::u8string_view m_strl; ///< Original string
|
|
|
|
std::u8string_view m_delimiter; ///< Delimiter
|
|
|
|
|
|
|
|
public:
|
|
|
|
LazySplit(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
|
|
|
LazySplitIterator begin() const;
|
|
|
|
LazySplitIterator end() const;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Lazily split given string with specified delimiter.
|
2025-06-22 17:14:49 +08:00
|
|
|
* @param[in] strl The string need to be splitting.
|
2025-09-26 14:43:13 +08:00
|
|
|
* @param[in] delimiter The delimiter for splitting.
|
2025-06-22 17:14:49 +08:00
|
|
|
* @return
|
2025-09-26 14:43:13 +08:00
|
|
|
* LazySplit object that can be used in range-based for loops.
|
2025-06-22 17:14:49 +08:00
|
|
|
* \par
|
2025-09-26 14:43:13 +08:00
|
|
|
* Every items in result is a splitted entries.
|
2025-06-22 17:14:49 +08:00
|
|
|
* If given string or delimiter are empty,
|
2025-09-26 14:43:13 +08:00
|
|
|
* the result container will only contain one entry which is equal to given string.
|
|
|
|
*/
|
|
|
|
LazySplit lazy_split(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
|
|
|
/**
|
|
|
|
* @brief Split given string with specified delimiter as string view.
|
|
|
|
* @details
|
|
|
|
* If your split involve large items, please consider using lazy_split(),
|
|
|
|
* because it split entries one by one rather than one time output.
|
|
|
|
* @param[in] strl The string need to be splitting.
|
|
|
|
* @param[in] delimiter The delimiter for splitting.
|
|
|
|
* @return Split result in string view format.
|
|
|
|
* @see lazy_split() for more about aplit rules.
|
2025-06-22 17:14:49 +08:00
|
|
|
*/
|
2025-09-26 14:43:13 +08:00
|
|
|
std::vector<std::u8string_view> split(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
2025-07-25 09:35:26 +08:00
|
|
|
/**
|
2025-06-22 17:14:49 +08:00
|
|
|
* @brief Split given string with specified delimiter.
|
2025-09-26 14:43:13 +08:00
|
|
|
* @details
|
|
|
|
* If there is no requirement about storing result,
|
|
|
|
* please consider using split() becuase it use less memory.
|
2024-07-24 15:03:31 +08:00
|
|
|
* @param[in] strl The string need to be splitting.
|
2025-09-26 14:43:13 +08:00
|
|
|
* @param[in] delimiter The delimiter for splitting.
|
|
|
|
* @return Split result in string format.
|
|
|
|
* @see lazy_split() for more about aplit rules.
|
2024-05-21 10:24:05 +08:00
|
|
|
*/
|
2025-09-26 14:43:13 +08:00
|
|
|
std::vector<std::u8string> split_owned(const std::u8string_view& strl, const std::u8string_view& delimiter);
|
2024-06-29 17:39:13 +08:00
|
|
|
|
2025-09-26 14:43:13 +08:00
|
|
|
#pragma endregion
|
2025-06-20 23:38:34 +08:00
|
|
|
|
2025-07-25 09:35:26 +08:00
|
|
|
} // namespace yycc::string::op
|