1
0

feat: add utf8 format

- move utf8 stream and format patch from string to patch namespace.
- add ordinay format alias and utf8 format in our format patch.
- add char8_t and char inter-cast in string reinterpret namespace.
- fix bug of utf8 formatter.
- add test for utf8 format.
This commit is contained in:
2025-09-25 15:29:55 +08:00
parent a61955bb09
commit c8d763bdcf
17 changed files with 257 additions and 36 deletions

View File

@ -1,78 +0,0 @@
/**
* @file
* @brief The patch for \c std::format to allow UTF8 string as arguments.
* @details
* As we known, \c std::format only allow \c char and \c wchar_t as its char type in C++ 23 currently.
* So it is impossible to use UTF8 string for std::format, both format string and argument.
* This namespace give a patch for this shortcoming.
* By including this file directly, you will have abilities that use UTF8 string as argument in \c std::format with \c char char type.
*/
#pragma once
#include "reinterpret.hpp"
#include <format>
#include <string>
#include <string_view>
#define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret
#pragma region Utf8 Formatter
// Add std::formatter specialization for "char8_t*"
template<>
struct std::formatter<char8_t*, char> {
constexpr auto parse(auto& ctx) { return underlying_formatter.parse(ctx); }
auto format(char8_t* str, auto& ctx) const { return underlying_formatter.format(NS_YYCC_STRING_REINTERPRET::as_ordinary(str), ctx); }
private:
std::formatter<const char*, char> underlying_formatter{};
};
// Add std::formatter specialization for "const char8_t*"
template<>
struct std::formatter<const char8_t*, char> {
constexpr auto parse(auto& ctx) { return underlying_formatter.parse(ctx); }
auto format(const char8_t* str, auto& ctx) const {
return underlying_formatter.format(NS_YYCC_STRING_REINTERPRET::as_ordinary(str), ctx);
}
private:
std::formatter<const char*, char> underlying_formatter{};
};
// Add std::formatter specialization for "char8_t[N]"
template<std::size_t N>
struct std::formatter<char8_t[N], char> {
constexpr auto parse(auto& ctx) { return underlying_formatter.parse(ctx); }
auto format(const char8_t (&str)[N], auto& ctx) const { return underlying_formatter.format(std::basic_string_view<char>(str, N), ctx); }
private:
std::formatter<std::basic_string_view<char>, char> underlying_formatter{};
};
// Add std::formatter specialization for "std::u8string"
template<class Traits, class Alloc>
struct std::formatter<std::basic_string<char8_t, Traits, Alloc>, char> {
constexpr auto parse(auto& ctx) { return underlying_formatter.parse(ctx); }
auto format(const std::u8string& str, auto& ctx) const {
return underlying_formatter.format(NS_YYCC_STRING_REINTERPRET::as_ordinary_view(str), ctx);
}
private:
std::formatter<std::basic_string_view<char>, char> underlying_formatter{};
};
// Add std::formatter specialization for "std::u8string_view"
template<class Traits>
struct std::formatter<std::basic_string_view<char8_t, Traits>, char> {
constexpr auto parse(auto& ctx) { return underlying_formatter.parse(ctx); }
auto format(std::u8string_view sv, auto& ctx) const {
return underlying_formatter.format(NS_YYCC_STRING_REINTERPRET::as_ordinary_view(sv), ctx);
}
private:
std::formatter<std::basic_string_view<char>, char> underlying_formatter{};
};
#pragma endregion
#undef NS_YYCC_STRING_REINTERPRET

View File

@ -2,6 +2,9 @@
namespace yycc::string::reinterpret {
char8_t as_utf8(const char& src) {
return static_cast<char8_t>(src);
}
const char8_t* as_utf8(const char* src) {
return reinterpret_cast<const char8_t*>(src);
}
@ -15,6 +18,9 @@ namespace yycc::string::reinterpret {
return std::u8string_view(reinterpret_cast<const char8_t*>(src.data()), src.size());
}
char as_ordinary(char8_t src) {
return static_cast<char>(src);
}
const char* as_ordinary(const char8_t* src) {
return reinterpret_cast<const char*>(src);
}

View File

@ -11,52 +11,64 @@
*/
namespace yycc::string::reinterpret {
/**
* @brief Reinterpret ordinary char type to UTF-8 char type.
* @param[in] src Source ordinary char value.
* @return UTF8 char value.
*/
char8_t as_utf8(const char& src);
/**
* @brief Reinterpret ordinary C-string to UTF-8 string (const version).
* @param src Source ordinary string
* @param[in] src Source ordinary string
* @return Pointer to UTF-8 encoded string
*/
const char8_t* as_utf8(const char* src);
/**
* @brief Reinterpret ordinary C-string as an UTF-8 string (non-const version).
* @param src Source ordinary string
* @param[in] src Source ordinary string
* @return Pointer to UTF-8 encoded string
*/
char8_t* as_utf8(char* src);
/**
* @brief Reinterpret ordinary string view to copied UTF-8 string.
* @param src Source ordinary string view
* @param[in] src Source ordinary string view
* @return UTF-8 encoded string
*/
std::u8string as_utf8(const std::string_view& src);
/**
* @brief Reinterpret ordinary string view to UTF-8 string view.
* @param src Source ordinary string view
* @param[in] src Source ordinary string view
* @return UTF-8 encoded string view
*/
std::u8string_view as_utf8_view(const std::string_view& src);
/**
* @brief Reinterpret UTF-8 char type to ordinary char type.
* @param[in] src Source UTF-8 char value.
* @return Ordinary char value.
*/
char as_ordinary(char8_t src);
/**
* @brief Reinterpret UTF-8 C-string to ordinary string (const version).
* @param src Source UTF-8 string
* @param[in] src Source UTF-8 string
* @return Pointer to ordinary string
*/
const char* as_ordinary(const char8_t* src);
/**
* @brief Reinterpret UTF-8 C-string to ordinary string (non-const version).
* @param src Source UTF-8 string
* @param[in] src Source UTF-8 string
* @return Pointer to ordinary string
*/
char* as_ordinary(char8_t* src);
/**
* @brief Reinterpret UTF-8 string view to ordinary string.
* @param src Source UTF-8 string view
* @param[in] src Source UTF-8 string view
* @return Ordinary string
*/
std::string as_ordinary(const std::u8string_view& src);
/**
* @brief Reinterpret UTF-8 string view to ordinary string view
* @param src Source UTF-8 string view
* @param[in] src Source UTF-8 string view
* @return Ordinary string view
*/
std::string_view as_ordinary_view(const std::u8string_view& src);

View File

@ -1,23 +0,0 @@
#include "stream.hpp"
#include "reinterpret.hpp"
#define REINTERPRET ::yycc::string::reinterpret
namespace yycc::string::stream {
std::ostream& operator<<(std::ostream& os, const std::u8string_view& u8str) {
os << REINTERPRET::as_ordinary_view(u8str);
return os;
}
std::ostream& operator<<(std::ostream& os, const char8_t* u8str) {
os << REINTERPRET::as_ordinary(u8str);
return os;
}
std::ostream& operator<<(std::ostream& os, char8_t u8chr) {
os << static_cast<char>(u8chr);
return os;
}
}

View File

@ -1,17 +0,0 @@
#pragma once
#include <ostream>
#include <string_view>
/**
* @brief This namespace add UTF8 support for \c std::ostream.
* @details
* The operator overloads written in this namespace will give \c std::ostream ability to write UTF8 string and its char.
* For using this feature, please directly use <TT>using namespace yycc::string:stream;</TT> to import this namespace.
*/
namespace yycc::string::stream {
std::ostream& operator<<(std::ostream& os, const std::u8string_view& u8str);
std::ostream& operator<<(std::ostream& os, const char8_t* u8str);
std::ostream& operator<<(std::ostream& os, char8_t u8chr);
}