From 09fea7e0a38ffc21141ad74ea27fc9d6e2e9e4d8 Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Fri, 23 Jan 2026 11:02:54 +0800 Subject: [PATCH] feat: add charconv polyfill for clang and apple clang --- src/CMakeLists.txt | 1 + src/yycc/num/parse.hpp | 7 +- src/yycc/num/stringify.hpp | 7 +- src/yycc/patch/libcxx/charconv.hpp | 422 +++++++++++++++++++++++++++++ 4 files changed, 435 insertions(+), 2 deletions(-) create mode 100644 src/yycc/patch/libcxx/charconv.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ea950b1..1406e95 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -69,6 +69,7 @@ FILES yycc/patch/format.hpp yycc/patch/libcxx/enumerate.hpp yycc/patch/libcxx/stacktrace.hpp + yycc/patch/libcxx/charconv.hpp yycc/num/parse.hpp yycc/num/stringify.hpp yycc/num/safe_cast.hpp diff --git a/src/yycc/num/parse.hpp b/src/yycc/num/parse.hpp index de71757..e0a6cb9 100644 --- a/src/yycc/num/parse.hpp +++ b/src/yycc/num/parse.hpp @@ -3,10 +3,15 @@ #include "../string/reinterpret.hpp" #include #include -#include #include #include +#if defined(YYCC_STL_CLANGSTL) +#include "patch/libcxx/charconv.hpp" +#else +#include +#endif + #define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret #define NS_YYCC_STRING_OP ::yycc::string::op diff --git a/src/yycc/num/stringify.hpp b/src/yycc/num/stringify.hpp index f93d86b..f426fcf 100644 --- a/src/yycc/num/stringify.hpp +++ b/src/yycc/num/stringify.hpp @@ -3,9 +3,14 @@ #include #include #include -#include #include +#if defined(YYCC_STL_CLANGSTL) +#include "patch/libcxx/charconv.hpp" +#else +#include +#endif + #define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret /** diff --git a/src/yycc/patch/libcxx/charconv.hpp b/src/yycc/patch/libcxx/charconv.hpp new file mode 100644 index 0000000..2a02dfa --- /dev/null +++ b/src/yycc/patch/libcxx/charconv.hpp @@ -0,0 +1,422 @@ +#pragma once +#include "../../macro/stl_detector.hpp" + +#if defined(YYCC_STL_CLANGSTL) + +/** + * @private + * @file This is the polyfill for LLVM libcxx charconv header + * which including \c std::from_chars and \c std::to_chars utilities. + * These 2 functions are only \b fully implemented in the latest Clang (20+) + * and partially implemented in the latest Apple Clang (Xcode 16). + * This should be removed once both Clang official and Apple Clang libcxx \b fully provide them. + * This polyfill is generated by AI. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace std { + + enum class chars_format : unsigned int { + scientific = 1, + fixed = 2, + hex = 4, + general = fixed | scientific // This should be 6 (fixed|scientific) + }; + + struct from_chars_result { + const char* ptr; + std::errc ec; + + friend bool operator==(const from_chars_result&, const from_chars_result&) = default; + constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } + }; + + struct to_chars_result { + char* ptr; + std::errc ec; + + friend bool operator==(const to_chars_result&, const to_chars_result&) = default; + constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } + }; + + /// @private + enum class __integral_type { + u8, + u16, + u32, + u64, + i8, + i16, + i32, + i64, + }; + + /// @private + enum class __integral_base_type { + base8, + base10, + base16, + }; + + /// @private + template + constexpr __integral_type __classify_int_type() { + if constexpr (std::is_same_v) { + return __integral_type::u8; + } else if constexpr (std::is_same_v) { + return __integral_type::u16; + } else if constexpr (std::is_same_v) { + return __integral_type::u32; + } else if constexpr (std::is_same_v) { + return __integral_type::u64; + } else if constexpr (std::is_same_v) { + return __integral_type::i8; + } else if constexpr (std::is_same_v) { + return __integral_type::i16; + } else if constexpr (std::is_same_v) { + return __integral_type::i32; + } else if constexpr (std::is_same_v) { + return __integral_type::i64; + } else { + static_cast(false, "Unsupported integral type"); + } + } + + /// @private + std::optional<__integral_base_type> __classify_int_base(int base) { + if (base == 8) { + return __integral_base_type::base8; + } else if (base == 10) { + return __integral_base_type::base10; + } else if (base == 16) { + return __integral_base_type::base16; + } else { + return std::nullopt; + } + } + + /// @private + /// @brief Helper to get printf format specifier based on type and base + template<__integral_type EIntTy> + const char* __get_int_format(__integral_base_type base) { + if (base == __integral_base_type::base8) { + if constexpr (EIntTy == __integral_type::u8) { + return "%" PRIo8; + } else if constexpr (EIntTy == __integral_type::u16) { + return "%" PRIo16; + } else if constexpr (EIntTy == __integral_type::u32) { + return "%" PRIo32; + } else if constexpr (EIntTy == __integral_type::u64) { + return "%" PRIo64; + } else { + static_assert(false, "Unsupported integral type and base"); + } + } else if (base == __integral_base_type::base10) { + if constexpr (EIntTy == __integral_type::u8) { + return "%" PRIu8; + } else if constexpr (EIntTy == __integral_type::u16) { + return "%" PRIu16; + } else if constexpr (EIntTy == __integral_type::u32) { + return "%" PRIu32; + } else if constexpr (EIntTy == __integral_type::u64) { + return "%" PRIu64; + } else if constexpr (EIntTy == __integral_type::i8) { + return "%" PRId8; + } else if constexpr (EIntTy == __integral_type::i16) { + return "%" PRId16; + } else if constexpr (EIntTy == __integral_type::i32) { + return "%" PRId32; + } else if constexpr (EIntTy == __integral_type::i64) { + return "%" PRId64; + } + } else if (base == __integral_base_type::base16) { + if constexpr (EIntTy == __integral_type::u8) { + return "%" PRIx8; + } else if constexpr (EIntTy == __integral_type::u16) { + return "%" PRIx16; + } else if constexpr (EIntTy == __integral_type::u32) { + return "%" PRIx32; + } else if constexpr (EIntTy == __integral_type::u64) { + return "%" PRIx64; + } else { + static_assert(false, "Unsupported integral type and base"); + } + } + } + + // Integer to_chars + template + requires std::integral && (!std::same_as) + to_chars_result to_chars(char* first, char* last, T value, int base = 10) { + if (first >= last) { + return {first, std::errc::value_too_large}; + } + + constexpr auto integral_type = __classify_int_type(); + const auto opt_integral_base_type = __classify_int_base(base); + if (!opt_integral_base_type.has_value()) { + return {first, std::errc::invalid_argument}; + } + const auto integral_base_type = std::move(opt_integral_base_type.value()); + + // Use snprintf with appropriate format + const auto max_buffer_size = static_cast(last - first); + const char* format_string = __get_int_format(integral_base_type); + int written = std::snprintf(first, max_buffer_size, format_string, val); + if (written < 0 || static_cast(written) >= max_buffer_size) { + return {last, std::errc::value_too_large}; + } + return {first + written, std::errc{}}; + } + + /// @private + enum class __float_type { + f32, f64, + }; + + /// @private + template + constexpr __float_type __classify_float_type() { + if constexpr (std::is_same_v) { + return __float_type::f32; + } else if constexpr (std::is_same_v) { + return __float_type::f64; + } else { + static_assert(false, "Unsupported floating point type"); + } + } + + /// @private + enum class __float_fmt_type { + general, scientific, fixed, hex + }; + + /// @private + std::optional<__float_fmt_type> __classify_float_fmt(chars_format fmt) { + if (fmt == chars_format::general) { + return __float_fmt_type::general; + } else if (fmt == chars_format::scientific) { + return __float_fmt_type::scientific; + } else if (fmt == chars_format::fixed) { + return __float_fmt_type::fixed; + } else if (fmt == chars_format::hex) { + return __float_fmt_type::hex; + } else { + return std::nullopt; + } + } + + /// @private + template<__float_type TFpTy> + std::optional __get_float_format(chars_format fmt) { + // Precision is passed by extra argument via ".*" format. + if (fmt == chars_format::general) { + if constexpr (TFpTy == __float_type::f32) { + return "%.*g"; + } else if constexpr (TFpTy == __float_type::f64) { + return "%.*lg"; + } + } else if (fmt == chars_format::scientific) { + if constexpr (TFpTy == __float_type::f32) { + return "%.*e"; + } else if constexpr (TFpTy == __float_type::f64) { + return "%.*le"; + } + } else if (fmt == chars_format::fixed) { + if constexpr (TFpTy == __float_type::f32) { + return "%.*f"; + } else if constexpr (TFpTy == __float_type::f64) { + return "%.*lf"; + } + } else if (fmt == chars_format::hex) { + if constexpr (TFpTy == __float_type::f32) { + return "%.*a"; + } else if constexpr (TFpTy == __float_type::f64) { + return "%.*la"; + } + } else { + return std::nullopt; + } + } + + // Float to_chars + template + requires std::floating_point + to_chars_result to_chars(char* first, char* last, T value, chars_format fmt, int precision = 0) { + if (first >= last) { + return {first, std::errc::value_too_large}; + } + + constexpr auto float_type = __classify_float_type(); + const auto opt_float_fmt_type = __classify_float_fmt(fmt); + if (!opt_float_fmt_type.has_value()) { + return {first, std::errc::invalid_argument}; + } + const auto float_fmt_type = std::move(opt_float_fmt_type.value()); + + const auto max_buffer_size = static_cast(last - first); + const char *format_string = __get_float_format(float_fmt_type); + int written = std::snprintf(first, max_buffer_size, format_string, precision, value); + if (written < 0 || static_cast(written) >= max_buffer_size) { + return {last, std::errc::value_too_large}; + } + return {first + written, std::errc{}}; + } + + /// @private + enum class __strtoi_cluster { + tol, toll, toul, toull + }; + + /// @private + template + constexpr __strtoi_cluster __classify_strtoi_cluster() { + if constexpr (std::is_signed_v) { + if constexpr (sizeof(T) <= sizeof(long)) { + return __strtoi_cluster::tol; + } else if constexpr (sizeof(T) <= sizeof(long long)) { + return __strtoi_cluster::toll; + } else { + static_assert(false, "Unsupported signed integral type"); + } + } else { + if constexpr (sizeof(T) <= sizeof(unsigned long)) { + return __strtoi_cluster::toul; + } else if constexpr (sizeof(T) <= sizeof(unsigned long long)) { + return __strtoi_cluster::toull; + } else { + static_assert(false, "Unsupported unsigned integral type"); + } + } + } + + /// @private + template<__strtoi_cluster TFc> + auto __execute_strtoi(const char* str, char** str_end, int base) { + if constexpr (TFc == __strtoi_cluster::tol) { + return std::strtol(str, str_end, base); + } else if constexpr (TFc == __strtoi_cluster::toll) { + return std::strtoll(str, str_end, base); + } else if constexpr (TFc == __strtoi_cluster::toul) { + return std::strtoul(str, str_end, base); + } else if constexpr (TFc == __strtoi_cluster::toull) { + return std::strtoull(str, str_end, base); + } + } + + // Integer from_chars + template + requires std::integral && (!std::same_as) + from_chars_result from_chars(const char* first, const char* last, T& value, int base = 10) { + if (first >= last) { + return {first, std::errc::invalid_argument}; + } + + // strtoi function cluster strongly order that given string must be NULL-terminated. + // So we must do a heavy copy in there because first-last pair is not NULL-terminated guaranteed. + std::string buffer(first, static_cast(last - first)); + + constexpr auto strtoi_cluster = __classify_strtoi_cluster(); + + errno = 0; + char* end_ptr = const_cast(first); + auto rv = __execute_strtoi(buffer.data(), &end_ptr, base); + if (errno == ERANGE) { + return {end_ptr, std::errc::result_out_of_range}; + } + + using strtoi_cluster_rvtype = decltype(rv); + // Check if result fits in T + if (result < static_cast(std::numeric_limits::min()) + || result > static_cast(std::numeric_limits::max())) { + return {end_ptr, std::errc::result_out_of_range}; + } + + if (end_ptr == buffer.data) { + return {first, std::errc::invalid_argument}; + } + + // Ensure we don't go past 'last' + if ((end_ptr - buffer.data()) > (last - first)) { + return {const_cast(last), std::errc::invalid_argument}; + } + + value = static_cast(result); + return {first + (end_ptr - buffer.data()), std::errc{}}; + } + + /// @private + enum class __strtof_cluster { + tof, tod + }; + + /// @private + template + constexpr __strtof_cluster __classify_strtof_cluster() { + if constexpr (std::is_same_v) { + return __strtof_cluster::tof; + } else if constexpr (std::is_same_v) { + return __strtof_cluster::tod; + } else { + static_assert(false, "Unsupported floating point type"); + } + } + + /// @private + template<__strtof_cluster TFc> + auto __execute_strtof(const char* str, char** str_end) { + if constexpr (TFc == __strtof_cluster::tof) { + return std::strtof(str, str_end); + } else if constexpr (TFc == __strtof_cluster::tod) { + return std::strtod(str, str_end); + } + } + + // Float from_chars + template + requires std::floating_point + from_chars_result from_chars(const char* first, const char* last, T& value, chars_format fmt = chars_format::general) { + // We ignore "fmt" by design. + if (first >= last) { + return {first, std::errc::invalid_argument}; + } + + // strtof function cluster strongly order that given string must be NULL-terminated. + // So we must do a heavy copy in there because first-last pair is not NULL-terminated guaranteed. + std::string buffer(first, static_cast(last - first)); + + constexpr auto strtof_cluster = __classify_strtof_cluster(); + + errno = 0; + char* end_ptr = const_cast(first); + auto rv = __execute_strtof(buffer.data(), &end_ptr); + if (errno == ERANGE) { + return {end_ptr, std::errc::result_out_of_range}; + } + + if (end_ptr == buffer.data) { + return {first, std::errc::invalid_argument}; + } + + // Ensure we don't go past 'last' + if ((end_ptr - buffer.data()) > (last - first)) { + return {const_cast(last), std::errc::invalid_argument}; + } + + value = result; + return {first + (end_ptr - buffer.data()), std::errc{}}; + } + +} // namespace std + +#endif