feat: finish iconv module
This commit is contained in:
@ -40,7 +40,6 @@ static size_t that_iconv(iconv_t cd, const char** inbuf, size_t* inbytesleft, ch
|
||||
#undef iconv_t
|
||||
#undef iconv_open
|
||||
#undef iconv_close
|
||||
|
||||
#undef iconv
|
||||
|
||||
#pragma endregion
|
||||
@ -184,6 +183,217 @@ namespace yycc::encoding::iconv {
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Convertion Class Helper
|
||||
|
||||
// YYC MARK:
|
||||
// If we use UTF16 or UTF32 code name directly, it will produce a BOM at data head.
|
||||
// That's not what we expected.
|
||||
// So we need manually check runtime endian and explicitly specify endian in code name.
|
||||
|
||||
// TODO: fix this encoding endian issue.
|
||||
|
||||
static const NS_YYCC_STRING::u8char* UTF8_CODENAME_LITERAL = YYCC_U8("UTF-8");
|
||||
static const NS_YYCC_STRING::u8char* WCHAR_CODENAME_LITERAL = YYCC_U8("WCHAR_T");
|
||||
static const NS_YYCC_STRING::u8char* fetch_utf16_codename() {
|
||||
return YYCC_U8("UTF16");
|
||||
}
|
||||
static const NS_YYCC_STRING::u8char* UTF16_CODENAME_LITERAL = fetch_utf16_codename();
|
||||
static const NS_YYCC_STRING::u8char* fetch_utf32_codename() {
|
||||
return YYCC_U8("UTF32");
|
||||
}
|
||||
static const NS_YYCC_STRING::u8char* UTF32_CODENAME_LITERAL = fetch_utf32_codename();
|
||||
|
||||
// TODO: There is a memory copy in this function. Consider removing it in future.
|
||||
#define CONVFN_TYPE0(src_char_type, dst_char_type) \
|
||||
namespace expected = NS_YYCC_PATCH_EXPECTED; \
|
||||
auto rv = iconv_kernel(this->token, reinterpret_cast<const uint8_t*>(src.data()), src.size()); \
|
||||
if (expected::is_value(rv)) { \
|
||||
const auto& dst = expected::get_value(rv); \
|
||||
if constexpr (sizeof(dst_char_type) > 1u) { \
|
||||
if (dst.size() % sizeof(dst_char_type) != 0u) return ConvError::BadRv; \
|
||||
} \
|
||||
return std::basic_string<dst_char_type>(reinterpret_cast<const dst_char_type*>(dst.data()), dst.size() / sizeof(dst_char_type)); \
|
||||
} else { \
|
||||
return expected::get_error(rv); \
|
||||
}
|
||||
|
||||
#define CONVFN_TYPE1(fct_name, src_char_type, dst_char_type) \
|
||||
namespace expected = NS_YYCC_PATCH_EXPECTED; \
|
||||
auto rv = this->priv_##fct_name(src); \
|
||||
if (expected::is_value(rv)) { \
|
||||
dst = std::move(expected::get_value(rv)); \
|
||||
return true; \
|
||||
} else { \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#define CONVFN_TYPE2(fct_name, src_char_type, dst_char_type) \
|
||||
std::basic_string<dst_char_type> rv; \
|
||||
if (this->fct_name(src, rv)) return rv; \
|
||||
else throw std::runtime_error("fail to convert string in Win32 function");
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Char -> UTF8
|
||||
|
||||
CharToUtf8::CharToUtf8(const CodeName& code_name) : token(code_name, UTF8_CODENAME_LITERAL) {}
|
||||
|
||||
CharToUtf8::~CharToUtf8() {}
|
||||
|
||||
ConvResult<NS_YYCC_STRING::u8string> CharToUtf8::priv_to_utf8(const std::string_view& src) {
|
||||
CONVFN_TYPE0(char, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
bool CharToUtf8::to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst) {
|
||||
CONVFN_TYPE1(to_utf8, char, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
NS_YYCC_STRING::u8string CharToUtf8::to_utf8(const std::string_view& src) {
|
||||
CONVFN_TYPE2(to_utf8, char, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF8 -> Char
|
||||
|
||||
Utf8ToChar::Utf8ToChar(const CodeName& code_name) : token(UTF8_CODENAME_LITERAL, code_name) {}
|
||||
|
||||
Utf8ToChar::~Utf8ToChar() {}
|
||||
|
||||
ConvResult<std::string> Utf8ToChar::priv_to_char(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE0(NS_YYCC_STRING::u8char, char);
|
||||
}
|
||||
|
||||
bool Utf8ToChar::to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst) {
|
||||
CONVFN_TYPE1(to_char, NS_YYCC_STRING::u8char, char);
|
||||
}
|
||||
|
||||
std::string Utf8ToChar::to_char(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE2(to_char, NS_YYCC_STRING::u8char, char);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region WChar -> Char
|
||||
|
||||
WcharToUtf8::WcharToUtf8() : token(WCHAR_CODENAME_LITERAL, UTF8_CODENAME_LITERAL) {}
|
||||
|
||||
WcharToUtf8::~WcharToUtf8() {}
|
||||
|
||||
ConvResult<NS_YYCC_STRING::u8string> WcharToUtf8::priv_to_utf8(const std::wstring_view& src) {
|
||||
CONVFN_TYPE0(wchar_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
bool WcharToUtf8::to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst) {
|
||||
CONVFN_TYPE1(to_utf8, wchar_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
NS_YYCC_STRING::u8string WcharToUtf8::to_utf8(const std::wstring_view& src) {
|
||||
CONVFN_TYPE2(to_utf8, wchar_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Char -> WChar
|
||||
|
||||
Utf8ToWchar::Utf8ToWchar() : token(UTF8_CODENAME_LITERAL, WCHAR_CODENAME_LITERAL) {}
|
||||
|
||||
Utf8ToWchar::~Utf8ToWchar() {}
|
||||
|
||||
ConvResult<std::wstring> Utf8ToWchar::priv_to_wchar(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE0(NS_YYCC_STRING::u8char, wchar_t);
|
||||
}
|
||||
|
||||
bool Utf8ToWchar::to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst) {
|
||||
CONVFN_TYPE1(to_wchar, NS_YYCC_STRING::u8char, wchar_t);
|
||||
}
|
||||
|
||||
std::wstring Utf8ToWchar::to_wchar(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE2(to_wchar, NS_YYCC_STRING::u8char, wchar_t);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF8 -> UTF16
|
||||
|
||||
Utf8ToUtf16::Utf8ToUtf16() : token(UTF8_CODENAME_LITERAL, UTF16_CODENAME_LITERAL) {}
|
||||
|
||||
Utf8ToUtf16::~Utf8ToUtf16() {}
|
||||
|
||||
ConvResult<std::u16string> Utf8ToUtf16::priv_to_utf16(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE0(NS_YYCC_STRING::u8char, char16_t);
|
||||
}
|
||||
|
||||
bool Utf8ToUtf16::to_utf16(const NS_YYCC_STRING::u8string_view& src, std::u16string& dst) {
|
||||
CONVFN_TYPE1(to_utf16, NS_YYCC_STRING::u8char, char16_t);
|
||||
}
|
||||
|
||||
std::u16string Utf8ToUtf16::to_utf16(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE2(to_utf16, NS_YYCC_STRING::u8char, char16_t);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF16 -> UTF8
|
||||
|
||||
Utf16ToUtf8::Utf16ToUtf8() : token(UTF16_CODENAME_LITERAL, UTF8_CODENAME_LITERAL) {}
|
||||
|
||||
Utf16ToUtf8::~Utf16ToUtf8() {}
|
||||
|
||||
ConvResult<NS_YYCC_STRING::u8string> Utf16ToUtf8::priv_to_utf8(const std::u16string_view& src) {
|
||||
CONVFN_TYPE0(char16_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
bool Utf16ToUtf8::to_utf8(const std::u16string_view& src, NS_YYCC_STRING::u8string& dst) {
|
||||
CONVFN_TYPE1(to_utf8, char16_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
NS_YYCC_STRING::u8string Utf16ToUtf8::to_utf8(const std::u16string_view& src) {
|
||||
CONVFN_TYPE2(to_utf8, char16_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF8 -> UTF32
|
||||
|
||||
Utf8ToUtf32::Utf8ToUtf32() : token(UTF8_CODENAME_LITERAL, UTF32_CODENAME_LITERAL) {}
|
||||
|
||||
Utf8ToUtf32::~Utf8ToUtf32() {}
|
||||
|
||||
ConvResult<std::u32string> Utf8ToUtf32::priv_to_utf32(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE0(NS_YYCC_STRING::u8char, char32_t);
|
||||
}
|
||||
|
||||
bool Utf8ToUtf32::to_utf32(const NS_YYCC_STRING::u8string_view& src, std::u32string& dst) {
|
||||
CONVFN_TYPE1(to_utf32, NS_YYCC_STRING::u8char, char32_t);
|
||||
}
|
||||
|
||||
std::u32string Utf8ToUtf32::to_utf32(const NS_YYCC_STRING::u8string_view& src) {
|
||||
CONVFN_TYPE2(to_utf32, NS_YYCC_STRING::u8char, char32_t);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF32 -> UTF8
|
||||
|
||||
Utf32ToUtf8::Utf32ToUtf8() : token(UTF32_CODENAME_LITERAL, UTF8_CODENAME_LITERAL) {}
|
||||
|
||||
Utf32ToUtf8::~Utf32ToUtf8() {}
|
||||
|
||||
ConvResult<NS_YYCC_STRING::u8string> Utf32ToUtf8::priv_to_utf8(const std::u32string_view& src) {
|
||||
CONVFN_TYPE0(char32_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
bool Utf32ToUtf8::to_utf8(const std::u32string_view& src, NS_YYCC_STRING::u8string& dst) {
|
||||
CONVFN_TYPE1(to_utf8, char32_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
NS_YYCC_STRING::u8string Utf32ToUtf8::to_utf8(const std::u32string_view& src) {
|
||||
CONVFN_TYPE2(to_utf8, char32_t, NS_YYCC_STRING::u8char);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
} // namespace yycc::encoding::iconv
|
||||
|
||||
#endif
|
||||
|
@ -44,15 +44,13 @@ namespace yycc::encoding::iconv {
|
||||
NullPointer, ///< Some of essential pointer in argument is nullptr.
|
||||
InvalidMbSeq, ///< An invalid multibyte sequence has been encountered in the input.
|
||||
IncompleteMbSeq, ///< An incomplete multibyte sequence has been encountered in the input.
|
||||
BadRv, ///< The size of encoding convertion is not matched with expected char type.
|
||||
};
|
||||
|
||||
/// @private
|
||||
template<typename T>
|
||||
using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected<T, ConvError>;
|
||||
|
||||
// TODO: 为下列类添加注释时,指明static开头的是一次性使用的,适用于转换一两次结束。
|
||||
// 用实例类的适用于需要持续转换的。
|
||||
|
||||
// Char -> UTF8
|
||||
class CharToUtf8 {
|
||||
public:
|
||||
@ -87,35 +85,35 @@ namespace yycc::encoding::iconv {
|
||||
Token token;
|
||||
};
|
||||
|
||||
// WChar -> Char
|
||||
class WcharToChar {
|
||||
// WChar -> UTF8
|
||||
class WcharToUtf8 {
|
||||
public:
|
||||
WcharToChar(const CodeName& code_name);
|
||||
~WcharToChar();
|
||||
YYCC_DELETE_COPY(WcharToChar)
|
||||
YYCC_DEFAULT_MOVE(WcharToChar)
|
||||
WcharToUtf8();
|
||||
~WcharToUtf8();
|
||||
YYCC_DELETE_COPY(WcharToUtf8)
|
||||
YYCC_DEFAULT_MOVE(WcharToUtf8)
|
||||
|
||||
public:
|
||||
ConvResult<std::string> priv_to_char(const std::wstring_view& src);
|
||||
bool to_char(const std::wstring_view& src, std::string& dst);
|
||||
std::string to_char(const std::wstring_view& src);
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src);
|
||||
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
};
|
||||
|
||||
// Char -> WChar
|
||||
class CharToWchar {
|
||||
// UTF8 -> WChar
|
||||
class Utf8ToWchar {
|
||||
public:
|
||||
CharToWchar(const CodeName& code_name);
|
||||
~CharToWchar();
|
||||
YYCC_DELETE_COPY(CharToWchar)
|
||||
YYCC_DEFAULT_MOVE(CharToWchar)
|
||||
Utf8ToWchar();
|
||||
~Utf8ToWchar();
|
||||
YYCC_DELETE_COPY(Utf8ToWchar)
|
||||
YYCC_DEFAULT_MOVE(Utf8ToWchar)
|
||||
|
||||
public:
|
||||
ConvResult<std::wstring> priv_to_wchar(const std::string_view& src, const CodeName& code_name);
|
||||
bool to_wchar(const std::string_view& src, std::wstring& dst, const CodeName& code_name);
|
||||
std::wstring to_wchar(const std::string_view& src, const CodeName& code_name);
|
||||
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
|
||||
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||
|
||||
private:
|
||||
Token token;
|
||||
|
Reference in New Issue
Block a user