refactor: write iconv.
- write iconv encoding (not finished). - rename united_codec to pycodec.
This commit is contained in:
@ -117,7 +117,7 @@ BreakFunctionDefinitionParameters: false
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakStringLiterals: false
|
||||
BreakTemplateDeclarations: Yes
|
||||
ColumnLimit: 100
|
||||
ColumnLimit: 140
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
@ -206,7 +206,7 @@ RequiresExpressionIndentation: OuterScope
|
||||
SeparateDefinitionBlocks: Leave
|
||||
ShortNamespaceLines: 1
|
||||
SkipMacroDefinitionBody: false
|
||||
SortIncludes: CaseSensitive
|
||||
SortIncludes: Never
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: Lexicographic
|
||||
SpaceAfterCStyleCast: true
|
||||
|
@ -7,6 +7,7 @@ project(YYCC
|
||||
# Provide options
|
||||
option(YYCC_BUILD_TESTBENCH "Build testbench of YYCCommonplace." OFF)
|
||||
option(YYCC_BUILD_DOC "Build document of YYCCommonplace." OFF)
|
||||
option(YYCC_ENFORCE_ICONV "Enforce iconv support for this library (e.g. in MSYS2 environment)." OFF)
|
||||
option(YYCC_DEBUG_UE_FILTER "YYCC developer used switch for testing Windows unhandled exception filter. Should not set to ON!!!" OFF)
|
||||
|
||||
# Setup install path from CMake provided install path for convenient use.
|
||||
@ -27,6 +28,10 @@ if (YYCC_BUILD_TESTBENCH)
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
find_package(GTest REQUIRED)
|
||||
endif ()
|
||||
# Iconv is required if we are not in Windows or user request it
|
||||
if (YYCC_ENFORCE_ICONV OR (NOT WIN32))
|
||||
find_package(Iconv REQUIRED)
|
||||
endif ()
|
||||
|
||||
# Import 3 build targets
|
||||
add_subdirectory(src)
|
||||
|
@ -18,7 +18,7 @@ PRIVATE
|
||||
yycc/encoding/utf.cpp
|
||||
yycc/encoding/windows.cpp
|
||||
yycc/encoding/iconv.cpp
|
||||
yycc/encoding/united_codec.cpp
|
||||
yycc/encoding/pycodec.cpp
|
||||
# YYCC/COMHelper.cpp
|
||||
# YYCC/ArgParser.cpp
|
||||
# YYCC/ConfigManager.cpp
|
||||
@ -69,7 +69,7 @@ FILES
|
||||
yycc/encoding/utf.hpp
|
||||
yycc/encoding/windows.hpp
|
||||
yycc/encoding/iconv.hpp
|
||||
yycc/encoding/united_codec.hpp
|
||||
yycc/encoding/pycodec.hpp
|
||||
|
||||
# # Headers
|
||||
# # Common headers
|
||||
@ -102,6 +102,13 @@ PUBLIC
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
)
|
||||
# Link Iconv if we have import it
|
||||
if (Iconv_FOUND)
|
||||
target_link_libraries(YYCCommonplace
|
||||
PRIVATE
|
||||
Iconv::Iconv
|
||||
)
|
||||
endif ()
|
||||
# Link with DbgHelp.lib on Windows
|
||||
target_link_libraries(YYCCommonplace
|
||||
PRIVATE
|
||||
@ -112,11 +119,13 @@ target_compile_features(YYCCommonplace PUBLIC cxx_std_17)
|
||||
set_target_properties(YYCCommonplace PROPERTIES CXX_EXTENSION OFF)
|
||||
# Setup macros
|
||||
target_compile_definitions(YYCCommonplace
|
||||
# Debug macro should populate to child projects
|
||||
PUBLIC
|
||||
# Debug macro. And it should populate to child projects
|
||||
$<$<BOOL:${YYCC_DEBUG_UE_FILTER}>:YYCC_DEBUG_UE_FILTER>
|
||||
# Unicode charset for private using
|
||||
# Iconv environment macro
|
||||
$<$<BOOL:${YYCC_ENFORCE_ICONV}>:YYCC_FEAT_ICONV>
|
||||
PRIVATE
|
||||
# Unicode charset for private using
|
||||
$<$<CXX_COMPILER_ID:MSVC>:UNICODE>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:_UNICODE>
|
||||
)
|
||||
|
@ -1,209 +0,0 @@
|
||||
#include "EncodingHelper.hpp"
|
||||
|
||||
#include <locale>
|
||||
|
||||
namespace YYCC::EncodingHelper {
|
||||
|
||||
/* Define some assistant macros for easy writing. */
|
||||
|
||||
#define CONVFCT_TYPE2(fct_name, src_char_type, dst_char_type, ...) if (src == nullptr) return false; \
|
||||
std::basic_string_view<src_char_type> cache(src); \
|
||||
return fct_name(cache, dst, ##__VA_ARGS__);
|
||||
|
||||
#define CONVFCT_TYPE3(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
|
||||
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
|
||||
return ret;
|
||||
|
||||
#define CONVFCT_TYPE4(fct_name, src_char_type, dst_char_type, ...) std::basic_string<dst_char_type> ret; \
|
||||
if (!fct_name(src, ret, ##__VA_ARGS__)) ret.clear(); \
|
||||
return ret;
|
||||
|
||||
|
||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||
|
||||
#pragma region WcharToChar
|
||||
|
||||
bool WcharToChar(const std::wstring_view& src, std::string& dst, UINT code_page) {
|
||||
// if src is empty, direct output
|
||||
if (src.empty()) {
|
||||
dst.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
// init WideCharToMultiByte used variables
|
||||
// setup src pointer
|
||||
LPCWCH lpWideCharStr = reinterpret_cast<LPCWCH>(src.data());
|
||||
// check whether source string is too large.
|
||||
size_t cSrcSize = src.size();
|
||||
if (cSrcSize > std::numeric_limits<int>::max()) return false;
|
||||
int cchWideChar = static_cast<int>(src.size());
|
||||
|
||||
// do convertion
|
||||
// do a dry-run first to fetch desired size.
|
||||
int desired_size = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, NULL, 0, NULL, NULL);
|
||||
if (desired_size <= 0) return false;
|
||||
// resize dest for receiving result
|
||||
dst.resize(static_cast<size_t>(desired_size));
|
||||
// do real convertion
|
||||
int write_result = WideCharToMultiByte(code_page, 0, lpWideCharStr, cchWideChar, reinterpret_cast<LPSTR>(dst.data()), desired_size, NULL, NULL);
|
||||
if (write_result <= 0) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page) {
|
||||
CONVFCT_TYPE2(WcharToChar, wchar_t, char, code_page);
|
||||
}
|
||||
std::string WcharToChar(const std::wstring_view& src, UINT code_page) {
|
||||
CONVFCT_TYPE3(WcharToChar, wchar_t, char, code_page);
|
||||
}
|
||||
std::string WcharToChar(const wchar_t* src, UINT code_page) {
|
||||
CONVFCT_TYPE4(WcharToChar, wchar_t, char, code_page);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region CharToWchar
|
||||
|
||||
bool CharToWchar(const std::string_view& src, std::wstring& dst, UINT code_page) {
|
||||
// if src is empty, direct output
|
||||
if (src.empty()) {
|
||||
dst.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
// init WideCharToMultiByte used variables
|
||||
// setup src pointer
|
||||
LPCCH lpMultiByteStr = reinterpret_cast<LPCCH>(src.data());
|
||||
// check whether source string is too large.
|
||||
size_t cSrcSize = src.size();
|
||||
if (cSrcSize > std::numeric_limits<int>::max()) return false;
|
||||
int cbMultiByte = static_cast<int>(src.size());
|
||||
|
||||
// do convertion
|
||||
// do a dry-run first to fetch desired size.
|
||||
int desired_size = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, NULL, 0);
|
||||
if (desired_size <= 0) return false;
|
||||
// resize dest for receiving result
|
||||
dst.resize(static_cast<size_t>(desired_size));
|
||||
// do real convertion
|
||||
int write_result = MultiByteToWideChar(code_page, 0, lpMultiByteStr, cbMultiByte, reinterpret_cast<LPWSTR>(dst.data()), desired_size);
|
||||
if (write_result <= 0) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
bool CharToWchar(const char* src, std::wstring& dst, UINT code_page) {
|
||||
CONVFCT_TYPE2(CharToWchar, char, wchar_t, code_page);
|
||||
}
|
||||
std::wstring CharToWchar(const std::string_view& src, UINT code_page) {
|
||||
CONVFCT_TYPE3(CharToWchar, char, wchar_t, code_page);
|
||||
}
|
||||
std::wstring CharToWchar(const char* src, UINT code_page) {
|
||||
CONVFCT_TYPE4(CharToWchar, char, wchar_t, code_page);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region CharToChar
|
||||
|
||||
bool CharToChar(const std::string_view& src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
|
||||
std::wstring intermediary;
|
||||
if (!CharToWchar(src, intermediary, src_code_page)) return false;
|
||||
if (!WcharToChar(intermediary, dst, dst_code_page)) return false;
|
||||
return true;
|
||||
}
|
||||
bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page) {
|
||||
CONVFCT_TYPE2(CharToChar, char, char, src_code_page, dst_code_page);
|
||||
}
|
||||
std::string CharToChar(const std::string_view& src, UINT src_code_page, UINT dst_code_page) {
|
||||
CONVFCT_TYPE3(CharToChar, char, char, src_code_page, dst_code_page);
|
||||
}
|
||||
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page) {
|
||||
CONVFCT_TYPE4(CharToChar, char, char, src_code_page, dst_code_page);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region WcharToUTF8
|
||||
|
||||
bool WcharToUTF8(const std::wstring_view& src, yycc_u8string& dst) {
|
||||
std::string adapted_dst;
|
||||
bool ret = WcharToChar(src, adapted_dst, CP_UTF8);
|
||||
if (ret) dst = ToUTF8(adapted_dst);
|
||||
return ret;
|
||||
}
|
||||
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst) {
|
||||
CONVFCT_TYPE2(WcharToUTF8, wchar_t, yycc_char8_t);
|
||||
}
|
||||
yycc_u8string WcharToUTF8(const std::wstring_view& src) {
|
||||
CONVFCT_TYPE3(WcharToUTF8, wchar_t, yycc_char8_t);
|
||||
}
|
||||
yycc_u8string WcharToUTF8(const wchar_t* src) {
|
||||
CONVFCT_TYPE4(WcharToUTF8, wchar_t, yycc_char8_t);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF8ToWchar
|
||||
|
||||
bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst) {
|
||||
std::string_view adapted_src(ToOrdinaryView(src));
|
||||
return CharToWchar(adapted_src, dst, CP_UTF8);
|
||||
}
|
||||
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst) {
|
||||
CONVFCT_TYPE2(UTF8ToWchar, yycc_char8_t, wchar_t);
|
||||
}
|
||||
std::wstring UTF8ToWchar(const yycc_u8string_view& src) {
|
||||
CONVFCT_TYPE3(UTF8ToWchar, yycc_char8_t, wchar_t);
|
||||
}
|
||||
std::wstring UTF8ToWchar(const yycc_char8_t* src) {
|
||||
CONVFCT_TYPE4(UTF8ToWchar, yycc_char8_t, wchar_t);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region CharToUTF8
|
||||
|
||||
bool CharToUTF8(const std::string_view& src, yycc_u8string& dst, UINT code_page) {
|
||||
std::string adapted_dst;
|
||||
bool ret = CharToChar(src, adapted_dst, code_page, CP_UTF8);
|
||||
if (ret) dst = ToUTF8(adapted_dst);
|
||||
return ret;
|
||||
}
|
||||
bool CharToUTF8(const char* src, yycc_u8string& dst, UINT code_page) {
|
||||
CONVFCT_TYPE2(CharToUTF8, char, yycc_char8_t, code_page);
|
||||
}
|
||||
yycc_u8string CharToUTF8(const std::string_view& src, UINT code_page) {
|
||||
CONVFCT_TYPE3(CharToUTF8, char, yycc_char8_t, code_page);
|
||||
}
|
||||
yycc_u8string CharToUTF8(const char* src, UINT code_page) {
|
||||
CONVFCT_TYPE4(CharToUTF8, char, yycc_char8_t, code_page);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF8ToChar
|
||||
|
||||
bool UTF8ToChar(const yycc_u8string_view& src, std::string& dst, UINT code_page) {
|
||||
std::string_view adapted_src(ToOrdinaryView(src));
|
||||
return CharToChar(adapted_src, dst, CP_UTF8, code_page);
|
||||
}
|
||||
bool UTF8ToChar(const yycc_char8_t* src, std::string& dst, UINT code_page) {
|
||||
CONVFCT_TYPE2(UTF8ToChar, yycc_char8_t, char, code_page);
|
||||
}
|
||||
std::string UTF8ToChar(const yycc_u8string_view& src, UINT code_page) {
|
||||
CONVFCT_TYPE3(UTF8ToChar, yycc_char8_t, char, code_page);
|
||||
}
|
||||
std::string UTF8ToChar(const yycc_char8_t* src, UINT code_page) {
|
||||
CONVFCT_TYPE4(UTF8ToChar, yycc_char8_t, char, code_page);
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#endif
|
||||
|
||||
#undef CONVFCT_TYPE2
|
||||
#undef CONVFCT_TYPE3
|
||||
#undef CONVFCT_TYPE4
|
||||
|
||||
}
|
||||
|
@ -1,60 +0,0 @@
|
||||
#pragma once
|
||||
#include "YYCCInternal.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||
#include "WinImportPrefix.hpp"
|
||||
#include <Windows.h>
|
||||
#include "WinImportSuffix.hpp"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief The helper for all encoding stuff.
|
||||
* @details
|
||||
* For more infomations about how to use the functions provided by this namespace,
|
||||
* please see \ref library_encoding and \ref encoding_helper.
|
||||
*/
|
||||
namespace YYCC::EncodingHelper {
|
||||
|
||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||
|
||||
bool WcharToChar(const std::wstring_view& src, std::string& dst, UINT code_page);
|
||||
bool WcharToChar(const wchar_t* src, std::string& dst, UINT code_page);
|
||||
std::string WcharToChar(const std::wstring_view& src, UINT code_page);
|
||||
std::string WcharToChar(const wchar_t* src, UINT code_page);
|
||||
|
||||
bool CharToWchar(const std::string_view& src, std::wstring& dst, UINT code_page);
|
||||
bool CharToWchar(const char* src, std::wstring& dst, UINT code_page);
|
||||
std::wstring CharToWchar(const std::string_view& src, UINT code_page);
|
||||
std::wstring CharToWchar(const char* src, UINT code_page);
|
||||
|
||||
bool CharToChar(const std::string_view& src, std::string& dst, UINT src_code_page, UINT dst_code_page);
|
||||
bool CharToChar(const char* src, std::string& dst, UINT src_code_page, UINT dst_code_page);
|
||||
std::string CharToChar(const std::string_view& src, UINT src_code_page, UINT dst_code_page);
|
||||
std::string CharToChar(const char* src, UINT src_code_page, UINT dst_code_page);
|
||||
|
||||
|
||||
bool WcharToUTF8(const std::wstring_view& src, yycc_u8string& dst);
|
||||
bool WcharToUTF8(const wchar_t* src, yycc_u8string& dst);
|
||||
yycc_u8string WcharToUTF8(const std::wstring_view& src);
|
||||
yycc_u8string WcharToUTF8(const wchar_t* src);
|
||||
|
||||
bool UTF8ToWchar(const yycc_u8string_view& src, std::wstring& dst);
|
||||
bool UTF8ToWchar(const yycc_char8_t* src, std::wstring& dst);
|
||||
std::wstring UTF8ToWchar(const yycc_u8string_view& src);
|
||||
std::wstring UTF8ToWchar(const yycc_char8_t* src);
|
||||
|
||||
bool CharToUTF8(const std::string_view& src, yycc_u8string& dst, UINT code_page);
|
||||
bool CharToUTF8(const char* src, yycc_u8string& dst, UINT code_page);
|
||||
yycc_u8string CharToUTF8(const std::string_view& src, UINT code_page);
|
||||
yycc_u8string CharToUTF8(const char* src, UINT code_page);
|
||||
|
||||
bool UTF8ToChar(const yycc_u8string_view& src, std::string& dst, UINT code_page);
|
||||
bool UTF8ToChar(const yycc_char8_t* src, std::string& dst, UINT code_page);
|
||||
std::string UTF8ToChar(const yycc_u8string_view& src, UINT code_page);
|
||||
std::string UTF8ToChar(const yycc_char8_t* src, UINT code_page);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
@ -0,0 +1,194 @@
|
||||
#include "iconv.hpp"
|
||||
|
||||
#if YYCC_FEAT_ICONV || (YYCC_OS != YYCC_OS_WINDOWS)
|
||||
|
||||
#include "../string/reinterpret.hpp"
|
||||
#include <cerrno>
|
||||
#include <stdexcept>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iconv.h>
|
||||
|
||||
#define NS_YYCC_STRING ::yycc::string
|
||||
#define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret
|
||||
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
|
||||
|
||||
#pragma region Iconv Shit Fix
|
||||
|
||||
// YYC MARK:
|
||||
// I don't know what Iconv is for, Iconv put an huge pieces of shit into its header file "iconv.h" (at least for me).
|
||||
// Especially a macro called iconv, which pollutes my namespace name while also can not be disabled because I need to rely on it to access essential functions.
|
||||
// I can't simply redefine it, because I can't make sure that this "iconv" is defined in that way on all platforms.
|
||||
// So I can only write some definitions of functions and types here, and extract the functions and types I need before I declare the namespace.
|
||||
// And at the same time remove those annoying macro definitions. Hopefully, the compiler will optimize these wrapper functions.
|
||||
|
||||
typedef iconv_t that_iconv_t;
|
||||
static iconv_t that_iconv_open(const char* tocode, const char* fromcode) {
|
||||
return iconv_open(tocode, fromcode);
|
||||
}
|
||||
static int that_iconv_close(iconv_t cd) {
|
||||
return iconv_close(cd);
|
||||
}
|
||||
static size_t that_iconv(iconv_t cd, const char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft) {
|
||||
// YYC MARK:
|
||||
// This is also bullshit. I don't know why the real signature of this function differ with its document written by GNU.
|
||||
// I have to make a "const" cast in there.
|
||||
return iconv(cd, const_cast<char**>(inbuf), inbytesleft, outbuf, outbytesleft);
|
||||
}
|
||||
|
||||
#undef iconv_t
|
||||
#undef iconv_open
|
||||
#undef iconv_close
|
||||
|
||||
#undef iconv
|
||||
|
||||
#pragma endregion
|
||||
|
||||
namespace yycc::encoding::iconv {
|
||||
|
||||
static const that_iconv_t INVALID_ICONV_TOKEN = reinterpret_cast<that_iconv_t>(-1);
|
||||
|
||||
#pragma region PrivToken
|
||||
|
||||
class PrivToken {
|
||||
public:
|
||||
PrivToken(const CodeName& from_code, const CodeName& to_code) : inner(INVALID_ICONV_TOKEN) {
|
||||
// We must cast them into string container, not string view,
|
||||
// because they may not have NULL terminator.
|
||||
std::string iconv_from_code = NS_YYCC_STRING_REINTERPRET::as_ordinary(from_code),
|
||||
iconv_to_code = NS_YYCC_STRING_REINTERPRET::as_ordinary(to_code);
|
||||
// Call iconv_t creator
|
||||
that_iconv_t descriptor = that_iconv_open(iconv_to_code.c_str(), iconv_from_code.c_str());
|
||||
if (descriptor == INVALID_ICONV_TOKEN) {
|
||||
if (errno == EINVAL) {
|
||||
return;
|
||||
} else {
|
||||
throw std::runtime_error("impossible errno when calling iconv_open()");
|
||||
}
|
||||
}
|
||||
// Setup value
|
||||
this->inner = descriptor;
|
||||
}
|
||||
~PrivToken() {
|
||||
if (this->inner != INVALID_ICONV_TOKEN) {
|
||||
that_iconv_close(this->inner);
|
||||
}
|
||||
}
|
||||
YYCC_DELETE_COPY(PrivToken)
|
||||
YYCC_DEFAULT_MOVE(PrivToken)
|
||||
|
||||
bool is_valid() const { return this->inner != INVALID_ICONV_TOKEN; }
|
||||
that_iconv_t get_inner() const { return this->inner; }
|
||||
|
||||
private:
|
||||
that_iconv_t inner;
|
||||
};
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Token
|
||||
|
||||
Token::Token(const CodeName& from_code, const CodeName& to_code) : inner(std::make_unique<PrivToken>(from_code, to_code)) {}
|
||||
|
||||
Token::~Token() {}
|
||||
|
||||
bool Token::is_valid() const {
|
||||
return this->inner->is_valid();
|
||||
}
|
||||
|
||||
PrivToken* Token::get_inner() const {
|
||||
return this->inner.get();
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
#pragma region Kernel
|
||||
|
||||
// YYC MARK:
|
||||
// Use std::monostate to simulate std::expected<void>.
|
||||
|
||||
constexpr const size_t ICONV_INC_LEN = 16u;
|
||||
constexpr size_t ICONV_ERR_RV = static_cast<size_t>(-1);
|
||||
|
||||
// Reference: https://stackoverflow.com/questions/13297458/simple-utf8-utf16-string-conversion-with-iconv
|
||||
|
||||
static ConvResult<std::monostate> iconv_kernel(
|
||||
const Token& token, const uint8_t* arg_inbuf, size_t arg_inbytes, uint8_t** arg_outbuf, size_t* arg_outbytes) {
|
||||
#define SETUP_RV(buf, len) \
|
||||
*arg_outbuf = (buf); \
|
||||
*arg_outbytes = (len);
|
||||
|
||||
// ===== Check Requirements =====
|
||||
// Unwrap and check iconv_t
|
||||
that_iconv_t cd = token.get_inner()->get_inner();
|
||||
if (cd == INVALID_ICONV_TOKEN) return ConvError::InvalidCd;
|
||||
|
||||
// Check nullptr output variables
|
||||
if (arg_outbuf == nullptr || arg_outbytes == nullptr) return ConvError::NullPointer;
|
||||
// Check empty input
|
||||
if (arg_inbytes == 0u) {
|
||||
SETUP_RV(nullptr, 0u);
|
||||
return {};
|
||||
}
|
||||
// Check nullptr input variables
|
||||
if (arg_inbuf == nullptr) return ConvError::NullPointer;
|
||||
|
||||
// ===== Do Iconv =====
|
||||
// setup input variables
|
||||
size_t inbytesleft = arg_inbytes;
|
||||
const char* inbuf = reinterpret_cast<const char*>(arg_inbuf);
|
||||
// pre-allocation output variables
|
||||
size_t outbytesall = arg_inbytes + ICONV_INC_LEN;
|
||||
char* outbufbase = static_cast<char*>(std::malloc(outbytesall));
|
||||
if (outbufbase == nullptr) throw std::bad_alloc();
|
||||
size_t outbytesleft = outbytesall;
|
||||
char* outbuf = outbufbase;
|
||||
|
||||
// conv core
|
||||
size_t nchars = that_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
while (nchars == ICONV_ERR_RV && errno == E2BIG) {
|
||||
// record the length has been converted
|
||||
size_t len = outbuf - str_to.data();
|
||||
|
||||
// resize for variables
|
||||
outbytesall += ICONV_INC_LEN;
|
||||
outbytesleft += ICONV_INC_LEN;
|
||||
|
||||
// resize for container
|
||||
str_to.resize(outbytesall);
|
||||
|
||||
// assign new outbuf from failed position
|
||||
outbuf = str_to.data() + len;
|
||||
nchars = that_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
}
|
||||
|
||||
// restore descriptor initial state
|
||||
that_iconv(cd, nullptr, nullptr, nullptr, nullptr);
|
||||
|
||||
// check error
|
||||
if (nchars == ICONV_ERR_RV) {
|
||||
// Free allocated buffer
|
||||
std::free(outbufbase);
|
||||
|
||||
if (errno == EILSEQ) {
|
||||
return ConvError::InvalidMbSeq;
|
||||
} else if (errno == EINVAL) {
|
||||
return ConvError::IncompleteMbSeq;
|
||||
} else {
|
||||
throw std::runtime_error("impossible errno when calling iconv_open()");
|
||||
}
|
||||
} else {
|
||||
// success
|
||||
// compute result data
|
||||
SETUP_RV(reinterpret_cast<uint8_t*>(outbufbase), outbytesall - outbytesleft);
|
||||
return {};
|
||||
}
|
||||
|
||||
#undef SETUP_RV
|
||||
}
|
||||
|
||||
#pragma endregion
|
||||
|
||||
} // namespace yycc::encoding::iconv
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
#include "../macro/os_detector.hpp"
|
||||
|
||||
#if YYCC_FEAT_ICONV || (YYCC_OS != YYCC_OS_WINDOWS)
|
||||
|
||||
#include "../macro/class_copy_move.hpp"
|
||||
#include "../patch/expected.hpp"
|
||||
#include "../string.hpp"
|
||||
#include <memory>
|
||||
|
||||
#define NS_YYCC_STRING ::yycc::string
|
||||
#define NS_YYCC_PATCH_EXPECTED ::yycc::patch::expected
|
||||
|
||||
namespace yycc::encoding::iconv {
|
||||
|
||||
// YYC MARK:
|
||||
// I don't want to include "iconv.h" in there.
|
||||
// One of reasons is that I want to hide all implementation of Iconv.
|
||||
// Another reason is that "iconv.h" defines some annoying macros which intervene some names in this files.
|
||||
// So I introduce PIMPL design mode. Use a pointer to hide all details in class PrivToken.
|
||||
|
||||
using CodeName = NS_YYCC_STRING::u8string_view;
|
||||
|
||||
/// @private
|
||||
class PrivToken;
|
||||
|
||||
class Token {
|
||||
public:
|
||||
Token(const CodeName& from_code, const CodeName& to_code);
|
||||
~Token();
|
||||
YYCC_DELETE_COPY(Token)
|
||||
YYCC_DEFAULT_MOVE(Token)
|
||||
|
||||
bool is_valid() const;
|
||||
PrivToken* get_inner() const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<PrivToken> inner;
|
||||
};
|
||||
|
||||
/// @private
|
||||
enum class ConvError {
|
||||
InvalidCd, ///< Given token is invalid.
|
||||
NullPointer, ///< Some of essential pointer in argument is nullptr.
|
||||
InvalidMbSeq, ///< An invalid multibyte sequence has been encountered in the input.
|
||||
IncompleteMbSeq, ///< An incomplete multibyte sequence has been encountered in the input.
|
||||
};
|
||||
|
||||
/// @private
|
||||
template<typename T>
|
||||
using ConvResult = NS_YYCC_PATCH_EXPECTED::Expected<T, ConvError>;
|
||||
|
||||
} // namespace yycc::encoding::iconv
|
||||
|
||||
#undef NS_YYCC_PATCH_EXPECTED
|
||||
#undef NS_YYCC_STRING
|
||||
|
||||
#endif
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include <stdexcept>
|
||||
|
||||
#include "../windows/import_guard_head.hpp"
|
||||
#include "../windows/import_guard_tail.hpp"
|
||||
#include <Windows.h>
|
||||
#include "../windows/import_guard_tail.hpp"
|
||||
|
||||
#define NS_YYCC_STRING ::yycc::string
|
||||
#define NS_YYCC_STRING_REINTERPRET ::yycc::string::reinterpret
|
||||
@ -134,9 +134,7 @@ namespace yycc::encoding::windows {
|
||||
|
||||
#pragma region Char -> Char
|
||||
|
||||
ConvResult<std::string> priv_to_char(const std::string_view& src,
|
||||
CodePage src_code_page,
|
||||
CodePage dst_code_page) {
|
||||
ConvResult<std::string> priv_to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page) {
|
||||
namespace expected = NS_YYCC_PATCH_EXPECTED;
|
||||
|
||||
// Perform first convertion
|
||||
@ -205,8 +203,7 @@ namespace yycc::encoding::windows {
|
||||
|
||||
#pragma region Char -> UTF8
|
||||
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src,
|
||||
CodePage code_page) {
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src, CodePage code_page) {
|
||||
namespace expected = NS_YYCC_PATCH_EXPECTED;
|
||||
|
||||
auto rv = priv_to_char(src, code_page, CP_UTF8);
|
||||
@ -228,6 +225,7 @@ namespace yycc::encoding::windows {
|
||||
#pragma endregion
|
||||
|
||||
#pragma region UTF8 -> Char
|
||||
|
||||
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src,
|
||||
CodePage code_page) {
|
||||
return priv_to_char(NS_YYCC_STRING_REINTERPRET::as_ordinary_view(src), CP_UTF8, code_page);
|
||||
|
@ -1,7 +1,10 @@
|
||||
#pragma once
|
||||
#include "../macro/os_detector.hpp"
|
||||
#include "../string.hpp"
|
||||
|
||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||
|
||||
#include "../patch/expected.hpp"
|
||||
#include "../string.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
#define NS_YYCC_STRING ::yycc::string
|
||||
@ -9,14 +12,12 @@
|
||||
|
||||
namespace yycc::encoding::windows {
|
||||
|
||||
#if YYCC_OS == YYCC_OS_WINDOWS
|
||||
|
||||
using CodePage = uint32_t;
|
||||
|
||||
/// @private
|
||||
enum class ConvError {
|
||||
TooLargeLength, ///< The length of given string is too large exceeding the maximum capacity of Win32 function.
|
||||
NoDesiredSize, ///< Can not compute the desired size of result string.
|
||||
NoDesiredSize, ///< Can not compute the desired size of result string.
|
||||
BadWrittenSize, ///< The size of written data is not matched with expected size.
|
||||
};
|
||||
|
||||
@ -34,47 +35,39 @@ namespace yycc::encoding::windows {
|
||||
bool to_wchar(const std::string_view& src, std::wstring& dst, CodePage code_page);
|
||||
std::wstring to_wchar(const std::string_view& src, CodePage code_page);
|
||||
|
||||
// YYC MARK:
|
||||
// Following functions are basically the alias of above functions.
|
||||
|
||||
// Char -> Char
|
||||
ConvResult<std::string> priv_to_char(const std::string_view& src,
|
||||
CodePage src_code_page,
|
||||
CodePage dst_code_page);
|
||||
bool to_char(const std::string_view& src,
|
||||
std::string& dst,
|
||||
CodePage src_code_page,
|
||||
CodePage dst_code_page);
|
||||
// This is the combination of "WChar -> Char" and "Char -> WChar"
|
||||
ConvResult<std::string> priv_to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page);
|
||||
bool to_char(const std::string_view& src, std::string& dst, CodePage src_code_page, CodePage dst_code_page);
|
||||
std::string to_char(const std::string_view& src, CodePage src_code_page, CodePage dst_code_page);
|
||||
|
||||
// YYC MARK:
|
||||
// Following functions are basically the specialized UTF8 functions.
|
||||
|
||||
// WChar -> UTF8
|
||||
// This is the specialization of "WChar -> Char"
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::wstring_view& src);
|
||||
bool to_utf8(const std::wstring_view& src, NS_YYCC_STRING::u8string& dst);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::wstring_view& src);
|
||||
|
||||
// UTF8 -> WChar
|
||||
// This is the specialization of "Char -> WChar"
|
||||
ConvResult<std::wstring> priv_to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||
bool to_wchar(const NS_YYCC_STRING::u8string_view& src, std::wstring& dst);
|
||||
std::wstring to_wchar(const NS_YYCC_STRING::u8string_view& src);
|
||||
|
||||
// Char -> UTF8
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src,
|
||||
CodePage code_page);
|
||||
// This is the specialization of "Char -> Char"
|
||||
ConvResult<NS_YYCC_STRING::u8string> priv_to_utf8(const std::string_view& src, CodePage code_page);
|
||||
bool to_utf8(const std::string_view& src, NS_YYCC_STRING::u8string& dst, CodePage code_page);
|
||||
NS_YYCC_STRING::u8string to_utf8(const std::string_view& src, CodePage code_page);
|
||||
|
||||
// UTF8 -> Char
|
||||
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src,
|
||||
CodePage code_page);
|
||||
// This is the specialization of "Char -> Char"
|
||||
ConvResult<std::string> priv_to_char(const NS_YYCC_STRING::u8string_view& src, CodePage code_page);
|
||||
bool to_char(const NS_YYCC_STRING::u8string_view& src, std::string& dst, CodePage code_page);
|
||||
std::string to_char(const NS_YYCC_STRING::u8string_view& src, CodePage code_page);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace yycc::encoding::windows
|
||||
|
||||
#undef NS_YYCC_PATCH_EXPECTED
|
||||
#undef NS_YYCC_STRING
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user