#include #include #include #include #include #include #include #include #include #include #include namespace NlpCodec { class NlpException : public std::exception { public: NlpException(const char* msg) : message(msg ? msg : "") {} NlpException(const NlpException& rhs) : message(rhs.message) {} virtual ~NlpException() {} [[nodiscard]] virtual const char* what() const noexcept override { return message.c_str(); } private: std::string message; }; /// @brief The safe version of static_cast which throw exception /// if given value can not be cast into given type (out of range). template static constexpr _TyTo SafeCast(_TyFrom value) { if (!std::in_range<_TyTo>(value)) throw NlpException( "Fail to cast integral number because given value is greater than container." "This is usually caused by your input or output file is too long."); return static_cast<_TyTo>(value); } /// @brief The magic DWORD for file length encrption. /// @details It is actually the DWORD consisted by the first 4 bytes of XOR_ARRAY. constexpr const uint32_t MAGIC_DWORD = 0xF956A82Cu; constexpr const uint32_t CHECKSUM_OFFSET = 1072u; /// @brief The size of extra part of NLP file which store the size of original plain text file. constexpr const size_t TAIL_SIZE = sizeof(uint32_t) * 2u; /// @brief The core array for data encryption. /// @details First byte will XOR with the first byte of this array, and so on. /// When reaching the tail of this array, next give byte will perform XOR with the first byte again and so on. constexpr const uint8_t XOR_ARRAY[] { 0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37, 0xFA, 0x6B, 0xB5, 0xA1, 0x2C, 0x96, 0x13, 0xA2, 0xAB, 0x4F, 0xC5, 0xA1, 0x3E, 0xA7, 0x91, 0x8D, 0x2C, 0xDF, 0x78, 0x6D, 0x3C, 0xFC, 0x92, 0x1F, 0x1A, 0x62, 0xA7, 0x9C, 0x92, 0x29, 0x44, 0x6D, 0x3D, 0xA9, 0x2B, 0xE1, 0x91, 0xAD, 0x49, 0x3C, 0xE2, 0x33, 0xD2, 0x1A, 0x55, 0x92, 0xE7, 0x95, 0x8C, 0xDA, 0xD2, 0xCD, 0xA2, 0xCF, 0x92, 0x9A, 0xE1, 0xF9, 0x3A, 0x26, 0xFA, 0xC4, 0xA9, 0x23, 0xA9, 0x4D, 0x1A, 0x2C, 0x3C, 0x2A, 0xAC, 0x62, 0xA3, 0x92, 0xAC, 0x1F, 0x3E, 0xA6, 0xC9, 0xC8, 0x63, 0xCA, 0x52, 0xF9, 0xFB, 0x3A, 0x9C, 0x2A, 0xB2, 0x1A, 0x8D, 0x9A, 0x8C, 0x2A, 0x9C, 0x32, 0xAA, 0xC3, 0xA2, 0x97, 0x34, 0x92, 0xFA, 0x71, 0xBE, 0x3F, 0xAC, 0x28, 0x22, 0x9F, 0xAC, 0xE8 }; /// @brief The size of above array. constexpr const size_t XOR_ARRAY_LEN = sizeof(XOR_ARRAY) / sizeof(uint8_t); /// @brief A convenient mask for above array when performing modulo. constexpr const size_t XOR_ARRAY_MASK = XOR_ARRAY_LEN - 1u; // Use a static_assert to confirm computed XOR_ARRAY_MASK is what we desired. // Because some stupid programmers (like me) may change above array and fill a series of wrong data, // then this mask was computed wrongly. static_assert(XOR_ARRAY_MASK == 0x7Fu); static void GeneralXorOperation(void* data, size_t data_len) { uint8_t* ptr = reinterpret_cast(data); for (size_t i = 0u; i < data_len; ++i) { ptr[i] ^= XOR_ARRAY[i & XOR_ARRAY_MASK]; } } /// @brief Get the length of given file stream. static uint32_t GetFileLength(std::ifstream& fin) { // Fetch the types this stream used for following convenience. using stream_pos_t = std::ifstream::pos_type; using stream_off_t = std::ifstream::off_type; // Backups current file cursor. stream_pos_t current_pos = fin.tellg(); // Seek to the tail and get corresponding offset to get the length of file. fin.seekg(0, std::ios_base::end); stream_pos_t tail_pos = fin.tellg(); if (std::numeric_limits::max() < tail_pos) throw NlpException("The size of given file is too large. It should not larger than the capacity of uint32_t."); // Restore to previous backup file cursor fin.seekg(static_cast(current_pos), std::ios_base::beg); // Safely reurn cast length. return SafeCast(tail_pos); } // HINTS: // In zlib, uLong and uLongf is 32-bit or more. // So when casting them to uint32_t, you need use SafeCast to perform boundary check. // However, you can directly cast uint32_t to them because there is no overflow issue. // Additionally, uInt is 16-bit or more. // So when processing with uInt, please more carefully. static void EncodeNlp(std::ifstream& fin, std::ofstream& fout) { // Get file length and fetch uint32_t raw_size = GetFileLength(fin); // Fetch corresponding zlib boundary for the convenience of zlib encode. // uLong is 32-bit or more, so we need check whether uint32_t can hold the result first. uint32_t computed_boundary = SafeCast(compressBound(static_cast(raw_size))); // Create buffer first std::unique_ptr inbuf(new(std::nothrow) char[raw_size]); std::unique_ptr outbuf(new(std::nothrow) char[computed_boundary]); if (inbuf == nullptr || outbuf == nullptr) throw NlpException("Fail to allocate memory."); // Read data from file to input buffer fin.read(inbuf.get(), raw_size); if (!fin.good() || fin.gcount() != raw_size) throw NlpException("Fail to read file data into buffer."); // Do XOR operation GeneralXorOperation(inbuf.get(), raw_size); // Do compress and get the size of compressed data. uLongf dest_len = static_cast(computed_boundary); int ret = compress2( reinterpret_cast(outbuf.get()), &dest_len, reinterpret_cast(inbuf.get()), static_cast(raw_size), Z_BEST_COMPRESSION ); // Check ZLib result. if (ret != Z_OK) throw NlpException("Zlib compress() failed."); // Fetch final compressed size. uint32_t compressed_size = SafeCast(dest_len); // Produce checksum uint32_t checksum = static_cast(adler32(0u, reinterpret_cast(outbuf.get()), SafeCast(compressed_size))); // Write compressed data into file fout.write(outbuf.get(), compressed_size); if (!fout.good()) throw NlpException("Fail to write data into file."); // Raw size and checksum need some extra encryption before writting raw_size = static_cast(-(static_cast(raw_size) + 1)) ^ MAGIC_DWORD; checksum = checksum + CHECKSUM_OFFSET; // Write raw size and checksum fout.write(reinterpret_cast(&raw_size), sizeof(uint32_t)); if (!fout.good()) throw NlpException("Fail to write raw size into file."); fout.write(reinterpret_cast(&checksum), sizeof(uint32_t)); if (!fout.good()) throw NlpException("Fail to write checksum into file."); } static void DecodeNlp(std::ifstream& fin, std::ofstream& fout) { // Seek to tail to get essential data uint32_t compressed_size = GetFileLength(fin); if (compressed_size < TAIL_SIZE) throw NlpException("Invalid file. File is too short."); // Get expected raw size and checksum compressed_size -= TAIL_SIZE; fin.seekg(compressed_size, std::ios_base::beg); uint32_t expected_raw_size = 0u, expected_checksum = 0u; fin.read(reinterpret_cast(&expected_raw_size), sizeof(uint32_t)); fin.read(reinterpret_cast(&expected_checksum), sizeof(uint32_t)); fin.seekg(0, std::ios_base::beg); // Raw size and checksum data need to do some extra decryption. expected_raw_size = static_cast(-1 - static_cast(MAGIC_DWORD ^ expected_raw_size)); expected_checksum = expected_checksum - CHECKSUM_OFFSET; // Allocate memory to store data std::unique_ptr inbuf(new(std::nothrow) char[compressed_size]); std::unique_ptr outbuf(new(std::nothrow) char[expected_raw_size]); if (inbuf == nullptr || outbuf == nullptr) throw NlpException("Fail to allocate memory."); // Read file into buffer fin.read(inbuf.get(), compressed_size); if (!fin.good() || fin.gcount() != compressed_size) throw NlpException("Fail to read data into buffer.\n"); // Test checksum uint32_t checksum = static_cast(adler32(0u, reinterpret_cast(inbuf.get()), SafeCast(compressed_size))); if (checksum != expected_checksum) { fprintf(stdout, "[ERR] Fail to match crc32. Expect 0x%" PRIx32 " got 0x%" PRIx32 ".\n", expected_checksum, checksum ); return false; } // Do decompress uLongf _destLen = static_cast(expected_raw_size); int ret = uncompress( reinterpret_cast(outbuf.get()), &_destLen, reinterpret_cast(inbuf.get()), static_cast(compressed_size) ); // Check zlib result if (ret != Z_OK) throw NlpException("Zlib uncompress() failed."); // do xor operation GeneralXorOperation(outbuf.get(), expected_raw_size); // Write result into file fout.write(outbuf.get(), expected_raw_size); if (!fout.good()) throw NlpException("Fail to write data into file."); } } namespace NlpCodec::Runtime { enum class UserOperation { Encode, Decode, Version, Help }; struct UserRequest { UserOperation mUserOperation; std::filesystem::path mInputFile; std::filesystem::path mOutputFile; }; static void PrintHelp() { std::cout << "NlpCodec Usage" << std::endl << "NlpCodec [encode | decode | version | help] " << std::endl << std::endl << "version - print version info about this program." << std::endl << "help - print this page." << std::endl << std::endl << "encode - encode text file into NLP file." << std::endl << "decode - decode NLP file into text file." << std::endl << " - the source file." << std::endl << " the path to text file in encode mode." << std::endl << " the path to NLP file in decode mode." << std::endl << " - the destination file." << std::endl << " the path to NLP file in encode mode." << std::endl << " the path to text file in decode mode." << std::endl << "" << std::endl; } static void PrintVersion() { std::cout << "NlpCodec built at " __DATE__ " " __TIME__ << std::endl << "MIT License. Copyright (c) 2022-2024 yyc12345" << std::endl; } static UserRequest ResolveArguments(int argc, char* argv[]) { // Prepare return value UserRequest ret { UserOperation::Version, "", "" }; switch (argc) { case 2: { // Get mode string std::string mode(argv[1]); // Check `help` and `version` if (mode == "version") { ret.mUserOperation = UserOperation::Version; } else if (mode == "help") { ret.mUserOperation = UserOperation::Help; } else { // Not matched. throw NlpException("Invalid argument! Must be one of `version` or `help`"); } // Return value return ret; } case 4: { // Get mode string std::string mode(argv[1]); // Check `encode` and `decode` if (mode == "encode") { ret.mUserOperation = UserOperation::Encode; } else if (mode == "decode") { ret.mUserOperation = UserOperation::Decode; } else { // Not matched. throw NlpException("Invalid argument! Must be one of `encode` or `decode`"); } // Setup input output file path ret.mInputFile = std::filesystem::path(argv[2]); ret.mOutputFile = std::filesystem::path(argv[3]); // Return value return ret; } default: throw NlpException("Invalid argument count!"); } } static void ExecuteWorker(const UserRequest& user_request) { // Take action according to different request first bool is_encode; switch (user_request.mUserOperation) { case UserOperation::Version: PrintVersion(); return; case NlpCodec::Runtime::UserOperation::Help: PrintHelp(); return; case NlpCodec::Runtime::UserOperation::Encode: is_encode = true; break; case NlpCodec::Runtime::UserOperation::Decode: is_encode = false; break; } // Do real codec related works. // Try to open files std::ifstream in_file; in_file.open(user_request.mInputFile, std::ios_base::in | std::ios_base::binary); std::ofstream out_file; out_file.open(user_request.mOutputFile, std::ios_base::out | std::ios_base::binary); // Check file status if (!in_file.is_open() || !out_file.is_open()) { throw NlpException("Fail to open input or output file."); } // Perform codec if (is_encode) { ::NlpCodec::EncodeNlp(in_file, out_file); } else { ::NlpCodec::DecodeNlp(in_file, out_file); } // Free resources in_file.close(); out_file.close(); } } int main(int argc, char* argv[]) { // Try parsing given arguments NlpCodec::Runtime::UserRequest user_request; try { user_request = NlpCodec::Runtime::ResolveArguments(argc, argv); } catch (const NlpCodec::NlpException& e) { std::cerr << "[Argument Error] " << e.what() << std::endl; return 1; } // Try executing real wroker try { NlpCodec::Runtime::ExecuteWorker(user_request); } catch (const NlpCodec::NlpException& e) { std::cerr << "[Codec Error] " << e.what() << std::endl; return 2; } return 0; }