yyc12345
52ea2745dd
- move gitignore in individual directories. - change some directory layout. - refactor NlpCodec but not finished.
367 lines
15 KiB
C++
367 lines
15 KiB
C++
#include <zlib.h>
|
|
#include <iostream>
|
|
#include <cstdint>
|
|
#include <cinttypes>
|
|
#include <filesystem>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <memory>
|
|
#include <limits>
|
|
#include <stdexcept>
|
|
#include <utility>
|
|
|
|
namespace NlpCodec {
|
|
|
|
class NlpException : public std::exception {
|
|
public:
|
|
NlpException(const char* msg) : message(msg ? msg : "") {}
|
|
NlpException(const NlpException& rhs) : message(rhs.message) {}
|
|
virtual ~NlpException() {}
|
|
[[nodiscard]] virtual const char* what() const noexcept override { return message.c_str(); }
|
|
private:
|
|
std::string message;
|
|
};
|
|
|
|
/// @brief The safe version of static_cast which throw exception
|
|
/// if given value can not be cast into given type (out of range).
|
|
template<typename _TyTo, typename _TyFrom>
|
|
static constexpr _TyTo SafeCast(_TyFrom value) {
|
|
if (!std::in_range<_TyTo>(value))
|
|
throw NlpException(
|
|
"Fail to cast integral number because given value is greater than container."
|
|
"This is usually caused by your input or output file is too long.");
|
|
return static_cast<_TyTo>(value);
|
|
}
|
|
|
|
/// @brief The magic DWORD for file length encrption.
|
|
/// @details It is actually the DWORD consisted by the first 4 bytes of XOR_ARRAY.
|
|
constexpr const uint32_t MAGIC_DWORD = 0xF956A82Cu;
|
|
constexpr const uint32_t CHECKSUM_OFFSET = 1072u;
|
|
/// @brief The size of extra part of NLP file which store the size of original plain text file.
|
|
constexpr const size_t TAIL_SIZE = sizeof(uint32_t) * 2u;
|
|
|
|
/// @brief The core array for data encryption.
|
|
/// @details First byte will XOR with the first byte of this array, and so on.
|
|
/// When reaching the tail of this array, next give byte will perform XOR with the first byte again and so on.
|
|
constexpr const uint8_t XOR_ARRAY[] {
|
|
0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37,
|
|
0xFA, 0x6B, 0xB5, 0xA1, 0x2C, 0x96, 0x13, 0xA2, 0xAB, 0x4F, 0xC5, 0xA1, 0x3E, 0xA7, 0x91, 0x8D,
|
|
0x2C, 0xDF, 0x78, 0x6D, 0x3C, 0xFC, 0x92, 0x1F, 0x1A, 0x62, 0xA7, 0x9C, 0x92, 0x29, 0x44, 0x6D,
|
|
0x3D, 0xA9, 0x2B, 0xE1, 0x91, 0xAD, 0x49, 0x3C, 0xE2, 0x33, 0xD2, 0x1A, 0x55, 0x92, 0xE7, 0x95,
|
|
0x8C, 0xDA, 0xD2, 0xCD, 0xA2, 0xCF, 0x92, 0x9A, 0xE1, 0xF9, 0x3A, 0x26, 0xFA, 0xC4, 0xA9, 0x23,
|
|
0xA9, 0x4D, 0x1A, 0x2C, 0x3C, 0x2A, 0xAC, 0x62, 0xA3, 0x92, 0xAC, 0x1F, 0x3E, 0xA6, 0xC9, 0xC8,
|
|
0x63, 0xCA, 0x52, 0xF9, 0xFB, 0x3A, 0x9C, 0x2A, 0xB2, 0x1A, 0x8D, 0x9A, 0x8C, 0x2A, 0x9C, 0x32,
|
|
0xAA, 0xC3, 0xA2, 0x97, 0x34, 0x92, 0xFA, 0x71, 0xBE, 0x3F, 0xAC, 0x28, 0x22, 0x9F, 0xAC, 0xE8
|
|
};
|
|
/// @brief The size of above array.
|
|
constexpr const size_t XOR_ARRAY_LEN = sizeof(XOR_ARRAY) / sizeof(uint8_t);
|
|
/// @brief A convenient mask for above array when performing modulo.
|
|
constexpr const size_t XOR_ARRAY_MASK = XOR_ARRAY_LEN - 1u;
|
|
// Use a static_assert to confirm computed XOR_ARRAY_MASK is what we desired.
|
|
// Because some stupid programmers (like me) may change above array and fill a series of wrong data,
|
|
// then this mask was computed wrongly.
|
|
static_assert(XOR_ARRAY_MASK == 0x7Fu);
|
|
|
|
static void GeneralXorOperation(void* data, size_t data_len) {
|
|
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
|
|
for (size_t i = 0u; i < data_len; ++i) {
|
|
ptr[i] ^= XOR_ARRAY[i & XOR_ARRAY_MASK];
|
|
}
|
|
}
|
|
|
|
/// @brief Get the length of given file stream.
|
|
static uint32_t GetFileLength(std::ifstream& fin) {
|
|
// Fetch the types this stream used for following convenience.
|
|
using stream_pos_t = std::ifstream::pos_type;
|
|
using stream_off_t = std::ifstream::off_type;
|
|
|
|
// Backups current file cursor.
|
|
stream_pos_t current_pos = fin.tellg();
|
|
// Seek to the tail and get corresponding offset to get the length of file.
|
|
fin.seekg(0, std::ios_base::end);
|
|
stream_pos_t tail_pos = fin.tellg();
|
|
if (std::numeric_limits<uint32_t>::max() < tail_pos)
|
|
throw NlpException("The size of given file is too large. It should not larger than the capacity of uint32_t.");
|
|
// Restore to previous backup file cursor
|
|
fin.seekg(static_cast<stream_off_t>(current_pos), std::ios_base::beg);
|
|
|
|
// Safely reurn cast length.
|
|
return SafeCast<uint32_t>(tail_pos);
|
|
}
|
|
|
|
// HINTS:
|
|
// In zlib, uLong and uLongf is 32-bit or more.
|
|
// So when casting them to uint32_t, you need use SafeCast to perform boundary check.
|
|
// However, you can directly cast uint32_t to them because there is no overflow issue.
|
|
// Additionally, uInt is 16-bit or more.
|
|
// So when processing with uInt, please more carefully.
|
|
|
|
static void EncodeNlp(std::ifstream& fin, std::ofstream& fout) {
|
|
// Get file length and fetch
|
|
uint32_t raw_size = GetFileLength(fin);
|
|
// Fetch corresponding zlib boundary for the convenience of zlib encode.
|
|
// uLong is 32-bit or more, so we need check whether uint32_t can hold the result first.
|
|
uint32_t computed_boundary = SafeCast<uint32_t>(compressBound(static_cast<uLong>(raw_size)));
|
|
|
|
// Create buffer first
|
|
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[raw_size]);
|
|
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[computed_boundary]);
|
|
if (inbuf == nullptr || outbuf == nullptr)
|
|
throw NlpException("Fail to allocate memory.");
|
|
|
|
// Read data from file to input buffer
|
|
fin.read(inbuf.get(), raw_size);
|
|
if (!fin.good() || fin.gcount() != raw_size)
|
|
throw NlpException("Fail to read file data into buffer.");
|
|
|
|
// Do XOR operation
|
|
GeneralXorOperation(inbuf.get(), raw_size);
|
|
|
|
// Do compress and get the size of compressed data.
|
|
uLongf dest_len = static_cast<uLongf>(computed_boundary);
|
|
int ret = compress2(
|
|
reinterpret_cast<Bytef*>(outbuf.get()), &dest_len,
|
|
reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uLong>(raw_size),
|
|
Z_BEST_COMPRESSION
|
|
);
|
|
// Check ZLib result.
|
|
if (ret != Z_OK)
|
|
throw NlpException("Zlib compress() failed.");
|
|
// Fetch final compressed size.
|
|
uint32_t compressed_size = SafeCast<uint32_t>(dest_len);
|
|
|
|
// Produce checksum
|
|
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(outbuf.get()), SafeCast<uInt>(compressed_size)));
|
|
|
|
// Write compressed data into file
|
|
fout.write(outbuf.get(), compressed_size);
|
|
if (!fout.good())
|
|
throw NlpException("Fail to write data into file.");
|
|
|
|
// Raw size and checksum need some extra encryption before writting
|
|
raw_size = static_cast<uint32_t>(-(static_cast<int32_t>(raw_size) + 1)) ^ MAGIC_DWORD;
|
|
checksum = checksum + CHECKSUM_OFFSET;
|
|
|
|
// Write raw size and checksum
|
|
fout.write(reinterpret_cast<char*>(&raw_size), sizeof(uint32_t));
|
|
if (!fout.good())
|
|
throw NlpException("Fail to write raw size into file.");
|
|
fout.write(reinterpret_cast<char*>(&checksum), sizeof(uint32_t));
|
|
if (!fout.good())
|
|
throw NlpException("Fail to write checksum into file.");
|
|
|
|
}
|
|
|
|
static void DecodeNlp(std::ifstream& fin, std::ofstream& fout) {
|
|
// Seek to tail to get essential data
|
|
uint32_t compressed_size = GetFileLength(fin);
|
|
if (compressed_size < TAIL_SIZE)
|
|
throw NlpException("Invalid file. File is too short.");
|
|
|
|
// Get expected raw size and checksum
|
|
compressed_size -= TAIL_SIZE;
|
|
fin.seekg(compressed_size, std::ios_base::beg);
|
|
uint32_t expected_raw_size = 0u, expected_checksum = 0u;
|
|
fin.read(reinterpret_cast<char*>(&expected_raw_size), sizeof(uint32_t));
|
|
fin.read(reinterpret_cast<char*>(&expected_checksum), sizeof(uint32_t));
|
|
fin.seekg(0, std::ios_base::beg);
|
|
|
|
// Raw size and checksum data need to do some extra decryption.
|
|
expected_raw_size = static_cast<uint32_t>(-1 - static_cast<int32_t>(MAGIC_DWORD ^ expected_raw_size));
|
|
expected_checksum = expected_checksum - CHECKSUM_OFFSET;
|
|
|
|
// Allocate memory to store data
|
|
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[compressed_size]);
|
|
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[expected_raw_size]);
|
|
if (inbuf == nullptr || outbuf == nullptr)
|
|
throw NlpException("Fail to allocate memory.");
|
|
|
|
// Read file into buffer
|
|
fin.read(inbuf.get(), compressed_size);
|
|
if (!fin.good() || fin.gcount() != compressed_size)
|
|
throw NlpException("Fail to read data into buffer.\n");
|
|
|
|
// Test checksum
|
|
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(inbuf.get()), SafeCast<uInt>(compressed_size)));
|
|
if (checksum != expected_checksum) {
|
|
fprintf(stdout, "[ERR] Fail to match crc32. Expect 0x%" PRIx32 " got 0x%" PRIx32 ".\n",
|
|
expected_checksum, checksum
|
|
);
|
|
return false;
|
|
}
|
|
|
|
// Do decompress
|
|
uLongf _destLen = static_cast<uLongf>(expected_raw_size);
|
|
int ret = uncompress(
|
|
reinterpret_cast<Bytef*>(outbuf.get()), &_destLen,
|
|
reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uLong>(compressed_size)
|
|
);
|
|
// Check zlib result
|
|
if (ret != Z_OK)
|
|
throw NlpException("Zlib uncompress() failed.");
|
|
|
|
// do xor operation
|
|
GeneralXorOperation(outbuf.get(), expected_raw_size);
|
|
|
|
// Write result into file
|
|
fout.write(outbuf.get(), expected_raw_size);
|
|
if (!fout.good())
|
|
throw NlpException("Fail to write data into file.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
namespace NlpCodec::Runtime {
|
|
|
|
enum class UserOperation {
|
|
Encode,
|
|
Decode,
|
|
Version,
|
|
Help
|
|
};
|
|
|
|
struct UserRequest {
|
|
UserOperation mUserOperation;
|
|
std::filesystem::path mInputFile;
|
|
std::filesystem::path mOutputFile;
|
|
};
|
|
|
|
static void PrintHelp() {
|
|
std::cout
|
|
<< "NlpCodec Usage" << std::endl
|
|
<< "NlpCodec [encode | decode | version | help] <src> <dest>" << std::endl
|
|
<< std::endl
|
|
<< "version - print version info about this program." << std::endl
|
|
<< "help - print this page." << std::endl
|
|
<< std::endl
|
|
<< "encode - encode text file into NLP file." << std::endl
|
|
<< "decode - decode NLP file into text file." << std::endl
|
|
<< "<src> - the source file." << std::endl
|
|
<< " the path to text file in encode mode." << std::endl
|
|
<< " the path to NLP file in decode mode." << std::endl
|
|
<< "<dest> - the destination file." << std::endl
|
|
<< " the path to NLP file in encode mode." << std::endl
|
|
<< " the path to text file in decode mode." << std::endl
|
|
<< "" << std::endl;
|
|
}
|
|
|
|
static void PrintVersion() {
|
|
std::cout
|
|
<< "NlpCodec built at " __DATE__ " " __TIME__ << std::endl
|
|
<< "MIT License. Copyright (c) 2022-2024 yyc12345" << std::endl;
|
|
}
|
|
|
|
static UserRequest ResolveArguments(int argc, char* argv[]) {
|
|
// Prepare return value
|
|
UserRequest ret { UserOperation::Version, "", "" };
|
|
|
|
switch (argc) {
|
|
case 2: {
|
|
// Get mode string
|
|
std::string mode(argv[1]);
|
|
|
|
// Check `help` and `version`
|
|
if (mode == "version") {
|
|
ret.mUserOperation = UserOperation::Version;
|
|
} else if (mode == "help") {
|
|
ret.mUserOperation = UserOperation::Help;
|
|
} else {
|
|
// Not matched.
|
|
throw NlpException("Invalid argument! Must be one of `version` or `help`");
|
|
}
|
|
|
|
// Return value
|
|
return ret;
|
|
}
|
|
case 4: {
|
|
// Get mode string
|
|
std::string mode(argv[1]);
|
|
|
|
// Check `encode` and `decode`
|
|
if (mode == "encode") {
|
|
ret.mUserOperation = UserOperation::Encode;
|
|
} else if (mode == "decode") {
|
|
ret.mUserOperation = UserOperation::Decode;
|
|
} else {
|
|
// Not matched.
|
|
throw NlpException("Invalid argument! Must be one of `encode` or `decode`");
|
|
}
|
|
|
|
// Setup input output file path
|
|
ret.mInputFile = std::filesystem::path(argv[2]);
|
|
ret.mOutputFile = std::filesystem::path(argv[3]);
|
|
|
|
// Return value
|
|
return ret;
|
|
}
|
|
default:
|
|
throw NlpException("Invalid argument count!");
|
|
}
|
|
}
|
|
|
|
static void ExecuteWorker(const UserRequest& user_request) {
|
|
// Take action according to different request first
|
|
bool is_encode;
|
|
switch (user_request.mUserOperation) {
|
|
case UserOperation::Version:
|
|
PrintVersion();
|
|
return;
|
|
case NlpCodec::Runtime::UserOperation::Help:
|
|
PrintHelp();
|
|
return;
|
|
case NlpCodec::Runtime::UserOperation::Encode:
|
|
is_encode = true;
|
|
break;
|
|
case NlpCodec::Runtime::UserOperation::Decode:
|
|
is_encode = false;
|
|
break;
|
|
}
|
|
|
|
// Do real codec related works.
|
|
// Try to open files
|
|
std::ifstream in_file;
|
|
in_file.open(user_request.mInputFile, std::ios_base::in | std::ios_base::binary);
|
|
std::ofstream out_file;
|
|
out_file.open(user_request.mOutputFile, std::ios_base::out | std::ios_base::binary);
|
|
// Check file status
|
|
if (!in_file.is_open() || !out_file.is_open()) {
|
|
throw NlpException("Fail to open input or output file.");
|
|
}
|
|
|
|
// Perform codec
|
|
if (is_encode) {
|
|
::NlpCodec::EncodeNlp(in_file, out_file);
|
|
} else {
|
|
::NlpCodec::DecodeNlp(in_file, out_file);
|
|
}
|
|
|
|
// Free resources
|
|
in_file.close();
|
|
out_file.close();
|
|
}
|
|
|
|
}
|
|
|
|
int main(int argc, char* argv[]) {
|
|
|
|
// Try parsing given arguments
|
|
NlpCodec::Runtime::UserRequest user_request;
|
|
try {
|
|
user_request = NlpCodec::Runtime::ResolveArguments(argc, argv);
|
|
} catch (const NlpCodec::NlpException& e) {
|
|
std::cerr << "[Argument Error] " << e.what() << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
// Try executing real wroker
|
|
try {
|
|
NlpCodec::Runtime::ExecuteWorker(user_request);
|
|
} catch (const NlpCodec::NlpException& e) {
|
|
std::cerr << "[Codec Error] " << e.what() << std::endl;
|
|
return 2;
|
|
}
|
|
|
|
return 0;
|
|
}
|