[refactor] do some works
- move gitignore in individual directories. - change some directory layout. - refactor NlpCodec but not finished.
This commit is contained in:
378
NlpCodec/.gitignore
vendored
Normal file
378
NlpCodec/.gitignore
vendored
Normal file
@ -0,0 +1,378 @@
|
||||
# -------------------- Custom --------------------
|
||||
# Disable install and build directory
|
||||
out/
|
||||
build/
|
||||
install/
|
||||
|
||||
# -------------------- CMake --------------------
|
||||
CMakeLists.txt.user
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
CMakeScripts
|
||||
Testing
|
||||
Makefile
|
||||
cmake_install.cmake
|
||||
install_manifest.txt
|
||||
compile_commands.json
|
||||
CTestTestfile.cmake
|
||||
_deps
|
||||
|
||||
# -------------------- Visual Studio --------------------
|
||||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
*.rsuser
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Mono auto generated files
|
||||
mono_crash.*
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
[Ww][Ii][Nn]32/
|
||||
[Aa][Rr][Mm]/
|
||||
[Aa][Rr][Mm]64/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
[Ll]ogs/
|
||||
Temp/
|
||||
|
||||
# Visual Studio 2015/2017 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# Visual Studio 2017 auto generated files
|
||||
Generated\ Files/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUnit
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
nunit-*.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# Benchmark Results
|
||||
BenchmarkDotNet.Artifacts/
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
|
||||
# ASP.NET Scaffolding
|
||||
ScaffoldingReadMe.txt
|
||||
|
||||
# StyleCop
|
||||
StyleCopReport.xml
|
||||
|
||||
# Files built by Visual Studio
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_h.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.iobj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.ipdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*_wpftmp.csproj
|
||||
*.log
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# Visual Studio Trace Files
|
||||
*.e2e
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# AxoCover is a Code Coverage Tool
|
||||
.axoCover/*
|
||||
!.axoCover/settings.json
|
||||
|
||||
# Coverlet is a free, cross platform Code Coverage Tool
|
||||
coverage*[.json, .xml, .info]
|
||||
|
||||
# Visual Studio code coverage results
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# Note: Comment the next line if you want to checkin your web deploy settings,
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# NuGet Symbol Packages
|
||||
*.snupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/[Pp]ackages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/[Pp]ackages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/[Pp]ackages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
*.appx
|
||||
*.appxbundle
|
||||
*.appxupload
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!?*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
orleans.codegen.cs
|
||||
|
||||
# Including strong name files can present a security risk
|
||||
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
|
||||
#*.snk
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
ServiceFabricBackup/
|
||||
*.rptproj.bak
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
*.ndf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
*.rptproj.rsuser
|
||||
*- [Bb]ackup.rdl
|
||||
*- [Bb]ackup ([0-9]).rdl
|
||||
*- [Bb]ackup ([0-9][0-9]).rdl
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
node_modules/
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
*.vbw
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# CodeRush personal settings
|
||||
.cr/personal
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Tabs Studio
|
||||
*.tss
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
*.jmconfig
|
||||
|
||||
# BizTalk build output
|
||||
*.btp.cs
|
||||
*.btm.cs
|
||||
*.odx.cs
|
||||
*.xsd.cs
|
||||
|
||||
# OpenCover UI analysis results
|
||||
OpenCover/
|
||||
|
||||
# Azure Stream Analytics local run output
|
||||
ASALocalRun/
|
||||
|
||||
# MSBuild Binary and Structured Log
|
||||
*.binlog
|
||||
|
||||
# NVidia Nsight GPU debugger configuration file
|
||||
*.nvuser
|
||||
|
||||
# MFractors (Xamarin productivity tool) working folder
|
||||
.mfractor/
|
||||
|
||||
# Local History for Visual Studio
|
||||
.localhistory/
|
||||
|
||||
# BeatPulse healthcheck temp database
|
||||
healthchecksdb
|
||||
|
||||
# Backup folder for Package Reference Convert tool in Visual Studio 2017
|
||||
MigrationBackup/
|
||||
|
||||
# Ionide (cross platform F# VS Code tools) working folder
|
||||
.ionide/
|
@ -1,19 +1,45 @@
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
cmake_minimum_required(VERSION 3.23)
|
||||
project(NlpCodec LANGUAGES CXX)
|
||||
|
||||
# find packages
|
||||
# Find ZLib packages
|
||||
find_package(ZLIB REQUIRED)
|
||||
|
||||
# set standard
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# generate program
|
||||
add_executable(NlpCodec NlpCodec.cpp)
|
||||
# Build executable
|
||||
add_executable(NlpCodec "")
|
||||
# Setup sources file and no need to setup headers
|
||||
target_sources(NlpCodec
|
||||
PRIVATE
|
||||
NlpCodec.cpp
|
||||
)
|
||||
# Link with ZLib
|
||||
target_link_libraries(NlpCodec
|
||||
PRIVATE
|
||||
${ZLIB_LIBRARIES}
|
||||
)
|
||||
target_include_directories(NlpCodec
|
||||
PRIVATE
|
||||
${ZLIB_INCLUDE_DIRS}
|
||||
# Setup standard
|
||||
set_target_properties(NlpCodec
|
||||
PROPERTIES
|
||||
CXX_STANDARD 20
|
||||
CXX_STANDARD_REQUIRED 20
|
||||
CXX_EXTENSION OFF
|
||||
)
|
||||
|
||||
# Extra options for MSVC
|
||||
# Unicode charset
|
||||
target_compile_definitions(NlpCodec
|
||||
PRIVATE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:UNICODE>
|
||||
$<$<CXX_COMPILER_ID:MSVC>:_UNICODE>
|
||||
)
|
||||
# Order UTF-8 in both runtime and source environment
|
||||
target_compile_options(NlpCodec
|
||||
PRIVATE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/utf-8>
|
||||
)
|
||||
|
||||
# Install built artifact
|
||||
include(GNUInstallDirs)
|
||||
install(TARGETS NlpCodec
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
)
|
||||
|
||||
|
@ -1,15 +1,49 @@
|
||||
#include <zlib.h>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <cinttypes>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
|
||||
namespace NlpCodec {
|
||||
|
||||
constexpr const uint8_t g_XorArray[] {
|
||||
class NlpException : public std::exception {
|
||||
public:
|
||||
NlpException(const char* msg) : message(msg ? msg : "") {}
|
||||
NlpException(const NlpException& rhs) : message(rhs.message) {}
|
||||
virtual ~NlpException() {}
|
||||
[[nodiscard]] virtual const char* what() const noexcept override { return message.c_str(); }
|
||||
private:
|
||||
std::string message;
|
||||
};
|
||||
|
||||
/// @brief The safe version of static_cast which throw exception
|
||||
/// if given value can not be cast into given type (out of range).
|
||||
template<typename _TyTo, typename _TyFrom>
|
||||
static constexpr _TyTo SafeCast(_TyFrom value) {
|
||||
if (!std::in_range<_TyTo>(value))
|
||||
throw NlpException(
|
||||
"Fail to cast integral number because given value is greater than container."
|
||||
"This is usually caused by your input or output file is too long.");
|
||||
return static_cast<_TyTo>(value);
|
||||
}
|
||||
|
||||
/// @brief The magic DWORD for file length encrption.
|
||||
/// @details It is actually the DWORD consisted by the first 4 bytes of XOR_ARRAY.
|
||||
constexpr const uint32_t MAGIC_DWORD = 0xF956A82Cu;
|
||||
constexpr const uint32_t CHECKSUM_OFFSET = 1072u;
|
||||
/// @brief The size of extra part of NLP file which store the size of original plain text file.
|
||||
constexpr const size_t TAIL_SIZE = sizeof(uint32_t) * 2u;
|
||||
|
||||
/// @brief The core array for data encryption.
|
||||
/// @details First byte will XOR with the first byte of this array, and so on.
|
||||
/// When reaching the tail of this array, next give byte will perform XOR with the first byte again and so on.
|
||||
constexpr const uint8_t XOR_ARRAY[] {
|
||||
0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37,
|
||||
0xFA, 0x6B, 0xB5, 0xA1, 0x2C, 0x96, 0x13, 0xA2, 0xAB, 0x4F, 0xC5, 0xA1, 0x3E, 0xA7, 0x91, 0x8D,
|
||||
0x2C, 0xDF, 0x78, 0x6D, 0x3C, 0xFC, 0x92, 0x1F, 0x1A, 0x62, 0xA7, 0x9C, 0x92, 0x29, 0x44, 0x6D,
|
||||
@ -19,129 +53,136 @@ namespace NlpCodec {
|
||||
0x63, 0xCA, 0x52, 0xF9, 0xFB, 0x3A, 0x9C, 0x2A, 0xB2, 0x1A, 0x8D, 0x9A, 0x8C, 0x2A, 0x9C, 0x32,
|
||||
0xAA, 0xC3, 0xA2, 0x97, 0x34, 0x92, 0xFA, 0x71, 0xBE, 0x3F, 0xAC, 0x28, 0x22, 0x9F, 0xAC, 0xE8
|
||||
};
|
||||
constexpr const size_t g_XorArrayLen = sizeof(g_XorArray) / sizeof(uint8_t);
|
||||
constexpr const uint32_t MAGIC_DWORD = 0xF956A82Cu;
|
||||
constexpr const size_t TAIL_SIZE = sizeof(uint32_t) * 2u;
|
||||
/// @brief The size of above array.
|
||||
constexpr const size_t XOR_ARRAY_LEN = sizeof(XOR_ARRAY) / sizeof(uint8_t);
|
||||
/// @brief A convenient mask for above array when performing modulo.
|
||||
constexpr const size_t XOR_ARRAY_MASK = XOR_ARRAY_LEN - 1u;
|
||||
// Use a static_assert to confirm computed XOR_ARRAY_MASK is what we desired.
|
||||
// Because some stupid programmers (like me) may change above array and fill a series of wrong data,
|
||||
// then this mask was computed wrongly.
|
||||
static_assert(XOR_ARRAY_MASK == 0x7Fu);
|
||||
|
||||
void GeneralXorOperation(void* data, size_t datalen) {
|
||||
static void GeneralXorOperation(void* data, size_t data_len) {
|
||||
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
|
||||
for (size_t i = 0u; i < datalen; ++i) {
|
||||
ptr[i] ^= g_XorArray[i & 0x7Fu];
|
||||
for (size_t i = 0u; i < data_len; ++i) {
|
||||
ptr[i] ^= XOR_ARRAY[i & XOR_ARRAY_MASK];
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetFileLength(std::ifstream& fin) {
|
||||
// backup
|
||||
uint64_t curpos = static_cast<uint64_t>(fin.tellg());
|
||||
// get tail
|
||||
/// @brief Get the length of given file stream.
|
||||
static uint32_t GetFileLength(std::ifstream& fin) {
|
||||
// Fetch the types this stream used for following convenience.
|
||||
using stream_pos_t = std::ifstream::pos_type;
|
||||
using stream_off_t = std::ifstream::off_type;
|
||||
|
||||
// Backups current file cursor.
|
||||
stream_pos_t current_pos = fin.tellg();
|
||||
// Seek to the tail and get corresponding offset to get the length of file.
|
||||
fin.seekg(0, std::ios_base::end);
|
||||
uint32_t tail = static_cast<uint32_t>(fin.tellg());
|
||||
// restore
|
||||
fin.seekg(static_cast<std::ifstream::off_type>(curpos), std::ios_base::beg);
|
||||
stream_pos_t tail_pos = fin.tellg();
|
||||
if (std::numeric_limits<uint32_t>::max() < tail_pos)
|
||||
throw NlpException("The size of given file is too large. It should not larger than the capacity of uint32_t.");
|
||||
// Restore to previous backup file cursor
|
||||
fin.seekg(static_cast<stream_off_t>(current_pos), std::ios_base::beg);
|
||||
|
||||
return tail;
|
||||
// Safely reurn cast length.
|
||||
return SafeCast<uint32_t>(tail_pos);
|
||||
}
|
||||
|
||||
bool EncodeNlp(std::ifstream& fin, std::ofstream& fout) {
|
||||
// get file length and decide zlib boundary
|
||||
uint32_t rawsize = GetFileLength(fin);
|
||||
uint32_t compboundary = static_cast<uint32_t>(compressBound(static_cast<uLong>(rawsize)));
|
||||
// HINTS:
|
||||
// In zlib, uLong and uLongf is 32-bit or more.
|
||||
// So when casting them to uint32_t, you need use SafeCast to perform boundary check.
|
||||
// However, you can directly cast uint32_t to them because there is no overflow issue.
|
||||
// Additionally, uInt is 16-bit or more.
|
||||
// So when processing with uInt, please more carefully.
|
||||
|
||||
// create buffer first
|
||||
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[rawsize]);
|
||||
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[compboundary]);
|
||||
if (inbuf == nullptr || outbuf == nullptr) {
|
||||
fputs("[ERR] Fail to allocate memory.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
static void EncodeNlp(std::ifstream& fin, std::ofstream& fout) {
|
||||
// Get file length and fetch
|
||||
uint32_t raw_size = GetFileLength(fin);
|
||||
// Fetch corresponding zlib boundary for the convenience of zlib encode.
|
||||
// uLong is 32-bit or more, so we need check whether uint32_t can hold the result first.
|
||||
uint32_t computed_boundary = SafeCast<uint32_t>(compressBound(static_cast<uLong>(raw_size)));
|
||||
|
||||
// read data from file
|
||||
fin.read(inbuf.get(), rawsize);
|
||||
if (!fin.good() || fin.gcount() != rawsize) {
|
||||
fputs("[ERR] Fail to read data into buffer.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Create buffer first
|
||||
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[raw_size]);
|
||||
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[computed_boundary]);
|
||||
if (inbuf == nullptr || outbuf == nullptr)
|
||||
throw NlpException("Fail to allocate memory.");
|
||||
|
||||
// do xor operation
|
||||
GeneralXorOperation(inbuf.get(), rawsize);
|
||||
// Read data from file to input buffer
|
||||
fin.read(inbuf.get(), raw_size);
|
||||
if (!fin.good() || fin.gcount() != raw_size)
|
||||
throw NlpException("Fail to read file data into buffer.");
|
||||
|
||||
// do compress and get the size of compressed data
|
||||
uLongf _destLen = static_cast<uLongf>(compboundary);
|
||||
// Do XOR operation
|
||||
GeneralXorOperation(inbuf.get(), raw_size);
|
||||
|
||||
// Do compress and get the size of compressed data.
|
||||
uLongf dest_len = static_cast<uLongf>(computed_boundary);
|
||||
int ret = compress2(
|
||||
reinterpret_cast<Bytef*>(outbuf.get()), &_destLen,
|
||||
reinterpret_cast<Bytef*>(inbuf.get()), rawsize,
|
||||
reinterpret_cast<Bytef*>(outbuf.get()), &dest_len,
|
||||
reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uLong>(raw_size),
|
||||
Z_BEST_COMPRESSION
|
||||
);
|
||||
if (ret != Z_OK) {
|
||||
fputs("[ERR] Zlib compress() failed.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
uint32_t compsize = static_cast<uint32_t>(_destLen);
|
||||
// Check ZLib result.
|
||||
if (ret != Z_OK)
|
||||
throw NlpException("Zlib compress() failed.");
|
||||
// Fetch final compressed size.
|
||||
uint32_t compressed_size = SafeCast<uint32_t>(dest_len);
|
||||
|
||||
// produce checksum
|
||||
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(outbuf.get()), static_cast<uInt>(compsize)));
|
||||
// Produce checksum
|
||||
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(outbuf.get()), SafeCast<uInt>(compressed_size)));
|
||||
|
||||
// write compressed data into file
|
||||
fout.write(outbuf.get(), compsize);
|
||||
if (!fout.good()) {
|
||||
fputs("[ERR] Fail to write data into file.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Write compressed data into file
|
||||
fout.write(outbuf.get(), compressed_size);
|
||||
if (!fout.good())
|
||||
throw NlpException("Fail to write data into file.");
|
||||
|
||||
// raw size and checksum need some extra operation before writting
|
||||
rawsize = static_cast<uint32_t>(-(static_cast<int32_t>(rawsize) + 1)) ^ MAGIC_DWORD;
|
||||
checksum = checksum + 1072u;
|
||||
// Raw size and checksum need some extra encryption before writting
|
||||
raw_size = static_cast<uint32_t>(-(static_cast<int32_t>(raw_size) + 1)) ^ MAGIC_DWORD;
|
||||
checksum = checksum + CHECKSUM_OFFSET;
|
||||
|
||||
// write raw size and checksum
|
||||
fout.write(reinterpret_cast<char*>(&rawsize), sizeof(uint32_t));
|
||||
if (!fout.good()) {
|
||||
fputs("[ERR] Fail to write raw size into file.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Write raw size and checksum
|
||||
fout.write(reinterpret_cast<char*>(&raw_size), sizeof(uint32_t));
|
||||
if (!fout.good())
|
||||
throw NlpException("Fail to write raw size into file.");
|
||||
fout.write(reinterpret_cast<char*>(&checksum), sizeof(uint32_t));
|
||||
if (!fout.good()) {
|
||||
fputs("[ERR] Fail to write checksum into file.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
if (!fout.good())
|
||||
throw NlpException("Fail to write checksum into file.");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DecodeNlp(std::ifstream& fin, std::ofstream& fout) {
|
||||
// seek to tail to get essential data
|
||||
uint32_t compsize = GetFileLength(fin);
|
||||
if (compsize < TAIL_SIZE) {
|
||||
fputs("[ERR] Invalid file.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
compsize -= TAIL_SIZE;
|
||||
fin.seekg(compsize, std::ios_base::beg);
|
||||
uint32_t expected_rawlen = 0u, expected_checksum = 0u;
|
||||
fin.read(reinterpret_cast<char*>(&expected_rawlen), sizeof(uint32_t));
|
||||
static void DecodeNlp(std::ifstream& fin, std::ofstream& fout) {
|
||||
// Seek to tail to get essential data
|
||||
uint32_t compressed_size = GetFileLength(fin);
|
||||
if (compressed_size < TAIL_SIZE)
|
||||
throw NlpException("Invalid file. File is too short.");
|
||||
|
||||
// Get expected raw size and checksum
|
||||
compressed_size -= TAIL_SIZE;
|
||||
fin.seekg(compressed_size, std::ios_base::beg);
|
||||
uint32_t expected_raw_size = 0u, expected_checksum = 0u;
|
||||
fin.read(reinterpret_cast<char*>(&expected_raw_size), sizeof(uint32_t));
|
||||
fin.read(reinterpret_cast<char*>(&expected_checksum), sizeof(uint32_t));
|
||||
fin.seekg(0, std::ios_base::beg);
|
||||
|
||||
// these tail data need to do some processes
|
||||
expected_rawlen = static_cast<uint32_t>(-1 - static_cast<int32_t>(MAGIC_DWORD ^ expected_rawlen));
|
||||
expected_checksum = expected_checksum - 1072u;
|
||||
// Raw size and checksum data need to do some extra decryption.
|
||||
expected_raw_size = static_cast<uint32_t>(-1 - static_cast<int32_t>(MAGIC_DWORD ^ expected_raw_size));
|
||||
expected_checksum = expected_checksum - CHECKSUM_OFFSET;
|
||||
|
||||
// allocate memory to store data
|
||||
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[compsize]);
|
||||
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[expected_rawlen]);
|
||||
if (inbuf == nullptr || outbuf == nullptr) {
|
||||
fputs("[ERR] Fail to allocate memory.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Allocate memory to store data
|
||||
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[compressed_size]);
|
||||
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[expected_raw_size]);
|
||||
if (inbuf == nullptr || outbuf == nullptr)
|
||||
throw NlpException("Fail to allocate memory.");
|
||||
|
||||
// read into buffer
|
||||
fin.read(inbuf.get(), compsize);
|
||||
if (!fin.good() || fin.gcount() != compsize) {
|
||||
fputs("[ERR] Fail to read data into buffer.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Read file into buffer
|
||||
fin.read(inbuf.get(), compressed_size);
|
||||
if (!fin.good() || fin.gcount() != compressed_size)
|
||||
throw NlpException("Fail to read data into buffer.\n");
|
||||
|
||||
// test checksum
|
||||
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uInt>(compsize)));
|
||||
// Test checksum
|
||||
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(inbuf.get()), SafeCast<uInt>(compressed_size)));
|
||||
if (checksum != expected_checksum) {
|
||||
fprintf(stdout, "[ERR] Fail to match crc32. Expect 0x%" PRIx32 " got 0x%" PRIx32 ".\n",
|
||||
expected_checksum, checksum
|
||||
@ -149,84 +190,176 @@ namespace NlpCodec {
|
||||
return false;
|
||||
}
|
||||
|
||||
// do uncompress
|
||||
uLongf _destLen = static_cast<uLongf>(expected_rawlen);
|
||||
// Do decompress
|
||||
uLongf _destLen = static_cast<uLongf>(expected_raw_size);
|
||||
int ret = uncompress(
|
||||
reinterpret_cast<Bytef*>(outbuf.get()), &_destLen,
|
||||
reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uLong>(compsize)
|
||||
reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uLong>(compressed_size)
|
||||
);
|
||||
if (ret != Z_OK) {
|
||||
fputs("[ERR] Zlib uncompress() failed.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Check zlib result
|
||||
if (ret != Z_OK)
|
||||
throw NlpException("Zlib uncompress() failed.");
|
||||
|
||||
// do xor operation
|
||||
GeneralXorOperation(outbuf.get(), expected_rawlen);
|
||||
GeneralXorOperation(outbuf.get(), expected_raw_size);
|
||||
|
||||
// write into file
|
||||
fout.write(outbuf.get(), expected_rawlen);
|
||||
if (!fout.good()) {
|
||||
fputs("[ERR] Fail to write data into file.\n", stdout);
|
||||
return false;
|
||||
}
|
||||
// Write result into file
|
||||
fout.write(outbuf.get(), expected_raw_size);
|
||||
if (!fout.good())
|
||||
throw NlpException("Fail to write data into file.");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void PrintHelp(void) {
|
||||
fputs("NlpCodec Usage\n", stdout);
|
||||
fputs("\n", stdout);
|
||||
fputs("NlpCodec [encode | decode] <src> <dest>\n", stdout);
|
||||
fputs("encode - encode text file into nlp file.\n", stdout);
|
||||
fputs("decode - decompress nlp file into text file.\n", stdout);
|
||||
fputs("<src> - the source file. text file in compress mode. nlp file in uncompress mode.\n", stdout);
|
||||
fputs("<dest> - the destination file. nlp file in compress mode. text file in uncompress mode.\n", stdout);
|
||||
namespace NlpCodec::Runtime {
|
||||
|
||||
enum class UserOperation {
|
||||
Encode,
|
||||
Decode,
|
||||
Version,
|
||||
Help
|
||||
};
|
||||
|
||||
struct UserRequest {
|
||||
UserOperation mUserOperation;
|
||||
std::filesystem::path mInputFile;
|
||||
std::filesystem::path mOutputFile;
|
||||
};
|
||||
|
||||
static void PrintHelp() {
|
||||
std::cout
|
||||
<< "NlpCodec Usage" << std::endl
|
||||
<< "NlpCodec [encode | decode | version | help] <src> <dest>" << std::endl
|
||||
<< std::endl
|
||||
<< "version - print version info about this program." << std::endl
|
||||
<< "help - print this page." << std::endl
|
||||
<< std::endl
|
||||
<< "encode - encode text file into NLP file." << std::endl
|
||||
<< "decode - decode NLP file into text file." << std::endl
|
||||
<< "<src> - the source file." << std::endl
|
||||
<< " the path to text file in encode mode." << std::endl
|
||||
<< " the path to NLP file in decode mode." << std::endl
|
||||
<< "<dest> - the destination file." << std::endl
|
||||
<< " the path to NLP file in encode mode." << std::endl
|
||||
<< " the path to text file in decode mode." << std::endl
|
||||
<< "" << std::endl;
|
||||
}
|
||||
|
||||
static void PrintVersion() {
|
||||
std::cout
|
||||
<< "NlpCodec built at " __DATE__ " " __TIME__ << std::endl
|
||||
<< "MIT License. Copyright (c) 2022-2024 yyc12345" << std::endl;
|
||||
}
|
||||
|
||||
static UserRequest ResolveArguments(int argc, char* argv[]) {
|
||||
// Prepare return value
|
||||
UserRequest ret { UserOperation::Version, "", "" };
|
||||
|
||||
switch (argc) {
|
||||
case 2: {
|
||||
// Get mode string
|
||||
std::string mode(argv[1]);
|
||||
|
||||
// Check `help` and `version`
|
||||
if (mode == "version") {
|
||||
ret.mUserOperation = UserOperation::Version;
|
||||
} else if (mode == "help") {
|
||||
ret.mUserOperation = UserOperation::Help;
|
||||
} else {
|
||||
// Not matched.
|
||||
throw NlpException("Invalid argument! Must be one of `version` or `help`");
|
||||
}
|
||||
|
||||
// Return value
|
||||
return ret;
|
||||
}
|
||||
case 4: {
|
||||
// Get mode string
|
||||
std::string mode(argv[1]);
|
||||
|
||||
// Check `encode` and `decode`
|
||||
if (mode == "encode") {
|
||||
ret.mUserOperation = UserOperation::Encode;
|
||||
} else if (mode == "decode") {
|
||||
ret.mUserOperation = UserOperation::Decode;
|
||||
} else {
|
||||
// Not matched.
|
||||
throw NlpException("Invalid argument! Must be one of `encode` or `decode`");
|
||||
}
|
||||
|
||||
// Setup input output file path
|
||||
ret.mInputFile = std::filesystem::path(argv[2]);
|
||||
ret.mOutputFile = std::filesystem::path(argv[3]);
|
||||
|
||||
// Return value
|
||||
return ret;
|
||||
}
|
||||
default:
|
||||
throw NlpException("Invalid argument count!");
|
||||
}
|
||||
}
|
||||
|
||||
static void ExecuteWorker(const UserRequest& user_request) {
|
||||
// Take action according to different request first
|
||||
bool is_encode;
|
||||
switch (user_request.mUserOperation) {
|
||||
case UserOperation::Version:
|
||||
PrintVersion();
|
||||
return;
|
||||
case NlpCodec::Runtime::UserOperation::Help:
|
||||
PrintHelp();
|
||||
return;
|
||||
case NlpCodec::Runtime::UserOperation::Encode:
|
||||
is_encode = true;
|
||||
break;
|
||||
case NlpCodec::Runtime::UserOperation::Decode:
|
||||
is_encode = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Do real codec related works.
|
||||
// Try to open files
|
||||
std::ifstream in_file;
|
||||
in_file.open(user_request.mInputFile, std::ios_base::in | std::ios_base::binary);
|
||||
std::ofstream out_file;
|
||||
out_file.open(user_request.mOutputFile, std::ios_base::out | std::ios_base::binary);
|
||||
// Check file status
|
||||
if (!in_file.is_open() || !out_file.is_open()) {
|
||||
throw NlpException("Fail to open input or output file.");
|
||||
}
|
||||
|
||||
// Perform codec
|
||||
if (is_encode) {
|
||||
::NlpCodec::EncodeNlp(in_file, out_file);
|
||||
} else {
|
||||
::NlpCodec::DecodeNlp(in_file, out_file);
|
||||
}
|
||||
|
||||
// Free resources
|
||||
in_file.close();
|
||||
out_file.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
// check arguments
|
||||
if (argc != 4) {
|
||||
fputs("[ERR] Invalid arguments!\n", stdout);
|
||||
PrintHelp();
|
||||
// Try parsing given arguments
|
||||
NlpCodec::Runtime::UserRequest user_request;
|
||||
try {
|
||||
user_request = NlpCodec::Runtime::ResolveArguments(argc, argv);
|
||||
} catch (const NlpCodec::NlpException& e) {
|
||||
std::cerr << "[Argument Error] " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string mode(argv[1]);
|
||||
if (mode != "encode" && mode != "decode") {
|
||||
fputs("[ERR] Unknown operation!\n", stdout);
|
||||
PrintHelp();
|
||||
return 1;
|
||||
}
|
||||
|
||||
// try initializing files
|
||||
std::ifstream infile;
|
||||
infile.open(std::filesystem::path(argv[2]), std::ios_base::in | std::ios_base::binary);
|
||||
std::ofstream outfile;
|
||||
outfile.open(std::filesystem::path(argv[3]), std::ios_base::out | std::ios_base::binary);
|
||||
|
||||
if (!infile.is_open() || !outfile.is_open()) {
|
||||
fputs("[ERR] Fail to open file!\n", stdout);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// do real work
|
||||
bool result = true;
|
||||
if (mode == "encode") {
|
||||
result = NlpCodec::EncodeNlp(infile, outfile);
|
||||
} else {
|
||||
result = NlpCodec::DecodeNlp(infile, outfile);
|
||||
}
|
||||
|
||||
// free resources and report
|
||||
infile.close();
|
||||
outfile.close();
|
||||
|
||||
if (!result) {
|
||||
fputs("[ERR] Encoder failed!\n", stdout);
|
||||
return 1;
|
||||
// Try executing real wroker
|
||||
try {
|
||||
NlpCodec::Runtime::ExecuteWorker(user_request);
|
||||
} catch (const NlpCodec::NlpException& e) {
|
||||
std::cerr << "[Codec Error] " << e.what() << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Reference in New Issue
Block a user