first commit

This commit is contained in:
yyc12345 2023-06-26 22:00:10 +08:00
commit 66f78e9fa9
6 changed files with 642 additions and 0 deletions

2
.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
*.fods eol=lf

364
.gitignore vendored Normal file
View File

@ -0,0 +1,364 @@
## my ban
out/
temp/
.vscode/
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
Temp/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*[.json, .xml, .info]
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2022-2023 yyc12345
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

19
NlpEncoder/CMakeLists.txt Normal file
View File

@ -0,0 +1,19 @@
cmake_minimum_required(VERSION 3.12)
project(NlpEncoder LANGUAGES CXX)
# find packages
find_package(ZLIB REQUIRED)
# set standard
set(CMAKE_CXX_STANDARD 17)
# generate program
add_executable(NlpEncoder NlpEncoder.cpp)
target_link_libraries(NlpEncoder
PRIVATE
${ZLIB_LIBRARIES}
)
target_include_directories(NlpEncoder
PRIVATE
${ZLIB_INCLUDE_DIRS}
)

233
NlpEncoder/NlpEncoder.cpp Normal file
View File

@ -0,0 +1,233 @@
#include <zlib.h>
#include <cstdio>
#include <cstdint>
#include <cinttypes>
#include <filesystem>
#include <string>
#include <fstream>
#include <memory>
namespace NlpEncoder {
constexpr const uint8_t g_XorArray[] {
0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37,
0xFA, 0x6B, 0xB5, 0xA1, 0x2C, 0x96, 0x13, 0xA2, 0xAB, 0x4F, 0xC5, 0xA1, 0x3E, 0xA7, 0x91, 0x8D,
0x2C, 0xDF, 0x78, 0x6D, 0x3C, 0xFC, 0x92, 0x1F, 0x1A, 0x62, 0xA7, 0x9C, 0x92, 0x29, 0x44, 0x6D,
0x3D, 0xA9, 0x2B, 0xE1, 0x91, 0xAD, 0x49, 0x3C, 0xE2, 0x33, 0xD2, 0x1A, 0x55, 0x92, 0xE7, 0x95,
0x8C, 0xDA, 0xD2, 0xCD, 0xA2, 0xCF, 0x92, 0x9A, 0xE1, 0xF9, 0x3A, 0x26, 0xFA, 0xC4, 0xA9, 0x23,
0xA9, 0x4D, 0x1A, 0x2C, 0x3C, 0x2A, 0xAC, 0x62, 0xA3, 0x92, 0xAC, 0x1F, 0x3E, 0xA6, 0xC9, 0xC8,
0x63, 0xCA, 0x52, 0xF9, 0xFB, 0x3A, 0x9C, 0x2A, 0xB2, 0x1A, 0x8D, 0x9A, 0x8C, 0x2A, 0x9C, 0x32,
0xAA, 0xC3, 0xA2, 0x97, 0x34, 0x92, 0xFA, 0x71, 0xBE, 0x3F, 0xAC, 0x28, 0x22, 0x9F, 0xAC, 0xE8
};
constexpr const size_t g_XorArrayLen = sizeof(g_XorArray) / sizeof(uint8_t);
constexpr const uint32_t MAGIC_DWORD = 0xF956A82Cu;
constexpr const size_t TAIL_SIZE = sizeof(uint32_t) * 2u;
void GeneralXorOperation(void* data, size_t datalen) {
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
for (size_t i = 0u; i < datalen; ++i) {
ptr[i] ^= g_XorArray[i & 0x7Fu];
}
}
uint32_t GetFileLength(std::ifstream& fin) {
// backup
uint64_t curpos = static_cast<uint64_t>(fin.tellg());
// get tail
fin.seekg(0, std::ios_base::end);
uint32_t tail = static_cast<uint32_t>(fin.tellg());
// restore
fin.seekg(static_cast<std::ifstream::off_type>(curpos), std::ios_base::beg);
return tail;
}
bool EncodeNlp(std::ifstream& fin, std::ofstream& fout) {
// get file length and decide zlib boundary
uint32_t rawsize = GetFileLength(fin);
uint32_t compboundary = static_cast<uint32_t>(compressBound(static_cast<uLong>(rawsize)));
// create buffer first
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[rawsize]);
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[compboundary]);
if (inbuf == nullptr || outbuf == nullptr) {
fputs("[ERR] Fail to allocate memory.\n", stdout);
return false;
}
// read data from file
fin.read(inbuf.get(), rawsize);
if (!fin.good() || fin.gcount() != rawsize) {
fputs("[ERR] Fail to read data into buffer.\n", stdout);
return false;
}
// do xor operation
GeneralXorOperation(inbuf.get(), rawsize);
// do compress and get the size of compressed data
uLongf _destLen = static_cast<uLongf>(compboundary);
int ret = compress2(
reinterpret_cast<Bytef*>(outbuf.get()), &_destLen,
reinterpret_cast<Bytef*>(inbuf.get()), rawsize,
Z_BEST_COMPRESSION
);
if (ret != Z_OK) {
fputs("[ERR] Zlib compress() failed.\n", stdout);
return false;
}
uint32_t compsize = static_cast<uint32_t>(_destLen);
// produce checksum
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(outbuf.get()), static_cast<uInt>(compsize)));
// write compressed data into file
fout.write(outbuf.get(), compsize);
if (!fout.good()) {
fputs("[ERR] Fail to write data into file.\n", stdout);
return false;
}
// raw size and checksum need some extra operation before writting
rawsize = static_cast<uint32_t>(-(static_cast<int32_t>(rawsize) + 1)) ^ MAGIC_DWORD;
checksum = checksum + 1072u;
// write raw size and checksum
fout.write(reinterpret_cast<char*>(&rawsize), sizeof(uint32_t));
if (!fout.good()) {
fputs("[ERR] Fail to write raw size into file.\n", stdout);
return false;
}
fout.write(reinterpret_cast<char*>(&checksum), sizeof(uint32_t));
if (!fout.good()) {
fputs("[ERR] Fail to write checksum into file.\n", stdout);
return false;
}
return true;
}
bool DecodeNlp(std::ifstream& fin, std::ofstream& fout) {
// seek to tail to get essential data
uint32_t compsize = GetFileLength(fin);
if (compsize < TAIL_SIZE) {
fputs("[ERR] Invalid file.\n", stdout);
return false;
}
compsize -= TAIL_SIZE;
fin.seekg(compsize, std::ios_base::beg);
uint32_t expected_rawlen = 0u, expected_checksum = 0u;
fin.read(reinterpret_cast<char*>(&expected_rawlen), sizeof(uint32_t));
fin.read(reinterpret_cast<char*>(&expected_checksum), sizeof(uint32_t));
fin.seekg(0, std::ios_base::beg);
// these tail data need to do some processes
expected_rawlen = static_cast<uint32_t>(-1 - static_cast<int32_t>(MAGIC_DWORD ^ expected_rawlen));
expected_checksum = expected_checksum - 1072u;
// allocate memory to store data
std::unique_ptr<char[]> inbuf(new(std::nothrow) char[compsize]);
std::unique_ptr<char[]> outbuf(new(std::nothrow) char[expected_rawlen]);
if (inbuf == nullptr || outbuf == nullptr) {
fputs("[ERR] Fail to allocate memory.\n", stdout);
return false;
}
// read into buffer
fin.read(inbuf.get(), compsize);
if (!fin.good() || fin.gcount() != compsize) {
fputs("[ERR] Fail to read data into buffer.\n", stdout);
return false;
}
// test checksum
uint32_t checksum = static_cast<uint32_t>(adler32(0u, reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uInt>(compsize)));
if (checksum != expected_checksum) {
fprintf(stdout, "[ERR] Fail to match crc32. Expect 0x%" PRIx32 " got 0x%" PRIx32 ".\n",
expected_checksum, checksum
);
return false;
}
// do uncompress
uLongf _destLen = static_cast<uLongf>(expected_rawlen);
int ret = uncompress(
reinterpret_cast<Bytef*>(outbuf.get()), &_destLen,
reinterpret_cast<Bytef*>(inbuf.get()), static_cast<uLong>(compsize)
);
if (ret != Z_OK) {
fputs("[ERR] Zlib uncompress() failed.\n", stdout);
return false;
}
// do xor operation
GeneralXorOperation(outbuf.get(), expected_rawlen);
// write into file
fout.write(outbuf.get(), expected_rawlen);
if (!fout.good()) {
fputs("[ERR] Fail to write data into file.\n", stdout);
return false;
}
return true;
}
}
static void PrintHelp(void) {
fputs("NlpEncoder Usage\n", stdout);
fputs("\n", stdout);
fputs("NlpEncoder [compress | uncompress] <src> <dest>\n", stdout);
fputs("compress - compress text file into nlp file.\n", stdout);
fputs("uncompress - decompress nlp file into text file.\n", stdout);
fputs("<src> - the source file. text file in compress mode. nlp file in uncompress mode.\n", stdout);
fputs("<dest> - the destination file. nlp file in compress mode. text file in uncompress mode.\n", stdout);
}
int main(int argc, char* argv[]) {
// check arguments
if (argc != 4) {
fputs("[ERR] Invalid arguments!\n", stdout);
PrintHelp();
return 1;
}
std::string mode(argv[1]);
if (mode != "compress" && mode != "uncompress") {
fputs("[ERR] Unknow operation!\n", stdout);
PrintHelp();
return 1;
}
// try initializing files
std::ifstream infile;
infile.open(std::filesystem::path(argv[2]), std::ios_base::in | std::ios_base::binary);
std::ofstream outfile;
outfile.open(std::filesystem::path(argv[3]), std::ios_base::out | std::ios_base::binary);
if (!infile.is_open() || !outfile.is_open()) {
fputs("[ERR] Fail to open file!\n", stdout);
return 1;
}
// do real work
bool result = true;
if (mode == "compress") {
result = NlpEncoder::EncodeNlp(infile, outfile);
} else {
result = NlpEncoder::DecodeNlp(infile, outfile);
}
// free resources and report
infile.close();
outfile.close();
if (!result) {
fputs("[ERR] Encoder failed!\n", stdout);
return 1;
}
return 0;
}

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# Virtools Translation
This is a part of plan...