commit 66f78e9fa95c1ef09e398833bc93ff603d2db7d2 Author: yyc12345 Date: Mon Jun 26 22:00:10 2023 +0800 first commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..096dd73 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.fods eol=lf + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c228184 --- /dev/null +++ b/.gitignore @@ -0,0 +1,364 @@ +## my ban +out/ +temp/ + +.vscode/ + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ +Temp/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*[.json, .xml, .info] + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..560a19f --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2022-2023 yyc12345 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NlpEncoder/CMakeLists.txt b/NlpEncoder/CMakeLists.txt new file mode 100644 index 0000000..df0b344 --- /dev/null +++ b/NlpEncoder/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.12) +project(NlpEncoder LANGUAGES CXX) + +# find packages +find_package(ZLIB REQUIRED) + +# set standard +set(CMAKE_CXX_STANDARD 17) + +# generate program +add_executable(NlpEncoder NlpEncoder.cpp) +target_link_libraries(NlpEncoder +PRIVATE + ${ZLIB_LIBRARIES} +) +target_include_directories(NlpEncoder +PRIVATE + ${ZLIB_INCLUDE_DIRS} +) diff --git a/NlpEncoder/NlpEncoder.cpp b/NlpEncoder/NlpEncoder.cpp new file mode 100644 index 0000000..3b2de3f --- /dev/null +++ b/NlpEncoder/NlpEncoder.cpp @@ -0,0 +1,233 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NlpEncoder { + + constexpr const uint8_t g_XorArray[] { + 0x2C, 0xA8, 0x56, 0xF9, 0xBD, 0xA6, 0x8D, 0x15, 0x25, 0x38, 0x1A, 0xD4, 0x65, 0x58, 0x28, 0x37, + 0xFA, 0x6B, 0xB5, 0xA1, 0x2C, 0x96, 0x13, 0xA2, 0xAB, 0x4F, 0xC5, 0xA1, 0x3E, 0xA7, 0x91, 0x8D, + 0x2C, 0xDF, 0x78, 0x6D, 0x3C, 0xFC, 0x92, 0x1F, 0x1A, 0x62, 0xA7, 0x9C, 0x92, 0x29, 0x44, 0x6D, + 0x3D, 0xA9, 0x2B, 0xE1, 0x91, 0xAD, 0x49, 0x3C, 0xE2, 0x33, 0xD2, 0x1A, 0x55, 0x92, 0xE7, 0x95, + 0x8C, 0xDA, 0xD2, 0xCD, 0xA2, 0xCF, 0x92, 0x9A, 0xE1, 0xF9, 0x3A, 0x26, 0xFA, 0xC4, 0xA9, 0x23, + 0xA9, 0x4D, 0x1A, 0x2C, 0x3C, 0x2A, 0xAC, 0x62, 0xA3, 0x92, 0xAC, 0x1F, 0x3E, 0xA6, 0xC9, 0xC8, + 0x63, 0xCA, 0x52, 0xF9, 0xFB, 0x3A, 0x9C, 0x2A, 0xB2, 0x1A, 0x8D, 0x9A, 0x8C, 0x2A, 0x9C, 0x32, + 0xAA, 0xC3, 0xA2, 0x97, 0x34, 0x92, 0xFA, 0x71, 0xBE, 0x3F, 0xAC, 0x28, 0x22, 0x9F, 0xAC, 0xE8 + }; + constexpr const size_t g_XorArrayLen = sizeof(g_XorArray) / sizeof(uint8_t); + constexpr const uint32_t MAGIC_DWORD = 0xF956A82Cu; + constexpr const size_t TAIL_SIZE = sizeof(uint32_t) * 2u; + + void GeneralXorOperation(void* data, size_t datalen) { + uint8_t* ptr = reinterpret_cast(data); + for (size_t i = 0u; i < datalen; ++i) { + ptr[i] ^= g_XorArray[i & 0x7Fu]; + } + } + + uint32_t GetFileLength(std::ifstream& fin) { + // backup + uint64_t curpos = static_cast(fin.tellg()); + // get tail + fin.seekg(0, std::ios_base::end); + uint32_t tail = static_cast(fin.tellg()); + // restore + fin.seekg(static_cast(curpos), std::ios_base::beg); + + return tail; + } + + bool EncodeNlp(std::ifstream& fin, std::ofstream& fout) { + // get file length and decide zlib boundary + uint32_t rawsize = GetFileLength(fin); + uint32_t compboundary = static_cast(compressBound(static_cast(rawsize))); + + // create buffer first + std::unique_ptr inbuf(new(std::nothrow) char[rawsize]); + std::unique_ptr outbuf(new(std::nothrow) char[compboundary]); + if (inbuf == nullptr || outbuf == nullptr) { + fputs("[ERR] Fail to allocate memory.\n", stdout); + return false; + } + + // read data from file + fin.read(inbuf.get(), rawsize); + if (!fin.good() || fin.gcount() != rawsize) { + fputs("[ERR] Fail to read data into buffer.\n", stdout); + return false; + } + + // do xor operation + GeneralXorOperation(inbuf.get(), rawsize); + + // do compress and get the size of compressed data + uLongf _destLen = static_cast(compboundary); + int ret = compress2( + reinterpret_cast(outbuf.get()), &_destLen, + reinterpret_cast(inbuf.get()), rawsize, + Z_BEST_COMPRESSION + ); + if (ret != Z_OK) { + fputs("[ERR] Zlib compress() failed.\n", stdout); + return false; + } + uint32_t compsize = static_cast(_destLen); + + // produce checksum + uint32_t checksum = static_cast(adler32(0u, reinterpret_cast(outbuf.get()), static_cast(compsize))); + + // write compressed data into file + fout.write(outbuf.get(), compsize); + if (!fout.good()) { + fputs("[ERR] Fail to write data into file.\n", stdout); + return false; + } + + // raw size and checksum need some extra operation before writting + rawsize = static_cast(-(static_cast(rawsize) + 1)) ^ MAGIC_DWORD; + checksum = checksum + 1072u; + + // write raw size and checksum + fout.write(reinterpret_cast(&rawsize), sizeof(uint32_t)); + if (!fout.good()) { + fputs("[ERR] Fail to write raw size into file.\n", stdout); + return false; + } + fout.write(reinterpret_cast(&checksum), sizeof(uint32_t)); + if (!fout.good()) { + fputs("[ERR] Fail to write checksum into file.\n", stdout); + return false; + } + + return true; + } + + bool DecodeNlp(std::ifstream& fin, std::ofstream& fout) { + // seek to tail to get essential data + uint32_t compsize = GetFileLength(fin); + if (compsize < TAIL_SIZE) { + fputs("[ERR] Invalid file.\n", stdout); + return false; + } + compsize -= TAIL_SIZE; + fin.seekg(compsize, std::ios_base::beg); + uint32_t expected_rawlen = 0u, expected_checksum = 0u; + fin.read(reinterpret_cast(&expected_rawlen), sizeof(uint32_t)); + fin.read(reinterpret_cast(&expected_checksum), sizeof(uint32_t)); + fin.seekg(0, std::ios_base::beg); + + // these tail data need to do some processes + expected_rawlen = static_cast(-1 - static_cast(MAGIC_DWORD ^ expected_rawlen)); + expected_checksum = expected_checksum - 1072u; + + // allocate memory to store data + std::unique_ptr inbuf(new(std::nothrow) char[compsize]); + std::unique_ptr outbuf(new(std::nothrow) char[expected_rawlen]); + if (inbuf == nullptr || outbuf == nullptr) { + fputs("[ERR] Fail to allocate memory.\n", stdout); + return false; + } + + // read into buffer + fin.read(inbuf.get(), compsize); + if (!fin.good() || fin.gcount() != compsize) { + fputs("[ERR] Fail to read data into buffer.\n", stdout); + return false; + } + + // test checksum + uint32_t checksum = static_cast(adler32(0u, reinterpret_cast(inbuf.get()), static_cast(compsize))); + if (checksum != expected_checksum) { + fprintf(stdout, "[ERR] Fail to match crc32. Expect 0x%" PRIx32 " got 0x%" PRIx32 ".\n", + expected_checksum, checksum + ); + return false; + } + + // do uncompress + uLongf _destLen = static_cast(expected_rawlen); + int ret = uncompress( + reinterpret_cast(outbuf.get()), &_destLen, + reinterpret_cast(inbuf.get()), static_cast(compsize) + ); + if (ret != Z_OK) { + fputs("[ERR] Zlib uncompress() failed.\n", stdout); + return false; + } + + // do xor operation + GeneralXorOperation(outbuf.get(), expected_rawlen); + + // write into file + fout.write(outbuf.get(), expected_rawlen); + if (!fout.good()) { + fputs("[ERR] Fail to write data into file.\n", stdout); + return false; + } + + return true; + } + +} + +static void PrintHelp(void) { + fputs("NlpEncoder Usage\n", stdout); + fputs("\n", stdout); + fputs("NlpEncoder [compress | uncompress] \n", stdout); + fputs("compress - compress text file into nlp file.\n", stdout); + fputs("uncompress - decompress nlp file into text file.\n", stdout); + fputs(" - the source file. text file in compress mode. nlp file in uncompress mode.\n", stdout); + fputs(" - the destination file. nlp file in compress mode. text file in uncompress mode.\n", stdout); +} + +int main(int argc, char* argv[]) { + + // check arguments + if (argc != 4) { + fputs("[ERR] Invalid arguments!\n", stdout); + PrintHelp(); + return 1; + } + + std::string mode(argv[1]); + if (mode != "compress" && mode != "uncompress") { + fputs("[ERR] Unknow operation!\n", stdout); + PrintHelp(); + return 1; + } + + // try initializing files + std::ifstream infile; + infile.open(std::filesystem::path(argv[2]), std::ios_base::in | std::ios_base::binary); + std::ofstream outfile; + outfile.open(std::filesystem::path(argv[3]), std::ios_base::out | std::ios_base::binary); + + if (!infile.is_open() || !outfile.is_open()) { + fputs("[ERR] Fail to open file!\n", stdout); + return 1; + } + + // do real work + bool result = true; + if (mode == "compress") { + result = NlpEncoder::EncodeNlp(infile, outfile); + } else { + result = NlpEncoder::DecodeNlp(infile, outfile); + } + + // free resources and report + infile.close(); + outfile.close(); + + if (!result) { + fputs("[ERR] Encoder failed!\n", stdout); + return 1; + } + + return 0; +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9c2b8d --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Virtools Translation + +This is a part of plan...