diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 097d879..05143c7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,7 +41,8 @@ PRIVATE yycc/carton/binstore/types.cpp yycc/carton/binstore/setting.cpp yycc/carton/binstore/configuration.cpp - yycc/carton/binstore/storage.cpp + #yycc/carton/binstore/storage.cpp + yycc/carton/lexer61.cpp ) target_sources(YYCCommonplace PUBLIC @@ -110,7 +111,8 @@ FILES yycc/carton/binstore/serializer.hpp yycc/carton/binstore/setting.hpp yycc/carton/binstore/configuration.hpp - yycc/carton/binstore/storage.hpp + #yycc/carton/binstore/storage.hpp + yycc/carton/lexer61.hpp yycc/carton/fft.hpp ) # Setup header infomations diff --git a/src/yycc/carton/lexer61.cpp b/src/yycc/carton/lexer61.cpp new file mode 100644 index 0000000..dff829a --- /dev/null +++ b/src/yycc/carton/lexer61.cpp @@ -0,0 +1,188 @@ +#include "lexer61.hpp" + +namespace yycc::carton::lexer61 { + + Lexer61::Lexer61() : m_ArgsCollection(), m_CurrentArg(), m_CurrentChar(u8'\0'), m_State(LexerState::Space), m_PrevState(LexerState::Space) {} + + Lexer61::~Lexer61() {} + + LexerResult> Lexer61::lex(const std::u8string_view &cmd) { + // Clear variables when we start a new lex. + this->reset(); + + // Iterate all chars in commandline one by one. + for (char8_t c : cmd) { + m_CurrentChar = c; + + // YYC MARK: + // All invalid characters should be passed directly to handlers. + // Because of this, all handler should care this case. + // After this, UTF8 code unit can directly accepted. + switch (m_State) { + case LexerState::Space: + proc_space(); + break; + case LexerState::Single: + proc_single(); + break; + case LexerState::Double: + proc_double(); + break; + case LexerState::Escape: + proc_escape(); + break; + case LexerState::Normal: + proc_normal(); + break; + } + } + + // All chars has been processed. + // Check the final state. + bool okey = false; + switch (m_State) { + case LexerState::Space: + // Space state is okey. + okey = true; + break; + case LexerState::Normal: + // In normal state, we need push current argument into collection, + // and we can back to space state. + m_ArgsCollection.emplace_back(m_CurrentArg); + okey = true; + break; + case LexerState::Single: + case LexerState::Double: + case LexerState::Escape: + // Any other states is not expected. + okey = false; + break; + } + + // Check success flag + if (okey) { + return this->m_ArgsCollection; + } else { + return std::unexpected(LexerError::UnexpectedEnd); + } + } + + LexerResult> Lexer61::owend_lex(const std::u8string_view &cmd) { + auto rv = this->lex(cmd); + if (rv.has_value()) { + auto source = std::move(rv.value()); + std::vector elems; + for (const auto &strl_view : source) { + elems.emplace_back(std::u8string(strl_view)); + } + return elems; + } else { + std::unexpected(rv.error()); + } + } + + void Lexer61::reset() { + // Because these value may be moved, so we need assign them with new value, + // rather clear them. + m_ArgsCollection = decltype(Lexer61::m_ArgsCollection)(); + m_CurrentArg = decltype(Lexer61::m_CurrentArg)(); + // Set other values. + m_CurrentChar = u8'\0'; + m_State = m_PrevState = LexerState::Space; + } + + void Lexer61::proc_space() { + switch (m_CurrentChar) { + case u8'\'': + m_State = LexerState::Single; + break; + case u8'"': + m_State = LexerState::Double; + break; + case u8'\\': + m_State = LexerState::Escape; + m_PrevState = LexerState::Normal; + break; + case u8' ': + // Skip blank + break; + default: + m_CurrentArg.push_back(m_CurrentChar); + m_State = LexerState::Normal; + break; + } + } + + void Lexer61::proc_single() { + switch (m_CurrentChar) { + case u8'\'': + m_State = LexerState::Normal; + break; + case u8'"': + m_CurrentArg.push_back('"'); + break; + case u8'\\': + m_State = LexerState::Escape; + m_PrevState = LexerState::Single; + break; + case u8' ': + m_CurrentArg.push_back(u8' '); + break; + default: + m_CurrentArg.push_back(m_CurrentChar); + break; + } + } + + void Lexer61::proc_double() { + switch (m_CurrentChar) { + case u8'\'': + m_CurrentArg.push_back(u8'\''); + break; + case u8'"': + m_State = LexerState::Normal; + break; + case u8'\\': + m_State = LexerState::Escape; + m_PrevState = LexerState::Double; + break; + case u8' ': + m_CurrentArg.push_back(u8' '); + break; + default: + m_CurrentArg.push_back(m_CurrentChar); + break; + } + } + + void Lexer61::proc_escape() { + // Add itself + m_CurrentArg.push_back(m_CurrentChar); + // And restore state + m_State = m_PrevState; + } + + void Lexer61::proc_normal() { + switch (m_CurrentChar) { + case u8'\'': + m_CurrentArg.push_back(u8'\''); + break; + case u8'"': + m_CurrentArg.push_back(u8'"'); + break; + case u8'\\': + m_State = LexerState::Escape; + m_PrevState = LexerState::Normal; + break; + case u8' ': + m_ArgsCollection.emplace_back(m_CurrentArg); + m_CurrentArg.clear(); + m_State = LexerState::Space; + break; + default: + m_CurrentArg.push_back(m_CurrentChar); + break; + } + } + +} // namespace yycc::carton::lexer61 diff --git a/src/yycc/carton/lexer61.hpp b/src/yycc/carton/lexer61.hpp new file mode 100644 index 0000000..f64f43e --- /dev/null +++ b/src/yycc/carton/lexer61.hpp @@ -0,0 +1,46 @@ +#pragma once +#include "../macro/class_copy_move.hpp" +#include +#include +#include +#include + +namespace yycc::carton::lexer61 { + + enum class LexerState { Space, Single, Double, Escape, Normal }; + + // @brief Any error occurs when lexer working. + enum class LexerError { + UnexpectedEnd, ///< The end of command line is not expected. + }; + + template + using LexerResult = std::expected; + + class Lexer61 { + public: + Lexer61(); + ~Lexer61(); + YYCC_DEFAULT_COPY_MOVE(Lexer61) + + public: + LexerResult> lex(const std::u8string_view& cmd); + LexerResult> owend_lex(const std::u8string_view& cmd); + + private: + void reset(); + + void proc_space(); + void proc_single(); + void proc_double(); + void proc_escape(); + void proc_normal(); + + std::vector m_ArgsCollection; ///< Internal result holder. + std::u8string m_CurrentArg; ///< Holding current building commandline argument. + char8_t m_CurrentChar; ///< Holding current char analysing. + LexerState m_State; ///< Recording current state. + LexerState m_PrevState; ///< Recording previous state. + }; + +} // namespace yycc::carton::lexer61