diff --git a/src/.vscode/settings.json b/.vscode/settings.json similarity index 100% rename from src/.vscode/settings.json rename to .vscode/settings.json diff --git a/src/baguthesis.py b/src/baguthesis.py index c249bab..1573192 100644 --- a/src/baguthesis.py +++ b/src/baguthesis.py @@ -1,9 +1,20 @@ -import cli.baguthesis +import tempfile +from pathlib import Path +import latex2xthesis +import xthesis2docx +from cli.baguthesis import LaTeX2DocxCli, parse_cli -def main(): - args = cli.baguthesis.parse_cli() +def main(opts: LaTeX2DocxCli): + with tempfile.TemporaryDirectory() as temp_dir: + # Create a temporary directory and XThesis intermediate file in it + temp_dir_path = Path(temp_dir).resolve() + temp_xthesis_path = temp_dir_path / "temp.xthesis" + # Break the options into frontend and backend options and run them respectively + frontend_opts, backend_opts = opts.break_into(temp_xthesis_path) + latex2xthesis.main(frontend_opts) + xthesis2docx.main(backend_opts) if __name__ == "__main__": - main() + main(parse_cli()) diff --git a/src/common.py b/src/common.py new file mode 100644 index 0000000..a7f3b86 --- /dev/null +++ b/src/common.py @@ -0,0 +1,54 @@ +from pathlib import Path +from logger import LOGGER + + +class BaGuException(Exception): + """The exception raised by this project.""" + pass + + + +def resolve_resource(tobe_resolved: str, resource_dir: Path) -> Path | None: + """ + Resolve given path in resource directory or current work directory. + + This function will use absolute path directly if it is and it is exist. + If not, this function will try to resolve it in resource directory first, + and then in current work directory. + + :param tobe_resolved: The path to be resolved. + :param resource_dir: The resource directory. + :return: The resolved path if resolved, otherwise None. + """ + tobe_resolved_path = Path(tobe_resolved) + LOGGER.debug(f'Resolving {tobe_resolved_path} ...') + + # Return absolute path directly + if tobe_resolved_path.is_absolute(): + if tobe_resolved_path.is_file(): + LOGGER.debug(f'Resolved {tobe_resolved_path}') + return tobe_resolved_path + else: + LOGGER.debug(f'{tobe_resolved_path} is absolute path but not a file.') + return None + + # Resolve it in resource directory first + resource_dir_path = Path(resource_dir).resolve() + resolved = resource_dir_path / tobe_resolved_path + if resolved.is_file(): + LOGGER.debug(f'Resolved {resolved}') + return resolved + else: + LOGGER.debug(f'Resolved failed in resource path because {resolved} is not a file.') + + # Resolve it in work directory + cwd_path = Path.cwd().resolve() + resolved = cwd_path / tobe_resolved_path + if resolved.is_file(): + LOGGER.debug(f'Resolved {resolved}') + return resolved + else: + LOGGER.debug(f'Resolved failed in current work directory because {resolved} is not a file.') + + # Not resolved + return None diff --git a/src/latex2xthesis.py b/src/latex2xthesis.py deleted file mode 100644 index d9e312c..0000000 --- a/src/latex2xthesis.py +++ /dev/null @@ -1,9 +0,0 @@ -import cli.latex2xthesis - - -def main(): - args = cli.latex2xthesis.parse_cli() - - -if __name__ == "__main__": - main() diff --git a/src/latex2xthesis/__init__.py b/src/latex2xthesis/__init__.py new file mode 100644 index 0000000..fd58be5 --- /dev/null +++ b/src/latex2xthesis/__init__.py @@ -0,0 +1,74 @@ +from pathlib import Path +from typing import Iterator +from pylatexenc import latexwalker +from cli.latex2xthesis import LaTeX2XThesisCli, parse_cli +from common import BaGuException +from logger import LOGGER + + +class LatexWalkerEnvironment: + + __loaded_texs: set[Path] + """The set storing all loaded tex files to avoid circular including""" + + def __init__(self) -> None: + self.__loaded_texs = set() + + def register_tex_filename(self, filename: Path) -> bool: + """ + Register tex file name as loaded. + + :return: False if this file name is already registered, otherwise true. + """ + if filename in self.__loaded_texs: + return False + else: + self.__loaded_texs.add(filename) + return True + + +class LatexWalker: + + environment: LatexWalkerEnvironment + """The environment of this walker""" + walker: latexwalker.LatexWalker | None + """The underlying walker""" + + def __init__(self, environment: LatexWalkerEnvironment, filename: Path) -> None: + # Check environment + self.environment = environment + + # Try to load file. + try: + with open(filename, 'r') as f: + self.walker = latexwalker.LatexWalker(f.read()) + except Exception as e: + LOGGER.warning(f'Fail to read LaTeX file: {filename}. Reason: {e}.') + self.walker = None + + def iter(self) -> Iterator[latexwalker.LatexNode]: + if self.walker is not None: + # get node list + nodelists: list[latexwalker.LatexNode] + (nodelists, _, _) = self.walker.get_latex_nodes() + + # special treat for command node for inserted document + for node in nodelists: + if isinstance(node, latexwalker.LatexMacroNode): + node.macroname + pass + else: + yield node + + + + + + + +def main(opts: LaTeX2XThesisCli): + pass + + +if __name__ == "__main__": + main(parse_cli()) diff --git a/src/latex2xthesis/extractor.py b/src/latex2xthesis/extractor.py new file mode 100644 index 0000000..e69de29 diff --git a/src/latex2xthesis/latexwalker.py b/src/latex2xthesis/latexwalker.py new file mode 100644 index 0000000..688ff67 --- /dev/null +++ b/src/latex2xthesis/latexwalker.py @@ -0,0 +1,236 @@ +from pathlib import Path +from abc import ABC, abstractmethod +from collections.abc import Iterator, Iterable +from typing import cast +from pylatexenc.latexwalker import ( + LatexWalker as PyLatexWalker, + LatexNode as PyLatexNode, + LatexCommentNode as PyLatexCommentNode, + LatexMacroNode as PyLatexMacroNode, +) +from ..logger import LOGGER + + +class LatexWalker(Iterator[PyLatexNode]): + """ + The interface of all LaTeX walkers. + + A LaTeX walker is an iterator that iterates over LaTeX nodes. + And for the convenience, we also provide a method `peek` to peek the next node without advancing the walker. + and an Rust-like iterator interface called `next` to fetch the next node and advance the walker. + """ + + @abstractmethod + def peek(self) -> PyLatexNode | None: + """ + Peek the next node without advancing the walker. + + :return: The next node or None if there is no more node. + """ + pass + + @abstractmethod + def next(self) -> PyLatexNode | None: + """ + Fetch the next node and advance the walker. + + :return: The next node or None if there is no more node. + """ + pass + + def __iter__(self) -> Iterator[PyLatexNode]: + return self + + def __next__(self) -> PyLatexNode: + node = self.next() + if node is None: + raise StopIteration + else: + return node + + +class FileLatexWalker(LatexWalker): + """ + A trivial implementation of LaTeX walker which only output LaTeX nodes one by one. + """ + + walker: PyLatexWalker | None + """The underlying walker""" + nodelist: list[PyLatexNode] + """The list of nodes provided by underlying walker""" + cnt: int + """The count of all nodes""" + i: int + """The current index""" + + def __init__( + self, walker: PyLatexWalker | None, nodelist: list[PyLatexNode] + ) -> None: + self.walker = walker + self.nodelist = nodelist + self.cnt = len(nodelist) + self.i = 0 + + @staticmethod + def from_file(filename: Path) -> "FileLatexWalker": + # Try to load file. + walker: PyLatexWalker | None + try: + with open(filename, "r") as f: + walker = PyLatexWalker(f.read()) + (nodelist, _, _) = walker.get_latex_nodes() + return FileLatexWalker(walker, nodelist) + except Exception as e: + LOGGER.warning(f"Fail to read LaTeX file: {filename}. Reason: {e}.") + return FileLatexWalker(None, list()) + + def peek(self) -> PyLatexNode | None: + if self.walker is None: + return None + + if self.i >= self.cnt: + return None + else: + return self.nodelist[self.i] + + def next(self) -> PyLatexNode | None: + node = self.peek() + if node is not None: + self.i += 1 + return node + + +class ResolvingIncludeContext: + __loaded_texs: set[Path] + """The set storing all loaded tex files to avoid circular including""" + + def __init__(self) -> None: + self.__loaded_texs = set() + + def test(self, filename: Path) -> bool: + """ + Register tex file name as loaded. + + :return: False if this file name is already registered, otherwise true. + """ + if filename in self.__loaded_texs: + return False + else: + self.__loaded_texs.add(filename) + return True + + +class ResolvingIncludeLatexWalker(LatexWalker): + """ + A LaTeX walker wrapper that can resolve include command. + """ + + underlying: LatexWalker + """The underlying walker""" + loaded: LatexWalker | None + """The walker loaded by LaTeX include command""" + context: ResolvingIncludeContext + """The context for resolving include command""" + + def __init__( + self, underlying: LatexWalker, context: ResolvingIncludeContext + ) -> None: + self.underlying = underlying + self.loaded = None + self.context = context + + # def from_filename(filename: Path) -> 'ResolvingIncludeLatexWalker': + # pass + + # def from_base_tex(walker: LatexWalker) -> 'ResolvingIncludeLatexWalker': + # pass + + # def from_included_tex(walker: LatexWalker, context: ResolvingIncludeContext) -> 'ResolvingIncludeLatexWalker': + # pass + + def peek(self) -> PyLatexNode | None: + pass + + def next(self) -> PyLatexNode | None: + pass + + +class Condition(ABC): + """ + A condition that can break a LaTeX walker. + """ + + @abstractmethod + def can_break(self, node: PyLatexNode) -> bool: + """ + Check if the given node can trigger the walker to break. + + :param node: The node to check. + :return: True if the node can trigger the walker to break, otherwise false. + """ + pass + + +class MultiCommandCondition(Condition): + """ + A condition that can break a LaTeX walker when one of the certain commands is encountered. + """ + + __commands: set[str] + """The set of commands that can trigger the walker to break""" + + def __init__(self, commands: Iterable[str]) -> None: + """ + Initialize the condition with a set of commands. + + :param commands: The commands that can trigger the walker to break. + """ + self.__commands = set(commands) + + def can_break(self, node: PyLatexNode) -> bool: + if isinstance(node, PyLatexMacroNode): + macro_name = cast(str, node.macroname) + return macro_name in self.__commands + else: + return False + + +class CommentCondition(Condition): + """ + A condition that can break a LaTeX walker when a certain comment is encountered. + """ + + __comment: str + """The comment that can trigger the walker to break""" + + def __init__(self, comment: str) -> None: + """ + Initialize the condition with a comment. + + :param comment: The comment that can trigger the walker to break. + """ + self.__comment = comment + + def can_break(self, node: PyLatexNode) -> bool: + if isinstance(node, PyLatexCommentNode): + node_comment = cast(str, node.comment) + return node_comment == self.__comment + else: + return False + + +class ConditionalLatexWalker(LatexWalker): + """ + A LaTeX walker wrapper that can break when a certain condition is met. + + The node triggering the break is not consumed by this walker, + and can be accessed by the underlying walker. + """ + + underlying: LatexWalker + """The underlying walker""" + condition: Condition + + def __init__(self, underlying: LatexWalker, condition: Condition) -> None: + self.underlying = underlying + self.condition = condition diff --git a/src/logger.py b/src/logger.py new file mode 100644 index 0000000..a2ecd69 --- /dev/null +++ b/src/logger.py @@ -0,0 +1,42 @@ +import logging +import enum + + +def _build_logger() -> tuple[logging.Logger, logging.Handler]: + # Create a new logger which is independent with Flask + logger = logging.getLogger("my_console_logger") + # Avoid message was propagated to root logger or captured by Flask logger. + logger.propagate = False + # Set initial level. + logger.setLevel(logging.INFO) + + # Create StreamHandler to output into stderr. + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + # Set format for it. + formatter = logging.Formatter("[%(levelname)s] %(message)s") + console_handler.setFormatter(formatter) + # Add handler + logger.addHandler(console_handler) + + return (logger, console_handler) + + +(LOGGER, CONSOLE_HANDLER) = _build_logger() + + +class LoggerLevel(enum.IntEnum): + DEBUG = enum.auto() + INFO = enum.auto() + + +def set_level(level: LoggerLevel) -> None: + logging_level: int = logging.INFO + match level: + case LoggerLevel.DEBUG: + logging_level = logging.DEBUG + case LoggerLevel.INFO: + logging_level = logging.INFO + + LOGGER.setLevel(logging_level) + CONSOLE_HANDLER.setLevel(logging_level) diff --git a/src/xthesis2docx.py b/src/xthesis2docx.py deleted file mode 100644 index a8de4b0..0000000 --- a/src/xthesis2docx.py +++ /dev/null @@ -1,9 +0,0 @@ -import cli.xthesis2docx - - -def main(): - args = cli.xthesis2docx.parse_cli() - - -if __name__ == "__main__": - main() diff --git a/src/xthesis2docx/__init__.py b/src/xthesis2docx/__init__.py new file mode 100644 index 0000000..953322d --- /dev/null +++ b/src/xthesis2docx/__init__.py @@ -0,0 +1,9 @@ +from cli.xthesis2docx import XThesis2DocxCli, parse_cli + + +def main(opts: XThesis2DocxCli): + pass + + +if __name__ == "__main__": + main(parse_cli()) diff --git a/src/xthesis2docx/__main__.py b/src/xthesis2docx/__main__.py new file mode 100644 index 0000000..953322d --- /dev/null +++ b/src/xthesis2docx/__main__.py @@ -0,0 +1,9 @@ +from cli.xthesis2docx import XThesis2DocxCli, parse_cli + + +def main(opts: XThesis2DocxCli): + pass + + +if __name__ == "__main__": + main(parse_cli())