from pathlib import Path from abc import ABC, abstractmethod from collections.abc import Iterator, Iterable from typing import cast from pylatexenc.latexwalker import ( LatexWalker as PyLatexWalker, LatexNode as PyLatexNode, LatexCommentNode as PyLatexCommentNode, LatexMacroNode as PyLatexMacroNode, ) from ..logger import LOGGER class LatexWalker(Iterator[PyLatexNode]): """ The interface of all LaTeX walkers. A LaTeX walker is an iterator that iterates over LaTeX nodes. And for the convenience, we also provide a method `peek` to peek the next node without advancing the walker. and an Rust-like iterator interface called `next` to fetch the next node and advance the walker. """ @abstractmethod def peek(self) -> PyLatexNode | None: """ Peek the next node without advancing the walker. :return: The next node or None if there is no more node. """ pass @abstractmethod def next(self) -> PyLatexNode | None: """ Fetch the next node and advance the walker. :return: The next node or None if there is no more node. """ pass def __iter__(self) -> Iterator[PyLatexNode]: return self def __next__(self) -> PyLatexNode: node = self.next() if node is None: raise StopIteration else: return node class FileLatexWalker(LatexWalker): """ A trivial implementation of LaTeX walker which only output LaTeX nodes one by one. """ walker: PyLatexWalker | None """The underlying walker""" nodelist: list[PyLatexNode] """The list of nodes provided by underlying walker""" cnt: int """The count of all nodes""" i: int """The current index""" def __init__( self, walker: PyLatexWalker | None, nodelist: list[PyLatexNode] ) -> None: self.walker = walker self.nodelist = nodelist self.cnt = len(nodelist) self.i = 0 @staticmethod def from_file(filename: Path) -> "FileLatexWalker": # Try to load file. walker: PyLatexWalker | None try: with open(filename, "r") as f: walker = PyLatexWalker(f.read()) (nodelist, _, _) = walker.get_latex_nodes() return FileLatexWalker(walker, nodelist) except Exception as e: LOGGER.warning(f"Fail to read LaTeX file: {filename}. Reason: {e}.") return FileLatexWalker(None, list()) def peek(self) -> PyLatexNode | None: if self.walker is None: return None if self.i >= self.cnt: return None else: return self.nodelist[self.i] def next(self) -> PyLatexNode | None: node = self.peek() if node is not None: self.i += 1 return node class ResolvingIncludeContext: __loaded_texs: set[Path] """The set storing all loaded tex files to avoid circular including""" def __init__(self) -> None: self.__loaded_texs = set() def test(self, filename: Path) -> bool: """ Register tex file name as loaded. :return: False if this file name is already registered, otherwise true. """ if filename in self.__loaded_texs: return False else: self.__loaded_texs.add(filename) return True class ResolvingIncludeLatexWalker(LatexWalker): """ A LaTeX walker wrapper that can resolve include command. """ underlying: LatexWalker """The underlying walker""" loaded: LatexWalker | None """The walker loaded by LaTeX include command""" context: ResolvingIncludeContext """The context for resolving include command""" def __init__( self, underlying: LatexWalker, context: ResolvingIncludeContext ) -> None: self.underlying = underlying self.loaded = None self.context = context # def from_filename(filename: Path) -> 'ResolvingIncludeLatexWalker': # pass # def from_base_tex(walker: LatexWalker) -> 'ResolvingIncludeLatexWalker': # pass # def from_included_tex(walker: LatexWalker, context: ResolvingIncludeContext) -> 'ResolvingIncludeLatexWalker': # pass def peek(self) -> PyLatexNode | None: pass def next(self) -> PyLatexNode | None: pass class Condition(ABC): """ A condition that can break a LaTeX walker. """ @abstractmethod def can_break(self, node: PyLatexNode) -> bool: """ Check if the given node can trigger the walker to break. :param node: The node to check. :return: True if the node can trigger the walker to break, otherwise false. """ pass class MultiCommandCondition(Condition): """ A condition that can break a LaTeX walker when one of the certain commands is encountered. """ __commands: set[str] """The set of commands that can trigger the walker to break""" def __init__(self, commands: Iterable[str]) -> None: """ Initialize the condition with a set of commands. :param commands: The commands that can trigger the walker to break. """ self.__commands = set(commands) def can_break(self, node: PyLatexNode) -> bool: if isinstance(node, PyLatexMacroNode): macro_name = cast(str, node.macroname) return macro_name in self.__commands else: return False class CommentCondition(Condition): """ A condition that can break a LaTeX walker when a certain comment is encountered. """ __comment: str """The comment that can trigger the walker to break""" def __init__(self, comment: str) -> None: """ Initialize the condition with a comment. :param comment: The comment that can trigger the walker to break. """ self.__comment = comment def can_break(self, node: PyLatexNode) -> bool: if isinstance(node, PyLatexCommentNode): node_comment = cast(str, node.comment) return node_comment == self.__comment else: return False class ConditionalLatexWalker(LatexWalker): """ A LaTeX walker wrapper that can break when a certain condition is met. The node triggering the break is not consumed by this walker, and can be accessed by the underlying walker. """ underlying: LatexWalker """The underlying walker""" condition: Condition def __init__(self, underlying: LatexWalker, condition: Condition) -> None: self.underlying = underlying self.condition = condition