1
0
Files
bagu-thesis/src/frontend/latexwalker.py

237 lines
6.6 KiB
Python
Raw Normal View History

2026-06-07 14:00:00 +08:00
from pathlib import Path
from abc import ABC, abstractmethod
from collections.abc import Iterator, Iterable
from typing import cast
from pylatexenc.latexwalker import (
LatexWalker as PyLatexWalker,
LatexNode as PyLatexNode,
LatexCommentNode as PyLatexCommentNode,
LatexMacroNode as PyLatexMacroNode,
)
from ..logger import LOGGER
class LatexWalker(Iterator[PyLatexNode]):
"""
The interface of all LaTeX walkers.
A LaTeX walker is an iterator that iterates over LaTeX nodes.
And for the convenience, we also provide a method `peek` to peek the next node without advancing the walker.
and an Rust-like iterator interface called `next` to fetch the next node and advance the walker.
"""
@abstractmethod
def peek(self) -> PyLatexNode | None:
"""
Peek the next node without advancing the walker.
:return: The next node or None if there is no more node.
"""
pass
@abstractmethod
def next(self) -> PyLatexNode | None:
"""
Fetch the next node and advance the walker.
:return: The next node or None if there is no more node.
"""
pass
def __iter__(self) -> Iterator[PyLatexNode]:
return self
def __next__(self) -> PyLatexNode:
node = self.next()
if node is None:
raise StopIteration
else:
return node
class FileLatexWalker(LatexWalker):
"""
A trivial implementation of LaTeX walker which only output LaTeX nodes one by one.
"""
walker: PyLatexWalker | None
"""The underlying walker"""
nodelist: list[PyLatexNode]
"""The list of nodes provided by underlying walker"""
cnt: int
"""The count of all nodes"""
i: int
"""The current index"""
def __init__(
self, walker: PyLatexWalker | None, nodelist: list[PyLatexNode]
) -> None:
self.walker = walker
self.nodelist = nodelist
self.cnt = len(nodelist)
self.i = 0
@staticmethod
def from_file(filename: Path) -> "FileLatexWalker":
# Try to load file.
walker: PyLatexWalker | None
try:
with open(filename, "r") as f:
walker = PyLatexWalker(f.read())
(nodelist, _, _) = walker.get_latex_nodes()
return FileLatexWalker(walker, nodelist)
except Exception as e:
LOGGER.warning(f"Fail to read LaTeX file: {filename}. Reason: {e}.")
return FileLatexWalker(None, list())
def peek(self) -> PyLatexNode | None:
if self.walker is None:
return None
if self.i >= self.cnt:
return None
else:
return self.nodelist[self.i]
def next(self) -> PyLatexNode | None:
node = self.peek()
if node is not None:
self.i += 1
return node
class ResolvingIncludeContext:
__loaded_texs: set[Path]
"""The set storing all loaded tex files to avoid circular including"""
def __init__(self) -> None:
self.__loaded_texs = set()
def test(self, filename: Path) -> bool:
"""
Register tex file name as loaded.
:return: False if this file name is already registered, otherwise true.
"""
if filename in self.__loaded_texs:
return False
else:
self.__loaded_texs.add(filename)
return True
class ResolvingIncludeLatexWalker(LatexWalker):
"""
A LaTeX walker wrapper that can resolve include command.
"""
underlying: LatexWalker
"""The underlying walker"""
loaded: LatexWalker | None
"""The walker loaded by LaTeX include command"""
context: ResolvingIncludeContext
"""The context for resolving include command"""
def __init__(
self, underlying: LatexWalker, context: ResolvingIncludeContext
) -> None:
self.underlying = underlying
self.loaded = None
self.context = context
# def from_filename(filename: Path) -> 'ResolvingIncludeLatexWalker':
# pass
# def from_base_tex(walker: LatexWalker) -> 'ResolvingIncludeLatexWalker':
# pass
# def from_included_tex(walker: LatexWalker, context: ResolvingIncludeContext) -> 'ResolvingIncludeLatexWalker':
# pass
def peek(self) -> PyLatexNode | None:
pass
def next(self) -> PyLatexNode | None:
pass
class Condition(ABC):
"""
A condition that can break a LaTeX walker.
"""
@abstractmethod
def can_break(self, node: PyLatexNode) -> bool:
"""
Check if the given node can trigger the walker to break.
:param node: The node to check.
:return: True if the node can trigger the walker to break, otherwise false.
"""
pass
class MultiCommandCondition(Condition):
"""
A condition that can break a LaTeX walker when one of the certain commands is encountered.
"""
__commands: set[str]
"""The set of commands that can trigger the walker to break"""
def __init__(self, commands: Iterable[str]) -> None:
"""
Initialize the condition with a set of commands.
:param commands: The commands that can trigger the walker to break.
"""
self.__commands = set(commands)
def can_break(self, node: PyLatexNode) -> bool:
if isinstance(node, PyLatexMacroNode):
macro_name = cast(str, node.macroname)
return macro_name in self.__commands
else:
return False
class CommentCondition(Condition):
"""
A condition that can break a LaTeX walker when a certain comment is encountered.
"""
__comment: str
"""The comment that can trigger the walker to break"""
def __init__(self, comment: str) -> None:
"""
Initialize the condition with a comment.
:param comment: The comment that can trigger the walker to break.
"""
self.__comment = comment
def can_break(self, node: PyLatexNode) -> bool:
if isinstance(node, PyLatexCommentNode):
node_comment = cast(str, node.comment)
return node_comment == self.__comment
else:
return False
class ConditionalLatexWalker(LatexWalker):
"""
A LaTeX walker wrapper that can break when a certain condition is met.
The node triggering the break is not consumed by this walker,
and can be accessed by the underlying walker.
"""
underlying: LatexWalker
"""The underlying walker"""
condition: Condition
def __init__(self, underlying: LatexWalker, condition: Condition) -> None:
self.underlying = underlying
self.condition = condition