From c471187d11cf83b79d2ffbcce898786499485c44 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Fri, 3 Jan 2025 15:59:57 -0500 Subject: [PATCH] Fix error with delimiters --- marker/schema/blocks/equation.py | 15 ++++++++++++++- marker/settings.py | 1 - pyproject.toml | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/marker/schema/blocks/equation.py b/marker/schema/blocks/equation.py index 49affcfe..3881814b 100644 --- a/marker/schema/blocks/equation.py +++ b/marker/schema/blocks/equation.py @@ -11,7 +11,17 @@ class Equation(Block): def assemble_html(self, child_blocks, parent_structure=None): if self.latex: html_out = f"

" - for el in self.parse_latex(html.escape(self.latex)): + + try: + latex = self.parse_latex(html.escape(self.latex)) + except ValueError as e: + # If we have mismatched delimiters, we'll treat it as a single block + # Strip the $'s from the latex + latex = [ + {"class": "block", "content": self.latex.replace("$", "")} + ] + + for el in latex: if el["class"] == "block": html_out += f'{el["content"]}' elif el["class"] == "inline": @@ -26,6 +36,9 @@ def assemble_html(self, child_blocks, parent_structure=None): @staticmethod def parse_latex(text: str): + if text.count("$") % 2 != 0: + raise ValueError("Mismatched delimiters in LaTeX") + DELIMITERS = [ ("$$", "block"), ("$", "inline") diff --git a/marker/settings.py b/marker/settings.py index 41d54536..0a3a0ef7 100644 --- a/marker/settings.py +++ b/marker/settings.py @@ -23,7 +23,6 @@ class Settings(BaseSettings): # General models TORCH_DEVICE: Optional[str] = None # Note: MPS device does not work for text detection, and will default to CPU - GOOGLE_API_KEY: Optional[str] = None @computed_field @property diff --git a/pyproject.toml b/pyproject.toml index 20b622e3..07f1682f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "marker-pdf" -version = "1.2.2" +version = "1.2.3" description = "Convert PDF to markdown with high speed and accuracy." authors = ["Vik Paruchuri "] readme = "README.md"