Skip to content

Commit

Permalink
Fix span id issue
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Jan 25, 2025
1 parent d3c43d6 commit e147ae6
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
8 changes: 5 additions & 3 deletions marker/renderers/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,13 @@ def convert_table(self, el, text, convert_as_inline):
def convert_a(self, el, text, convert_as_inline):
text = self.escape(text)
text = re.sub(r"([\[\]])", r"\\\1", text)
return super().convert_a(el, self.escape(text), convert_as_inline)
return super().convert_a(el, text, convert_as_inline)

def convert_span(self, el, text, convert_as_inline):
return f'<span id="{el["id"]}"/>'

if el.get("id"):
return f'<span id="{el["id"]}">{text}</span>'
else:
return text

class MarkdownOutput(BaseModel):
markdown: str
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "marker-pdf"
version = "1.3.1"
version = "1.3.2"
description = "Convert PDF to markdown with high speed and accuracy."
authors = ["Vik Paruchuri <github@vikas.sh>"]
readme = "README.md"
Expand Down
4 changes: 2 additions & 2 deletions tests/builders/test_pdf_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_pdf_links(pdf_document: Document, pdf_converter: PdfConverter, temp_pdf
markdown = markdown_output.markdown

assert '[II.](#page-1-0)' in markdown
assert '<span id="page-1-0"/>II. THEORETICAL FRAMEWORK' in markdown
assert '<span id="page-1-0"></span>II. THEORETICAL FRAMEWORK' in markdown

for ref in set([f'<span id="page-{m[0]}-{m[1]}"/>' for m in re.findall(r'\]\(#page-(\d+)-(\d+)\)', markdown)]):
for ref in set([f'<span id="page-{m[0]}-{m[1]}">' for m in re.findall(r'\]\(#page-(\d+)-(\d+)\)', markdown)]):
assert ref in markdown, f"Reference {ref} not found in markdown"

0 comments on commit e147ae6

Please sign in to comment.