From 4591f317e8c2bc9c6c9eb5c3422b3828bca4b193 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Tue, 19 Nov 2024 10:57:28 -0500 Subject: [PATCH] Add json renderer tests --- marker/v2/renderers/__init__.py | 2 +- tests/renderers/test_json_renderer.py | 13 +++++++++++++ tests/renderers/test_markdown_renderer.py | 11 ++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 tests/renderers/test_json_renderer.py diff --git a/marker/v2/renderers/__init__.py b/marker/v2/renderers/__init__.py index 8837ca3c..e87ee655 100644 --- a/marker/v2/renderers/__init__.py +++ b/marker/v2/renderers/__init__.py @@ -57,7 +57,7 @@ def compute_toc(self, document, block_output: BlockOutput): toc = [] if hasattr(block_output, "id") and block_output.id.block_type == BlockTypes.SectionHeader: toc.append({ - "title": self.extract_block_html(document, block_output), + "title": self.extract_block_html(document, block_output)[0], "level": document.get_block(block_output.id).heading_level, "page": block_output.id.page_id }) diff --git a/tests/renderers/test_json_renderer.py b/tests/renderers/test_json_renderer.py new file mode 100644 index 00000000..d42ab79d --- /dev/null +++ b/tests/renderers/test_json_renderer.py @@ -0,0 +1,13 @@ +import pytest + +from marker.v2.renderers.json import JSONRenderer + + +@pytest.mark.config({"page_range": [0]}) +def test_markdown_renderer_pagination(pdf_document): + renderer = JSONRenderer() + pages = renderer(pdf_document).children + + assert len(pages) == 1 + assert pages[0].block_type == "Page" + assert pages[0].children[0].block_type == "SectionHeader" \ No newline at end of file diff --git a/tests/renderers/test_markdown_renderer.py b/tests/renderers/test_markdown_renderer.py index ba28402d..e44e0b54 100644 --- a/tests/renderers/test_markdown_renderer.py +++ b/tests/renderers/test_markdown_renderer.py @@ -18,4 +18,13 @@ def test_markdown_renderer_pagination(pdf_document): md = renderer(pdf_document).markdown assert "{0}-" in md - assert "{1}-" in md \ No newline at end of file + assert "{1}-" in md + + +@pytest.mark.config({"page_range": [0, 1]}) +def test_markdown_renderer_metadata(pdf_document): + renderer = MarkdownRenderer({"paginate_output": True}) + metadata = renderer(pdf_document).metadata + assert "table_of_contents" in metadata + + assert "Subspace Adversarial Training" in metadata["table_of_contents"][0]["title"] \ No newline at end of file