Skip to content

Commit

Permalink
Merge pull request #369 from VikParuchuri/vik_v2
Browse files Browse the repository at this point in the history
Reorganize tests
  • Loading branch information
VikParuchuri authored Nov 19, 2024
2 parents 10f6ed0 + b191e17 commit f89089c
Show file tree
Hide file tree
Showing 12 changed files with 46 additions and 17 deletions.
5 changes: 5 additions & 0 deletions marker/v2/schema/blocks/handwriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@

class Handwriting(Block):
block_type: BlockTypes = BlockTypes.Handwriting

def assemble_html(self, child_blocks, parent_structure):
template = super().assemble_html(child_blocks, parent_structure)
template = template.replace("\n", " ")
return f"<p>{template}</p>"
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


@pytest.mark.filename("water_damage.pdf")
def test_ocr_pipeline(pdf_document):
def test_garbled_pdf(pdf_document):
assert pdf_document.pages[0].structure[0] == '/page/0/Table/0'

table_block = pdf_document.pages[0].get_block(pdf_document.pages[0].structure[0])
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
21 changes: 21 additions & 0 deletions tests/renderers/test_markdown_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest

from marker.v2.renderers.markdown import MarkdownRenderer


@pytest.mark.config({"page_range": [0]})
def test_markdown_renderer(pdf_document):
renderer = MarkdownRenderer()
md = renderer(pdf_document).markdown

# Verify markdown
assert '# Subspace Adversarial Training' in md


@pytest.mark.config({"page_range": [0, 1], "paginate_output": True})
def test_markdown_renderer_pagination(pdf_document):
renderer = MarkdownRenderer({"paginate_output": True})
md = renderer(pdf_document).markdown

assert "{0}-" in md
assert "{1}-" in md
19 changes: 19 additions & 0 deletions tests/schema/groups/test_list_grouping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest

from marker.v2.builders.structure import StructureBuilder
from marker.v2.schema import BlockTypes


@pytest.mark.config({"page_range": [4]})
def test_list_grouping(pdf_document):
structure = StructureBuilder()
structure(pdf_document)

page = pdf_document.pags[0]
list_groups = []
for block in page.children:
if block.block_type == BlockTypes.ListGroup:
list_groups.append(block)

assert len(list_groups) == 1

16 changes: 0 additions & 16 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,3 @@ def setup_pdf_provider(

provider = PdfProvider(temp_pdf.name, config)
return provider


def setup_pdf_document(
filename='adversarial.pdf',
config=None,
) -> Document:
layout_model = setup_layout_model()
recognition_model = setup_recognition_model()
detection_model = setup_detection_model()

provider = setup_pdf_provider(filename, config)
layout_builder = LayoutBuilder(layout_model, config)
ocr_builder = OcrBuilder(detection_model, recognition_model, config)
builder = DocumentBuilder(config)
document = builder(provider, layout_builder, ocr_builder)
return document

0 comments on commit f89089c

Please sign in to comment.