Skip to content

Commit

Permalink
Misc fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Nov 19, 2024
1 parent 7e449a1 commit a72a508
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions marker/v2/converters/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def main(fpath: str, output_dir: str, debug: bool, output_format: str, pages: st
config["debug_pdf_images"] = True
config["debug_layout_images"] = True
config["debug_json"] = True
config["debug_data_folder"] = output_dir
if force_ocr:
config["force_ocr"] = True

Expand Down
2 changes: 1 addition & 1 deletion marker/v2/schema/blocks/listitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def replace_bullets(child_blocks):
first_block = child_blocks[0]
child_blocks = first_block.children

if first_block.id.block_type == BlockTypes.Line:
if first_block is not None and first_block.id.block_type == BlockTypes.Line:
bullet_pattern = r"(^|[\n ]|<[^>]*>)[•●○■▪▫–—-]( )"
first_block.html = re.sub(bullet_pattern, r"\1\2", first_block.html)

Expand Down
2 changes: 1 addition & 1 deletion tests/processors/test_document_toc_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ def test_table_processor(pdf_document, detection_model, recognition_model, table
processor(pdf_document)

assert len(pdf_document.table_of_contents) == 3
assert pdf_document.table_of_contents[0].text == "Subspace Adversarial Training"
assert pdf_document.table_of_contents[0]["text"] == "Subspace Adversarial Training"

0 comments on commit a72a508

Please sign in to comment.