Skip to content

Commit

Permalink
Refactor fully
Browse files Browse the repository at this point in the history
All line detection and splitting is now done in a separate LineBuilder
class. Lines which require OCR are later picked up by OCRBuilder
  • Loading branch information
tarun-menta committed Jan 31, 2025
1 parent 6eb7e83 commit 834c097
Show file tree
Hide file tree
Showing 7 changed files with 500 additions and 442 deletions.
4 changes: 3 additions & 1 deletion marker/builders/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from marker.builders import BaseBuilder
from marker.builders.layout import LayoutBuilder
from marker.builders.line import LineBuilder
from marker.builders.ocr import OcrBuilder
from marker.providers.pdf import PdfProvider
from marker.schema import BlockTypes
Expand All @@ -27,9 +28,10 @@ class DocumentBuilder(BaseBuilder):
"Disable OCR processing.",
] = False

def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilder, ocr_builder: OcrBuilder):
def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilder, line_builder: LineBuilder, ocr_builder: OcrBuilder):
document = self.build_document(provider)
layout_builder(document, provider)
line_builder(document, provider)
if not self.disable_ocr:
ocr_builder(document, provider)
return document
Expand Down
436 changes: 436 additions & 0 deletions marker/builders/line.py

Large diffs are not rendered by default.

482 changes: 54 additions & 428 deletions marker/builders/ocr.py

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion marker/converters/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from marker.builders.document import DocumentBuilder
from marker.builders.layout import LayoutBuilder
from marker.builders.llm_layout import LLMLayoutBuilder
from marker.builders.line import LineBuilder
from marker.builders.ocr import OcrBuilder
from marker.builders.structure import StructureBuilder
from marker.converters import BaseConverter
Expand Down Expand Up @@ -130,9 +131,10 @@ def resolve_dependencies(self, cls):
def build_document(self, filepath: str):
provider_cls = provider_from_filepath(filepath)
layout_builder = self.resolve_dependencies(self.layout_builder_class)
line_builder = self.resolve_dependencies(LineBuilder)
ocr_builder = self.resolve_dependencies(OcrBuilder)
with provider_cls(filepath, self.config) as provider:
document = DocumentBuilder(self.config)(provider, layout_builder, ocr_builder)
document = DocumentBuilder(self.config)(provider, layout_builder, line_builder, ocr_builder)
StructureBuilder(self.config)(document)

for processor_cls in self.processor_list:
Expand Down
3 changes: 2 additions & 1 deletion marker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import List
from PIL import Image

from surya.detection import DetectionPredictor
from surya.detection import DetectionPredictor, InlineDetectionPredictor
from surya.layout import LayoutPredictor
from surya.ocr_error import OCRErrorPredictor
from surya.recognition import RecognitionPredictor
Expand Down Expand Up @@ -45,5 +45,6 @@ def create_model_dict(device=None, dtype=None) -> dict:
"recognition_model": RecognitionPredictor(device=device, dtype=dtype),
"table_rec_model": TableRecPredictor(device=device, dtype=dtype),
"detection_model": DetectionPredictor(device=device, dtype=dtype),
"inline_detection_model": InlineDetectionPredictor(device=device, dtype=dtype),
"ocr_error_model": OCRErrorPredictor(device=device, dtype=dtype)
}
2 changes: 1 addition & 1 deletion marker/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def raw_text(self):
return "".join(span.text for span in self.spans)

def __hash__(self):
return hash(tuple(self.line.polygon.bbox)+(self.raw_text))
return hash(tuple(self.line.polygon.bbox)+(self.raw_text,))

ProviderPageLines = Dict[int, List[ProviderOutput]]

Expand Down
11 changes: 1 addition & 10 deletions marker/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,4 @@ def matrix_intersection_area(boxes1: List[List[float]], boxes2: List[List[float]
width = np.maximum(0, max_x - min_x)
height = np.maximum(0, max_y - min_y)

return width * height # Shape: (N, M)

def rescale_bbox(bbox: List[float], old_size=tuple[float], new_size=tuple[float]):
width_scaler, height_scaler = new_size[0]/old_size[0], new_size[1]/old_size[1]
return [
bbox[0]*width_scaler,
bbox[1]*height_scaler,
bbox[2]*width_scaler,
bbox[3]*height_scaler
]
return width * height # Shape: (N, M)

0 comments on commit 834c097

Please sign in to comment.