diff --git a/marker/builders/document.py b/marker/builders/document.py
index bbc688a6..e87ba001 100644
--- a/marker/builders/document.py
+++ b/marker/builders/document.py
@@ -2,6 +2,7 @@
 
 from marker.builders import BaseBuilder
 from marker.builders.layout import LayoutBuilder
+from marker.builders.line import LineBuilder
 from marker.builders.ocr import OcrBuilder
 from marker.providers.pdf import PdfProvider
 from marker.schema import BlockTypes
@@ -27,9 +28,10 @@ class DocumentBuilder(BaseBuilder):
         "Disable OCR processing.",
     ] = False
 
-    def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilder, ocr_builder: OcrBuilder):
+    def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilder, line_builder: LineBuilder, ocr_builder: OcrBuilder):
         document = self.build_document(provider)
         layout_builder(document, provider)
+        line_builder(document, provider)
         if not self.disable_ocr:
             ocr_builder(document, provider)
         return document
diff --git a/marker/builders/line.py b/marker/builders/line.py
new file mode 100644
index 00000000..d28477eb
--- /dev/null
+++ b/marker/builders/line.py
@@ -0,0 +1,436 @@
+from typing import Annotated, List, Optional, Tuple
+
+import numpy as np
+
+from surya.detection import DetectionPredictor, InlineDetectionPredictor
+from surya.ocr_error import OCRErrorPredictor
+
+from marker.builders import BaseBuilder
+from marker.providers import ProviderOutput, ProviderPageLines
+from marker.providers.pdf import PdfProvider
+from marker.schema import BlockTypes
+from marker.schema.document import Document
+from marker.schema.groups.page import PageGroup
+from marker.schema.polygon import PolygonBox
+from marker.schema.registry import get_block_class
+from marker.schema.text.line import Line
+from marker.schema.text.span import Span
+from marker.settings import settings
+from marker.util import matrix_intersection_area
+
+class TextBox(PolygonBox):
+    math: bool =False
+
+class LineBuilder(BaseBuilder):
+    """
+    A builder for detecting text lines, and inline math. Merges the detected lines with the lines from the provider
+    """
+    detection_batch_size: Annotated[
+        Optional[int],
+        "The batch size to use for the detection model.",
+        "Default is None, which will use the default batch size for the model."
+    ] = None
+    ocr_error_batch_size: Annotated[
+        Optional[int],
+        "The batch size to use for the ocr error detection model.",
+        "Default is None, which will use the default batch size for the model."
+    ] = None
+    enable_table_ocr: Annotated[
+        bool,
+        "Whether to skip OCR on tables.  The TableProcessor will re-OCR them.  Only enable if the TableProcessor is not running.",
+    ] = False
+    layout_coverage_min_lines: Annotated[
+        int,
+        "The minimum number of PdfProvider lines that must be covered by the layout model",
+        "to consider the lines from the PdfProvider valid.",
+    ] = 1
+    layout_coverage_threshold: Annotated[
+        float,
+        "The minimum coverage ratio required for the layout model to consider",
+        "the lines from the PdfProvider valid.",
+    ] = .1
+    document_ocr_threshold: Annotated[
+        float,
+        "The minimum ratio of pages that must pass the layout coverage check",
+        "to avoid OCR.",
+    ] = .8
+    excluded_for_coverage: Annotated[
+        Tuple[BlockTypes],
+        "A list of block types to exclude from the layout coverage check.",
+    ] = (BlockTypes.Figure, BlockTypes.Picture, BlockTypes.Table, BlockTypes.FigureGroup, BlockTypes.TableGroup, BlockTypes.PictureGroup)
+
+    def __init__(self, detection_model: DetectionPredictor, inline_detection_model: InlineDetectionPredictor, ocr_error_model: OCRErrorPredictor, config=None):
+        super().__init__(config)
+
+        self.detection_model = detection_model
+        self.inline_detection_model = inline_detection_model
+        self.ocr_error_model = ocr_error_model
+
+    def __call__(self, document: Document, provider: PdfProvider):
+        provider_lines, ocr_lines= self.get_all_lines(document, provider)
+        self.merge_blocks(document, provider_lines, ocr_lines)
+
+    def get_detection_batch_size(self):
+        if self.detection_batch_size is not None:
+            return self.detection_batch_size
+        elif settings.TORCH_DEVICE_MODEL == "cuda":
+            return 4
+        return 4
+
+    def get_ocr_error_batch_size(self):
+        if self.ocr_error_batch_size is not None:
+            return self.ocr_error_batch_size
+        elif settings.TORCH_DEVICE_MODEL == "cuda":
+            return 4
+        return 4
+
+    def get_all_lines(self, document: Document, provider: PdfProvider):
+        page_images = [page.get_image(highres=False, remove_tables=not self.enable_table_ocr) for page in document.pages]
+        detection_results = self.detection_model(
+            images=page_images,
+        )
+        inline_detection_results = self.inline_detection_model(
+            images=page_images,
+        )
+        ocr_error_detection_results = self.ocr_error_detection(document.pages, provider.page_lines)
+
+        #For each page, need to carry out the following steps:
+        boxes_to_ocr = {page.page_id: [] for page in document.pages}
+        page_lines = {page.page_id: [] for page in document.pages}
+
+        SpanClass: Span = get_block_class(BlockTypes.Span)
+        LineClass: Line = get_block_class(BlockTypes.Line)
+
+        for document_page, detection_result, inline_detection_result, ocr_error_detection_label in zip(document.pages, detection_results, inline_detection_results, ocr_error_detection_results.labels):
+            provider_lines = provider.page_lines.get(document_page.page_id, [])
+            detection_result_split = self.split_detected_text_and_inline_boxes(text_boxes=[box for box in detection_result.bboxes], inline_boxes=[box for box in inline_detection_result.bboxes])
+            detected_text_lines = [box for box in detection_result_split if not box.math]
+            detected_inline_math_lines = [box for box in detection_result_split if box.math]
+            image_size = PolygonBox.from_bbox(detection_result.image_bbox).size
+            page_size = provider.get_page_bbox(document_page.page_id).size
+
+            provider_lines_good = bool(provider) and ocr_error_detection_label!='bad' and self.check_layout_coverage(document_page, provider_lines)
+
+            if provider_lines_good:
+                #Merge inline math blocks into the provider lines, only persist new detected text lines which do not overlap with existing provider lines
+                page_lines[document_page.page_id].extend(self.merge_provider_lines_inline_math(document_page.page_id, provider_lines, detected_inline_math_lines, image_size, page_size))
+                boxes_to_ocr[document_page.page_id].extend(self.filter_detected_text_lines(provider_lines, detected_text_lines, image_size, page_size))
+                continue
+
+            #Skip inline math merging if no provider lines are good; OCR all text lines and all inline math lines
+            boxes_to_ocr[document_page.page_id].extend(detected_text_lines)
+            for line in detected_inline_math_lines:
+                polygon = PolygonBox.from_bbox(line.bbox).rescale(image_size, page_size)
+                line = LineClass(
+                    polygon=polygon,
+                    page_id=document_page.page_id,
+                )
+                spans = [
+                    SpanClass(
+                        text="",
+                        formats=['math'],
+                        page_id=document_page.page_id,
+                        polygon=polygon,
+                        minimum_position=0,
+                        maximum_position=0,
+                        font='Unknown',
+                        font_weight=0,
+                        font_size=0,
+                    )
+                ]
+
+                page_lines[document_page.page_id].append(ProviderOutput(line=line, spans=spans))
+
+        #Dummy lines to merge into the document - Contains no spans, will be filled in later by OCRBuilder
+        ocr_lines = {document_page.page_id: [] for document_page in document.pages}
+        for page_id, page_ocr_boxes in boxes_to_ocr.items():
+            page_size = provider.get_page_bbox(page_id).size
+            image_size = document.get_page(page_id).get_image(highres=False, remove_tables=not self.enable_table_ocr).size
+            for box_to_ocr in page_ocr_boxes:
+                line_polygon = PolygonBox(polygon=box_to_ocr.polygon).rescale(image_size, page_size)
+                ocr_lines[page_id].append(
+                    ProviderOutput(
+                        line=LineClass(
+                            polygon=line_polygon,
+                            page_id=page_id,
+                            text_extraction_method='surya'
+                        ),
+                        spans=[]
+                    )
+                )
+
+        return page_lines, ocr_lines
+
+    def ocr_error_detection(self, pages:List[PageGroup], provider_page_lines: ProviderPageLines):
+        page_texts = []
+        for document_page in pages:
+            page_text = ''
+            provider_lines = provider_page_lines.get(document_page.page_id, [])
+            page_text = '\n'.join(' '.join(s.text for s in line.spans) for line in provider_lines)
+            page_texts.append(page_text)
+
+        ocr_error_detection_results = self.ocr_error_model(
+            page_texts,
+            batch_size=int(self.get_ocr_error_batch_size())
+        )
+        return ocr_error_detection_results
+
+    def filter_detected_text_lines(self, provider_lines, detected_text_lines, image_size, page_size, threshold=0.7):
+        filtered_lines = []
+        for detected_line in detected_text_lines:
+            keep_line = True
+            detected_line_polygon = PolygonBox(polygon=detected_line.polygon).rescale(image_size, page_size)
+            detected_line_area = detected_line_polygon.area
+            for provider_line in provider_lines:
+                intersection_area = provider_line.line.polygon.intersection_area(detected_line_polygon)
+                if detected_line_area > 0 and (intersection_area / detected_line_area) > threshold:
+                    keep_line = False
+                    break
+            
+            if keep_line:
+                filtered_lines.append(detected_line)
+        
+        return filtered_lines
+
+
+    def merge_provider_lines_inline_math(self, document_page_id, provider_lines, inline_math_lines, image_size, page_size, min_inline_overlap=0.1, span_overlap_threshold=0.4):
+        updated_provider_lines = []
+        provider_to_math = {provider_line: [] for provider_line in provider_lines}
+
+        SpanClass: Span = get_block_class(BlockTypes.Span)
+
+        for math_line in inline_math_lines:
+            math_line_polygon = PolygonBox(polygon=math_line.polygon).rescale(image_size, page_size)
+            math_line_area = math_line_polygon.area
+            best_match = None
+            best_overlap = min_inline_overlap if min_inline_overlap else 0        #Start with this threshold atleast, skip all boxes if not reached
+
+            for provider_line in provider_lines:
+                intersection_area = provider_line.line.polygon.intersection_area(math_line_polygon)
+                
+                if math_line_area > 0:
+                    overlap = intersection_area / math_line_area
+                    if overlap > best_overlap:
+                        best_overlap = overlap
+                        best_match = provider_line
+            
+            if best_match:
+                provider_to_math[best_match].append(math_line)
+
+        for provider_line, math_lines in provider_to_math.items():
+            #No intersection with math, or vertical text line - Skip
+            if not math_lines or provider_line.line.polygon.height>provider_line.line.polygon.width:
+                updated_provider_lines.append(provider_line)
+                continue
+
+            #Remove all spans in the line that intersect with the math line
+            spans_to_keep = []
+            for span in provider_line.spans:
+                flag = False
+                span_area = span.polygon.area
+                for math_line in math_lines:
+                    math_line_polygon = PolygonBox(polygon=math_line.polygon).rescale(image_size, page_size)
+                    overlap = span.polygon.intersection_area(math_line_polygon)/span_area
+                    if overlap>span_overlap_threshold:
+                        flag = True
+                        break
+                if not flag:
+                    spans_to_keep.append(span)
+
+            #Add math lines in as new spans - Empty text to be replaced with latex by EquationProcessor later
+            for math_line in math_lines:
+                spans_to_keep.append(
+                    SpanClass(
+                        text="",
+                        formats=['math'],
+                        page_id=document_page_id,
+                        polygon=PolygonBox(polygon=math_line.polygon).rescale(image_size, page_size),
+                        minimum_position=0,
+                        maximum_position=0,
+                        font='Unknown',
+                        font_weight=0,
+                        font_size=0,
+                    )
+                )
+            provider_line.spans = sorted(spans_to_keep, key=lambda s:s.polygon.x_start)
+            updated_provider_lines.append(provider_line)
+
+        return updated_provider_lines
+
+    def check_layout_coverage(
+        self,
+        document_page: PageGroup,
+        provider_lines: List[ProviderOutput],
+    ):
+        covered_blocks = 0
+        total_blocks = 0
+        large_text_blocks = 0
+
+        layout_blocks = [document_page.get_block(block) for block in document_page.structure]
+        layout_blocks = [b for b in layout_blocks if b.block_type not in self.excluded_for_coverage]
+
+        layout_bboxes = [block.polygon.bbox for block in layout_blocks]
+        provider_bboxes = [line.line.polygon.bbox for line in provider_lines]
+
+        intersection_matrix = matrix_intersection_area(layout_bboxes, provider_bboxes)
+
+        for idx, layout_block in enumerate(layout_blocks):
+            total_blocks += 1
+            intersecting_lines = np.count_nonzero(intersection_matrix[idx] > 0)
+
+            if intersecting_lines >= self.layout_coverage_min_lines:
+                covered_blocks += 1
+
+            if layout_block.polygon.intersection_pct(document_page.polygon) > 0.8 and layout_block.block_type == BlockTypes.Text:
+                large_text_blocks += 1
+
+        coverage_ratio = covered_blocks / total_blocks if total_blocks > 0 else 1
+        text_okay = coverage_ratio >= self.layout_coverage_threshold
+
+        # Model will sometimes say there is a single block of text on the page when it is blank
+        if not text_okay and (total_blocks == 1 and large_text_blocks == 1):
+            text_okay = True
+        return text_okay
+
+    def merge_blocks(self, document: Document, page_provider_lines: ProviderPageLines, page_ocr_lines: ProviderPageLines):
+        for document_page in document.pages:
+            document_page.merge_blocks(page_provider_lines[document_page.page_id], text_extraction_method="pdftext")
+            document_page.merge_blocks(page_ocr_lines[document_page.page_id], text_extraction_method="surya")
+
+
+    def split_detected_text_and_inline_boxes(
+        self,
+        text_boxes: List[PolygonBox], 
+        inline_boxes: List[PolygonBox], 
+    ) -> List[TextBox]:
+        """
+        Splits horizontal text boxes around inline boxes, skips vertical text boxes, 
+        and retains unrelated text boxes.
+
+        Args:
+            text_boxes: List of TextBox objects representing text boxes.
+            inline_boxes: List of TextBox objects representing inline boxes.
+
+        Returns:
+            A new list of TextBox objects with split text boxes, inline boxes, 
+            and unmodified vertical/unrelated text boxes.
+        """
+        result_boxes = []  # Final result to store the split boxes and retained boxes
+        horizontal_text_boxes = []  # Only horizontal text boxes to process
+
+        # Step 1: Separate vertical and horizontal text boxes
+        for text_box in text_boxes:
+            if text_box.height > text_box.width:
+                # Retain vertical text boxes
+                result_boxes.append(TextBox(
+                    polygon=text_box.polygon,
+                    confidence=text_box.confidence
+                ))
+            else:
+                horizontal_text_boxes.append(text_box)
+
+        # Step 2: Assign inline boxes to horizontal text boxes
+        inline_assignments = {inline_box: None for inline_box in inline_boxes}
+
+        for inline_box in inline_boxes:
+            max_overlap_ratio = 0.3     #Need atleast this much overlap to even consider assignment at all
+            assigned_text_box = None
+
+            for text_box in horizontal_text_boxes:
+                # Calculate intersection area
+                intersection_area = text_box.intersection_area(inline_box)
+
+                # Calculate overlap ratios
+                inline_overlap_ratio = intersection_area / inline_box.area if inline_box.area > 0 else 0
+                text_overlap_ratio = intersection_area / text_box.area if text_box.area > 0 else 0
+
+                # Check if the inline box fully covers the text box
+                if text_overlap_ratio == 1:
+                    # Fully covered text box: Remove it and retain only the inline box
+                    if text_box in horizontal_text_boxes:
+                        horizontal_text_boxes.remove(text_box)
+                    inline_assignments[inline_box] = None
+                elif inline_overlap_ratio > max_overlap_ratio:
+                    # Assign inline box to the text box with the highest overlap ratio
+                    max_overlap_ratio = inline_overlap_ratio
+                    assigned_text_box = text_box
+
+            # Assign inline box to the selected text box (if not fully covering)
+            if assigned_text_box:
+                inline_assignments[inline_box] = assigned_text_box
+
+
+        for text_box in horizontal_text_boxes:
+            # Get all inline boxes assigned to this text box
+            assigned_inline_boxes = [
+                inline_box for inline_box, assigned_text in inline_assignments.items() if assigned_text == text_box
+            ]
+
+            if not assigned_inline_boxes:
+                # Retain the text box if it is not intersected by any inline boxes
+                result_boxes.append(TextBox(
+                    polygon=text_box.polygon,
+                    confidence=text_box.confidence
+                ))
+                continue
+            # Sort assigned inline boxes from left to right
+            assigned_inline_boxes.sort(key=lambda box: box.bbox[0])
+
+            current_x1 = text_box.bbox[0]  # Start with the leftmost x-coordinate of the text box
+            y1_t, y2_t = min(box.bbox[1] for box in [text_box]+assigned_inline_boxes), max(box.bbox[3] for box in [text_box]+assigned_inline_boxes)
+            text_segments = []
+
+            for inline_box in assigned_inline_boxes:
+                x1_i, x2_i = inline_box.bbox[0], inline_box.bbox[2]
+
+                # Add the text segment before the inline box, if any
+                if current_x1 < x1_i:
+                    text_segments.append(TextBox(
+                        polygon=[
+                            [current_x1, y1_t],
+                            [x1_i, y1_t],
+                            [x1_i, y2_t],
+                            [current_x1, y2_t],
+                        ],
+                        confidence=text_box.confidence
+                    ))
+
+                # Add the inline box itself
+                text_segments.append(TextBox(
+                    polygon=[
+                        [x1_i, y1_t],
+                        [x2_i, y1_t],
+                        [x2_i, y2_t],
+                        [x1_i, y2_t],
+                    ],
+                    confidence=inline_box.confidence,
+                    math=True
+                ))
+                current_x1 = x2_i  # Move the start point to after the current inline box
+
+            # Add any remaining text after the last inline box, if any
+            if current_x1 < text_box.bbox[2]:
+                text_segments.append(TextBox(
+                    polygon=[
+                        [current_x1, y1_t],
+                        [text_box.bbox[2], y1_t],
+                        [text_box.bbox[2], y2_t],
+                        [current_x1, y2_t],
+                    ],
+                    confidence=text_box.confidence
+                ))
+
+            # Append all split parts to the result
+            result_boxes.extend(text_segments)
+
+        # Step 4: Add inline boxes that replaced fully covered text boxes
+        for inline_box, assigned_text in inline_assignments.items():
+            if assigned_text is None:  # Covers a text box
+                result_boxes.append(TextBox(
+                    polygon=inline_box.polygon,
+                    confidence=inline_box.confidence,
+                    math=True
+                ))
+
+
+        return result_boxes
\ No newline at end of file
diff --git a/marker/builders/ocr.py b/marker/builders/ocr.py
index 124db7cf..a5445586 100644
--- a/marker/builders/ocr.py
+++ b/marker/builders/ocr.py
@@ -1,24 +1,20 @@
+import copy
 from typing import Annotated, List, Optional, Tuple
 
 from ftfy import fix_text
-import numpy as np
-from surya.detection import DetectionPredictor, TextBox
 from surya.recognition import RecognitionPredictor
-from surya.ocr_error import OCRErrorPredictor
 
 from marker.builders import BaseBuilder
 from marker.providers import ProviderOutput, ProviderPageLines
 from marker.providers.pdf import PdfProvider
 from marker.schema import BlockTypes
+from marker.schema.blocks import BlockId
 from marker.schema.document import Document
-from marker.schema.groups.page import PageGroup
 from marker.schema.polygon import PolygonBox
 from marker.schema.registry import get_block_class
 from marker.schema.text.line import Line
 from marker.schema.text.span import Span
 from marker.settings import settings
-from marker.util import matrix_intersection_area, rescale_bbox
-
 
 class OcrBuilder(BaseBuilder):
     """
@@ -29,55 +25,28 @@ class OcrBuilder(BaseBuilder):
         "The batch size to use for the recognition model.",
         "Default is None, which will use the default batch size for the model."
     ] = None
-    detection_batch_size: Annotated[
-        Optional[int],
-        "The batch size to use for the detection model.",
-        "Default is None, which will use the default batch size for the model."
-    ] = None
-    ocr_error_batch_size: Annotated[
-        Optional[int],
-        "The batch size to use for the ocr error detection model.",
-        "Default is None, which will use the default batch size for the model."
-    ] = None
-    languages: Annotated[
-        Optional[List[str]],
-        "A list of languages to use for OCR.",
-        "Default is None."
-    ] = None
     enable_table_ocr: Annotated[
         bool,
         "Whether to skip OCR on tables.  The TableProcessor will re-OCR them.  Only enable if the TableProcessor is not running.",
     ] = False
-    layout_coverage_min_lines: Annotated[
-        int,
-        "The minimum number of PdfProvider lines that must be covered by the layout model",
-        "to consider the lines from the PdfProvider valid.",
-    ] = 1
-    layout_coverage_threshold: Annotated[
-        float,
-        "The minimum coverage ratio required for the layout model to consider",
-        "the lines from the PdfProvider valid.",
-    ] = .1
-    document_ocr_threshold: Annotated[
+    block_ocr_threshold: Annotated[
         float,
-        "The minimum ratio of pages that must pass the layout coverage check",
-        "to avoid OCR.",
-    ] = .8
-    excluded_for_coverage: Annotated[
-        Tuple[BlockTypes],
-        "A list of block types to exclude from the layout coverage check.",
-    ] = (BlockTypes.Figure, BlockTypes.Picture, BlockTypes.Table, BlockTypes.FigureGroup, BlockTypes.TableGroup, BlockTypes.PictureGroup)
+        "The minimum fraction of detected lines in a block to OCR the block"
+    ] = 0.
+    languages: Annotated[
+        Optional[List[str]],
+        "A list of languages to use for OCR.",
+        "Default is None."
+    ] = None
 
-    def __init__(self, detection_model: DetectionPredictor, recognition_model: RecognitionPredictor, ocr_error_model: OCRErrorPredictor, config=None):
+    def __init__(self, recognition_model: RecognitionPredictor, config=None):
         super().__init__(config)
 
-        self.detection_model = detection_model
         self.recognition_model = recognition_model
-        self.ocr_error_model = ocr_error_model
 
     def __call__(self, document: Document, provider: PdfProvider):
-        provider_lines, ocr_lines= self.get_all_lines(document, provider)
-        self.merge_blocks(document, provider_lines, ocr_lines)
+        images, line_boxes, line_ids = self.get_ocr_images_boxes_ids(document, provider)
+        self.ocr_extraction(document, provider, images, line_boxes, line_ids)
 
     def get_recognition_batch_size(self):
         if self.recognition_batch_size is not None:
@@ -88,402 +57,59 @@ def get_recognition_batch_size(self):
             return 32
         return 32
 
-    def get_detection_batch_size(self):
-        if self.detection_batch_size is not None:
-            return self.detection_batch_size
-        elif settings.TORCH_DEVICE_MODEL == "cuda":
-            return 4
-        return 4
-
-    def get_ocr_error_batch_size(self):
-        if self.ocr_error_batch_size is not None:
-            return self.ocr_error_batch_size
-        elif settings.TORCH_DEVICE_MODEL == "cuda":
-            return 4
-        return 4
-
-    def get_all_lines(self, document: Document, provider: PdfProvider):
-        detection_results = self.detection_model(
-            images=[page.get_image(highres=False, remove_tables=not self.enable_table_ocr) for page in document.pages],
-            detect_inline_math=True
-        )
-        ocr_error_detection_results = self.ocr_error_detection(document.pages, provider.page_lines)
-
-        #For each page, need to carry out the following steps:
-        lines_to_ocr = {page.page_id: [] for page in document.pages}
-        page_lines = {page.page_id: [] for page in document.pages}
-
-        SpanClass: Span = get_block_class(BlockTypes.Span)
-        LineClass: Line = get_block_class(BlockTypes.Line)
+    def get_ocr_images_boxes_ids(self, document: Document, provider: PdfProvider):
+        highres_images, highres_boxes, line_ids = [], [], []
+        for document_page in document.pages:
+            page_highres_image = document_page.get_image(highres=True, remove_tables=not self.enable_table_ocr)
+            page_highres_boxes = []
+            page_line_ids = []
 
-        for document_page, detection_result, ocr_error_detection_label in zip(document.pages, detection_results, ocr_error_detection_results.labels):
-            provider_lines = provider.page_lines.get(document_page.page_id, [])
-            detection_result_split = self.split_detected_text_and_inline_boxes(text_boxes=[box for box in detection_result.bboxes if not box.math], inline_boxes=[box for box in detection_result.bboxes if box.math])
-            detected_text_lines = [box for box in detection_result_split if not box.math]
-            detected_inline_math_lines = [box for box in detection_result_split if box.math]
-            image_size = PolygonBox.from_bbox(detection_result.image_bbox).size
             page_size = provider.get_page_bbox(document_page.page_id).size
+            image_size = page_highres_image.size
+            for block in document_page.contained_blocks(document):
+                block_lines = block.contained_blocks(document, [BlockTypes.Line])
+                block_detected_lines = [block_line for block_line in block_lines if block_line.text_extraction_method=='surya']
+                if len(block_lines)==0 or len(block_detected_lines)/len(block_lines)<self.block_ocr_threshold:
+                    continue
 
-            provider_lines_good = bool(provider) and ocr_error_detection_label!='bad' and self.check_layout_coverage(document_page, provider_lines)
-
-            if provider_lines_good:
-                #Merge inline math blocks into the provider lines, only persist new detected text lines which do not overlap with existing provider lines
-                page_lines[document_page.page_id].extend(self.merge_provider_lines_inline_math(document_page.page_id, provider_lines, detected_inline_math_lines, image_size, page_size))
-                lines_to_ocr[document_page.page_id].extend(self.filter_detected_text_lines(provider_lines, detected_text_lines, image_size, page_size))
-                continue
-
-            #Skip inline math merging if no provider lines are good; OCR all text lines and all inline math lines
-            lines_to_ocr[document_page.page_id].extend(detected_text_lines)
-            for line in detected_inline_math_lines:
-                polygon = PolygonBox.from_bbox(line.bbox).rescale(image_size, page_size)
-                line = LineClass(
-                    polygon=polygon,
-                    page_id=document_page.page_id,
-                )
-                spans = [
-                    SpanClass(
-                        text="",
-                        formats=['math'],
-                        page_id=document_page.page_id,
-                        polygon=polygon,
-                        minimum_position=0,
-                        maximum_position=0,
-                        font='Unknown',
-                        font_weight=0,
-                        font_size=0,
-                    )
-                ]
-
-                page_lines[document_page.page_id].append(ProviderOutput(line=line, spans=spans))
-
-        ocr_lines = self.ocr_extraction(document, provider, lowres_detected_text_lines=lines_to_ocr)
-
-        return page_lines, ocr_lines
-
-
-    def filter_detected_text_lines(self, provider_lines, detected_text_lines, image_size, page_size, threshold=0.7):
-        filtered_lines = []
-        for detected_line in detected_text_lines:
-            keep_line = True
-            detected_line_polygon = PolygonBox(polygon=detected_line.polygon).rescale(image_size, page_size)
-            detected_line_area = detected_line_polygon.area
-            for provider_line in provider_lines:
-                intersection_area = provider_line.line.polygon.intersection_area(detected_line_polygon)
-                if detected_line_area > 0 and (intersection_area / detected_line_area) > threshold:
-                    keep_line = False
-                    break
-            
-            if keep_line:
-                filtered_lines.append(detected_line)
-        
-        return filtered_lines
-
-
-    def merge_provider_lines_inline_math(self, document_page_id, provider_lines, inline_math_lines, image_size, page_size, min_inline_overlap=0.1, span_overlap_threshold=0.4):
-        updated_provider_lines = []
-        provider_to_math = {provider_line: [] for provider_line in provider_lines}
-
-        SpanClass: Span = get_block_class(BlockTypes.Span)
-
-        for math_line in inline_math_lines:
-            math_line_polygon = PolygonBox(polygon=math_line.polygon).rescale(image_size, page_size)
-            math_line_area = math_line_polygon.area
-            best_match = None
-            best_overlap = min_inline_overlap if min_inline_overlap else 0        #Start with this threshold atleast, skip all boxes if not reached
-
-            for provider_line in provider_lines:
-                intersection_area = provider_line.line.polygon.intersection_area(math_line_polygon)
-                
-                if math_line_area > 0:
-                    overlap = intersection_area / math_line_area
-                    if overlap > best_overlap:
-                        best_overlap = overlap
-                        best_match = provider_line
-            
-            if best_match:
-                provider_to_math[best_match].append(math_line)
-
-        for provider_line, math_lines in provider_to_math.items():
-            #No intersection with math, or vertical text line - Skip
-            if not math_lines or provider_line.line.polygon.height>provider_line.line.polygon.width:
-                updated_provider_lines.append(provider_line)
-                continue
-
-            #Remove all spans in the line that intersect with the math line
-            spans_to_keep = []
-            for span in provider_line.spans:
-                flag = False
-                span_area = span.polygon.area
-                for math_line in math_lines:
-                    math_line_polygon = PolygonBox(polygon=math_line.polygon).rescale(image_size, page_size)
-                    overlap = span.polygon.intersection_area(math_line_polygon)/span_area
-                    if overlap>span_overlap_threshold:
-                        flag = True
-                        break
-                if not flag:
-                    spans_to_keep.append(span)
-
-
-
-            #Add math lines in as new spans. 
-            for math_line in math_lines:
-                spans_to_keep.append(
-                    SpanClass(
-                        text="INLINE",
-                        formats=['math'],
-                        page_id=document_page_id,
-                        polygon=PolygonBox(polygon=math_line.polygon).rescale(image_size, page_size),
-                        minimum_position=0,
-                        maximum_position=0,
-                        font='Unknown',
-                        font_weight=0,
-                        font_size=0,
-                    )
-                )
-            provider_line.spans = sorted(spans_to_keep, key=lambda s:s.polygon.x_start)
-            updated_provider_lines.append(provider_line)
-
-        return updated_provider_lines
+                for line in block_detected_lines:
+                    line_polygon = copy.deepcopy(line.polygon)
+                    page_highres_boxes.append(line_polygon.rescale(page_size, image_size).bbox)
+                    page_line_ids.append(line.id)
 
-    def ocr_error_detection(self, pages:List[PageGroup], provider_page_lines: ProviderPageLines):
-        page_texts = []
-        for document_page in pages:
-            page_text = ''
-            provider_lines = provider_page_lines.get(document_page.page_id, [])
-            page_text = '\n'.join(' '.join(s.text for s in line.spans) for line in provider_lines)
-            page_texts.append(page_text)
+            highres_images.append(page_highres_image)
+            highres_boxes.append(page_highres_boxes)
+            line_ids.append(page_line_ids)
 
-        ocr_error_detection_results = self.ocr_error_model(
-            page_texts,
-            batch_size=int(self.get_ocr_error_batch_size())
-        )
-        return ocr_error_detection_results
-
-
-    def ocr_extraction(self, document: Document, provider: PdfProvider, lowres_detected_text_lines:any) -> ProviderPageLines:
-        highres_images, scaled_bboxes = [], []
-        for document_page in document.pages:
-            highres_image = document_page.get_image(highres=True, remove_tables=not self.enable_table_ocr)
-            highres_size = highres_image.size
-
-            lowres_size = document_page.get_image(highres=False).size
-
-            lowres_boxes = [line.bbox for line in lowres_detected_text_lines[document_page.page_id]]
-            highres_boxes = list(map(rescale_bbox, lowres_boxes, [lowres_size]*len(lowres_boxes), [highres_size]*len(lowres_boxes)))
-
-            highres_images.append(highres_image)
-            scaled_bboxes.append(highres_boxes)
+        return highres_images, highres_boxes, line_ids
 
-        if sum(len(b) for b in scaled_bboxes)==0:
-            return {page.page_id: [] for page in document.pages}
+    def ocr_extraction(self, document: Document, provider: PdfProvider, images: List[any], line_boxes: List[List[float]], line_ids: List[List[BlockId]]) -> ProviderPageLines:
+        if sum(len(b) for b in line_boxes)==0:
+            return
 
         # Remove tables because we re-OCR them later with the table processor
         recognition_results = self.recognition_model(
-            images=highres_images,
-            bboxes=scaled_bboxes,
+            images=images,
+            bboxes=line_boxes,
             langs=[self.languages] * len(document.pages),
             recognition_batch_size=int(self.get_recognition_batch_size()),
         )
 
-        page_lines = {}
-
         SpanClass: Span = get_block_class(BlockTypes.Span)
-        LineClass: Line = get_block_class(BlockTypes.Line)
-
-        for page_id, recognition_result in zip((page.page_id for page in document.pages), recognition_results):
-            page_lines.setdefault(page_id, [])
-
-            page_size = provider.get_page_bbox(page_id).size
-
-            for ocr_line_idx, ocr_line in enumerate(recognition_result.text_lines):
-                image_polygon = PolygonBox.from_bbox(recognition_result.image_bbox)
-                polygon = PolygonBox.from_bbox(ocr_line.bbox).rescale(image_polygon.size, page_size)
-
-                line = LineClass(
-                    polygon=polygon,
-                    page_id=page_id,
+        for document_page, page_recognition_result, page_line_ids in zip(document.pages, recognition_results, line_ids):
+            for line_id, ocr_line in zip(page_line_ids, page_recognition_result.text_lines):
+                line = document_page.get_block(line_id)
+                assert line.structure is None
+                new_span = SpanClass(
+                    text=fix_text(ocr_line.text) + '\n',
+                    formats=['math'],
+                    page_id=document_page.page_id,
+                    polygon=copy.deepcopy(line.polygon),
+                    minimum_position=0,
+                    maximum_position=0,
+                    font='Unknown',
+                    font_weight=0,
+                    font_size=0,
                 )
-                spans = [
-                    SpanClass(
-                        text=fix_text(ocr_line.text) + "\n",
-                        formats=['plain'],
-                        page_id=page_id,
-                        polygon=polygon,
-                        minimum_position=0,
-                        maximum_position=0,
-                        font='Unknown',
-                        font_weight=0,
-                        font_size=0,
-                    )
-                ]
-
-                page_lines[page_id].append(ProviderOutput(line=line, spans=spans))
-
-        return page_lines
-
-    def check_layout_coverage(
-        self,
-        document_page: PageGroup,
-        provider_lines: List[ProviderOutput],
-    ):
-        covered_blocks = 0
-        total_blocks = 0
-        large_text_blocks = 0
-
-        layout_blocks = [document_page.get_block(block) for block in document_page.structure]
-        layout_blocks = [b for b in layout_blocks if b.block_type not in self.excluded_for_coverage]
-
-        layout_bboxes = [block.polygon.bbox for block in layout_blocks]
-        provider_bboxes = [line.line.polygon.bbox for line in provider_lines]
-
-        intersection_matrix = matrix_intersection_area(layout_bboxes, provider_bboxes)
-
-        for idx, layout_block in enumerate(layout_blocks):
-            total_blocks += 1
-            intersecting_lines = np.count_nonzero(intersection_matrix[idx] > 0)
-
-            if intersecting_lines >= self.layout_coverage_min_lines:
-                covered_blocks += 1
-
-            if layout_block.polygon.intersection_pct(document_page.polygon) > 0.8 and layout_block.block_type == BlockTypes.Text:
-                large_text_blocks += 1
-
-        coverage_ratio = covered_blocks / total_blocks if total_blocks > 0 else 1
-        text_okay = coverage_ratio >= self.layout_coverage_threshold
-
-        # Model will sometimes say there is a single block of text on the page when it is blank
-        if not text_okay and (total_blocks == 1 and large_text_blocks == 1):
-            text_okay = True
-        return text_okay
-
-    def merge_blocks(self, document: Document, page_provider_lines: ProviderPageLines, page_ocr_lines: ProviderPageLines):
-        for document_page in document.pages:
-            document_page.merge_blocks(page_provider_lines[document_page.page_id], text_extraction_method="pdftext")
-            document_page.merge_blocks(page_ocr_lines[document_page.page_id], text_extraction_method="surya")
-
-
-    def split_detected_text_and_inline_boxes(
-        self,
-        text_boxes: List[TextBox], 
-        inline_boxes: List[TextBox], 
-    ) -> List[TextBox]:
-        """
-        Splits horizontal text boxes around inline boxes, skips vertical text boxes, 
-        and retains unrelated text boxes.
-
-        Args:
-            text_boxes: List of TextBox objects representing text boxes.
-            inline_boxes: List of TextBox objects representing inline boxes.
-
-        Returns:
-            A new list of TextBox objects with split text boxes, inline boxes, 
-            and unmodified vertical/unrelated text boxes.
-        """
-        result_boxes = []  # Final result to store the split boxes and retained boxes
-        horizontal_text_boxes = []  # Only horizontal text boxes to process
-
-        # Step 1: Separate vertical and horizontal text boxes
-        for text_box in text_boxes:
-            if text_box.height > text_box.width:
-                # Retain vertical text boxes
-                result_boxes.append(text_box)
-            else:
-                horizontal_text_boxes.append(text_box)
-
-        # Step 2: Assign inline boxes to horizontal text boxes
-        inline_assignments = {inline_box: None for inline_box in inline_boxes}
-
-        for inline_box in inline_boxes:
-            max_overlap_ratio = 0.3     #Need atleast this much overlap to even consider assignment at all
-            assigned_text_box = None
-
-            for text_box in horizontal_text_boxes:
-                # Calculate intersection area
-                intersection_area = text_box.intersection_area(inline_box)
-
-                # Calculate overlap ratios
-                inline_overlap_ratio = intersection_area / inline_box.area if inline_box.area > 0 else 0
-                text_overlap_ratio = intersection_area / text_box.area if text_box.area > 0 else 0
-
-                # Check if the inline box fully covers the text box
-                if text_overlap_ratio == 1:
-                    # Fully covered text box: Remove it and retain only the inline box
-                    if text_box in horizontal_text_boxes:
-                        horizontal_text_boxes.remove(text_box)
-                    inline_assignments[inline_box] = None
-                elif inline_overlap_ratio > max_overlap_ratio:
-                    # Assign inline box to the text box with the highest overlap ratio
-                    max_overlap_ratio = inline_overlap_ratio
-                    assigned_text_box = text_box
-
-            # Assign inline box to the selected text box (if not fully covering)
-            if assigned_text_box:
-                inline_assignments[inline_box] = assigned_text_box
-
-
-        for text_box in horizontal_text_boxes:
-            # Get all inline boxes assigned to this text box
-            assigned_inline_boxes = [
-                inline_box for inline_box, assigned_text in inline_assignments.items() if assigned_text == text_box
-            ]
-
-            if not assigned_inline_boxes:
-                # Retain the text box if it is not intersected by any inline boxes
-                result_boxes.append(text_box)
-                continue
-            # Sort assigned inline boxes from left to right
-            assigned_inline_boxes.sort(key=lambda box: box.bbox[0])
-
-            current_x1 = text_box.bbox[0]  # Start with the leftmost x-coordinate of the text box
-            y1_t, y2_t = min(box.bbox[1] for box in [text_box]+assigned_inline_boxes), max(box.bbox[3] for box in [text_box]+assigned_inline_boxes)
-            text_segments = []
-
-            for inline_box in assigned_inline_boxes:
-                x1_i, x2_i = inline_box.bbox[0], inline_box.bbox[2]
-
-                # Add the text segment before the inline box, if any
-                if current_x1 < x1_i:
-                    text_segments.append(TextBox(
-                        polygon=[
-                            [current_x1, y1_t],
-                            [x1_i, y1_t],
-                            [x1_i, y2_t],
-                            [current_x1, y2_t],
-                        ]
-                    ))
-
-                # Add the inline box itself
-                text_segments.append(TextBox(
-                    polygon=[
-                        [x1_i, y1_t],
-                        [x2_i, y1_t],
-                        [x2_i, y2_t],
-                        [x1_i, y2_t],
-                    ],
-                    confidence=inline_box.confidence,
-                    math=True
-                ))
-                current_x1 = x2_i  # Move the start point to after the current inline box
-
-            # Add any remaining text after the last inline box, if any
-            if current_x1 < text_box.bbox[2]:
-                text_segments.append(TextBox(
-                    polygon=[
-                        [current_x1, y1_t],
-                        [text_box.bbox[2], y1_t],
-                        [text_box.bbox[2], y2_t],
-                        [current_x1, y2_t],
-                    ]
-                ))
-
-            # Append all split parts to the result
-            result_boxes.extend(text_segments)
-
-        # Step 4: Add inline boxes that replaced fully covered text boxes
-        for inline_box, assigned_text in inline_assignments.items():
-            if assigned_text is None:  # Covers a text box
-                inline_box.math = True
-                result_boxes.append(inline_box)
-
-
-        return result_boxes
+                document_page.add_full_block(new_span)
+                line.add_structure(new_span)
\ No newline at end of file
diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py
index 3741b760..de7c9afd 100644
--- a/marker/converters/pdf.py
+++ b/marker/converters/pdf.py
@@ -12,6 +12,7 @@
 from marker.builders.document import DocumentBuilder
 from marker.builders.layout import LayoutBuilder
 from marker.builders.llm_layout import LLMLayoutBuilder
+from marker.builders.line import LineBuilder
 from marker.builders.ocr import OcrBuilder
 from marker.builders.structure import StructureBuilder
 from marker.converters import BaseConverter
@@ -130,9 +131,10 @@ def resolve_dependencies(self, cls):
     def build_document(self, filepath: str):
         provider_cls = provider_from_filepath(filepath)
         layout_builder = self.resolve_dependencies(self.layout_builder_class)
+        line_builder = self.resolve_dependencies(LineBuilder)
         ocr_builder = self.resolve_dependencies(OcrBuilder)
         with provider_cls(filepath, self.config) as provider:
-            document = DocumentBuilder(self.config)(provider, layout_builder, ocr_builder)
+            document = DocumentBuilder(self.config)(provider, layout_builder, line_builder, ocr_builder)
         StructureBuilder(self.config)(document)
 
         for processor_cls in self.processor_list:
diff --git a/marker/models.py b/marker/models.py
index 908fb863..41243ab4 100644
--- a/marker/models.py
+++ b/marker/models.py
@@ -7,7 +7,7 @@
 from typing import List
 from PIL import Image
 
-from surya.detection import DetectionPredictor
+from surya.detection import DetectionPredictor, InlineDetectionPredictor
 from surya.layout import LayoutPredictor
 from surya.ocr_error import OCRErrorPredictor
 from surya.recognition import RecognitionPredictor
@@ -45,5 +45,6 @@ def create_model_dict(device=None, dtype=None) -> dict:
         "recognition_model": RecognitionPredictor(device=device, dtype=dtype),
         "table_rec_model": TableRecPredictor(device=device, dtype=dtype),
         "detection_model": DetectionPredictor(device=device, dtype=dtype),
+        "inline_detection_model": InlineDetectionPredictor(device=device, dtype=dtype),
         "ocr_error_model": OCRErrorPredictor(device=device, dtype=dtype)
     }
\ No newline at end of file
diff --git a/marker/providers/__init__.py b/marker/providers/__init__.py
index 435cd413..7d9ae97e 100644
--- a/marker/providers/__init__.py
+++ b/marker/providers/__init__.py
@@ -20,7 +20,7 @@ def raw_text(self):
         return "".join(span.text for span in self.spans)
 
     def __hash__(self):
-        return hash(tuple(self.line.polygon.bbox)+(self.raw_text))
+        return hash(tuple(self.line.polygon.bbox)+(self.raw_text,))
 
 ProviderPageLines = Dict[int, List[ProviderOutput]]
 
diff --git a/marker/util.py b/marker/util.py
index 5d0080df..bb94d8c9 100644
--- a/marker/util.py
+++ b/marker/util.py
@@ -79,13 +79,4 @@ def matrix_intersection_area(boxes1: List[List[float]], boxes2: List[List[float]
     width = np.maximum(0, max_x - min_x)
     height = np.maximum(0, max_y - min_y)
 
-    return width * height  # Shape: (N, M)
-
-def rescale_bbox(bbox: List[float], old_size=tuple[float], new_size=tuple[float]):
-    width_scaler, height_scaler = new_size[0]/old_size[0], new_size[1]/old_size[1]
-    return [
-        bbox[0]*width_scaler,
-        bbox[1]*height_scaler,
-        bbox[2]*width_scaler,
-        bbox[3]*height_scaler
-    ]
+    return width * height  # Shape: (N, M)
\ No newline at end of file