diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b7cb985..c9239f8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -20,6 +20,6 @@ jobs:
           poetry install
       - name: Run detection benchmark test
         run: |
-          poetry run python benchmark.py --max 5 --result_path results
+          poetry run python benchmark.py --max 5 --result_path results --pdftext_only
           poetry run python scripts/verify_benchmark_scores.py results/results.json 
 
diff --git a/README.md b/README.md
index 6eab318..7a764d6 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ Here are the scores:
 | Library    | Time (s per page) | Alignment Score (% accuracy vs pymupdf) |
 |------------|-------------------|-----------------------------------------|
 | pymupdf    | 0.32              | --                                      |
-| pdftext    | 1.79              | 96.22                                   |
+| pdftext    | 1.57              | 96.22                                   |
 | pdfplumber | 3.0               | 89.88                                   |
 
 pdftext is approximately 2x slower than using pypdfium2 alone (if you were to extract all the same information).
diff --git a/benchmark.py b/benchmark.py
index a2ba903..d6332f8 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -55,6 +55,7 @@ def main():
     parser = argparse.ArgumentParser(description="Benchmark pdf extraction.")
     parser.add_argument("--result_path", type=str, help="Path to the output text file, defaults to stdout", default=None)
     parser.add_argument("--max", type=int, help="Maximum number of pages to process.", default=None)
+    parser.add_argument("--pdftext_only", action="store_true", help="Only run pdftext inference", default=False)
     args = parser.parse_args()
 
     split = "train"
@@ -66,6 +67,9 @@ def main():
     alignments = defaultdict(list)
     times_tools = ["pymupdf", "pdftext", "pdfplumber"]
     alignment_tools = ["pdftext", "pdfplumber"]
+    if args.pdftext_only:
+        times_tools = ["pdftext", "pymupdf"]
+        alignment_tools = ["pdftext"]
     model = get_model()
     for i in tqdm(range(len(dataset)), desc="Benchmarking"):
         row = dataset[i]
diff --git a/pdftext/inference.py b/pdftext/inference.py
index e4352be..14756ab 100644
--- a/pdftext/inference.py
+++ b/pdftext/inference.py
@@ -1,5 +1,7 @@
 from itertools import chain
 
+import sklearn
+
 from pdftext.pdf.utils import LINE_BREAKS, TABS, SPACES
 
 
@@ -152,7 +154,9 @@ def inference(text_chars, model):
         training_rows = [tl[1] for tl in training_list]
         training_idxs = [tl[0] for tl in training_list]
 
-        predictions = model.predict(training_rows)
+        # Disable nan, etc, validation for a small speedup
+        with sklearn.config_context(assume_finite=True):
+            predictions = model.predict(training_rows)
         for pred, page_idx in zip(predictions, training_idxs):
             next_prediction[page_idx] = pred
     page_blocks = sorted(page_blocks.items())
diff --git a/pyproject.toml b/pyproject.toml
index 8fd2c7a..dbd5d28 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pdftext"
-version = "0.1.1"
+version = "0.1.2"
 description = "Extract structured text from pdfs quickly"
 authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
 license = "Apache-2.0"