From 95c06c82ea45ae8685b55a1e3904af45e76ee853 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Wed, 29 Jan 2025 14:17:27 -0500 Subject: [PATCH 01/27] Update overall benchmark --- benchmarks/overall.py | 132 ----------------------- benchmarks/overall/inference.py | 47 ++++++++ benchmarks/overall/overall.py | 88 +++++++++++++++ benchmarks/overall/scoring.py | 30 ++++++ benchmarks/scoring.py | 36 ------- benchmarks/table/table.py | 22 ++-- marker/builders/llm_layout.py | 6 +- marker/processors/equation.py | 9 +- marker/processors/llm/__init__.py | 6 +- marker/processors/llm/llm_table_merge.py | 6 +- poetry.lock | 115 +++++++++++--------- pyproject.toml | 2 +- 12 files changed, 260 insertions(+), 239 deletions(-) delete mode 100644 benchmarks/overall.py create mode 100644 benchmarks/overall/inference.py create mode 100644 benchmarks/overall/overall.py create mode 100644 benchmarks/overall/scoring.py delete mode 100644 benchmarks/scoring.py diff --git a/benchmarks/overall.py b/benchmarks/overall.py deleted file mode 100644 index f6fb9591..00000000 --- a/benchmarks/overall.py +++ /dev/null @@ -1,132 +0,0 @@ -import tempfile -import time -from collections import defaultdict - -import click -from tqdm import tqdm -import pypdfium2 as pdfium - -from marker.config.parser import ConfigParser -from marker.converters.pdf import PdfConverter -from marker.logger import configure_logging -from marker.models import create_model_dict -from pdftext.extraction import plain_text_output -import json -import os -import subprocess -import shutil -from tabulate import tabulate - -from marker.settings import settings -from scoring import score_text - -configure_logging() - - -def nougat_prediction(pdf_filename, batch_size=1): - out_dir = tempfile.mkdtemp() - subprocess.run(["nougat", pdf_filename, "-o", out_dir, "--no-skipping", "--recompute", "--batchsize", str(batch_size)], check=True) - md_file = os.listdir(out_dir)[0] - with open(os.path.join(out_dir, md_file), "r") as f: - data = f.read() - shutil.rmtree(out_dir) - return data - -@click.command(help="Benchmark PDF to MD conversion.") -@click.argument("in_folder", type=str) -@click.argument("reference_folder", type=str) -@click.argument("out_file", type=str) -@click.option("--nougat", is_flag=True, help="Run nougat and compare") -@click.option("--md_out_path", type=str, default=None, help="Output path for generated markdown files") -def main(in_folder: str, reference_folder: str, out_file: str, nougat: bool, md_out_path: str): - methods = ["marker"] - if nougat: - methods.append("nougat") - - model_dict = create_model_dict() - - scores = defaultdict(dict) - benchmark_files = os.listdir(in_folder) - benchmark_files = [b for b in benchmark_files if b.endswith(".pdf")] - times = defaultdict(dict) - pages = defaultdict(int) - - for idx, fname in tqdm(enumerate(benchmark_files)): - md_filename = fname.rsplit(".", 1)[0] + ".md" - - reference_filename = os.path.join(reference_folder, md_filename) - with open(reference_filename, "r") as f: - reference = f.read() - - pdf_filename = os.path.join(in_folder, fname) - doc = pdfium.PdfDocument(pdf_filename) - pages[fname] = len(doc) - - config_parser = ConfigParser({"output_format": "markdown"}) - for method in methods: - start = time.time() - if method == "marker": - converter = PdfConverter( - config=config_parser.generate_config_dict(), - artifact_dict=model_dict, - processor_list=None, - renderer=config_parser.get_renderer() - ) - full_text = converter(pdf_filename).markdown - elif method == "nougat": - full_text = nougat_prediction(pdf_filename, batch_size=1) - elif method == "naive": - full_text = plain_text_output(doc, workers=1) - else: - raise ValueError(f"Unknown method {method}") - - times[method][fname] = time.time() - start - - score = score_text(full_text, reference) - scores[method][fname] = score - - if md_out_path: - md_out_filename = f"{method}_{md_filename}" - with open(os.path.join(md_out_path, md_out_filename), "w+") as f: - f.write(full_text) - - total_pages = sum(pages.values()) - with open(out_file, "w+") as f: - write_data = defaultdict(dict) - for method in methods: - total_time = sum(times[method].values()) - file_stats = { - fname: - { - "time": times[method][fname], - "score": scores[method][fname], - "pages": pages[fname] - } - - for fname in benchmark_files - } - write_data[method] = { - "files": file_stats, - "avg_score": sum(scores[method].values()) / len(scores[method]), - "time_per_page": total_time / total_pages, - "time_per_doc": total_time / len(scores[method]) - } - - json.dump(write_data, f, indent=4) - - summary_table = [] - score_table = [] - score_headers = benchmark_files - for method in methods: - summary_table.append([method, write_data[method]["avg_score"], write_data[method]["time_per_page"], write_data[method]["time_per_doc"]]) - score_table.append([method, *[write_data[method]["files"][h]["score"] for h in score_headers]]) - - print(tabulate(summary_table, headers=["Method", "Average Score", "Time per page", "Time per document"])) - print("") - print("Scores by file") - print(tabulate(score_table, headers=["Method", *score_headers])) - - -if __name__ == "__main__": - main() - diff --git a/benchmarks/overall/inference.py b/benchmarks/overall/inference.py new file mode 100644 index 00000000..040a53af --- /dev/null +++ b/benchmarks/overall/inference.py @@ -0,0 +1,47 @@ +import io + +import fitz as pymupdf +import tempfile +from bs4 import BeautifulSoup + +from marker.converters.pdf import PdfConverter + +def open_pymupdf(pdf_bytes): + stream = io.BytesIO(pdf_bytes) + return pymupdf.open(stream=stream) + +def clip_pdf_to_bbox(doc, bbox, padding=1): + page = doc[0] + height, width = page.bound().height, page.bound().width + remove_left = [0, 0, bbox[0] - padding, height] + remove_top = [0, 0, width, bbox[1] - padding] + remove_right = [bbox[2] + padding, 0, width, height] + remove_bottom = [0, bbox[3] + padding, width, height] + for remove in [remove_left, remove_top, remove_right, remove_bottom]: + clip_rect = pymupdf.Rect(*remove) + page.add_redact_annot(clip_rect) + page.apply_redactions() + + clip_rect = pymupdf.Rect(*bbox) + page.set_cropbox(clip_rect) + return doc + +def get_marker_block_html(marker_models: dict, gt_blocks: list, pdf_bytes: bytes): + block_html = [] + for block in gt_blocks: + bbox = block["bbox"] + doc2 = open_pymupdf(pdf_bytes) + clip_pdf_to_bbox(doc2, bbox) + block_converter = PdfConverter( + artifact_dict=marker_models, + config={"page_range": [0], "force_layout_block": block["block_type"], "disable_tqdm": True}, + renderer="marker.renderers.html.HTMLRenderer" + ) + with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: + doc2.save(f) + rendered = block_converter(f.name) + html = rendered.html + soup = BeautifulSoup(html, "html.parser") + inner_html = str(soup.find("body").decode_contents()) + block_html.append(inner_html) + return block_html \ No newline at end of file diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py new file mode 100644 index 00000000..57e80286 --- /dev/null +++ b/benchmarks/overall/overall.py @@ -0,0 +1,88 @@ +import json +import os +from collections import defaultdict +from pathlib import Path + +import click +import datasets +import tabulate +from tqdm import tqdm + +from marker.logger import configure_logging +from marker.models import create_model_dict +from inference import get_marker_block_html +from marker.settings import settings +from scoring import score_blocks + +configure_logging() + +@click.command(help="Benchmark PDF to MD conversion.") +@click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") +@click.option("--other_methods", type=str, help="Comma separated list of other methods to compare against. Possible values:", default="") +@click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") +@click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") +def main( + dataset: str, + other_methods: str, + result_path: str, + max_rows: int +): + allowed_methods = [""] + methods = other_methods.split(",") + for method in methods: + if method not in allowed_methods: + raise ValueError(f"Method {method} not allowed. Allowed methods are {allowed_methods}") + + model_dict = create_model_dict() + ds = datasets.load_dataset(dataset, split="train") + + bench_scores = {} + averages_by_type = defaultdict(list) + averages_by_block_type = defaultdict(list) + for idx, sample in tqdm(enumerate(ds), desc="Running benchmark"): + gt_blocks = json.loads(sample["gt_blocks"]) + doc_type = sample["classification"] + pdf_bytes = sample["pdf"] # This is a single page PDF + marker_html = get_marker_block_html(model_dict, gt_blocks, pdf_bytes) + gt_html = [block["html"] for block in gt_blocks] + scores = score_blocks(gt_html, marker_html) + gt_weights = [len(ht) for ht in gt_html] + overall_score = sum([s * w for s, w in zip(scores, gt_weights)]) / sum(gt_weights) + bench_scores[idx] = { + "scores": scores, + "weights": gt_weights, + "overall_score": overall_score # Weighted score, weighted by length of GT block + } + + averages_by_type[doc_type].append(overall_score) + + for score, gt_block in zip(scores, gt_blocks): + averages_by_block_type[gt_block["block_type"]].append(score) + + if max_rows is not None and idx >= max_rows: + break + + for k in averages_by_type: + averages_by_type[k] = sum(averages_by_type[k]) / len(averages_by_type[k]) + averages_by_type = sorted(averages_by_type.items()) + + print(tabulate.tabulate(averages_by_type, headers=["Document Type", "Average Score"], tablefmt="github")) + + for k in averages_by_block_type: + averages_by_block_type[k] = sum(averages_by_block_type[k]) / len(averages_by_block_type[k]) + averages_by_block_type = sorted(averages_by_block_type.items()) + + print(tabulate.tabulate(averages_by_block_type, headers=["Block Type", "Average Score"], tablefmt="github")) + + overall_average = sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores) + print(tabulate.tabulate([["Overall Average", overall_average]], tablefmt="github")) + + out_path = Path(result_path) / "overall.json" + with open(out_path, "w") as f: + json.dump(bench_scores, f, indent=2) + + print(f"Results saved to {out_path}.") + +if __name__ == "__main__": + main() + diff --git a/benchmarks/overall/scoring.py b/benchmarks/overall/scoring.py new file mode 100644 index 00000000..3ae19a98 --- /dev/null +++ b/benchmarks/overall/scoring.py @@ -0,0 +1,30 @@ +import re +from bs4 import BeautifulSoup + +from markdownify import markdownify as md +from rapidfuzz import fuzz + +def standardize_html(html): + soup = BeautifulSoup(html, "html.parser") + + # Convert all headers to h1 so we don't penalize small differences in header levels + for tag in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): + tag.name = "h1" + + html = str(soup) + markdown = md(html) + markdown = markdown.replace("
", "\n") + markdown = re.sub(r"\s+", " ", markdown) + markdown = re.sub(r"\n+", "\n", markdown) + markdown = re.sub("\\.+", ".", markdown) # Replace repeated periods with a single period, like in table of contents + return markdown.strip() + + +def score_blocks(gt_html, method_html): + scores = [] + for gt, method in zip(gt_html, method_html): + gt= standardize_html(gt) + method = standardize_html(method) + score = fuzz.ratio(gt, method) + scores.append(score) + return scores \ No newline at end of file diff --git a/benchmarks/scoring.py b/benchmarks/scoring.py deleted file mode 100644 index 5aa9faff..00000000 --- a/benchmarks/scoring.py +++ /dev/null @@ -1,36 +0,0 @@ -from rapidfuzz import fuzz -from statistics import mean - -CHUNK_MIN_CHARS = 25 - -def chunk_text(text, chunk_len=500): - chunks = [text[i:i+chunk_len] for i in range(0, len(text), chunk_len)] - chunks = [c for c in chunks if c.strip() and len(c) > CHUNK_MIN_CHARS] - return chunks - - -def overlap_score(hypothesis_chunks, reference_chunks): - length_modifier = len(hypothesis_chunks) / len(reference_chunks) - search_distance = max(len(reference_chunks) // 5, 10) - chunk_scores = [] - for i, hyp_chunk in enumerate(hypothesis_chunks): - max_score = 0 - total_len = 0 - i_offset = int(i * length_modifier) - chunk_range = range(max(0, i_offset-search_distance), min(len(reference_chunks), i_offset+search_distance)) - for j in chunk_range: - ref_chunk = reference_chunks[j] - score = fuzz.ratio(hyp_chunk, ref_chunk, score_cutoff=30) / 100 - if score > max_score: - max_score = score - total_len = len(ref_chunk) - chunk_scores.append(max_score) - return chunk_scores - - -def score_text(hypothesis, reference): - # Returns a 0-1 alignment score - hypothesis_chunks = chunk_text(hypothesis) - reference_chunks = chunk_text(reference) - chunk_scores = overlap_score(hypothesis_chunks, reference_chunks) - return mean(chunk_scores) diff --git a/benchmarks/table/table.py b/benchmarks/table/table.py index 3116274d..dfeb5eb0 100644 --- a/benchmarks/table/table.py +++ b/benchmarks/table/table.py @@ -1,10 +1,10 @@ import os -from itertools import repeat -from tkinter import Image - -os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for a simple op, which is not supported on MPS +os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for an op, which is not supported on MPS +from pathlib import Path +from itertools import repeat from typing import List + import numpy as np import base64 import time @@ -20,6 +20,7 @@ import pypdfium2 as pdfium from marker.util import matrix_intersection_area from marker.renderers.json import JSONOutput, JSONBlockOutput +from marker.settings import settings from marker.config.parser import ConfigParser from marker.converters.table import TableConverter @@ -47,7 +48,7 @@ def extract_tables(children: List[JSONBlockOutput]): @click.command(help="Benchmark Table to HTML Conversion") -@click.argument("out_file", type=str) +@click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "table"), help="Output path for results.") @click.option("--dataset", type=str, default="datalab-to/fintabnet-test", help="Dataset to use") @click.option("--max_rows", type=int, default=None, help="Maximum number of PDFs to process") @click.option("--max_workers", type=int, default=16, help="Maximum number of workers to use") @@ -55,7 +56,7 @@ def extract_tables(children: List[JSONBlockOutput]): @click.option("--table_rec_batch_size", type=int, default=None, help="Batch size for table recognition.") @click.option("--use_gemini", is_flag=True, help="Evaluate Gemini for table recognition.") def main( - out_file: str, + result_path: str, dataset: str, max_rows: int, max_workers: int, @@ -64,7 +65,7 @@ def main( use_gemini: bool = False ): models = create_model_dict() - config_parser = ConfigParser({'output_format': 'json', "use_llm": use_llm, "table_rec_batch_size": table_rec_batch_size}) + config_parser = ConfigParser({'output_format': 'json', "use_llm": use_llm, "table_rec_batch_size": table_rec_batch_size, "disable_tqdm": True}) start = time.time() @@ -93,9 +94,7 @@ def main( with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as temp_pdf_file: temp_pdf_file.write(pdf_binary) temp_pdf_file.seek(0) - tqdm.disable = True marker_json = converter(temp_pdf_file.name).children - tqdm.disable = False doc = pdfium.PdfDocument(temp_pdf_file.name) page_image = doc[0].render(scale=92/72).to_pil() @@ -223,8 +222,11 @@ def main( "gemini": gemini_results } - with open(out_file, "w+") as f: + out_path = Path(result_path) / "table.json" + with open(out_path, "w+") as f: json.dump(results, f, indent=2) + print(f"Results saved to {out_path}.") + if __name__ == '__main__': main() \ No newline at end of file diff --git a/marker/builders/llm_layout.py b/marker/builders/llm_layout.py index b061ea48..c9aae671 100644 --- a/marker/builders/llm_layout.py +++ b/marker/builders/llm_layout.py @@ -50,6 +50,10 @@ class LLMLayoutBuilder(LayoutBuilder): int, "The timeout for requests to the Gemini model.", ] = 60 + disable_tqdm: Annotated[ + bool, + "Whether to disable the tqdm progress bar.", + ] = False topk_relabelling_prompt: Annotated[ str, "The prompt to use for relabelling blocks.", @@ -107,7 +111,7 @@ def __call__(self, document: Document, provider: PdfProvider): print(f"Error relabelling blocks: {e}") def relabel_blocks(self, document: Document): - pbar = tqdm(desc="LLM layout relabelling") + pbar = tqdm(desc="LLM layout relabelling", disable=self.disable_tqdm) with ThreadPoolExecutor(max_workers=self.max_concurrency) as executor: futures = [] for page in document.pages: diff --git a/marker/processors/equation.py b/marker/processors/equation.py index 5f5be17c..6bd79fa7 100644 --- a/marker/processors/equation.py +++ b/marker/processors/equation.py @@ -1,7 +1,4 @@ from typing import Annotated, List, Optional, Tuple - -from texify.inference import batch_inference -from texify.model.model import GenerateVisionEncoderDecoderModel from tqdm import tqdm from marker.models import TexifyPredictor @@ -32,6 +29,10 @@ class EquationProcessor(BaseProcessor): int, "The number of tokens to buffer above max for the Texify model.", ] = 256 + disable_tqdm: Annotated[ + bool, + "Whether to disable the tqdm progress bar.", + ] = False def __init__(self, texify_model: TexifyPredictor, config=None): super().__init__(config) @@ -80,7 +81,7 @@ def get_latex_batched(self, equation_data: List[dict]): predictions = [""] * len(equation_data) batch_size = self.get_batch_size() - for i in tqdm(range(0, len(equation_data), batch_size), desc="Recognizing equations"): + for i in tqdm(range(0, len(equation_data), batch_size), desc="Recognizing equations", disable=self.disable_tqdm): # Dynamically set max length to save inference time min_idx = i max_idx = min(min_idx + batch_size, len(equation_data)) diff --git a/marker/processors/llm/__init__.py b/marker/processors/llm/__init__.py index c41853ac..5f36139e 100644 --- a/marker/processors/llm/__init__.py +++ b/marker/processors/llm/__init__.py @@ -44,6 +44,10 @@ class BaseLLMProcessor(BaseProcessor): bool, "Whether to use the LLM model.", ] = False + disable_tqdm: Annotated[ + bool, + "Whether to disable the tqdm progress bar.", + ] = False block_types = None def __init__(self, config=None): @@ -73,7 +77,7 @@ def rewrite_blocks(self, document: Document): if total_blocks == 0: return - pbar = tqdm(desc=f"{self.__class__.__name__} running") + pbar = tqdm(desc=f"{self.__class__.__name__} running", disable=self.disable_tqdm) with ThreadPoolExecutor(max_workers=self.max_concurrency) as executor: for future in as_completed([ executor.submit(self.process_rewriting, document, page, block) diff --git a/marker/processors/llm/llm_table_merge.py b/marker/processors/llm/llm_table_merge.py index e2012998..a3f74396 100644 --- a/marker/processors/llm/llm_table_merge.py +++ b/marker/processors/llm/llm_table_merge.py @@ -44,6 +44,10 @@ class LLMTableMergeProcessor(BaseLLMProcessor): int, "The maximum gap between columns to merge tables" ] = 50 + disable_tqdm: Annotated[ + bool, + "Whether to disable the tqdm progress bar.", + ] = False table_merge_prompt: Annotated[ str, "The prompt to use for rewriting text.", @@ -137,7 +141,7 @@ def get_column_count(cells: List[TableCell]): return max_cols def rewrite_blocks(self, document: Document): - pbar = tqdm(desc=f"{self.__class__.__name__} running") + pbar = tqdm(desc=f"{self.__class__.__name__} running", disable=self.disable_tqdm) table_runs = [] table_run = [] prev_block = None diff --git a/poetry.lock b/poetry.lock index 652f9c68..c053780c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2729,6 +2729,18 @@ files = [ [package.dependencies] nvidia-nvjitlink-cu12 = "*" +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.6.2" +description = "NVIDIA cuSPARSELt" +optional = false +python-versions = "*" +files = [ + {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:067a7f6d03ea0d4841c85f0c6f1991c5dda98211f6302cb83a4ab234ee95bef8"}, + {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9"}, + {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-win_amd64.whl", hash = "sha256:0057c91d230703924c0422feabe4ce768841f9b4b44d28586b6f6d2eb86fbe70"}, +] + [[package]] name = "nvidia-nccl-cu12" version = "2.21.5" @@ -3566,6 +3578,23 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pymupdf" +version = "1.25.2" +description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pymupdf-1.25.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:59dea22b633cc4fc13670b4c5db50d71f8cd4f420814420f33ce47ddcb61e1f6"}, + {file = "pymupdf-1.25.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e8b8a874497cd0deee89a6a4fb76a3a08173c8d39e88fc7cf715764ec5a243e9"}, + {file = "pymupdf-1.25.2-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f61e5cdb25b86eb28d34aa3557b49ecf9e361d5f5cd3b1660406f8f0bf813af7"}, + {file = "pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae8cfa7a97d78f813d286ecba32369059d88073edd1e5cf105f4cd0811f71925"}, + {file = "pymupdf-1.25.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:295505fe1ecb7c7b57d4124d373e207ea311d8e40bc7ac3016d8ec2d60b091e9"}, + {file = "pymupdf-1.25.2-cp39-abi3-win32.whl", hash = "sha256:b9488c8b82bb9be36fb13ee0c8d43b0ddcc50af83b61da01e6040413d9e67da6"}, + {file = "pymupdf-1.25.2-cp39-abi3-win_amd64.whl", hash = "sha256:1b4ca6f5780d319a08dff885a5a0e3585c5d7af04dcfa063c535b88371fd91c1"}, + {file = "pymupdf-1.25.2.tar.gz", hash = "sha256:9ea88ff1b3ccb359620f106a6fd5ba6877d959d21d78272052c3496ceede6eec"}, +] + [[package]] name = "pyparsing" version = "3.2.1" @@ -4729,27 +4758,6 @@ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] typing = ["mypy (>=1.6,<2.0)", "traitlets (>=5.11.1)"] -[[package]] -name = "texify" -version = "0.2.1" -description = "OCR for latex images" -optional = false -python-versions = "<4.0,>=3.10" -files = [ - {file = "texify-0.2.1-py3-none-any.whl", hash = "sha256:861c90ea6167fb6c2b334d5fcf0116dd9e1585af359463dec83115891c09dcfa"}, - {file = "texify-0.2.1.tar.gz", hash = "sha256:bab30f8445aa60e36de122fb86deb77b3f25348a885d4d5f3c67d6b6f5bb2e81"}, -] - -[package.dependencies] -ftfy = ">=6.1.3,<7.0.0" -Pillow = ">=10.1.0,<11.0.0" -pydantic = ">=2.5.2,<3.0.0" -pydantic-settings = ">=2.1.0,<3.0.0" -pypdfium2 = ">=4.25.0,<5.0.0" -python-dotenv = ">=1.0.0,<2.0.0" -torch = ">=2.1.2,<3.0.0" -transformers = ">=4.36.2,<5.0.0" - [[package]] name = "threadpoolctl" version = "3.5.0" @@ -4865,28 +4873,31 @@ files = [ [[package]] name = "torch" -version = "2.5.1" +version = "2.6.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false -python-versions = ">=3.8.0" +python-versions = ">=3.9.0" files = [ - {file = "torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:71328e1bbe39d213b8721678f9dcac30dfc452a46d586f1d514a6aa0a99d4744"}, - {file = "torch-2.5.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:34bfa1a852e5714cbfa17f27c49d8ce35e1b7af5608c4bc6e81392c352dbc601"}, - {file = "torch-2.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:32a037bd98a241df6c93e4c789b683335da76a2ac142c0973675b715102dc5fa"}, - {file = "torch-2.5.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:23d062bf70776a3d04dbe74db950db2a5245e1ba4f27208a87f0d743b0d06e86"}, - {file = "torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:de5b7d6740c4b636ef4db92be922f0edc425b65ed78c5076c43c42d362a45457"}, - {file = "torch-2.5.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:340ce0432cad0d37f5a31be666896e16788f1adf8ad7be481196b503dad675b9"}, - {file = "torch-2.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:603c52d2fe06433c18b747d25f5c333f9c1d58615620578c326d66f258686f9a"}, - {file = "torch-2.5.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:31f8c39660962f9ae4eeec995e3049b5492eb7360dd4f07377658ef4d728fa4c"}, - {file = "torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:ed231a4b3a5952177fafb661213d690a72caaad97d5824dd4fc17ab9e15cec03"}, - {file = "torch-2.5.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:3f4b7f10a247e0dcd7ea97dc2d3bfbfc90302ed36d7f3952b0008d0df264e697"}, - {file = "torch-2.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:73e58e78f7d220917c5dbfad1a40e09df9929d3b95d25e57d9f8558f84c9a11c"}, - {file = "torch-2.5.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:8c712df61101964eb11910a846514011f0b6f5920c55dbf567bff8a34163d5b1"}, - {file = "torch-2.5.1-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:9b61edf3b4f6e3b0e0adda8b3960266b9009d02b37555971f4d1c8f7a05afed7"}, - {file = "torch-2.5.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1f3b7fb3cf7ab97fae52161423f81be8c6b8afac8d9760823fd623994581e1a3"}, - {file = "torch-2.5.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:7974e3dce28b5a21fb554b73e1bc9072c25dde873fa00d54280861e7a009d7dc"}, - {file = "torch-2.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:46c817d3ea33696ad3b9df5e774dba2257e9a4cd3c4a3afbf92f6bb13ac5ce2d"}, - {file = "torch-2.5.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8046768b7f6d35b85d101b4b38cba8aa2f3cd51952bc4c06a49580f2ce682291"}, + {file = "torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:6860df13d9911ac158f4c44031609700e1eba07916fff62e21e6ffa0a9e01961"}, + {file = "torch-2.6.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c4f103a49830ce4c7561ef4434cc7926e5a5fe4e5eb100c19ab36ea1e2b634ab"}, + {file = "torch-2.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:56eeaf2ecac90da5d9e35f7f35eb286da82673ec3c582e310a8d1631a1c02341"}, + {file = "torch-2.6.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:09e06f9949e1a0518c5b09fe95295bc9661f219d9ecb6f9893e5123e10696628"}, + {file = "torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:7979834102cd5b7a43cc64e87f2f3b14bd0e1458f06e9f88ffa386d07c7446e1"}, + {file = "torch-2.6.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:ccbd0320411fe1a3b3fec7b4d3185aa7d0c52adac94480ab024b5c8f74a0bf1d"}, + {file = "torch-2.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:46763dcb051180ce1ed23d1891d9b1598e07d051ce4c9d14307029809c4d64f7"}, + {file = "torch-2.6.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:94fc63b3b4bedd327af588696559f68c264440e2503cc9e6954019473d74ae21"}, + {file = "torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:2bb8987f3bb1ef2675897034402373ddfc8f5ef0e156e2d8cfc47cacafdda4a9"}, + {file = "torch-2.6.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b789069020c5588c70d5c2158ac0aa23fd24a028f34a8b4fcb8fcb4d7efcf5fb"}, + {file = "torch-2.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:7e1448426d0ba3620408218b50aa6ada88aeae34f7a239ba5431f6c8774b1239"}, + {file = "torch-2.6.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:9a610afe216a85a8b9bc9f8365ed561535c93e804c2a317ef7fabcc5deda0989"}, + {file = "torch-2.6.0-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:4874a73507a300a5d089ceaff616a569e7bb7c613c56f37f63ec3ffac65259cf"}, + {file = "torch-2.6.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a0d5e1b9874c1a6c25556840ab8920569a7a4137afa8a63a32cee0bc7d89bd4b"}, + {file = "torch-2.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:510c73251bee9ba02ae1cb6c9d4ee0907b3ce6020e62784e2d7598e0cfa4d6cc"}, + {file = "torch-2.6.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:ff96f4038f8af9f7ec4231710ed4549da1bdebad95923953a25045dcf6fd87e2"}, + {file = "torch-2.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9ea955317cfcd3852b1402b62af258ce735c2edeee42ca9419b6bc889e5ae053"}, + {file = "torch-2.6.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bb2c6c3e65049f081940f5ab15c9136c7de40d3f01192541c920a07c7c585b7e"}, + {file = "torch-2.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:683410f97984103148e31b38a8631acf31c3034c020c0f4d26171e7626d8317a"}, + {file = "torch-2.6.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:265f70de5fd45b864d924b64be1797f86e76c8e48a02c2a3a6fc7ec247d2226c"}, ] [package.dependencies] @@ -4903,17 +4914,18 @@ nvidia-cufft-cu12 = {version = "11.2.1.3", markers = "platform_system == \"Linux nvidia-curand-cu12 = {version = "10.3.5.147", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusolver-cu12 = {version = "11.6.1.9", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusparse-cu12 = {version = "12.3.1.170", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparselt-cu12 = {version = "0.6.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nccl-cu12 = {version = "2.21.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvjitlink-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} setuptools = {version = "*", markers = "python_version >= \"3.12\""} sympy = {version = "1.13.1", markers = "python_version >= \"3.9\""} -triton = {version = "3.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""} -typing-extensions = ">=4.8.0" +triton = {version = "3.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +typing-extensions = ">=4.10.0" [package.extras] opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.12.0)"] +optree = ["optree (>=0.13.0)"] [[package]] name = "tornado" @@ -5042,21 +5054,18 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] name = "triton" -version = "3.1.0" +version = "3.2.0" description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" files = [ - {file = "triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8"}, - {file = "triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f34f6e7885d1bf0eaaf7ba875a5f0ce6f3c13ba98f9503651c1e6dc6757ed5c"}, - {file = "triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8182f42fd8080a7d39d666814fa36c5e30cc00ea7eeeb1a2983dbb4c99a0fdc"}, - {file = "triton-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dadaca7fc24de34e180271b5cf864c16755702e9f63a16f62df714a8099126a"}, - {file = "triton-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aafa9a20cd0d9fee523cd4504aa7131807a864cd77dcf6efe7e981f18b8c6c11"}, + {file = "triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e54983cd51875855da7c68ec05c05cf8bb08df361b1d5b69e05e40b0c9bd62"}, + {file = "triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8009a1fb093ee8546495e96731336a33fb8856a38e45bb4ab6affd6dbc3ba220"}, + {file = "triton-3.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d9b215efc1c26fa7eefb9a157915c92d52e000d2bf83e5f69704047e63f125c"}, + {file = "triton-3.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5dfa23ba84541d7c0a531dfce76d8bcd19159d50a4a8b14ad01e91734a5c1b0"}, + {file = "triton-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30ceed0eff2c4a73b14eb63e052992f44bbdf175f3fad21e1ac8097a772de7ee"}, ] -[package.dependencies] -filelock = "*" - [package.extras] build = ["cmake (>=3.20)", "lit"] tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"] @@ -5489,4 +5498,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d43373ff00de4feb00b0aed4fe98d2a84ecb5742d1a916cabbace5104f888d54" +content-hash = "9d330f12a8bad0352ec550e1d6a77348b10f6bca7ecc41769813bec85d3f9e08" diff --git a/pyproject.toml b/pyproject.toml index 08d8c72e..0e787c60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,6 @@ python-dotenv = "^1.0.0" torch = "^2.5.1" tqdm = "^4.66.1" ftfy = "^6.1.1" -texify = "^0.2.1" rapidfuzz = "^3.8.1" surya-ocr = "~0.10.0" regex = "^2024.4.28" @@ -50,6 +49,7 @@ apted = "1.0.3" distance = "0.1.3" lxml = "5.3.0" tabulate = "^0.9.0" +pymupdf = "^1.25.2" [tool.poetry.scripts] marker = "marker.scripts.convert:convert_cli" From e6e2d7da22d6cbf6c666d29dde9b6cdc4854cea5 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 30 Jan 2025 10:48:39 -0500 Subject: [PATCH 02/27] Clean up benchmarks --- README.md | 4 +- benchmarks/overall/inference.py | 78 +++--- benchmarks/overall/overall.py | 107 +++++--- benchmarks/overall/schema.py | 15 + benchmarks/overall/scoring.py | 143 ++++++++-- benchmarks/table/table.py | 1 + marker/processors/llm/llm_complex.py | 2 +- marker/processors/llm/llm_handwriting.py | 2 +- marker/renderers/markdown.py | 14 +- poetry.lock | 332 +++++++++++------------ pyproject.toml | 3 +- 11 files changed, 410 insertions(+), 291 deletions(-) create mode 100644 benchmarks/overall/schema.py diff --git a/README.md b/README.md index 0a3382a8..be37250f 100644 --- a/README.md +++ b/README.md @@ -219,11 +219,11 @@ rendered = converter("FILEPATH") text, _, images = text_from_rendered(rendered) ``` -This takes all the same configuration as the PdfConverter. You can specify the configuration `--force_layout_block=Table` to avoid layout detection and instead assume every page is a table. +This takes all the same configuration as the PdfConverter. You can specify the configuration `force_layout_block=Table` to avoid layout detection and instead assume every page is a table. Set `output_format=json` to also get cell bounding boxes. You can also run this via the CLI with ```shell -python convert_single.py FILENAME --use_llm --force_layout_block Table --converter_cls marker.converters.table.TableConverter +marker_single FILENAME --use_llm --force_layout_block Table --converter_cls marker.converters.table.TableConverter --output_format json ``` # Output Formats diff --git a/benchmarks/overall/inference.py b/benchmarks/overall/inference.py index 040a53af..f312429b 100644 --- a/benchmarks/overall/inference.py +++ b/benchmarks/overall/inference.py @@ -1,47 +1,47 @@ -import io - -import fitz as pymupdf +import json import tempfile from bs4 import BeautifulSoup +from benchmarks.overall.scoring import score_blocks +from benchmarks.overall.schema import BlockScores from marker.converters.pdf import PdfConverter -def open_pymupdf(pdf_bytes): - stream = io.BytesIO(pdf_bytes) - return pymupdf.open(stream=stream) +def get_marker_html(marker_models: dict, pdf_bytes: bytes): + block_converter = PdfConverter( + artifact_dict=marker_models, + config={"page_range": [0], "disable_tqdm": True}, + renderer="marker.renderers.html.HTMLRenderer" + ) + with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: + f.write(pdf_bytes) + rendered = block_converter(f.name) + html = rendered.html + soup = BeautifulSoup(html, "html.parser") + inner_html = str(soup.find("body").decode_contents()) + return inner_html + + +def marker_html_func(model_dict, sample, **kwargs) -> BlockScores: + gt_blocks = json.loads(sample["gt_blocks"]) + pdf_bytes = sample["pdf"] # This is a single page PDF + marker_html = get_marker_html(model_dict, pdf_bytes) + gt_html = [block["html"] for block in gt_blocks] + scores = score_blocks(gt_html, marker_html) + return scores -def clip_pdf_to_bbox(doc, bbox, padding=1): - page = doc[0] - height, width = page.bound().height, page.bound().width - remove_left = [0, 0, bbox[0] - padding, height] - remove_top = [0, 0, width, bbox[1] - padding] - remove_right = [bbox[2] + padding, 0, width, height] - remove_bottom = [0, bbox[3] + padding, width, height] - for remove in [remove_left, remove_top, remove_right, remove_bottom]: - clip_rect = pymupdf.Rect(*remove) - page.add_redact_annot(clip_rect) - page.apply_redactions() - clip_rect = pymupdf.Rect(*bbox) - page.set_cropbox(clip_rect) - return doc +def mathpix_html_func(model_dict, sample, mathpix_ds, **kwargs) -> BlockScores: + uuid = sample["uuid"] + data = None + for row in mathpix_ds: + if str(row["uuid"]) == str(uuid): + data = row + break + if not data: + raise ValueError(f"Could not find data for uuid {uuid}") -def get_marker_block_html(marker_models: dict, gt_blocks: list, pdf_bytes: bytes): - block_html = [] - for block in gt_blocks: - bbox = block["bbox"] - doc2 = open_pymupdf(pdf_bytes) - clip_pdf_to_bbox(doc2, bbox) - block_converter = PdfConverter( - artifact_dict=marker_models, - config={"page_range": [0], "force_layout_block": block["block_type"], "disable_tqdm": True}, - renderer="marker.renderers.html.HTMLRenderer" - ) - with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: - doc2.save(f) - rendered = block_converter(f.name) - html = rendered.html - soup = BeautifulSoup(html, "html.parser") - inner_html = str(soup.find("body").decode_contents()) - block_html.append(inner_html) - return block_html \ No newline at end of file + mathpix_md = data["md"] + gt_blocks = json.loads(sample["gt_blocks"]) + gt_html = [block["html"] for block in gt_blocks] + scores = score_blocks(gt_html, mathpix_md, convert=False) + return scores diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index 57e80286..bdb1fc7c 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -1,5 +1,6 @@ import json import os +import traceback from collections import defaultdict from pathlib import Path @@ -8,64 +9,53 @@ import tabulate from tqdm import tqdm +from benchmarks.overall.inference import marker_html_func, mathpix_html_func +from benchmarks.overall.schema import FullResult from marker.logger import configure_logging from marker.models import create_model_dict -from inference import get_marker_block_html from marker.settings import settings -from scoring import score_blocks configure_logging() -@click.command(help="Benchmark PDF to MD conversion.") -@click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") -@click.option("--other_methods", type=str, help="Comma separated list of other methods to compare against. Possible values:", default="") -@click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") -@click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") -def main( - dataset: str, - other_methods: str, - result_path: str, - max_rows: int -): - allowed_methods = [""] - methods = other_methods.split(",") - for method in methods: - if method not in allowed_methods: - raise ValueError(f"Method {method} not allowed. Allowed methods are {allowed_methods}") - - model_dict = create_model_dict() - ds = datasets.load_dataset(dataset, split="train") +def get_method_scores(ds, model_dict, max_rows=None, html_func=marker_html_func, **kwargs) -> FullResult: bench_scores = {} averages_by_type = defaultdict(list) averages_by_block_type = defaultdict(list) for idx, sample in tqdm(enumerate(ds), desc="Running benchmark"): + if max_rows is not None and idx >= max_rows: + break + gt_blocks = json.loads(sample["gt_blocks"]) doc_type = sample["classification"] - pdf_bytes = sample["pdf"] # This is a single page PDF - marker_html = get_marker_block_html(model_dict, gt_blocks, pdf_bytes) - gt_html = [block["html"] for block in gt_blocks] - scores = score_blocks(gt_html, marker_html) - gt_weights = [len(ht) for ht in gt_html] - overall_score = sum([s * w for s, w in zip(scores, gt_weights)]) / sum(gt_weights) - bench_scores[idx] = { - "scores": scores, - "weights": gt_weights, - "overall_score": overall_score # Weighted score, weighted by length of GT block - } - - averages_by_type[doc_type].append(overall_score) - - for score, gt_block in zip(scores, gt_blocks): + try: + scores = html_func(model_dict, sample, **kwargs) + except ValueError as e: + print(f"Error with sample {idx}: {e}") + continue + averages_by_type[doc_type].append(scores["overall_score"]) + + for score, gt_block in zip(scores["scores"], gt_blocks): averages_by_block_type[gt_block["block_type"]].append(score) - if max_rows is not None and idx >= max_rows: - break + bench_scores[idx] = scores + + return { + "raw_scores": bench_scores, + "averages_by_type": averages_by_type, + "averages_by_block_type": averages_by_block_type + } + +def print_scores(scores: FullResult, method: str): + averages_by_type = scores["averages_by_type"] + averages_by_block_type = scores["averages_by_block_type"] + bench_scores = scores["raw_scores"] for k in averages_by_type: averages_by_type[k] = sum(averages_by_type[k]) / len(averages_by_type[k]) averages_by_type = sorted(averages_by_type.items()) + print(f"Scores for method {method}:") print(tabulate.tabulate(averages_by_type, headers=["Document Type", "Average Score"], tablefmt="github")) for k in averages_by_block_type: @@ -76,10 +66,45 @@ def main( overall_average = sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores) print(tabulate.tabulate([["Overall Average", overall_average]], tablefmt="github")) + print() + +@click.command(help="Benchmark PDF to MD conversion.") +@click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") +@click.option("--other_methods", type=str, help="Comma separated list of other methods to compare against. Possible values: mathpix", default="") +@click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") +@click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") +def main( + dataset: str, + other_methods: str, + result_path: str, + max_rows: int +): + allowed_methods = ["mathpix", ""] + methods = other_methods.split(",") + for method in methods: + if method not in allowed_methods: + raise ValueError(f"Method {method} not allowed. Allowed methods are {allowed_methods}") + + model_dict = create_model_dict() + ds = datasets.load_dataset(dataset, split="train") + + marker_scores = get_method_scores(ds, model_dict, max_rows=max_rows) + all_scores = { + "marker": marker_scores + } + + if "mathpix" in methods: + mathpix_ds = datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train") + mathpix_scores = get_method_scores(ds, model_dict, max_rows=max_rows, html_func=mathpix_html_func, mathpix_ds=mathpix_ds) + all_scores["mathpix"] = mathpix_scores + + for k,v in all_scores.items(): + print_scores(v, k) - out_path = Path(result_path) / "overall.json" - with open(out_path, "w") as f: - json.dump(bench_scores, f, indent=2) + out_path = Path(result_path) + out_path.mkdir(parents=True, exist_ok=True) + with open(out_path / "overall.json", "w") as f: + json.dump(all_scores, f, indent=2) print(f"Results saved to {out_path}.") diff --git a/benchmarks/overall/schema.py b/benchmarks/overall/schema.py new file mode 100644 index 00000000..98ffc1b8 --- /dev/null +++ b/benchmarks/overall/schema.py @@ -0,0 +1,15 @@ +from typing import TypedDict, List, Dict + + +class BlockScores(TypedDict): + scores: List[float] + order_score: float + gt: List[str] + method: str + overall_score: float + + +class FullResult(TypedDict): + raw_scores: Dict[int, BlockScores] + averages_by_type: Dict[str, List[float]] + averages_by_block_type: Dict[str, List[float]] diff --git a/benchmarks/overall/scoring.py b/benchmarks/overall/scoring.py index 3ae19a98..1ba78bc9 100644 --- a/benchmarks/overall/scoring.py +++ b/benchmarks/overall/scoring.py @@ -1,30 +1,135 @@ -import re -from bs4 import BeautifulSoup +from typing import List -from markdownify import markdownify as md from rapidfuzz import fuzz -def standardize_html(html): - soup = BeautifulSoup(html, "html.parser") +from benchmarks.overall.schema import BlockScores +from marker.renderers.markdown import MarkdownRenderer +import re + + +def kendall_tau(correct_order: List[int], actual_order: List[int]) -> float: + n = len(correct_order) + concordant = 0 + discordant = 0 + + for i in range(n): + for j in range(i + 1, n): + correct_sign = correct_order[i] - correct_order[j] + actual_sign = actual_order[i] - actual_order[j] + + if (correct_sign > 0 and actual_sign > 0) or (correct_sign < 0 and actual_sign < 0): + concordant += 1 + elif (correct_sign < 0 and actual_sign > 0) or (correct_sign > 0 and actual_sign < 0): + discordant += 1 + + total_pairs = (n * (n - 1)) // 2 + tau = (concordant - discordant) / total_pairs + tau = (tau + 1) / 2 # 0-1 scale + return tau * 100 # 0-100 scale + + +def find_fuzzy_alignments( + main_string: str, + substrings: List[str], + threshold: int = 70 +) -> List[dict]: + alignments = [] + + for idx, substr in enumerate(substrings): + result = fuzz.partial_ratio_alignment(substr, main_string, score_cutoff=threshold) + + score = 0 + dest_start = 0 + dest_end = 0 + if result: + score = result.score + dest_start = result.dest_start + dest_end = result.dest_end + + alignments.append({ + "string": substr, + "start": dest_start, + "end": dest_end, + "score": score, + "idx": idx + }) + return alignments - # Convert all headers to h1 so we don't penalize small differences in header levels - for tag in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): - tag.name = "h1" +def convert_to_md(html): + md = MarkdownRenderer() + markdown = md.md_cls.convert(html) + return markdown + +def standardize_markdown(markdown): + pattern = r'(?", "\n") + markdown = re.sub(r"(.*?)", r"\1", markdown) + markdown = re.sub(r"(.*?)", r"\1", markdown) + markdown = re.sub(r"\s+", " ", markdown) markdown = re.sub(r"\n+", "\n", markdown) markdown = re.sub("\\.+", ".", markdown) # Replace repeated periods with a single period, like in table of contents - return markdown.strip() + markdown = re.sub("#+", "#", markdown) # Replace repeated headers with a single header + markdown = re.sub(r"\$", "", markdown) # Remove equation delimiters + return markdown.strip().lower() + + +def standardize_math(match): + try: + delim = "$$" if match.group(0).startswith('$$') else "$" + math_content = match.group(1) or match.group(2) + result = clean_latex(math_content) + return f'{delim}{result}{delim}' + except Exception as e: + print(f"Failed to standardize math expression: {match.group(0)} with error: {e}") + return match.group(0) + + +def clean_latex(latex_str): + latex_str = re.sub(r'\s+', ' ', latex_str.strip()) + for tag in [r'\\text', r'\\mathrm', r'\\mathbf', r'\\textbf']: + latex_str = re.sub(tag + r'\{([^}]+)\}', r'\1', latex_str) + + + replacements = { + '\\times': '*', + '\\cdot': '*', + '\\div': '/', + '\\le': '<=', + '\\ge': '>=', + '\\neq': '!=', + '\\to': '\\rightarrow', + } + + for old, new in replacements.items(): + latex_str = latex_str.replace(old, new) + + return latex_str + +def score_blocks(gt_html, method_html, convert=True) -> BlockScores: + if convert: + method_html = convert_to_md(method_html) + method_html = standardize_markdown(method_html) + gt = [standardize_markdown(convert_to_md(gt)) for gt in gt_html] + alignments = find_fuzzy_alignments(method_html, gt) + scores = [alignment["score"] for alignment in alignments] + orders = [alignment["start"] for alignment in alignments] + correct_order = range(len(gt)) + actual_order = sorted(range(len(gt)), key=lambda x: orders[x]) + order_score = kendall_tau(correct_order, actual_order) + gt_weights = [len(g) for g in gt] + weighted_scores = [score * weight for score, weight in zip(scores, gt_weights)] -def score_blocks(gt_html, method_html): - scores = [] - for gt, method in zip(gt_html, method_html): - gt= standardize_html(gt) - method = standardize_html(method) - score = fuzz.ratio(gt, method) - scores.append(score) - return scores \ No newline at end of file + # Weight the score by sequence length + overall_score = sum(weighted_scores) / max(1, sum(gt_weights)) + overall_score = overall_score * 0.8 + order_score * 0.2 + return { + "scores": scores, + "order_score": order_score, + "gt": gt, + "method": method_html, + "overall_score": overall_score + } \ No newline at end of file diff --git a/benchmarks/table/table.py b/benchmarks/table/table.py index dfeb5eb0..448e32fe 100644 --- a/benchmarks/table/table.py +++ b/benchmarks/table/table.py @@ -223,6 +223,7 @@ def main( } out_path = Path(result_path) / "table.json" + out_path.mkdir(parents=True, exist_ok=True) with open(out_path, "w+") as f: json.dump(results, f, indent=2) diff --git a/marker/processors/llm/llm_complex.py b/marker/processors/llm/llm_complex.py index 52c46364..6d58a077 100644 --- a/marker/processors/llm/llm_complex.py +++ b/marker/processors/llm/llm_complex.py @@ -85,4 +85,4 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): # Convert LLM markdown to html corrected_markdown = corrected_markdown.strip().lstrip("```markdown").rstrip("```").strip() - block.html = markdown2.markdown(corrected_markdown) \ No newline at end of file + block.html = markdown2.markdown(corrected_markdown, extras=["tables"]) \ No newline at end of file diff --git a/marker/processors/llm/llm_handwriting.py b/marker/processors/llm/llm_handwriting.py index d3e9b9f3..10a0c25b 100644 --- a/marker/processors/llm/llm_handwriting.py +++ b/marker/processors/llm/llm_handwriting.py @@ -72,4 +72,4 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Handwrit return markdown = markdown.strip().lstrip("```markdown").rstrip("```").strip() - block.html = markdown2.markdown(markdown) + block.html = markdown2.markdown(markdown, extras=["tables"]) diff --git a/marker/renderers/markdown.py b/marker/renderers/markdown.py index 9a48fa40..722470c1 100644 --- a/marker/renderers/markdown.py +++ b/marker/renderers/markdown.py @@ -198,10 +198,9 @@ class MarkdownRenderer(HTMLRenderer): inline_math_delimiters: Annotated[Tuple[str], "The delimiters to use for inline math."] = ("$", "$") block_math_delimiters: Annotated[Tuple[str], "The delimiters to use for block math."] = ("$$", "$$") - def __call__(self, document: Document) -> MarkdownOutput: - document_output = document.render() - full_html, images = self.extract_html(document, document_output) - md_cls = Markdownify( + @property + def md_cls(self): + return Markdownify( self.paginate_output, self.page_separator, heading_style="ATX", @@ -215,7 +214,12 @@ def __call__(self, document: Document) -> MarkdownOutput: inline_math_delimiters=self.inline_math_delimiters, block_math_delimiters=self.block_math_delimiters ) - markdown = md_cls.convert(full_html) + + + def __call__(self, document: Document) -> MarkdownOutput: + document_output = document.render() + full_html, images = self.extract_html(document, document_output) + markdown = self.md_cls.convert(full_html) markdown = cleanup_text(markdown) return MarkdownOutput( markdown=markdown, diff --git a/poetry.lock b/poetry.lock index c053780c..471d43c1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -801,13 +801,13 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth [[package]] name = "fastapi" -version = "0.115.7" +version = "0.115.8" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" files = [ - {file = "fastapi-0.115.7-py3-none-any.whl", hash = "sha256:eb6a8c8bf7f26009e8147111ff15b5177a0e19bb4a45bc3486ab14804539d21e"}, - {file = "fastapi-0.115.7.tar.gz", hash = "sha256:0f106da6c01d88a6786b3248fb4d7a940d071f6f488488898ad5d354b25ed015"}, + {file = "fastapi-0.115.8-py3-none-any.whl", hash = "sha256:753a96dd7e036b34eeef8babdfcfe3f28ff79648f86551eb36bfc1b0bf4a8cbf"}, + {file = "fastapi-0.115.8.tar.gz", hash = "sha256:0ce9111231720190473e222cdf0f07f7206ad7e53ea02beb1d2dc36e2f0741e9"}, ] [package.dependencies] @@ -1367,13 +1367,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.28.0" +version = "0.28.1" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.28.0-py3-none-any.whl", hash = "sha256:71cff4e500efe68061d94b7f6d3114e183715088be7a90bf4dd84af83b5f5cdb"}, - {file = "huggingface_hub-0.28.0.tar.gz", hash = "sha256:c2b18c02a47d4384763caddb4d0ab2a8fc6c16e0800d6de4d55d0a896244aba3"}, + {file = "huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7"}, + {file = "huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae"}, ] [package.dependencies] @@ -1826,13 +1826,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (> [[package]] name = "jupyterlab" -version = "4.3.4" +version = "4.3.5" description = "JupyterLab computational environment" optional = false python-versions = ">=3.8" files = [ - {file = "jupyterlab-4.3.4-py3-none-any.whl", hash = "sha256:b754c2601c5be6adf87cb5a1d8495d653ffb945f021939f77776acaa94dae952"}, - {file = "jupyterlab-4.3.4.tar.gz", hash = "sha256:f0bb9b09a04766e3423cccc2fc23169aa2ffedcdf8713e9e0fb33cac0b6859d0"}, + {file = "jupyterlab-4.3.5-py3-none-any.whl", hash = "sha256:571bbdee20e4c5321ab5195bc41cf92a75a5cff886be5e57ce78dfa37a5e9fdb"}, + {file = "jupyterlab-4.3.5.tar.gz", hash = "sha256:c779bf72ced007d7d29d5bcef128e7fdda96ea69299e19b04a43635a7d641f9d"}, ] [package.dependencies] @@ -2729,18 +2729,6 @@ files = [ [package.dependencies] nvidia-nvjitlink-cu12 = "*" -[[package]] -name = "nvidia-cusparselt-cu12" -version = "0.6.2" -description = "NVIDIA cuSPARSELt" -optional = false -python-versions = "*" -files = [ - {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:067a7f6d03ea0d4841c85f0c6f1991c5dda98211f6302cb83a4ab234ee95bef8"}, - {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9"}, - {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-win_amd64.whl", hash = "sha256:0057c91d230703924c0422feabe4ce768841f9b4b44d28586b6f6d2eb86fbe70"}, -] - [[package]] name = "nvidia-nccl-cu12" version = "2.21.5" @@ -3578,23 +3566,6 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] -[[package]] -name = "pymupdf" -version = "1.25.2" -description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." -optional = false -python-versions = ">=3.9" -files = [ - {file = "pymupdf-1.25.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:59dea22b633cc4fc13670b4c5db50d71f8cd4f420814420f33ce47ddcb61e1f6"}, - {file = "pymupdf-1.25.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e8b8a874497cd0deee89a6a4fb76a3a08173c8d39e88fc7cf715764ec5a243e9"}, - {file = "pymupdf-1.25.2-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f61e5cdb25b86eb28d34aa3557b49ecf9e361d5f5cd3b1660406f8f0bf813af7"}, - {file = "pymupdf-1.25.2-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae8cfa7a97d78f813d286ecba32369059d88073edd1e5cf105f4cd0811f71925"}, - {file = "pymupdf-1.25.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:295505fe1ecb7c7b57d4124d373e207ea311d8e40bc7ac3016d8ec2d60b091e9"}, - {file = "pymupdf-1.25.2-cp39-abi3-win32.whl", hash = "sha256:b9488c8b82bb9be36fb13ee0c8d43b0ddcc50af83b61da01e6040413d9e67da6"}, - {file = "pymupdf-1.25.2-cp39-abi3-win_amd64.whl", hash = "sha256:1b4ca6f5780d319a08dff885a5a0e3585c5d7af04dcfa063c535b88371fd91c1"}, - {file = "pymupdf-1.25.2.tar.gz", hash = "sha256:9ea88ff1b3ccb359620f106a6fd5ba6877d959d21d78272052c3496ceede6eec"}, -] - [[package]] name = "pyparsing" version = "3.2.1" @@ -3840,120 +3811,120 @@ files = [ [[package]] name = "pyzmq" -version = "26.2.0" +version = "26.2.1" description = "Python bindings for 0MQ" optional = false python-versions = ">=3.7" files = [ - {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629"}, - {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dacd995031a01d16eec825bf30802fceb2c3791ef24bcce48fa98ce40918c27b"}, - {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89289a5ee32ef6c439086184529ae060c741334b8970a6855ec0b6ad3ff28764"}, - {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5506f06d7dc6ecf1efacb4a013b1f05071bb24b76350832c96449f4a2d95091c"}, - {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea039387c10202ce304af74def5021e9adc6297067f3441d348d2b633e8166a"}, - {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a2224fa4a4c2ee872886ed00a571f5e967c85e078e8e8c2530a2fb01b3309b88"}, - {file = "pyzmq-26.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:28ad5233e9c3b52d76196c696e362508959741e1a005fb8fa03b51aea156088f"}, - {file = "pyzmq-26.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:1c17211bc037c7d88e85ed8b7d8f7e52db6dc8eca5590d162717c654550f7282"}, - {file = "pyzmq-26.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b8f86dd868d41bea9a5f873ee13bf5551c94cf6bc51baebc6f85075971fe6eea"}, - {file = "pyzmq-26.2.0-cp310-cp310-win32.whl", hash = "sha256:46a446c212e58456b23af260f3d9fb785054f3e3653dbf7279d8f2b5546b21c2"}, - {file = "pyzmq-26.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:49d34ab71db5a9c292a7644ce74190b1dd5a3475612eefb1f8be1d6961441971"}, - {file = "pyzmq-26.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:bfa832bfa540e5b5c27dcf5de5d82ebc431b82c453a43d141afb1e5d2de025fa"}, - {file = "pyzmq-26.2.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:8f7e66c7113c684c2b3f1c83cdd3376103ee0ce4c49ff80a648643e57fb22218"}, - {file = "pyzmq-26.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3a495b30fc91db2db25120df5847d9833af237546fd59170701acd816ccc01c4"}, - {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77eb0968da535cba0470a5165468b2cac7772cfb569977cff92e240f57e31bef"}, - {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ace4f71f1900a548f48407fc9be59c6ba9d9aaf658c2eea6cf2779e72f9f317"}, - {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92a78853d7280bffb93df0a4a6a2498cba10ee793cc8076ef797ef2f74d107cf"}, - {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:689c5d781014956a4a6de61d74ba97b23547e431e9e7d64f27d4922ba96e9d6e"}, - {file = "pyzmq-26.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aca98bc423eb7d153214b2df397c6421ba6373d3397b26c057af3c904452e37"}, - {file = "pyzmq-26.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f3496d76b89d9429a656293744ceca4d2ac2a10ae59b84c1da9b5165f429ad3"}, - {file = "pyzmq-26.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5c2b3bfd4b9689919db068ac6c9911f3fcb231c39f7dd30e3138be94896d18e6"}, - {file = "pyzmq-26.2.0-cp311-cp311-win32.whl", hash = "sha256:eac5174677da084abf378739dbf4ad245661635f1600edd1221f150b165343f4"}, - {file = "pyzmq-26.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:5a509df7d0a83a4b178d0f937ef14286659225ef4e8812e05580776c70e155d5"}, - {file = "pyzmq-26.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:c0e6091b157d48cbe37bd67233318dbb53e1e6327d6fc3bb284afd585d141003"}, - {file = "pyzmq-26.2.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:ded0fc7d90fe93ae0b18059930086c51e640cdd3baebdc783a695c77f123dcd9"}, - {file = "pyzmq-26.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17bf5a931c7f6618023cdacc7081f3f266aecb68ca692adac015c383a134ca52"}, - {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55cf66647e49d4621a7e20c8d13511ef1fe1efbbccf670811864452487007e08"}, - {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4661c88db4a9e0f958c8abc2b97472e23061f0bc737f6f6179d7a27024e1faa5"}, - {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea7f69de383cb47522c9c208aec6dd17697db7875a4674c4af3f8cfdac0bdeae"}, - {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7f98f6dfa8b8ccaf39163ce872bddacca38f6a67289116c8937a02e30bbe9711"}, - {file = "pyzmq-26.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e3e0210287329272539eea617830a6a28161fbbd8a3271bf4150ae3e58c5d0e6"}, - {file = "pyzmq-26.2.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6b274e0762c33c7471f1a7471d1a2085b1a35eba5cdc48d2ae319f28b6fc4de3"}, - {file = "pyzmq-26.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:29c6a4635eef69d68a00321e12a7d2559fe2dfccfa8efae3ffb8e91cd0b36a8b"}, - {file = "pyzmq-26.2.0-cp312-cp312-win32.whl", hash = "sha256:989d842dc06dc59feea09e58c74ca3e1678c812a4a8a2a419046d711031f69c7"}, - {file = "pyzmq-26.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:2a50625acdc7801bc6f74698c5c583a491c61d73c6b7ea4dee3901bb99adb27a"}, - {file = "pyzmq-26.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:4d29ab8592b6ad12ebbf92ac2ed2bedcfd1cec192d8e559e2e099f648570e19b"}, - {file = "pyzmq-26.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9dd8cd1aeb00775f527ec60022004d030ddc51d783d056e3e23e74e623e33726"}, - {file = "pyzmq-26.2.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:28c812d9757fe8acecc910c9ac9dafd2ce968c00f9e619db09e9f8f54c3a68a3"}, - {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d80b1dd99c1942f74ed608ddb38b181b87476c6a966a88a950c7dee118fdf50"}, - {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c997098cc65e3208eca09303630e84d42718620e83b733d0fd69543a9cab9cb"}, - {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ad1bc8d1b7a18497dda9600b12dc193c577beb391beae5cd2349184db40f187"}, - {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:bea2acdd8ea4275e1278350ced63da0b166421928276c7c8e3f9729d7402a57b"}, - {file = "pyzmq-26.2.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:23f4aad749d13698f3f7b64aad34f5fc02d6f20f05999eebc96b89b01262fb18"}, - {file = "pyzmq-26.2.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a4f96f0d88accc3dbe4a9025f785ba830f968e21e3e2c6321ccdfc9aef755115"}, - {file = "pyzmq-26.2.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ced65e5a985398827cc9276b93ef6dfabe0273c23de8c7931339d7e141c2818e"}, - {file = "pyzmq-26.2.0-cp313-cp313-win32.whl", hash = "sha256:31507f7b47cc1ead1f6e86927f8ebb196a0bab043f6345ce070f412a59bf87b5"}, - {file = "pyzmq-26.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:70fc7fcf0410d16ebdda9b26cbd8bf8d803d220a7f3522e060a69a9c87bf7bad"}, - {file = "pyzmq-26.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:c3789bd5768ab5618ebf09cef6ec2b35fed88709b104351748a63045f0ff9797"}, - {file = "pyzmq-26.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:034da5fc55d9f8da09015d368f519478a52675e558c989bfcb5cf6d4e16a7d2a"}, - {file = "pyzmq-26.2.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:c92d73464b886931308ccc45b2744e5968cbaade0b1d6aeb40d8ab537765f5bc"}, - {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:794a4562dcb374f7dbbfb3f51d28fb40123b5a2abadee7b4091f93054909add5"}, - {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aee22939bb6075e7afededabad1a56a905da0b3c4e3e0c45e75810ebe3a52672"}, - {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae90ff9dad33a1cfe947d2c40cb9cb5e600d759ac4f0fd22616ce6540f72797"}, - {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:43a47408ac52647dfabbc66a25b05b6a61700b5165807e3fbd40063fcaf46386"}, - {file = "pyzmq-26.2.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:25bf2374a2a8433633c65ccb9553350d5e17e60c8eb4de4d92cc6bd60f01d306"}, - {file = "pyzmq-26.2.0-cp313-cp313t-musllinux_1_1_i686.whl", hash = "sha256:007137c9ac9ad5ea21e6ad97d3489af654381324d5d3ba614c323f60dab8fae6"}, - {file = "pyzmq-26.2.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:470d4a4f6d48fb34e92d768b4e8a5cc3780db0d69107abf1cd7ff734b9766eb0"}, - {file = "pyzmq-26.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3b55a4229ce5da9497dd0452b914556ae58e96a4381bb6f59f1305dfd7e53fc8"}, - {file = "pyzmq-26.2.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9cb3a6460cdea8fe8194a76de8895707e61ded10ad0be97188cc8463ffa7e3a8"}, - {file = "pyzmq-26.2.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8ab5cad923cc95c87bffee098a27856c859bd5d0af31bd346035aa816b081fe1"}, - {file = "pyzmq-26.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ed69074a610fad1c2fda66180e7b2edd4d31c53f2d1872bc2d1211563904cd9"}, - {file = "pyzmq-26.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cccba051221b916a4f5e538997c45d7d136a5646442b1231b916d0164067ea27"}, - {file = "pyzmq-26.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:0eaa83fc4c1e271c24eaf8fb083cbccef8fde77ec8cd45f3c35a9a123e6da097"}, - {file = "pyzmq-26.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9edda2df81daa129b25a39b86cb57dfdfe16f7ec15b42b19bfac503360d27a93"}, - {file = "pyzmq-26.2.0-cp37-cp37m-win32.whl", hash = "sha256:ea0eb6af8a17fa272f7b98d7bebfab7836a0d62738e16ba380f440fceca2d951"}, - {file = "pyzmq-26.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4ff9dc6bc1664bb9eec25cd17506ef6672d506115095411e237d571e92a58231"}, - {file = "pyzmq-26.2.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:2eb7735ee73ca1b0d71e0e67c3739c689067f055c764f73aac4cc8ecf958ee3f"}, - {file = "pyzmq-26.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a534f43bc738181aa7cbbaf48e3eca62c76453a40a746ab95d4b27b1111a7d2"}, - {file = "pyzmq-26.2.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:aedd5dd8692635813368e558a05266b995d3d020b23e49581ddd5bbe197a8ab6"}, - {file = "pyzmq-26.2.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8be4700cd8bb02cc454f630dcdf7cfa99de96788b80c51b60fe2fe1dac480289"}, - {file = "pyzmq-26.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fcc03fa4997c447dce58264e93b5aa2d57714fbe0f06c07b7785ae131512732"}, - {file = "pyzmq-26.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:402b190912935d3db15b03e8f7485812db350d271b284ded2b80d2e5704be780"}, - {file = "pyzmq-26.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8685fa9c25ff00f550c1fec650430c4b71e4e48e8d852f7ddcf2e48308038640"}, - {file = "pyzmq-26.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:76589c020680778f06b7e0b193f4b6dd66d470234a16e1df90329f5e14a171cd"}, - {file = "pyzmq-26.2.0-cp38-cp38-win32.whl", hash = "sha256:8423c1877d72c041f2c263b1ec6e34360448decfb323fa8b94e85883043ef988"}, - {file = "pyzmq-26.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:76589f2cd6b77b5bdea4fca5992dc1c23389d68b18ccc26a53680ba2dc80ff2f"}, - {file = "pyzmq-26.2.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:b1d464cb8d72bfc1a3adc53305a63a8e0cac6bc8c5a07e8ca190ab8d3faa43c2"}, - {file = "pyzmq-26.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4da04c48873a6abdd71811c5e163bd656ee1b957971db7f35140a2d573f6949c"}, - {file = "pyzmq-26.2.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d049df610ac811dcffdc147153b414147428567fbbc8be43bb8885f04db39d98"}, - {file = "pyzmq-26.2.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05590cdbc6b902101d0e65d6a4780af14dc22914cc6ab995d99b85af45362cc9"}, - {file = "pyzmq-26.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c811cfcd6a9bf680236c40c6f617187515269ab2912f3d7e8c0174898e2519db"}, - {file = "pyzmq-26.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6835dd60355593de10350394242b5757fbbd88b25287314316f266e24c61d073"}, - {file = "pyzmq-26.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc6bee759a6bddea5db78d7dcd609397449cb2d2d6587f48f3ca613b19410cfc"}, - {file = "pyzmq-26.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c530e1eecd036ecc83c3407f77bb86feb79916d4a33d11394b8234f3bd35b940"}, - {file = "pyzmq-26.2.0-cp39-cp39-win32.whl", hash = "sha256:367b4f689786fca726ef7a6c5ba606958b145b9340a5e4808132cc65759abd44"}, - {file = "pyzmq-26.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:e6fa2e3e683f34aea77de8112f6483803c96a44fd726d7358b9888ae5bb394ec"}, - {file = "pyzmq-26.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:7445be39143a8aa4faec43b076e06944b8f9d0701b669df4af200531b21e40bb"}, - {file = "pyzmq-26.2.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:706e794564bec25819d21a41c31d4df2d48e1cc4b061e8d345d7fb4dd3e94072"}, - {file = "pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b435f2753621cd36e7c1762156815e21c985c72b19135dac43a7f4f31d28dd1"}, - {file = "pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:160c7e0a5eb178011e72892f99f918c04a131f36056d10d9c1afb223fc952c2d"}, - {file = "pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c4a71d5d6e7b28a47a394c0471b7e77a0661e2d651e7ae91e0cab0a587859ca"}, - {file = "pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c"}, - {file = "pyzmq-26.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2ea4ad4e6a12e454de05f2949d4beddb52460f3de7c8b9d5c46fbb7d7222e02c"}, - {file = "pyzmq-26.2.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fc4f7a173a5609631bb0c42c23d12c49df3966f89f496a51d3eb0ec81f4519d6"}, - {file = "pyzmq-26.2.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:878206a45202247781472a2d99df12a176fef806ca175799e1c6ad263510d57c"}, - {file = "pyzmq-26.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17c412bad2eb9468e876f556eb4ee910e62d721d2c7a53c7fa31e643d35352e6"}, - {file = "pyzmq-26.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:0d987a3ae5a71c6226b203cfd298720e0086c7fe7c74f35fa8edddfbd6597eed"}, - {file = "pyzmq-26.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:39887ac397ff35b7b775db7201095fc6310a35fdbae85bac4523f7eb3b840e20"}, - {file = "pyzmq-26.2.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fdb5b3e311d4d4b0eb8b3e8b4d1b0a512713ad7e6a68791d0923d1aec433d919"}, - {file = "pyzmq-26.2.0-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:226af7dcb51fdb0109f0016449b357e182ea0ceb6b47dfb5999d569e5db161d5"}, - {file = "pyzmq-26.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bed0e799e6120b9c32756203fb9dfe8ca2fb8467fed830c34c877e25638c3fc"}, - {file = "pyzmq-26.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:29c7947c594e105cb9e6c466bace8532dc1ca02d498684128b339799f5248277"}, - {file = "pyzmq-26.2.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cdeabcff45d1c219636ee2e54d852262e5c2e085d6cb476d938aee8d921356b3"}, - {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35cffef589bcdc587d06f9149f8d5e9e8859920a071df5a2671de2213bef592a"}, - {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18c8dc3b7468d8b4bdf60ce9d7141897da103c7a4690157b32b60acb45e333e6"}, - {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7133d0a1677aec369d67dd78520d3fa96dd7f3dcec99d66c1762870e5ea1a50a"}, - {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6a96179a24b14fa6428cbfc08641c779a53f8fcec43644030328f44034c7f1f4"}, - {file = "pyzmq-26.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4f78c88905461a9203eac9faac157a2a0dbba84a0fd09fd29315db27be40af9f"}, - {file = "pyzmq-26.2.0.tar.gz", hash = "sha256:070672c258581c8e4f640b5159297580a9974b026043bd4ab0470be9ed324f1f"}, + {file = "pyzmq-26.2.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:f39d1227e8256d19899d953e6e19ed2ccb689102e6d85e024da5acf410f301eb"}, + {file = "pyzmq-26.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a23948554c692df95daed595fdd3b76b420a4939d7a8a28d6d7dea9711878641"}, + {file = "pyzmq-26.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95f5728b367a042df146cec4340d75359ec6237beebf4a8f5cf74657c65b9257"}, + {file = "pyzmq-26.2.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95f7b01b3f275504011cf4cf21c6b885c8d627ce0867a7e83af1382ebab7b3ff"}, + {file = "pyzmq-26.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80a00370a2ef2159c310e662c7c0f2d030f437f35f478bb8b2f70abd07e26b24"}, + {file = "pyzmq-26.2.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:8531ed35dfd1dd2af95f5d02afd6545e8650eedbf8c3d244a554cf47d8924459"}, + {file = "pyzmq-26.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cdb69710e462a38e6039cf17259d328f86383a06c20482cc154327968712273c"}, + {file = "pyzmq-26.2.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e7eeaef81530d0b74ad0d29eec9997f1c9230c2f27242b8d17e0ee67662c8f6e"}, + {file = "pyzmq-26.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:361edfa350e3be1f987e592e834594422338d7174364763b7d3de5b0995b16f3"}, + {file = "pyzmq-26.2.1-cp310-cp310-win32.whl", hash = "sha256:637536c07d2fb6a354988b2dd1d00d02eb5dd443f4bbee021ba30881af1c28aa"}, + {file = "pyzmq-26.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:45fad32448fd214fbe60030aa92f97e64a7140b624290834cc9b27b3a11f9473"}, + {file = "pyzmq-26.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:d9da0289d8201c8a29fd158aaa0dfe2f2e14a181fd45e2dc1fbf969a62c1d594"}, + {file = "pyzmq-26.2.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:c059883840e634a21c5b31d9b9a0e2b48f991b94d60a811092bc37992715146a"}, + {file = "pyzmq-26.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed038a921df836d2f538e509a59cb638df3e70ca0fcd70d0bf389dfcdf784d2a"}, + {file = "pyzmq-26.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9027a7fcf690f1a3635dc9e55e38a0d6602dbbc0548935d08d46d2e7ec91f454"}, + {file = "pyzmq-26.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d75fcb00a1537f8b0c0bb05322bc7e35966148ffc3e0362f0369e44a4a1de99"}, + {file = "pyzmq-26.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0019cc804ac667fb8c8eaecdb66e6d4a68acf2e155d5c7d6381a5645bd93ae4"}, + {file = "pyzmq-26.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f19dae58b616ac56b96f2e2290f2d18730a898a171f447f491cc059b073ca1fa"}, + {file = "pyzmq-26.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f5eeeb82feec1fc5cbafa5ee9022e87ffdb3a8c48afa035b356fcd20fc7f533f"}, + {file = "pyzmq-26.2.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:000760e374d6f9d1a3478a42ed0c98604de68c9e94507e5452951e598ebecfba"}, + {file = "pyzmq-26.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:817fcd3344d2a0b28622722b98500ae9c8bfee0f825b8450932ff19c0b15bebd"}, + {file = "pyzmq-26.2.1-cp311-cp311-win32.whl", hash = "sha256:88812b3b257f80444a986b3596e5ea5c4d4ed4276d2b85c153a6fbc5ca457ae7"}, + {file = "pyzmq-26.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:ef29630fde6022471d287c15c0a2484aba188adbfb978702624ba7a54ddfa6c1"}, + {file = "pyzmq-26.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:f32718ee37c07932cc336096dc7403525301fd626349b6eff8470fe0f996d8d7"}, + {file = "pyzmq-26.2.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:a6549ecb0041dafa55b5932dcbb6c68293e0bd5980b5b99f5ebb05f9a3b8a8f3"}, + {file = "pyzmq-26.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0250c94561f388db51fd0213cdccbd0b9ef50fd3c57ce1ac937bf3034d92d72e"}, + {file = "pyzmq-26.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36ee4297d9e4b34b5dc1dd7ab5d5ea2cbba8511517ef44104d2915a917a56dc8"}, + {file = "pyzmq-26.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2a9cb17fd83b7a3a3009901aca828feaf20aa2451a8a487b035455a86549c09"}, + {file = "pyzmq-26.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:786dd8a81b969c2081b31b17b326d3a499ddd1856e06d6d79ad41011a25148da"}, + {file = "pyzmq-26.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2d88ba221a07fc2c5581565f1d0fe8038c15711ae79b80d9462e080a1ac30435"}, + {file = "pyzmq-26.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1c84c1297ff9f1cd2440da4d57237cb74be21fdfe7d01a10810acba04e79371a"}, + {file = "pyzmq-26.2.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:46d4ebafc27081a7f73a0f151d0c38d4291656aa134344ec1f3d0199ebfbb6d4"}, + {file = "pyzmq-26.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:91e2bfb8e9a29f709d51b208dd5f441dc98eb412c8fe75c24ea464734ccdb48e"}, + {file = "pyzmq-26.2.1-cp312-cp312-win32.whl", hash = "sha256:4a98898fdce380c51cc3e38ebc9aa33ae1e078193f4dc641c047f88b8c690c9a"}, + {file = "pyzmq-26.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:a0741edbd0adfe5f30bba6c5223b78c131b5aa4a00a223d631e5ef36e26e6d13"}, + {file = "pyzmq-26.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:e5e33b1491555843ba98d5209439500556ef55b6ab635f3a01148545498355e5"}, + {file = "pyzmq-26.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:099b56ef464bc355b14381f13355542e452619abb4c1e57a534b15a106bf8e23"}, + {file = "pyzmq-26.2.1-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:651726f37fcbce9f8dd2a6dab0f024807929780621890a4dc0c75432636871be"}, + {file = "pyzmq-26.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57dd4d91b38fa4348e237a9388b4423b24ce9c1695bbd4ba5a3eada491e09399"}, + {file = "pyzmq-26.2.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d51a7bfe01a48e1064131f3416a5439872c533d756396be2b39e3977b41430f9"}, + {file = "pyzmq-26.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7154d228502e18f30f150b7ce94f0789d6b689f75261b623f0fdc1eec642aab"}, + {file = "pyzmq-26.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:f1f31661a80cc46aba381bed475a9135b213ba23ca7ff6797251af31510920ce"}, + {file = "pyzmq-26.2.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:290c96f479504439b6129a94cefd67a174b68ace8a8e3f551b2239a64cfa131a"}, + {file = "pyzmq-26.2.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f2c307fbe86e18ab3c885b7e01de942145f539165c3360e2af0f094dd440acd9"}, + {file = "pyzmq-26.2.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:b314268e716487bfb86fcd6f84ebbe3e5bec5fac75fdf42bc7d90fdb33f618ad"}, + {file = "pyzmq-26.2.1-cp313-cp313-win32.whl", hash = "sha256:edb550616f567cd5603b53bb52a5f842c0171b78852e6fc7e392b02c2a1504bb"}, + {file = "pyzmq-26.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:100a826a029c8ef3d77a1d4c97cbd6e867057b5806a7276f2bac1179f893d3bf"}, + {file = "pyzmq-26.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:6991ee6c43e0480deb1b45d0c7c2bac124a6540cba7db4c36345e8e092da47ce"}, + {file = "pyzmq-26.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:25e720dba5b3a3bb2ad0ad5d33440babd1b03438a7a5220511d0c8fa677e102e"}, + {file = "pyzmq-26.2.1-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:9ec6abfb701437142ce9544bd6a236addaf803a32628d2260eb3dbd9a60e2891"}, + {file = "pyzmq-26.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e1eb9d2bfdf5b4e21165b553a81b2c3bd5be06eeddcc4e08e9692156d21f1f6"}, + {file = "pyzmq-26.2.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90dc731d8e3e91bcd456aa7407d2eba7ac6f7860e89f3766baabb521f2c1de4a"}, + {file = "pyzmq-26.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b6a93d684278ad865fc0b9e89fe33f6ea72d36da0e842143891278ff7fd89c3"}, + {file = "pyzmq-26.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c1bb37849e2294d519117dd99b613c5177934e5c04a5bb05dd573fa42026567e"}, + {file = "pyzmq-26.2.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:632a09c6d8af17b678d84df442e9c3ad8e4949c109e48a72f805b22506c4afa7"}, + {file = "pyzmq-26.2.1-cp313-cp313t-musllinux_1_1_i686.whl", hash = "sha256:fc409c18884eaf9ddde516d53af4f2db64a8bc7d81b1a0c274b8aa4e929958e8"}, + {file = "pyzmq-26.2.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:17f88622b848805d3f6427ce1ad5a2aa3cf61f12a97e684dab2979802024d460"}, + {file = "pyzmq-26.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3ef584f13820d2629326fe20cc04069c21c5557d84c26e277cfa6235e523b10f"}, + {file = "pyzmq-26.2.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:160194d1034902937359c26ccfa4e276abffc94937e73add99d9471e9f555dd6"}, + {file = "pyzmq-26.2.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:574b285150afdbf0a0424dddf7ef9a0d183988eb8d22feacb7160f7515e032cb"}, + {file = "pyzmq-26.2.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44dba28c34ce527cf687156c81f82bf1e51f047838d5964f6840fd87dfecf9fe"}, + {file = "pyzmq-26.2.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9fbdb90b85c7624c304f72ec7854659a3bd901e1c0ffb2363163779181edeb68"}, + {file = "pyzmq-26.2.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a7ad34a2921e8f76716dc7205c9bf46a53817e22b9eec2e8a3e08ee4f4a72468"}, + {file = "pyzmq-26.2.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:866c12b7c90dd3a86983df7855c6f12f9407c8684db6aa3890fc8027462bda82"}, + {file = "pyzmq-26.2.1-cp37-cp37m-win32.whl", hash = "sha256:eeb37f65350d5c5870517f02f8bbb2ac0fbec7b416c0f4875219fef305a89a45"}, + {file = "pyzmq-26.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4eb3197f694dfb0ee6af29ef14a35f30ae94ff67c02076eef8125e2d98963cd0"}, + {file = "pyzmq-26.2.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:36d4e7307db7c847fe37413f333027d31c11d5e6b3bacbb5022661ac635942ba"}, + {file = "pyzmq-26.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1c6ae0e95d0a4b0cfe30f648a18e764352d5415279bdf34424decb33e79935b8"}, + {file = "pyzmq-26.2.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5b4fc44f5360784cc02392f14235049665caaf7c0fe0b04d313e763d3338e463"}, + {file = "pyzmq-26.2.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:51431f6b2750eb9b9d2b2952d3cc9b15d0215e1b8f37b7a3239744d9b487325d"}, + {file = "pyzmq-26.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdbc78ae2065042de48a65f1421b8af6b76a0386bb487b41955818c3c1ce7bed"}, + {file = "pyzmq-26.2.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d14f50d61a89b0925e4d97a0beba6053eb98c426c5815d949a43544f05a0c7ec"}, + {file = "pyzmq-26.2.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:004837cb958988c75d8042f5dac19a881f3d9b3b75b2f574055e22573745f841"}, + {file = "pyzmq-26.2.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0b2007f28ce1b8acebdf4812c1aab997a22e57d6a73b5f318b708ef9bcabbe95"}, + {file = "pyzmq-26.2.1-cp38-cp38-win32.whl", hash = "sha256:269c14904da971cb5f013100d1aaedb27c0a246728c341d5d61ddd03f463f2f3"}, + {file = "pyzmq-26.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:31fff709fef3b991cfe7189d2cfe0c413a1d0e82800a182cfa0c2e3668cd450f"}, + {file = "pyzmq-26.2.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:a4bffcadfd40660f26d1b3315a6029fd4f8f5bf31a74160b151f5c577b2dc81b"}, + {file = "pyzmq-26.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e76ad4729c2f1cf74b6eb1bdd05f6aba6175999340bd51e6caee49a435a13bf5"}, + {file = "pyzmq-26.2.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8b0f5bab40a16e708e78a0c6ee2425d27e1a5d8135c7a203b4e977cee37eb4aa"}, + {file = "pyzmq-26.2.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e8e47050412f0ad3a9b2287779758073cbf10e460d9f345002d4779e43bb0136"}, + {file = "pyzmq-26.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f18ce33f422d119b13c1363ed4cce245b342b2c5cbbb76753eabf6aa6f69c7d"}, + {file = "pyzmq-26.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ceb0d78b7ef106708a7e2c2914afe68efffc0051dc6a731b0dbacd8b4aee6d68"}, + {file = "pyzmq-26.2.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ebdd96bd637fd426d60e86a29ec14b8c1ab64b8d972f6a020baf08a30d1cf46"}, + {file = "pyzmq-26.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:03719e424150c6395b9513f53a5faadcc1ce4b92abdf68987f55900462ac7eec"}, + {file = "pyzmq-26.2.1-cp39-cp39-win32.whl", hash = "sha256:ef5479fac31df4b304e96400fc67ff08231873ee3537544aa08c30f9d22fce38"}, + {file = "pyzmq-26.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:f92a002462154c176dac63a8f1f6582ab56eb394ef4914d65a9417f5d9fde218"}, + {file = "pyzmq-26.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:1fd4b3efc6f62199886440d5e27dd3ccbcb98dfddf330e7396f1ff421bfbb3c2"}, + {file = "pyzmq-26.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:380816d298aed32b1a97b4973a4865ef3be402a2e760204509b52b6de79d755d"}, + {file = "pyzmq-26.2.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97cbb368fd0debdbeb6ba5966aa28e9a1ae3396c7386d15569a6ca4be4572b99"}, + {file = "pyzmq-26.2.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf7b5942c6b0dafcc2823ddd9154f419147e24f8df5b41ca8ea40a6db90615c"}, + {file = "pyzmq-26.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fe6e28a8856aea808715f7a4fc11f682b9d29cac5d6262dd8fe4f98edc12d53"}, + {file = "pyzmq-26.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd8fdee945b877aa3bffc6a5a8816deb048dab0544f9df3731ecd0e54d8c84c9"}, + {file = "pyzmq-26.2.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ee7152f32c88e0e1b5b17beb9f0e2b14454235795ef68c0c120b6d3d23d12833"}, + {file = "pyzmq-26.2.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:baa1da72aecf6a490b51fba7a51f1ce298a1e0e86d0daef8265c8f8f9848eb77"}, + {file = "pyzmq-26.2.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:49135bb327fca159262d8fd14aa1f4a919fe071b04ed08db4c7c37d2f0647162"}, + {file = "pyzmq-26.2.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bacc1a10c150d58e8a9ee2b2037a70f8d903107e0f0b6e079bf494f2d09c091"}, + {file = "pyzmq-26.2.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:09dac387ce62d69bec3f06d51610ca1d660e7849eb45f68e38e7f5cf1f49cbcb"}, + {file = "pyzmq-26.2.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:70b3a46ecd9296e725ccafc17d732bfc3cdab850b54bd913f843a0a54dfb2c04"}, + {file = "pyzmq-26.2.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:59660e15c797a3b7a571c39f8e0b62a1f385f98ae277dfe95ca7eaf05b5a0f12"}, + {file = "pyzmq-26.2.1-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0f50db737d688e96ad2a083ad2b453e22865e7e19c7f17d17df416e91ddf67eb"}, + {file = "pyzmq-26.2.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a003200b6cd64e89b5725ff7e284a93ab24fd54bbac8b4fa46b1ed57be693c27"}, + {file = "pyzmq-26.2.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f9ba5def063243793dec6603ad1392f735255cbc7202a3a484c14f99ec290705"}, + {file = "pyzmq-26.2.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1238c2448c58b9c8d6565579393148414a42488a5f916b3f322742e561f6ae0d"}, + {file = "pyzmq-26.2.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8eddb3784aed95d07065bcf94d07e8c04024fdb6b2386f08c197dfe6b3528fda"}, + {file = "pyzmq-26.2.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0f19c2097fffb1d5b07893d75c9ee693e9cbc809235cf3f2267f0ef6b015f24"}, + {file = "pyzmq-26.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0995fd3530f2e89d6b69a2202e340bbada3191014352af978fa795cb7a446331"}, + {file = "pyzmq-26.2.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7c6160fe513654e65665332740f63de29ce0d165e053c0c14a161fa60dd0da01"}, + {file = "pyzmq-26.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8ec8e3aea6146b761d6c57fcf8f81fcb19f187afecc19bf1701a48db9617a217"}, + {file = "pyzmq-26.2.1.tar.gz", hash = "sha256:17d72a74e5e9ff3829deb72897a175333d3ef5b5413948cae3cf7ebf0b02ecca"}, ] [package.dependencies] @@ -4873,31 +4844,28 @@ files = [ [[package]] name = "torch" -version = "2.6.0" +version = "2.5.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false -python-versions = ">=3.9.0" +python-versions = ">=3.8.0" files = [ - {file = "torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:6860df13d9911ac158f4c44031609700e1eba07916fff62e21e6ffa0a9e01961"}, - {file = "torch-2.6.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c4f103a49830ce4c7561ef4434cc7926e5a5fe4e5eb100c19ab36ea1e2b634ab"}, - {file = "torch-2.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:56eeaf2ecac90da5d9e35f7f35eb286da82673ec3c582e310a8d1631a1c02341"}, - {file = "torch-2.6.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:09e06f9949e1a0518c5b09fe95295bc9661f219d9ecb6f9893e5123e10696628"}, - {file = "torch-2.6.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:7979834102cd5b7a43cc64e87f2f3b14bd0e1458f06e9f88ffa386d07c7446e1"}, - {file = "torch-2.6.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:ccbd0320411fe1a3b3fec7b4d3185aa7d0c52adac94480ab024b5c8f74a0bf1d"}, - {file = "torch-2.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:46763dcb051180ce1ed23d1891d9b1598e07d051ce4c9d14307029809c4d64f7"}, - {file = "torch-2.6.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:94fc63b3b4bedd327af588696559f68c264440e2503cc9e6954019473d74ae21"}, - {file = "torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:2bb8987f3bb1ef2675897034402373ddfc8f5ef0e156e2d8cfc47cacafdda4a9"}, - {file = "torch-2.6.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b789069020c5588c70d5c2158ac0aa23fd24a028f34a8b4fcb8fcb4d7efcf5fb"}, - {file = "torch-2.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:7e1448426d0ba3620408218b50aa6ada88aeae34f7a239ba5431f6c8774b1239"}, - {file = "torch-2.6.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:9a610afe216a85a8b9bc9f8365ed561535c93e804c2a317ef7fabcc5deda0989"}, - {file = "torch-2.6.0-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:4874a73507a300a5d089ceaff616a569e7bb7c613c56f37f63ec3ffac65259cf"}, - {file = "torch-2.6.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a0d5e1b9874c1a6c25556840ab8920569a7a4137afa8a63a32cee0bc7d89bd4b"}, - {file = "torch-2.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:510c73251bee9ba02ae1cb6c9d4ee0907b3ce6020e62784e2d7598e0cfa4d6cc"}, - {file = "torch-2.6.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:ff96f4038f8af9f7ec4231710ed4549da1bdebad95923953a25045dcf6fd87e2"}, - {file = "torch-2.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9ea955317cfcd3852b1402b62af258ce735c2edeee42ca9419b6bc889e5ae053"}, - {file = "torch-2.6.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bb2c6c3e65049f081940f5ab15c9136c7de40d3f01192541c920a07c7c585b7e"}, - {file = "torch-2.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:683410f97984103148e31b38a8631acf31c3034c020c0f4d26171e7626d8317a"}, - {file = "torch-2.6.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:265f70de5fd45b864d924b64be1797f86e76c8e48a02c2a3a6fc7ec247d2226c"}, + {file = "torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:71328e1bbe39d213b8721678f9dcac30dfc452a46d586f1d514a6aa0a99d4744"}, + {file = "torch-2.5.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:34bfa1a852e5714cbfa17f27c49d8ce35e1b7af5608c4bc6e81392c352dbc601"}, + {file = "torch-2.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:32a037bd98a241df6c93e4c789b683335da76a2ac142c0973675b715102dc5fa"}, + {file = "torch-2.5.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:23d062bf70776a3d04dbe74db950db2a5245e1ba4f27208a87f0d743b0d06e86"}, + {file = "torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:de5b7d6740c4b636ef4db92be922f0edc425b65ed78c5076c43c42d362a45457"}, + {file = "torch-2.5.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:340ce0432cad0d37f5a31be666896e16788f1adf8ad7be481196b503dad675b9"}, + {file = "torch-2.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:603c52d2fe06433c18b747d25f5c333f9c1d58615620578c326d66f258686f9a"}, + {file = "torch-2.5.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:31f8c39660962f9ae4eeec995e3049b5492eb7360dd4f07377658ef4d728fa4c"}, + {file = "torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:ed231a4b3a5952177fafb661213d690a72caaad97d5824dd4fc17ab9e15cec03"}, + {file = "torch-2.5.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:3f4b7f10a247e0dcd7ea97dc2d3bfbfc90302ed36d7f3952b0008d0df264e697"}, + {file = "torch-2.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:73e58e78f7d220917c5dbfad1a40e09df9929d3b95d25e57d9f8558f84c9a11c"}, + {file = "torch-2.5.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:8c712df61101964eb11910a846514011f0b6f5920c55dbf567bff8a34163d5b1"}, + {file = "torch-2.5.1-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:9b61edf3b4f6e3b0e0adda8b3960266b9009d02b37555971f4d1c8f7a05afed7"}, + {file = "torch-2.5.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1f3b7fb3cf7ab97fae52161423f81be8c6b8afac8d9760823fd623994581e1a3"}, + {file = "torch-2.5.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:7974e3dce28b5a21fb554b73e1bc9072c25dde873fa00d54280861e7a009d7dc"}, + {file = "torch-2.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:46c817d3ea33696ad3b9df5e774dba2257e9a4cd3c4a3afbf92f6bb13ac5ce2d"}, + {file = "torch-2.5.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8046768b7f6d35b85d101b4b38cba8aa2f3cd51952bc4c06a49580f2ce682291"}, ] [package.dependencies] @@ -4914,18 +4882,17 @@ nvidia-cufft-cu12 = {version = "11.2.1.3", markers = "platform_system == \"Linux nvidia-curand-cu12 = {version = "10.3.5.147", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusolver-cu12 = {version = "11.6.1.9", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusparse-cu12 = {version = "12.3.1.170", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparselt-cu12 = {version = "0.6.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nccl-cu12 = {version = "2.21.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvjitlink-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} setuptools = {version = "*", markers = "python_version >= \"3.12\""} sympy = {version = "1.13.1", markers = "python_version >= \"3.9\""} -triton = {version = "3.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -typing-extensions = ">=4.10.0" +triton = {version = "3.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""} +typing-extensions = ">=4.8.0" [package.extras] opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.13.0)"] +optree = ["optree (>=0.12.0)"] [[package]] name = "tornado" @@ -5054,18 +5021,21 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] name = "triton" -version = "3.2.0" +version = "3.1.0" description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" files = [ - {file = "triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e54983cd51875855da7c68ec05c05cf8bb08df361b1d5b69e05e40b0c9bd62"}, - {file = "triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8009a1fb093ee8546495e96731336a33fb8856a38e45bb4ab6affd6dbc3ba220"}, - {file = "triton-3.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d9b215efc1c26fa7eefb9a157915c92d52e000d2bf83e5f69704047e63f125c"}, - {file = "triton-3.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5dfa23ba84541d7c0a531dfce76d8bcd19159d50a4a8b14ad01e91734a5c1b0"}, - {file = "triton-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30ceed0eff2c4a73b14eb63e052992f44bbdf175f3fad21e1ac8097a772de7ee"}, + {file = "triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8"}, + {file = "triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f34f6e7885d1bf0eaaf7ba875a5f0ce6f3c13ba98f9503651c1e6dc6757ed5c"}, + {file = "triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8182f42fd8080a7d39d666814fa36c5e30cc00ea7eeeb1a2983dbb4c99a0fdc"}, + {file = "triton-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dadaca7fc24de34e180271b5cf864c16755702e9f63a16f62df714a8099126a"}, + {file = "triton-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aafa9a20cd0d9fee523cd4504aa7131807a864cd77dcf6efe7e981f18b8c6c11"}, ] +[package.dependencies] +filelock = "*" + [package.extras] build = ["cmake (>=3.20)", "lit"] tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"] @@ -5498,4 +5468,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "9d330f12a8bad0352ec550e1d6a77348b10f6bca7ecc41769813bec85d3f9e08" +content-hash = "9060b047f34d36d3ee1850cbbbaf2078fe14471661117b786c4e9a7661dc659a" diff --git a/pyproject.toml b/pyproject.toml index 0e787c60..badfc9aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ pydantic = "^2.4.2" pydantic-settings = "^2.0.3" transformers = "^4.45.2" python-dotenv = "^1.0.0" -torch = "^2.5.1" +torch = "~2.5.1" # 2.6.0 appears to fail with mps tqdm = "^4.66.1" ftfy = "^6.1.1" rapidfuzz = "^3.8.1" @@ -49,7 +49,6 @@ apted = "1.0.3" distance = "0.1.3" lxml = "5.3.0" tabulate = "^0.9.0" -pymupdf = "^1.25.2" [tool.poetry.scripts] marker = "marker.scripts.convert:convert_cli" From 70c0b0e0c1881f5bc9c3309228120417ca7088e2 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 30 Jan 2025 12:23:58 -0500 Subject: [PATCH 03/27] Additional benchmark cleanup --- .github/workflows/benchmark.yml | 33 -------------------------------- .github/workflows/benchmarks.yml | 28 +++++++++++++++++++++++++++ .github/workflows/ci.yml | 4 ---- .github/workflows/scripts.yml | 4 ---- benchmarks/overall/inference.py | 21 ++++++++++---------- benchmarks/overall/overall.py | 24 ++++++++++++++--------- benchmarks/overall/schema.py | 5 ++++- benchmarks/overall/scoring.py | 21 ++++++++++++++++++-- benchmarks/table/table.py | 6 +++--- benchmarks/verify_scores.py | 8 +++----- 10 files changed, 83 insertions(+), 71 deletions(-) delete mode 100644 .github/workflows/benchmark.yml create mode 100644 .github/workflows/benchmarks.yml diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index 5d49aa1c..00000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Integration test with benchmark - -on: [push] - -env: - TORCH_DEVICE: "cpu" - -jobs: - benchmark: - runs-on: [ubuntu-latest, windows-latest] - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: 3.11 - - name: Install python dependencies - run: | - pip install poetry - poetry install - poetry remove torch - poetry run pip install torch --index-url https://download.pytorch.org/whl/cpu - - name: Download benchmark data - run: | - wget -O benchmark_data.zip "https://drive.google.com/uc?export=download&id=1NHrdYatR1rtqs2gPVfdvO0BAvocH8CJi" - unzip -o benchmark_data.zip - - name: Run benchmark test - run: | - poetry run python benchmarks/overall.py benchmark_data/pdfs benchmark_data/references report.json - poetry run python benchmarks/verify_scores.py report.json --type marker - - - diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000..ae6a1c84 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,28 @@ +name: Integration test + +on: [push] + +env: + PYTHONIOENCODING: "utf-8" + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Install python dependencies + run: | + pip install poetry + poetry install + - name: Run benchmark test + run: | + poetry run python benchmarks/overall/overall.py --max_rows 5 + poetry run python benchmarks/verify_scores.py conversion_results/benchmark/overall/overall.json --type marker + - name: Run table benchmark + run: | + poetry run python benchmarks/table/table.py --max_rows 5 + poetry run python benchmarks/verify_scores.py conversion_results/benchmark/table/table.json --type table \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af4e92e8..84137df5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,6 @@ name: CI tests on: [push] -env: - TORCH_DEVICE: "cpu" - OCR_ENGINE: "surya" - jobs: tests: runs-on: ubuntu-latest diff --git a/.github/workflows/scripts.yml b/.github/workflows/scripts.yml index 217e4221..06230580 100644 --- a/.github/workflows/scripts.yml +++ b/.github/workflows/scripts.yml @@ -2,10 +2,6 @@ name: Test CLI scripts on: [push] -env: - TORCH_DEVICE: "cpu" - OCR_ENGINE: "surya" - jobs: tests: runs-on: ubuntu-latest diff --git a/benchmarks/overall/inference.py b/benchmarks/overall/inference.py index f312429b..1b504cff 100644 --- a/benchmarks/overall/inference.py +++ b/benchmarks/overall/inference.py @@ -1,15 +1,16 @@ -import json import tempfile +import time + from bs4 import BeautifulSoup from benchmarks.overall.scoring import score_blocks from benchmarks.overall.schema import BlockScores from marker.converters.pdf import PdfConverter -def get_marker_html(marker_models: dict, pdf_bytes: bytes): +def get_marker_html(marker_models: dict, pdf_bytes: bytes, use_llm: bool): block_converter = PdfConverter( artifact_dict=marker_models, - config={"page_range": [0], "disable_tqdm": True}, + config={"page_range": [0], "disable_tqdm": True, "use_llm": use_llm}, renderer="marker.renderers.html.HTMLRenderer" ) with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: @@ -21,16 +22,17 @@ def get_marker_html(marker_models: dict, pdf_bytes: bytes): return inner_html -def marker_html_func(model_dict, sample, **kwargs) -> BlockScores: - gt_blocks = json.loads(sample["gt_blocks"]) +def marker_scoring_func(model_dict, sample, gt_html, use_llm=False, **kwargs) -> BlockScores: pdf_bytes = sample["pdf"] # This is a single page PDF - marker_html = get_marker_html(model_dict, pdf_bytes) - gt_html = [block["html"] for block in gt_blocks] + start = time.time() + marker_html = get_marker_html(model_dict, pdf_bytes, use_llm) + total = time.time() - start scores = score_blocks(gt_html, marker_html) + scores["time"] = total return scores -def mathpix_html_func(model_dict, sample, mathpix_ds, **kwargs) -> BlockScores: +def mathpix_scoring_func(model_dict, sample, gt_html, mathpix_ds=None, **kwargs) -> BlockScores: uuid = sample["uuid"] data = None for row in mathpix_ds: @@ -41,7 +43,6 @@ def mathpix_html_func(model_dict, sample, mathpix_ds, **kwargs) -> BlockScores: raise ValueError(f"Could not find data for uuid {uuid}") mathpix_md = data["md"] - gt_blocks = json.loads(sample["gt_blocks"]) - gt_html = [block["html"] for block in gt_blocks] scores = score_blocks(gt_html, mathpix_md, convert=False) + scores["time"] = data["time"] return scores diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index bdb1fc7c..9cf6fb01 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -9,7 +9,7 @@ import tabulate from tqdm import tqdm -from benchmarks.overall.inference import marker_html_func, mathpix_html_func +from benchmarks.overall.inference import marker_scoring_func, mathpix_scoring_func from benchmarks.overall.schema import FullResult from marker.logger import configure_logging from marker.models import create_model_dict @@ -18,7 +18,7 @@ configure_logging() -def get_method_scores(ds, model_dict, max_rows=None, html_func=marker_html_func, **kwargs) -> FullResult: +def get_method_scores(ds, model_dict, max_rows=None, score_func=marker_scoring_func, **kwargs) -> FullResult: bench_scores = {} averages_by_type = defaultdict(list) averages_by_block_type = defaultdict(list) @@ -29,7 +29,8 @@ def get_method_scores(ds, model_dict, max_rows=None, html_func=marker_html_func, gt_blocks = json.loads(sample["gt_blocks"]) doc_type = sample["classification"] try: - scores = html_func(model_dict, sample, **kwargs) + gt_html = [block["html"] for block in gt_blocks] + scores = score_func(model_dict, sample, gt_html, **kwargs) except ValueError as e: print(f"Error with sample {idx}: {e}") continue @@ -40,10 +41,13 @@ def get_method_scores(ds, model_dict, max_rows=None, html_func=marker_html_func, bench_scores[idx] = scores + avg_time = sum([bench_scores[k]["time"] for k in bench_scores]) / len(bench_scores) return { "raw_scores": bench_scores, "averages_by_type": averages_by_type, - "averages_by_block_type": averages_by_block_type + "averages_by_block_type": averages_by_block_type, + "average_time": avg_time, + "average_score": sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores) } def print_scores(scores: FullResult, method: str): @@ -73,11 +77,13 @@ def print_scores(scores: FullResult, method: str): @click.option("--other_methods", type=str, help="Comma separated list of other methods to compare against. Possible values: mathpix", default="") @click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") @click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") +@click.option("--use_llm", is_flag=True, help="Use the LLM model for better marker quality.") def main( dataset: str, other_methods: str, result_path: str, - max_rows: int + max_rows: int, + use_llm: bool ): allowed_methods = ["mathpix", ""] methods = other_methods.split(",") @@ -88,14 +94,14 @@ def main( model_dict = create_model_dict() ds = datasets.load_dataset(dataset, split="train") - marker_scores = get_method_scores(ds, model_dict, max_rows=max_rows) + marker_scores = get_method_scores(ds, model_dict, max_rows=max_rows, use_llm=use_llm) all_scores = { "marker": marker_scores } if "mathpix" in methods: mathpix_ds = datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train") - mathpix_scores = get_method_scores(ds, model_dict, max_rows=max_rows, html_func=mathpix_html_func, mathpix_ds=mathpix_ds) + mathpix_scores = get_method_scores(ds, model_dict, max_rows=max_rows, score_func=mathpix_scoring_func, mathpix_ds=mathpix_ds) all_scores["mathpix"] = mathpix_scores for k,v in all_scores.items(): @@ -103,8 +109,8 @@ def main( out_path = Path(result_path) out_path.mkdir(parents=True, exist_ok=True) - with open(out_path / "overall.json", "w") as f: - json.dump(all_scores, f, indent=2) + with open(out_path / "overall.json", "w", encoding="utf-8") as f: + json.dump(all_scores, f, indent=2, ensure_ascii=False) print(f"Results saved to {out_path}.") diff --git a/benchmarks/overall/schema.py b/benchmarks/overall/schema.py index 98ffc1b8..8af5bf28 100644 --- a/benchmarks/overall/schema.py +++ b/benchmarks/overall/schema.py @@ -1,4 +1,4 @@ -from typing import TypedDict, List, Dict +from typing import TypedDict, List, Dict, Optional class BlockScores(TypedDict): @@ -7,9 +7,12 @@ class BlockScores(TypedDict): gt: List[str] method: str overall_score: float + time: Optional[float] class FullResult(TypedDict): raw_scores: Dict[int, BlockScores] averages_by_type: Dict[str, List[float]] averages_by_block_type: Dict[str, List[float]] + average_time: float + average_score: float diff --git a/benchmarks/overall/scoring.py b/benchmarks/overall/scoring.py index 1ba78bc9..713e5fef 100644 --- a/benchmarks/overall/scoring.py +++ b/benchmarks/overall/scoring.py @@ -12,6 +12,9 @@ def kendall_tau(correct_order: List[int], actual_order: List[int]) -> float: concordant = 0 discordant = 0 + if n <= 1: + return 100 + for i in range(n): for j in range(i + 1, n): correct_sign = correct_order[i] - correct_order[j] @@ -61,18 +64,27 @@ def convert_to_md(html): return markdown def standardize_markdown(markdown): + # Replace math expressions pattern = r'(?", "\n") markdown = re.sub(r"(.*?)", r"\1", markdown) markdown = re.sub(r"(.*?)", r"\1", markdown) + markdown = re.sub(r"(.*?)", r"\1", markdown) # Remove span tags and keep content + # Clean up markdown markdown = re.sub(r"\s+", " ", markdown) markdown = re.sub(r"\n+", "\n", markdown) markdown = re.sub("\\.+", ".", markdown) # Replace repeated periods with a single period, like in table of contents markdown = re.sub("#+", "#", markdown) # Replace repeated headers with a single header markdown = re.sub(r"\$", "", markdown) # Remove equation delimiters + markdown = markdown.encode().decode('unicode-escape') # Decode unicode characters properly return markdown.strip().lower() @@ -116,10 +128,14 @@ def score_blocks(gt_html, method_html, convert=True) -> BlockScores: gt = [standardize_markdown(convert_to_md(gt)) for gt in gt_html] alignments = find_fuzzy_alignments(method_html, gt) scores = [alignment["score"] for alignment in alignments] + + # Find order score orders = [alignment["start"] for alignment in alignments] - correct_order = range(len(gt)) + correct_order = list(range(len(gt))) actual_order = sorted(range(len(gt)), key=lambda x: orders[x]) order_score = kendall_tau(correct_order, actual_order) + + # Weight score by sequence length gt_weights = [len(g) for g in gt] weighted_scores = [score * weight for score, weight in zip(scores, gt_weights)] @@ -131,5 +147,6 @@ def score_blocks(gt_html, method_html, convert=True) -> BlockScores: "order_score": order_score, "gt": gt, "method": method_html, - "overall_score": overall_score + "overall_score": overall_score, + "time": None } \ No newline at end of file diff --git a/benchmarks/table/table.py b/benchmarks/table/table.py index 448e32fe..75b4c613 100644 --- a/benchmarks/table/table.py +++ b/benchmarks/table/table.py @@ -49,7 +49,7 @@ def extract_tables(children: List[JSONBlockOutput]): @click.command(help="Benchmark Table to HTML Conversion") @click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "table"), help="Output path for results.") -@click.option("--dataset", type=str, default="datalab-to/fintabnet-test", help="Dataset to use") +@click.option("--dataset", type=str, default="datalab-to/fintabnet_bench_marker", help="Dataset to use") @click.option("--max_rows", type=int, default=None, help="Maximum number of PDFs to process") @click.option("--max_workers", type=int, default=16, help="Maximum number of workers to use") @click.option("--use_llm", is_flag=True, help="Use LLM for improving table recognition.") @@ -222,9 +222,9 @@ def main( "gemini": gemini_results } - out_path = Path(result_path) / "table.json" + out_path = Path(result_path) out_path.mkdir(parents=True, exist_ok=True) - with open(out_path, "w+") as f: + with open(out_path / "table.json", "w+") as f: json.dump(results, f, indent=2) print(f"Results saved to {out_path}.") diff --git a/benchmarks/verify_scores.py b/benchmarks/verify_scores.py index 913081e9..defff1c7 100644 --- a/benchmarks/verify_scores.py +++ b/benchmarks/verify_scores.py @@ -6,11 +6,9 @@ def verify_scores(file_path): with open(file_path, 'r') as file: data = json.load(file) - multicolcnn_score = data["marker"]["files"]["multicolcnn.pdf"]["score"] - switch_trans_score = data["marker"]["files"]["switch_trans.pdf"]["score"] - - if multicolcnn_score <= 0.34 or switch_trans_score <= 0.40: - raise ValueError("One or more scores are below the required threshold of 0.4") + marker_score = data["marker"]["average_score"] + if marker_score < 90: + raise ValueError("Marker score below 90") def verify_table_scores(file_path): From bbf416193697fba9513b07de52b9670e24c50d98 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 30 Jan 2025 13:57:56 -0500 Subject: [PATCH 04/27] Refactor benchmarks --- benchmarks/overall/__init__.py | 0 benchmarks/overall/overall.py | 80 +++++++++++++------ benchmarks/table/__init__.py | 0 benchmarks/table/inference.py | 139 +++++++++++++++++++++++++++++++++ benchmarks/table/table.py | 131 ++----------------------------- benchmarks/verify_scores.py | 2 +- 6 files changed, 199 insertions(+), 153 deletions(-) create mode 100644 benchmarks/overall/__init__.py create mode 100644 benchmarks/table/__init__.py create mode 100644 benchmarks/table/inference.py diff --git a/benchmarks/overall/__init__.py b/benchmarks/overall/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index 9cf6fb01..e1245094 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -1,13 +1,14 @@ import json import os -import traceback from collections import defaultdict from pathlib import Path +from typing import Dict import click import datasets import tabulate from tqdm import tqdm +import pypdfium2 as pdfium from benchmarks.overall.inference import marker_scoring_func, mathpix_scoring_func from benchmarks.overall.schema import FullResult @@ -28,12 +29,17 @@ def get_method_scores(ds, model_dict, max_rows=None, score_func=marker_scoring_f gt_blocks = json.loads(sample["gt_blocks"]) doc_type = sample["classification"] + try: gt_html = [block["html"] for block in gt_blocks] scores = score_func(model_dict, sample, gt_html, **kwargs) except ValueError as e: print(f"Error with sample {idx}: {e}") continue + except pdfium.PdfiumError as e: + print(f"Error opening pdf: {e}") + continue + averages_by_type[doc_type].append(scores["overall_score"]) for score, gt_block in zip(scores["scores"], gt_blocks): @@ -50,27 +56,48 @@ def get_method_scores(ds, model_dict, max_rows=None, score_func=marker_scoring_f "average_score": sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores) } -def print_scores(scores: FullResult, method: str): - averages_by_type = scores["averages_by_type"] - averages_by_block_type = scores["averages_by_block_type"] - bench_scores = scores["raw_scores"] - - for k in averages_by_type: - averages_by_type[k] = sum(averages_by_type[k]) / len(averages_by_type[k]) - averages_by_type = sorted(averages_by_type.items()) - - print(f"Scores for method {method}:") - print(tabulate.tabulate(averages_by_type, headers=["Document Type", "Average Score"], tablefmt="github")) - - for k in averages_by_block_type: - averages_by_block_type[k] = sum(averages_by_block_type[k]) / len(averages_by_block_type[k]) - averages_by_block_type = sorted(averages_by_block_type.items()) - - print(tabulate.tabulate(averages_by_block_type, headers=["Block Type", "Average Score"], tablefmt="github")) - - overall_average = sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores) - print(tabulate.tabulate([["Overall Average", overall_average]], tablefmt="github")) - print() +def print_scores(scores: Dict[str, FullResult], out_path: Path, default_method="marker"): + inference_types = [default_method] + [k for k in scores.keys() if k != default_method] + + document_types = list(scores[default_method]["averages_by_type"].keys()) + document_rows = [[k] for k in document_types] + for k in inference_types: + for i, doc_type in enumerate(document_types): + avg = sum(scores[k]["averages_by_type"][doc_type]) / max(1, len(scores[k]["averages_by_type"][doc_type])) + document_rows[i].append(avg) + + print("Document types") + document_type_table = tabulate.tabulate(document_rows, headers=["Document Type"] + inference_types, tablefmt="github") + print(document_type_table) + with open(out_path / "document_types.md", "w", encoding="utf-8") as f: + f.write(document_type_table) + + block_types = list(scores[default_method]["averages_by_block_type"].keys()) + block_rows = [[k] for k in block_types] + for k in inference_types: + for i, block_type in enumerate(block_types): + avg = sum(scores[k]["averages_by_block_type"][block_type]) / max(1, len(scores[k]["averages_by_block_type"][block_type])) + block_rows[i].append(avg) + + print("Block types") + block_type_table = tabulate.tabulate(block_rows, headers=["Block Type"] + inference_types, tablefmt="github") + print(block_type_table) + with open(out_path / "block_types.md", "w", encoding="utf-8") as f: + f.write(block_type_table) + + headers = ["Method", "Avg Score", "Avg Time"] + inference_rows = [[k] for k in inference_types] + for i, k in enumerate(inference_types): + inference_rows[i].append(scores[k]["average_score"]) + inference_rows[i].append(scores[k]["average_time"]) + + print("Overall") + overall_table = tabulate.tabulate(inference_rows, headers=headers, tablefmt="github") + print(overall_table) + with open(out_path / "overall.md", "w", encoding="utf-8") as f: + f.write(overall_table) + + print("Scores computed by aligning ground truth markdown blocks with predicted markdown for each method. The scores are 0-100 based on edit distance.") @click.command(help="Benchmark PDF to MD conversion.") @click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") @@ -85,6 +112,9 @@ def main( max_rows: int, use_llm: bool ): + out_path = Path(result_path) + out_path.mkdir(parents=True, exist_ok=True) + allowed_methods = ["mathpix", ""] methods = other_methods.split(",") for method in methods: @@ -104,11 +134,9 @@ def main( mathpix_scores = get_method_scores(ds, model_dict, max_rows=max_rows, score_func=mathpix_scoring_func, mathpix_ds=mathpix_ds) all_scores["mathpix"] = mathpix_scores - for k,v in all_scores.items(): - print_scores(v, k) + # Display formatted score tables + print_scores(all_scores, out_path) - out_path = Path(result_path) - out_path.mkdir(parents=True, exist_ok=True) with open(out_path / "overall.json", "w", encoding="utf-8") as f: json.dump(all_scores, f, indent=2, ensure_ascii=False) diff --git a/benchmarks/table/__init__.py b/benchmarks/table/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/table/inference.py b/benchmarks/table/inference.py new file mode 100644 index 00000000..7e228c93 --- /dev/null +++ b/benchmarks/table/inference.py @@ -0,0 +1,139 @@ +import datasets +import numpy as np +from bs4 import BeautifulSoup +import pypdfium2 as pdfium +from tqdm import tqdm +import base64 +import tempfile + +from benchmarks.table.gemini import gemini_table_rec +from marker.config.parser import ConfigParser +from marker.converters.table import TableConverter +from marker.models import create_model_dict +from marker.util import matrix_intersection_area + + +def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, max_rows: int, use_gemini: bool): + models = create_model_dict() + config_parser = ConfigParser({'output_format': 'json', "use_llm": use_llm, "table_rec_batch_size": table_rec_batch_size, "disable_tqdm": True}) + total_unaligned = 0 + results = [] + + dataset = datasets.load_dataset(dataset, split='train') + dataset = dataset.shuffle(seed=0) + + iterations = len(dataset) + if max_rows is not None: + iterations = min(max_rows, len(dataset)) + + for i in tqdm(range(iterations), desc='Converting Tables'): + try: + row = dataset[i] + pdf_binary = base64.b64decode(row['pdf']) + gt_tables = row['tables'] # Already sorted by reading order, which is what marker returns + + converter = TableConverter( + config=config_parser.generate_config_dict(), + artifact_dict=models, + processor_list=config_parser.get_processors(), + renderer=config_parser.get_renderer() + ) + + with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as temp_pdf_file: + temp_pdf_file.write(pdf_binary) + temp_pdf_file.seek(0) + marker_json = converter(temp_pdf_file.name).children + + doc = pdfium.PdfDocument(temp_pdf_file.name) + page_image = doc[0].render(scale=92 / 72).to_pil() + + if len(marker_json) == 0 or len(gt_tables) == 0: + print(f'No tables detected, skipping...') + total_unaligned += len(gt_tables) + continue + + marker_tables = extract_tables(marker_json) + marker_table_boxes = [table.bbox for table in marker_tables] + page_bbox = marker_json[0].bbox + w_scaler, h_scaler = page_image.width / page_bbox[2], page_image.height / page_bbox[3] + table_images = [ + page_image.crop([bbox[0] * w_scaler, bbox[1] * h_scaler, bbox[2] * w_scaler, bbox[3] * h_scaler]) for bbox + in marker_table_boxes] + + # Normalize the bboxes + for bbox in marker_table_boxes: + bbox[0] = bbox[0] / page_bbox[2] + bbox[1] = bbox[1] / page_bbox[3] + bbox[2] = bbox[2] / page_bbox[2] + bbox[3] = bbox[3] / page_bbox[3] + + gt_boxes = [table['normalized_bbox'] for table in gt_tables] + gt_areas = [(bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) for bbox in gt_boxes] + marker_areas = [(bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) for bbox in marker_table_boxes] + table_alignments = matrix_intersection_area(gt_boxes, marker_table_boxes) + + aligned_tables = [] + used_tables = set() + unaligned_tables = set() + for table_idx, alignment in enumerate(table_alignments): + try: + max_area = np.max(alignment) + aligned_idx = np.argmax(alignment) + except ValueError: + # No alignment found + unaligned_tables.add(table_idx) + continue + + if aligned_idx in used_tables: + # Marker table already aligned with another gt table + unaligned_tables.add(table_idx) + continue + + # Gt table doesn't align well with any marker table + gt_table_pct = gt_areas[table_idx] / max_area + if not .75 < gt_table_pct < 1.25: + unaligned_tables.add(table_idx) + continue + + # Marker table doesn't align with gt table + marker_table_pct = marker_areas[aligned_idx] / max_area + if not .75 < marker_table_pct < 1.25: + unaligned_tables.add(table_idx) + continue + + gemini_html = "" + if use_gemini: + gemini_html = gemini_table_rec(table_images[aligned_idx]) + + aligned_tables.append( + (marker_tables[aligned_idx], gt_tables[table_idx], gemini_html) + ) + used_tables.add(aligned_idx) + + total_unaligned += len(unaligned_tables) + + for marker_table, gt_table, gemini_table in aligned_tables: + gt_table_html = gt_table['html'] + + # marker wraps the table in which fintabnet data doesn't + # Fintabnet doesn't use th tags, need to be replaced for fair comparison + marker_table_soup = BeautifulSoup(marker_table.html, 'html.parser') + tbody = marker_table_soup.find('tbody') + if tbody: + tbody.unwrap() + for th_tag in marker_table_soup.find_all('th'): + th_tag.name = 'td' + marker_table_html = str(marker_table_soup) + marker_table_html = marker_table_html.replace("
", " ") # Fintabnet uses spaces instead of newlines + marker_table_html = marker_table_html.replace("\n", " ") # Fintabnet uses spaces instead of newlines + gemini_table_html = gemini_table.replace("\n", " ") # Fintabnet uses spaces instead of newlines + + results.append({ + "marker_table": marker_table_html, + "gt_table": gt_table_html, + "gemini_table": gemini_table_html + }) + except pdfium.PdfiumError: + print('Broken PDF, Skipping...') + continue + return results, total_unaligned \ No newline at end of file diff --git a/benchmarks/table/table.py b/benchmarks/table/table.py index 75b4c613..964a8c61 100644 --- a/benchmarks/table/table.py +++ b/benchmarks/table/table.py @@ -1,33 +1,27 @@ import os + +from benchmarks.table.inference import inference_tables + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for an op, which is not supported on MPS from pathlib import Path from itertools import repeat from typing import List -import numpy as np -import base64 import time import datasets from tqdm import tqdm -import tempfile import click from tabulate import tabulate import json -from bs4 import BeautifulSoup from concurrent.futures import ProcessPoolExecutor -from pypdfium2._helpers.misc import PdfiumError -import pypdfium2 as pdfium -from marker.util import matrix_intersection_area -from marker.renderers.json import JSONOutput, JSONBlockOutput +from marker.renderers.json import JSONBlockOutput from marker.settings import settings from marker.config.parser import ConfigParser -from marker.converters.table import TableConverter from marker.models import create_model_dict from scoring import wrap_table_html, similarity_eval_html -from gemini import gemini_table_rec def update_teds_score(result, prefix: str = "marker"): prediction, ground_truth = result[f'{prefix}_table'], result['gt_table'] @@ -64,128 +58,13 @@ def main( table_rec_batch_size: int | None, use_gemini: bool = False ): - models = create_model_dict() - config_parser = ConfigParser({'output_format': 'json', "use_llm": use_llm, "table_rec_batch_size": table_rec_batch_size, "disable_tqdm": True}) start = time.time() dataset = datasets.load_dataset(dataset, split='train') dataset = dataset.shuffle(seed=0) - iterations = len(dataset) - if max_rows is not None: - iterations = min(max_rows, len(dataset)) - - results = [] - total_unaligned = 0 - for i in tqdm(range(iterations), desc='Converting Tables'): - try: - row = dataset[i] - pdf_binary = base64.b64decode(row['pdf']) - gt_tables = row['tables'] #Already sorted by reading order, which is what marker returns - - converter = TableConverter( - config=config_parser.generate_config_dict(), - artifact_dict=models, - processor_list=config_parser.get_processors(), - renderer=config_parser.get_renderer() - ) - - with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as temp_pdf_file: - temp_pdf_file.write(pdf_binary) - temp_pdf_file.seek(0) - marker_json = converter(temp_pdf_file.name).children - - doc = pdfium.PdfDocument(temp_pdf_file.name) - page_image = doc[0].render(scale=92/72).to_pil() - - if len(marker_json) == 0 or len(gt_tables) == 0: - print(f'No tables detected, skipping...') - total_unaligned += len(gt_tables) - continue - - marker_tables = extract_tables(marker_json) - marker_table_boxes = [table.bbox for table in marker_tables] - page_bbox = marker_json[0].bbox - w_scaler, h_scaler = page_image.width / page_bbox[2], page_image.height / page_bbox[3] - table_images = [page_image.crop([bbox[0] * w_scaler, bbox[1] * h_scaler, bbox[2] * w_scaler, bbox[3] * h_scaler]) for bbox in marker_table_boxes] - - # Normalize the bboxes - for bbox in marker_table_boxes: - bbox[0] = bbox[0] / page_bbox[2] - bbox[1] = bbox[1] / page_bbox[3] - bbox[2] = bbox[2] / page_bbox[2] - bbox[3] = bbox[3] / page_bbox[3] - - gt_boxes = [table['normalized_bbox'] for table in gt_tables] - gt_areas = [(bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) for bbox in gt_boxes] - marker_areas = [(bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) for bbox in marker_table_boxes] - table_alignments = matrix_intersection_area(gt_boxes, marker_table_boxes) - - aligned_tables = [] - used_tables = set() - unaligned_tables = set() - for table_idx, alignment in enumerate(table_alignments): - try: - max_area = np.max(alignment) - aligned_idx = np.argmax(alignment) - except ValueError: - # No alignment found - unaligned_tables.add(table_idx) - continue - - if aligned_idx in used_tables: - # Marker table already aligned with another gt table - unaligned_tables.add(table_idx) - continue - - # Gt table doesn't align well with any marker table - gt_table_pct = gt_areas[table_idx] / max_area - if not .75 < gt_table_pct < 1.25: - unaligned_tables.add(table_idx) - continue - - # Marker table doesn't align with gt table - marker_table_pct = marker_areas[aligned_idx] / max_area - if not .75 < marker_table_pct < 1.25: - unaligned_tables.add(table_idx) - continue - - gemini_html = "" - if use_gemini: - gemini_html = gemini_table_rec(table_images[aligned_idx]) - - aligned_tables.append( - (marker_tables[aligned_idx], gt_tables[table_idx], gemini_html) - ) - used_tables.add(aligned_idx) - - total_unaligned += len(unaligned_tables) - - for marker_table, gt_table, gemini_table in aligned_tables: - gt_table_html = gt_table['html'] - - #marker wraps the table in which fintabnet data doesn't - #Fintabnet doesn't use th tags, need to be replaced for fair comparison - marker_table_soup = BeautifulSoup(marker_table.html, 'html.parser') - tbody = marker_table_soup.find('tbody') - if tbody: - tbody.unwrap() - for th_tag in marker_table_soup.find_all('th'): - th_tag.name = 'td' - marker_table_html = str(marker_table_soup) - marker_table_html = marker_table_html.replace("
", " ") # Fintabnet uses spaces instead of newlines - marker_table_html = marker_table_html.replace("\n", " ") # Fintabnet uses spaces instead of newlines - gemini_table_html = gemini_table.replace("\n", " ") # Fintabnet uses spaces instead of newlines - - results.append({ - "marker_table": marker_table_html, - "gt_table": gt_table_html, - "gemini_table": gemini_table_html - }) - except PdfiumError: - print('Broken PDF, Skipping...') - continue + results, total_unaligned = inference_tables(dataset, use_llm, table_rec_batch_size, max_rows, use_gemini) print(f"Total time: {time.time() - start}.") print(f"Could not align {total_unaligned} tables from fintabnet.") diff --git a/benchmarks/verify_scores.py b/benchmarks/verify_scores.py index defff1c7..7b72a75a 100644 --- a/benchmarks/verify_scores.py +++ b/benchmarks/verify_scores.py @@ -15,7 +15,7 @@ def verify_table_scores(file_path): with open(file_path, 'r') as file: data = json.load(file) - avg = sum([r["score"] for r in data]) / len(data) + avg = sum([r["marker_score"] for r in data["marker"]]) / len(data) if avg < 0.7: raise ValueError("Average score is below the required threshold of 0.7") From 9a8da131719e9d415c2ee3380bd13454d150d250 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 30 Jan 2025 14:23:36 -0500 Subject: [PATCH 05/27] Additional fixes --- benchmarks/overall/overall.py | 2 +- benchmarks/overall/scoring.py | 33 ++++++++++++++++++++++++++++++--- benchmarks/table/inference.py | 34 ++++++++++++++++++++++++++-------- benchmarks/table/table.py | 19 ++----------------- marker/renderers/markdown.py | 2 +- 5 files changed, 60 insertions(+), 30 deletions(-) diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index e1245094..520b626e 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -31,7 +31,7 @@ def get_method_scores(ds, model_dict, max_rows=None, score_func=marker_scoring_f doc_type = sample["classification"] try: - gt_html = [block["html"] for block in gt_blocks] + gt_html = [block["html"] for block in gt_blocks if len(block["html"]) > 0] scores = score_func(model_dict, sample, gt_html, **kwargs) except ValueError as e: print(f"Error with sample {idx}: {e}") diff --git a/benchmarks/overall/scoring.py b/benchmarks/overall/scoring.py index 713e5fef..3798cbf2 100644 --- a/benchmarks/overall/scoring.py +++ b/benchmarks/overall/scoring.py @@ -69,8 +69,10 @@ def standardize_markdown(markdown): markdown = re.sub(pattern, standardize_math, markdown) # Replace image urls - pattern = r'!\[(.*?)\]\((.*?)(?:\?.*?width=(\d+).*?height=(\d+).*?)\)' - markdown = re.sub(pattern, r'![/api/placeholder]', markdown) + pattern = r'!\[(.*?)\]\((https?://[^\s\)]+)\)' + markdown = re.sub(pattern, r'![link]', markdown) + markdown = strip_latex_symbols(markdown) + markdown = replace_centered_lines(markdown) # Clean up html tags markdown = markdown.replace("
", "\n") @@ -84,10 +86,35 @@ def standardize_markdown(markdown): markdown = re.sub("\\.+", ".", markdown) # Replace repeated periods with a single period, like in table of contents markdown = re.sub("#+", "#", markdown) # Replace repeated headers with a single header markdown = re.sub(r"\$", "", markdown) # Remove equation delimiters - markdown = markdown.encode().decode('unicode-escape') # Decode unicode characters properly + markdown = markdown.encode().decode('unicode-escape', errors="ignore") # Decode unicode characters properly return markdown.strip().lower() +def replace_centered_lines(text): + def replace_match(m): + content = m.group(0) + dash_count = content.count('-') + return '-' * dash_count + + pattern = r':-+:' + return re.sub(pattern, replace_match, text) + + +def strip_latex_symbols(text): + # Handle short math mode sequences first - only match $ $ with brief content + text = re.sub(r'\$\s*\\?[a-zA-Z]+\d?\s*\$', '', text) + + # Handle common patterns inside remaining math mode + patterns = [ + r'\$\s*\\?[a-zA-Z]+\d?\s*\$', # \alpha or \alpha2 in math mode + r'\$\s*\d+\\[a-zA-Z]+\s*\$', # 45\circ in math mode + r'\$\s*[a-zA-Z0-9]\\[a-zA-Z]+\s*\$' # x\dagger in math mode + ] + + pattern = '|'.join(patterns) + return re.sub(pattern, '', text) + + def standardize_math(match): try: delim = "$$" if match.group(0).startswith('$$') else "$" diff --git a/benchmarks/table/inference.py b/benchmarks/table/inference.py index 7e228c93..c6d4d7d4 100644 --- a/benchmarks/table/inference.py +++ b/benchmarks/table/inference.py @@ -1,4 +1,5 @@ -import datasets +from typing import List + import numpy as np from bs4 import BeautifulSoup import pypdfium2 as pdfium @@ -10,18 +11,27 @@ from marker.config.parser import ConfigParser from marker.converters.table import TableConverter from marker.models import create_model_dict +from marker.renderers.json import JSONBlockOutput +from marker.schema.polygon import PolygonBox from marker.util import matrix_intersection_area +def extract_tables(children: List[JSONBlockOutput]): + tables = [] + for child in children: + if child.block_type == 'Table': + tables.append(child) + elif child.children: + tables.extend(extract_tables(child.children)) + return tables + + def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, max_rows: int, use_gemini: bool): models = create_model_dict() config_parser = ConfigParser({'output_format': 'json', "use_llm": use_llm, "table_rec_batch_size": table_rec_batch_size, "disable_tqdm": True}) total_unaligned = 0 results = [] - dataset = datasets.load_dataset(dataset, split='train') - dataset = dataset.shuffle(seed=0) - iterations = len(dataset) if max_rows is not None: iterations = min(max_rows, len(dataset)) @@ -45,7 +55,8 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m marker_json = converter(temp_pdf_file.name).children doc = pdfium.PdfDocument(temp_pdf_file.name) - page_image = doc[0].render(scale=92 / 72).to_pil() + page_image = doc[0].render(scale=96/72).to_pil() + doc.close() if len(marker_json) == 0 or len(gt_tables) == 0: print(f'No tables detected, skipping...') @@ -55,10 +66,17 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m marker_tables = extract_tables(marker_json) marker_table_boxes = [table.bbox for table in marker_tables] page_bbox = marker_json[0].bbox - w_scaler, h_scaler = page_image.width / page_bbox[2], page_image.height / page_bbox[3] + table_images = [ - page_image.crop([bbox[0] * w_scaler, bbox[1] * h_scaler, bbox[2] * w_scaler, bbox[3] * h_scaler]) for bbox - in marker_table_boxes] + page_image.crop( + PolygonBox.from_bbox(bbox) + .rescale( + (page_bbox[2], page_bbox[3]), (page_image.width, page_image.height) + ).bbox + ) + for bbox + in marker_table_boxes + ] # Normalize the bboxes for bbox in marker_table_boxes: diff --git a/benchmarks/table/table.py b/benchmarks/table/table.py index 964a8c61..4e674c28 100644 --- a/benchmarks/table/table.py +++ b/benchmarks/table/table.py @@ -1,7 +1,4 @@ import os - -from benchmarks.table.inference import inference_tables - os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for an op, which is not supported on MPS from pathlib import Path @@ -15,11 +12,9 @@ from tabulate import tabulate import json from concurrent.futures import ProcessPoolExecutor -from marker.renderers.json import JSONBlockOutput -from marker.settings import settings -from marker.config.parser import ConfigParser -from marker.models import create_model_dict +from marker.settings import settings +from benchmarks.table.inference import inference_tables from scoring import wrap_table_html, similarity_eval_html @@ -31,16 +26,6 @@ def update_teds_score(result, prefix: str = "marker"): return result -def extract_tables(children: List[JSONBlockOutput]): - tables = [] - for child in children: - if child.block_type == 'Table': - tables.append(child) - elif child.children: - tables.extend(extract_tables(child.children)) - return tables - - @click.command(help="Benchmark Table to HTML Conversion") @click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "table"), help="Output path for results.") @click.option("--dataset", type=str, default="datalab-to/fintabnet_bench_marker", help="Dataset to use") diff --git a/marker/renderers/markdown.py b/marker/renderers/markdown.py index 722470c1..28895ef2 100644 --- a/marker/renderers/markdown.py +++ b/marker/renderers/markdown.py @@ -128,7 +128,7 @@ def convert_table(self, el, text, convert_as_inline): grid[row_idx + r][col_idx + c] = '' # Empty cell due to rowspan/colspan except IndexError: # Sometimes the colspan/rowspan predictions can overflow - print(f"Overflow in columns: {col_idx + c} >= {total_cols}") + print(f"Overflow in columns: {col_idx + c} >= {total_cols} or rows: {row_idx + r} >= {total_rows}") continue col_idx += colspan From 720f09a9a11427d484b1b40f12d0e0e2a49afeed Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 30 Jan 2025 16:17:56 -0500 Subject: [PATCH 06/27] Bump surya --- poetry.lock | 194 ++++++++++++++++++++++++------------------------- pyproject.toml | 2 +- 2 files changed, 98 insertions(+), 98 deletions(-) diff --git a/poetry.lock b/poetry.lock index 471d43c1..fcd57b32 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3932,99 +3932,99 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} [[package]] name = "rapidfuzz" -version = "3.11.0" +version = "3.12.1" description = "rapid fuzzy string matching" optional = false python-versions = ">=3.9" files = [ - {file = "rapidfuzz-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb8a54543d16ab1b69e2c5ed96cabbff16db044a50eddfc028000138ca9ddf33"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:231c8b2efbd7f8d2ecd1ae900363ba168b8870644bb8f2b5aa96e4a7573bde19"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54e7f442fb9cca81e9df32333fb075ef729052bcabe05b0afc0441f462299114"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:906f1f2a1b91c06599b3dd1be207449c5d4fc7bd1e1fa2f6aef161ea6223f165"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed59044aea9eb6c663112170f2399b040d5d7b162828b141f2673e822093fa8"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cb1965a28b0fa64abdee130c788a0bc0bb3cf9ef7e3a70bf055c086c14a3d7e"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b488b244931d0291412917e6e46ee9f6a14376625e150056fe7c4426ef28225"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f0ba13557fec9d5ffc0a22826754a7457cc77f1b25145be10b7bb1d143ce84c6"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3871fa7dfcef00bad3c7e8ae8d8fd58089bad6fb21f608d2bf42832267ca9663"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b2669eafee38c5884a6e7cc9769d25c19428549dcdf57de8541cf9e82822e7db"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ffa1bb0e26297b0f22881b219ffc82a33a3c84ce6174a9d69406239b14575bd5"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:45b15b8a118856ac9caac6877f70f38b8a0d310475d50bc814698659eabc1cdb"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-win32.whl", hash = "sha256:22033677982b9c4c49676f215b794b0404073f8974f98739cb7234e4a9ade9ad"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:be15496e7244361ff0efcd86e52559bacda9cd975eccf19426a0025f9547c792"}, - {file = "rapidfuzz-3.11.0-cp310-cp310-win_arm64.whl", hash = "sha256:714a7ba31ba46b64d30fccfe95f8013ea41a2e6237ba11a805a27cdd3bce2573"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8724a978f8af7059c5323d523870bf272a097478e1471295511cf58b2642ff83"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b63cb1f2eb371ef20fb155e95efd96e060147bdd4ab9fc400c97325dfee9fe1"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82497f244aac10b20710448645f347d862364cc4f7d8b9ba14bd66b5ce4dec18"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:339607394941801e6e3f6c1ecd413a36e18454e7136ed1161388de674f47f9d9"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84819390a36d6166cec706b9d8f0941f115f700b7faecab5a7e22fc367408bc3"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eea8d9e20632d68f653455265b18c35f90965e26f30d4d92f831899d6682149b"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b659e1e2ea2784a9a397075a7fc395bfa4fe66424042161c4bcaf6e4f637b38"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1315cd2a351144572e31fe3df68340d4b83ddec0af8b2e207cd32930c6acd037"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a7743cca45b4684c54407e8638f6d07b910d8d811347b9d42ff21262c7c23245"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5bb636b0150daa6d3331b738f7c0f8b25eadc47f04a40e5c23c4bfb4c4e20ae3"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:42f4dd264ada7a9aa0805ea0da776dc063533917773cf2df5217f14eb4429eae"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51f24cb39e64256221e6952f22545b8ce21cacd59c0d3e367225da8fc4b868d8"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-win32.whl", hash = "sha256:aaf391fb6715866bc14681c76dc0308f46877f7c06f61d62cc993b79fc3c4a2a"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:ebadd5b8624d8ad503e505a99b8eb26fe3ea9f8e9c2234e805a27b269e585842"}, - {file = "rapidfuzz-3.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:d895998fec712544c13cfe833890e0226585cf0391dd3948412441d5d68a2b8c"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f382fec4a7891d66fb7163c90754454030bb9200a13f82ee7860b6359f3f2fa8"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dfaefe08af2a928e72344c800dcbaf6508e86a4ed481e28355e8d4b6a6a5230e"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92ebb7c12f682b5906ed98429f48a3dd80dd0f9721de30c97a01473d1a346576"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a1b3ebc62d4bcdfdeba110944a25ab40916d5383c5e57e7c4a8dc0b6c17211a"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c6d7fea39cb33e71de86397d38bf7ff1a6273e40367f31d05761662ffda49e4"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99aebef8268f2bc0b445b5640fd3312e080bd17efd3fbae4486b20ac00466308"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4469307f464ae3089acf3210b8fc279110d26d10f79e576f385a98f4429f7d97"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:eb97c53112b593f89a90b4f6218635a9d1eea1d7f9521a3b7d24864228bbc0aa"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ef8937dae823b889c0273dfa0f0f6c46a3658ac0d851349c464d1b00e7ff4252"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d95f9e9f3777b96241d8a00d6377cc9c716981d828b5091082d0fe3a2924b43e"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:b1d67d67f89e4e013a5295e7523bc34a7a96f2dba5dd812c7c8cb65d113cbf28"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d994cf27e2f874069884d9bddf0864f9b90ad201fcc9cb2f5b82bacc17c8d5f2"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-win32.whl", hash = "sha256:ba26d87fe7fcb56c4a53b549a9e0e9143f6b0df56d35fe6ad800c902447acd5b"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:b1f7efdd7b7adb32102c2fa481ad6f11923e2deb191f651274be559d56fc913b"}, - {file = "rapidfuzz-3.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:ed78c8e94f57b44292c1a0350f580e18d3a3c5c0800e253f1583580c1b417ad2"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e60814edd0c9b511b5f377d48b9782b88cfe8be07a98f99973669299c8bb318a"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f28952da055dbfe75828891cd3c9abf0984edc8640573c18b48c14c68ca5e06"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e8f93bc736020351a6f8e71666e1f486bb8bd5ce8112c443a30c77bfde0eb68"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76a4a11ba8f678c9e5876a7d465ab86def047a4fcc043617578368755d63a1bc"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc0e0d41ad8a056a9886bac91ff9d9978e54a244deb61c2972cc76b66752de9c"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e8ea35f2419c7d56b3e75fbde2698766daedb374f20eea28ac9b1f668ef4f74"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd340bbd025302276b5aa221dccfe43040c7babfc32f107c36ad783f2ffd8775"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:494eef2c68305ab75139034ea25328a04a548d297712d9cf887bf27c158c388b"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5a167344c1d6db06915fb0225592afdc24d8bafaaf02de07d4788ddd37f4bc2f"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8c7af25bda96ac799378ac8aba54a8ece732835c7b74cfc201b688a87ed11152"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d2a0f7e17f33e7890257367a1662b05fecaf56625f7dbb6446227aaa2b86448b"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d0d26c7172bdb64f86ee0765c5b26ea1dc45c52389175888ec073b9b28f4305"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-win32.whl", hash = "sha256:6ad02bab756751c90fa27f3069d7b12146613061341459abf55f8190d899649f"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:b1472986fd9c5d318399a01a0881f4a0bf4950264131bb8e2deba9df6d8c362b"}, - {file = "rapidfuzz-3.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:c408f09649cbff8da76f8d3ad878b64ba7f7abdad1471efb293d2c075e80c822"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1bac4873f6186f5233b0084b266bfb459e997f4c21fc9f029918f44a9eccd304"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f9f12c2d0aa52b86206d2059916153876a9b1cf9dfb3cf2f344913167f1c3d4"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dd501de6f7a8f83557d20613b58734d1cb5f0be78d794cde64fe43cfc63f5f2"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4416ca69af933d4a8ad30910149d3db6d084781d5c5fdedb713205389f535385"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f0821b9bdf18c5b7d51722b906b233a39b17f602501a966cfbd9b285f8ab83cd"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0edecc3f90c2653298d380f6ea73b536944b767520c2179ec5d40b9145e47aa"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4513dd01cee11e354c31b75f652d4d466c9440b6859f84e600bdebfccb17735a"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d9727b85511b912571a76ce53c7640ba2c44c364e71cef6d7359b5412739c570"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ab9eab33ee3213f7751dc07a1a61b8d9a3d748ca4458fffddd9defa6f0493c16"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6b01c1ddbb054283797967ddc5433d5c108d680e8fa2684cf368be05407b07e4"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:3857e335f97058c4b46fa39ca831290b70de554a5c5af0323d2f163b19c5f2a6"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d98a46cf07c0c875d27e8a7ed50f304d83063e49b9ab63f21c19c154b4c0d08d"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-win32.whl", hash = "sha256:c36539ed2c0173b053dafb221458812e178cfa3224ade0960599bec194637048"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:ec8d7d8567e14af34a7911c98f5ac74a3d4a743cd848643341fc92b12b3784ff"}, - {file = "rapidfuzz-3.11.0-cp39-cp39-win_arm64.whl", hash = "sha256:62171b270ecc4071be1c1f99960317db261d4c8c83c169e7f8ad119211fe7397"}, - {file = "rapidfuzz-3.11.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f06e3c4c0a8badfc4910b9fd15beb1ad8f3b8fafa8ea82c023e5e607b66a78e4"}, - {file = "rapidfuzz-3.11.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fe7aaf5a54821d340d21412f7f6e6272a9b17a0cbafc1d68f77f2fc11009dcd5"}, - {file = "rapidfuzz-3.11.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25398d9ac7294e99876a3027ffc52c6bebeb2d702b1895af6ae9c541ee676702"}, - {file = "rapidfuzz-3.11.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a52eea839e4bdc72c5e60a444d26004da00bb5bc6301e99b3dde18212e41465"}, - {file = "rapidfuzz-3.11.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c87319b0ab9d269ab84f6453601fd49b35d9e4a601bbaef43743f26fabf496c"}, - {file = "rapidfuzz-3.11.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3048c6ed29d693fba7d2a7caf165f5e0bb2b9743a0989012a98a47b975355cca"}, - {file = "rapidfuzz-3.11.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b04f29735bad9f06bb731c214f27253bd8bedb248ef9b8a1b4c5bde65b838454"}, - {file = "rapidfuzz-3.11.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7864e80a0d4e23eb6194254a81ee1216abdc53f9dc85b7f4d56668eced022eb8"}, - {file = "rapidfuzz-3.11.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3794df87313dfb56fafd679b962e0613c88a293fd9bd5dd5c2793d66bf06a101"}, - {file = "rapidfuzz-3.11.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d71da0012face6f45432a11bc59af19e62fac5a41f8ce489e80c0add8153c3d1"}, - {file = "rapidfuzz-3.11.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff38378346b7018f42cbc1f6d1d3778e36e16d8595f79a312b31e7c25c50bd08"}, - {file = "rapidfuzz-3.11.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6668321f90aa02a5a789d4e16058f2e4f2692c5230252425c3532a8a62bc3424"}, - {file = "rapidfuzz-3.11.0.tar.gz", hash = "sha256:a53ca4d3f52f00b393fab9b5913c5bafb9afc27d030c8a1db1283da6917a860f"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dbb7ea2fd786e6d66f225ef6eef1728832314f47e82fee877cb2a793ebda9579"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ae41361de05762c1eaa3955e5355de7c4c6f30d1ef1ea23d29bf738a35809ab"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc3c39e0317e7f68ba01bac056e210dd13c7a0abf823e7b6a5fe7e451ddfc496"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69f2520296f1ae1165b724a3aad28c56fd0ac7dd2e4cff101a5d986e840f02d4"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34dcbf5a7daecebc242f72e2500665f0bde9dd11b779246c6d64d106a7d57c99"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:773ab37fccf6e0513891f8eb4393961ddd1053c6eb7e62eaa876e94668fc6d31"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ecf0e6de84c0bc2c0f48bc03ba23cef2c5f1245db7b26bc860c11c6fd7a097c"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4dc2ebad4adb29d84a661f6a42494df48ad2b72993ff43fad2b9794804f91e45"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8389d98b9f54cb4f8a95f1fa34bf0ceee639e919807bb931ca479c7a5f2930bf"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:165bcdecbfed9978962da1d3ec9c191b2ff9f1ccc2668fbaf0613a975b9aa326"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:129d536740ab0048c1a06ccff73c683f282a2347c68069affae8dbc423a37c50"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b67e390261ffe98ec86c771b89425a78b60ccb610c3b5874660216fcdbded4b"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-win32.whl", hash = "sha256:a66520180d3426b9dc2f8d312f38e19bc1fc5601f374bae5c916f53fa3534a7d"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:82260b20bc7a76556cecb0c063c87dad19246a570425d38f8107b8404ca3ac97"}, + {file = "rapidfuzz-3.12.1-cp310-cp310-win_arm64.whl", hash = "sha256:3a860d103bbb25c69c2e995fdf4fac8cb9f77fb69ec0a00469d7fd87ff148f46"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6d9afad7b16d01c9e8929b6a205a18163c7e61b6cd9bcf9c81be77d5afc1067a"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb424ae7240f2d2f7d8dda66a61ebf603f74d92f109452c63b0dbf400204a437"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42149e6d13bd6d06437d2a954dae2184dadbbdec0fdb82dafe92860d99f80519"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:760ac95d788f2964b73da01e0bdffbe1bf2ad8273d0437565ce9092ae6ad1fbc"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cf27e8e4bf7bf9d92ef04f3d2b769e91c3f30ba99208c29f5b41e77271a2614"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:00ceb8ff3c44ab0d6014106c71709c85dee9feedd6890eff77c814aa3798952b"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b61c558574fbc093d85940c3264c08c2b857b8916f8e8f222e7b86b0bb7d12"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:346a2d8f17224e99f9ef988606c83d809d5917d17ad00207237e0965e54f9730"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d60d1db1b7e470e71ae096b6456e20ec56b52bde6198e2dbbc5e6769fa6797dc"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2477da227e266f9c712f11393182c69a99d3c8007ea27f68c5afc3faf401cc43"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:8499c7d963ddea8adb6cffac2861ee39a1053e22ca8a5ee9de1197f8dc0275a5"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:12802e5c4d8ae104fb6efeeb436098325ce0dca33b461c46e8df015c84fbef26"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-win32.whl", hash = "sha256:e1061311d07e7cdcffa92c9b50c2ab4192907e70ca01b2e8e1c0b6b4495faa37"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:c6e4ed63e204daa863a802eec09feea5448617981ba5d150f843ad8e3ae071a4"}, + {file = "rapidfuzz-3.12.1-cp311-cp311-win_arm64.whl", hash = "sha256:920733a28c3af47870835d59ca9879579f66238f10de91d2b4b3f809d1ebfc5b"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f6235b57ae3faa3f85cb3f90c9fee49b21bd671b76e90fc99e8ca2bdf0b5e4a3"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af4585e5812632c357fee5ab781c29f00cd06bea58f8882ff244cc4906ba6c9e"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5942dc4460e5030c5f9e1d4c9383de2f3564a2503fe25e13e89021bcbfea2f44"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b31ab59e1a0df5afc21f3109b6cfd77b34040dbf54f1bad3989f885cfae1e60"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97c885a7a480b21164f57a706418c9bbc9a496ec6da087e554424358cadde445"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d844c0587d969ce36fbf4b7cbf0860380ffeafc9ac5e17a7cbe8abf528d07bb"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93c95dce8917bf428064c64024de43ffd34ec5949dd4425780c72bd41f9d969"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:834f6113d538af358f39296604a1953e55f8eeffc20cb4caf82250edbb8bf679"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a940aa71a7f37d7f0daac186066bf6668d4d3b7e7ef464cb50bc7ba89eae1f51"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ec9eaf73501c9a7de2c6938cb3050392e2ee0c5ca3921482acf01476b85a7226"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c5ec360694ac14bfaeb6aea95737cf1a6cf805b5fe8ea7fd28814706c7fa838"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6b5e176524653ac46f1802bdd273a4b44a5f8d0054ed5013a8e8a4b72f254599"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-win32.whl", hash = "sha256:6f463c6f1c42ec90e45d12a6379e18eddd5cdf74138804d8215619b6f4d31cea"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:b894fa2b30cd6498a29e5c470cb01c6ea898540b7e048a0342775a5000531334"}, + {file = "rapidfuzz-3.12.1-cp312-cp312-win_arm64.whl", hash = "sha256:43bb17056c5d1332f517b888c4e57846c4b5f936ed304917eeb5c9ac85d940d4"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:97f824c15bc6933a31d6e3cbfa90188ba0e5043cf2b6dd342c2b90ee8b3fd47c"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a973b3f5cabf931029a3ae4a0f72e3222e53d412ea85fc37ddc49e1774f00fbf"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7880e012228722dec1be02b9ef3898ed023388b8a24d6fa8213d7581932510"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c78582f50e75e6c2bc38c791ed291cb89cf26a3148c47860c1a04d6e5379c8e"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d7d9e6a04d8344b0198c96394c28874086888d0a2b2f605f30d1b27b9377b7d"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5620001fd4d6644a2f56880388179cc8f3767670f0670160fcb97c3b46c828af"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0666ab4c52e500af7ba5cc17389f5d15c0cdad06412c80312088519fdc25686d"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:27b4d440fa50b50c515a91a01ee17e8ede719dca06eef4c0cccf1a111a4cfad3"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:83dccfd5a754f2a0e8555b23dde31f0f7920601bfa807aa76829391ea81e7c67"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b572b634740e047c53743ed27a1bb3b4f93cf4abbac258cd7af377b2c4a9ba5b"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7fa7b81fb52902d5f78dac42b3d6c835a6633b01ddf9b202a3ca8443be4b2d6a"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b1d4fbff980cb6baef4ee675963c081f7b5d6580a105d6a4962b20f1f880e1fb"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-win32.whl", hash = "sha256:3fe8da12ea77271097b303fa7624cfaf5afd90261002314e3b0047d36f4afd8d"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:6f7e92fc7d2a7f02e1e01fe4f539324dfab80f27cb70a30dd63a95445566946b"}, + {file = "rapidfuzz-3.12.1-cp313-cp313-win_arm64.whl", hash = "sha256:e31be53d7f4905a6a038296d8b773a79da9ee9f0cd19af9490c5c5a22e37d2e5"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bef5c91d5db776523530073cda5b2a276283258d2f86764be4a008c83caf7acd"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:841e0c2a5fbe8fc8b9b1a56e924c871899932c0ece7fbd970aa1c32bfd12d4bf"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:046fc67f3885d94693a2151dd913aaf08b10931639cbb953dfeef3151cb1027c"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4d2d39b2e76c17f92edd6d384dc21fa020871c73251cdfa017149358937a41d"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5857dda85165b986c26a474b22907db6b93932c99397c818bcdec96340a76d5"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c26cd1b9969ea70dbf0dbda3d2b54ab4b2e683d0fd0f17282169a19563efeb1"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf56ea4edd69005786e6c80a9049d95003aeb5798803e7a2906194e7a3cb6472"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fbe7580b5fb2db8ebd53819171ff671124237a55ada3f64d20fc9a149d133960"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:018506a53c3b20dcbda8c93d4484b9eb1764c93d5ea16be103cf6b0d8b11d860"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:325c9c71b737fcd32e2a4e634c430c07dd3d374cfe134eded3fe46e4c6f9bf5d"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:930756639643e3aa02d3136b6fec74e5b9370a24f8796e1065cd8a857a6a6c50"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0acbd27543b158cb915fde03877383816a9e83257832818f1e803bac9b394900"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-win32.whl", hash = "sha256:80ff9283c54d7d29b2d954181e137deee89bec62f4a54675d8b6dbb6b15d3e03"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:fd37e53f0ed239d0cec27b250cec958982a8ba252ce64aa5e6052de3a82fa8db"}, + {file = "rapidfuzz-3.12.1-cp39-cp39-win_arm64.whl", hash = "sha256:4a4422e4f73a579755ab60abccb3ff148b5c224b3c7454a13ca217dfbad54da6"}, + {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b7cba636c32a6fc3a402d1cb2c70c6c9f8e6319380aaf15559db09d868a23e56"}, + {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b79286738a43e8df8420c4b30a92712dec6247430b130f8e015c3a78b6d61ac2"}, + {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dc1937198e7ff67e217e60bfa339f05da268d91bb15fec710452d11fe2fdf60"}, + {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b85817a57cf8db32dd5d2d66ccfba656d299b09eaf86234295f89f91be1a0db2"}, + {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04283c6f3e79f13a784f844cd5b1df4f518ad0f70c789aea733d106c26e1b4fb"}, + {file = "rapidfuzz-3.12.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a718f740553aad5f4daef790191511da9c6eae893ee1fc2677627e4b624ae2db"}, + {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cbdf145c7e4ebf2e81c794ed7a582c4acad19e886d5ad6676086369bd6760753"}, + {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0d03ad14a26a477be221fddc002954ae68a9e2402b9d85433f2d0a6af01aa2bb"}, + {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1187aeae9c89e838d2a0a2b954b4052e4897e5f62e5794ef42527bf039d469e"}, + {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd47dfb1bca9673a48b923b3d988b7668ee8efd0562027f58b0f2b7abf27144c"}, + {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187cdb402e223264eebed2fe671e367e636a499a7a9c82090b8d4b75aa416c2a"}, + {file = "rapidfuzz-3.12.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6899b41bf6c30282179f77096c1939f1454836440a8ab05b48ebf7026a3b590"}, + {file = "rapidfuzz-3.12.1.tar.gz", hash = "sha256:6a98bbca18b4a37adddf2d8201856441c26e9c981d8895491b5bc857b5f780eb"}, ] [package.extras] @@ -4641,13 +4641,13 @@ snowflake = ["snowflake-connector-python (>=2.8.0)", "snowflake-snowpark-python[ [[package]] name = "surya-ocr" -version = "0.10.0" +version = "0.10.1" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "surya_ocr-0.10.0-py3-none-any.whl", hash = "sha256:ccad25a308eefd61a21b2c97fc3f5b8364887e09f197a3aaa5fee30c03f81ae1"}, - {file = "surya_ocr-0.10.0.tar.gz", hash = "sha256:966bc0c1aef346df42e458d2c1cbc95665004ea61020577e1656789107d09119"}, + {file = "surya_ocr-0.10.1-py3-none-any.whl", hash = "sha256:39fdc04ae1531e4b2ceb784e481a22941e53bb72f876fa1638677b5c4bd3c784"}, + {file = "surya_ocr-0.10.1.tar.gz", hash = "sha256:0e57975df87f0dcc17ea6ff06dfe68ff5308c6610e42608a1038f8cbbd044e35"}, ] [package.dependencies] @@ -4659,7 +4659,7 @@ pydantic = ">=2.5.3,<3.0.0" pydantic-settings = ">=2.1.0,<3.0.0" pypdfium2 = "4.30.0" python-dotenv = ">=1.0.0,<2.0.0" -torch = ">=2.4.1,<3.0.0" +torch = ">=2.5.1,<2.6.0" transformers = ">=4.41.0,<5.0.0" [[package]] @@ -4952,13 +4952,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "transformers" -version = "4.48.1" +version = "4.48.2" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.9.0" files = [ - {file = "transformers-4.48.1-py3-none-any.whl", hash = "sha256:24be0564b0a36d9e433d9a65de248f1545b6f6edce1737669605eb6a8141bbbb"}, - {file = "transformers-4.48.1.tar.gz", hash = "sha256:7c1931facc3ee8adcbf86fc7a87461d54c1e40eca3bb57fef1ee9f3ecd32187e"}, + {file = "transformers-4.48.2-py3-none-any.whl", hash = "sha256:493bc5b0268b116eff305edf6656367fc89cf570e7a9d5891369e04751db698a"}, + {file = "transformers-4.48.2.tar.gz", hash = "sha256:dcfb73473e61f22fb3366fe2471ed2e42779ecdd49527a1bdf1937574855d516"}, ] [package.dependencies] @@ -5468,4 +5468,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "9060b047f34d36d3ee1850cbbbaf2078fe14471661117b786c4e9a7661dc659a" +content-hash = "294f3036e322ab123bc681335d96606bbc2c8cb52a8a2c253874725b3180c2f7" diff --git a/pyproject.toml b/pyproject.toml index badfc9aa..82d6dc11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ torch = "~2.5.1" # 2.6.0 appears to fail with mps tqdm = "^4.66.1" ftfy = "^6.1.1" rapidfuzz = "^3.8.1" -surya-ocr = "~0.10.0" +surya-ocr = "~0.10.1" regex = "^2024.4.28" pdftext = "~0.5.1" markdownify = "^0.13.1" From cfde6d62690297a3e5d001b911aff8bd22291fab Mon Sep 17 00:00:00 2001 From: Moses Paul R Date: Sat, 1 Feb 2025 09:49:48 +0000 Subject: [PATCH 07/27] add llm text support for references, superscripts etc --- marker/processors/llm/llm_text.py | 24 ++++++++++++++++-------- marker/schema/text/line.py | 8 ++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/marker/processors/llm/llm_text.py b/marker/processors/llm/llm_text.py index 8a71b54e..02a6f1bf 100644 --- a/marker/processors/llm/llm_text.py +++ b/marker/processors/llm/llm_text.py @@ -28,10 +28,11 @@ class LLMTextProcessor(BaseLLMProcessor): * Inline math: Ensure all mathematical expressions are correctly formatted and rendered. * Formatting: Maintain consistent formatting with the text block image, including spacing, indentation, and special characters. * Other inaccuracies: If the image is handwritten then you may correct any spelling errors, or other discrepancies. -5. Do not remove any formatting i.e bold, italics, etc from the extracted lines unless it is necessary to correct the error. -6. Ensure that inline math is properly with inline math tags. -7. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. -8. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. +5. Do not remove any formatting i.e bold, italics, math, superscripts, subscripts, etc from the extracted lines unless it is necessary to correct an error. +6. DO not remove any ... tags, those are important for references and are coming directly from the document, you MUST always keep them. +7. Ensure that inline math is properly with inline math tags. +8. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. +9. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. **Example:** @@ -120,34 +121,41 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): minimum_position=0, maximum_position=0, formats=[span['type']], + url=span.get('url'), page_id=text_line.page_id, text_extraction_method="gemini", ) ) text_line.structure.append(span_block.id) - def text_to_spans(self, text): + @staticmethod + def text_to_spans(text): soup = BeautifulSoup(text, 'html.parser') tag_types = { 'b': 'bold', 'i': 'italic', - 'math': 'math' + 'math': 'math', } spans = [] for element in soup.descendants: if not len(list(element.parents)) == 1: continue + + url = element.attrs.get('href') if hasattr(element, 'attrs') else None + if element.name in tag_types: spans.append({ 'type': tag_types[element.name], - 'content': element.get_text() + 'content': element.get_text(), + 'url': url }) elif element.string: spans.append({ 'type': 'plain', - 'content': element.string + 'content': element.string, + 'url': url }) return spans diff --git a/marker/schema/text/line.py b/marker/schema/text/line.py index 30525a38..6285ee88 100644 --- a/marker/schema/text/line.py +++ b/marker/schema/text/line.py @@ -42,10 +42,18 @@ def formatted_text(self, document): for block in self.contained_blocks(document, (BlockTypes.Span,)): block_text = html.escape(block.text) + if block.has_superscript: + block_text = re.sub(r"^([0-9\W]+)(.*)", r"\1\2", block_text) + + if block.url: + block_text = f"{block_text}" + if block.italic: text += f"{block_text}" elif block.bold: text += f"{block_text}" + elif block.math: + text += f"{block_text}" else: text += block_text From 225ff44c965d4a07b320180bbecd003f9357f3ac Mon Sep 17 00:00:00 2001 From: Moses Paul R Date: Sat, 1 Feb 2025 09:51:22 +0000 Subject: [PATCH 08/27] fix typo [skip ci] --- marker/processors/llm/llm_text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marker/processors/llm/llm_text.py b/marker/processors/llm/llm_text.py index 02a6f1bf..dbbd23b4 100644 --- a/marker/processors/llm/llm_text.py +++ b/marker/processors/llm/llm_text.py @@ -29,7 +29,7 @@ class LLMTextProcessor(BaseLLMProcessor): * Formatting: Maintain consistent formatting with the text block image, including spacing, indentation, and special characters. * Other inaccuracies: If the image is handwritten then you may correct any spelling errors, or other discrepancies. 5. Do not remove any formatting i.e bold, italics, math, superscripts, subscripts, etc from the extracted lines unless it is necessary to correct an error. -6. DO not remove any ... tags, those are important for references and are coming directly from the document, you MUST always keep them. +6. Do not remove any ... tags, those are important for references and are coming directly from the document, you MUST always keep them. 7. Ensure that inline math is properly with inline math tags. 8. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. 9. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. From 93deddda79f48a2356e88e9a1dcd7da30810e366 Mon Sep 17 00:00:00 2001 From: Moses Paul R Date: Sat, 1 Feb 2025 11:48:15 +0000 Subject: [PATCH 09/27] refine prompt [skip ci] --- marker/processors/llm/llm_text.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/marker/processors/llm/llm_text.py b/marker/processors/llm/llm_text.py index dbbd23b4..a87ed862 100644 --- a/marker/processors/llm/llm_text.py +++ b/marker/processors/llm/llm_text.py @@ -29,10 +29,10 @@ class LLMTextProcessor(BaseLLMProcessor): * Formatting: Maintain consistent formatting with the text block image, including spacing, indentation, and special characters. * Other inaccuracies: If the image is handwritten then you may correct any spelling errors, or other discrepancies. 5. Do not remove any formatting i.e bold, italics, math, superscripts, subscripts, etc from the extracted lines unless it is necessary to correct an error. -6. Do not remove any ... tags, those are important for references and are coming directly from the document, you MUST always keep them. -7. Ensure that inline math is properly with inline math tags. -8. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. -9. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. +6. Ensure that inline math is properly with inline math tags. +7. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. +8. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. +9. You absolutely cannot remove any ... tags, those are extremely important for references and are coming directly from the document, you MUST always preserve them. **Example:** @@ -40,7 +40,7 @@ class LLMTextProcessor(BaseLLMProcessor): ``` { "extracted_lines": [ - "Adversarial training (AT) [23], which aims to minimize\n", + "Adversarial training (AT) [23], which aims to minimize\n", "the model's risk under the worst-case perturbations, is cur-\n", "rently the most effective approach for improving the robust-\n", "ness of deep neural networks. For a given neural network\n", @@ -55,7 +55,7 @@ class LLMTextProcessor(BaseLLMProcessor): ```json { "corrected_lines": [ - "Adversarial training (AT) [23], which aims to minimize\n", + "Adversarial training (AT) [23], which aims to minimize\n", "the model's risk under the worst-case perturbations, is cur-\n", "rently the most effective approach for improving the robust-\n", "ness of deep neural networks. For a given neural network\n", From 4e0fadc55482425aa027a3a862600ecf03af1650 Mon Sep 17 00:00:00 2001 From: Moses Paul R Date: Mon, 3 Feb 2025 03:53:14 +0000 Subject: [PATCH 10/27] fix llm table merging error --- marker/processors/llm/llm_table_merge.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/marker/processors/llm/llm_table_merge.py b/marker/processors/llm/llm_table_merge.py index e2012998..af512bf6 100644 --- a/marker/processors/llm/llm_table_merge.py +++ b/marker/processors/llm/llm_table_merge.py @@ -114,6 +114,9 @@ class LLMTableMergeProcessor(BaseLLMProcessor): @staticmethod def get_row_count(cells: List[TableCell]): + if not cells: + return 0 + max_rows = None for col_id in set([cell.col_id for cell in cells]): col_cells = [cell for cell in cells if cell.col_id == col_id] @@ -126,6 +129,9 @@ def get_row_count(cells: List[TableCell]): @staticmethod def get_column_count(cells: List[TableCell]): + if not cells: + return 0 + max_cols = None for row_id in set([cell.row_id for cell in cells]): row_cells = [cell for cell in cells if cell.row_id == row_id] From 277f2db312ee62dc801b58d3d80e362e7b839450 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Mon, 3 Feb 2025 17:14:52 -0500 Subject: [PATCH 11/27] Add order processor --- benchmarks/overall/inference.py | 10 +++++----- benchmarks/overall/overall.py | 2 +- benchmarks/overall/render.py | 14 +++++++++++--- benchmarks/overall/schema.py | 1 + marker/converters/pdf.py | 2 ++ marker/processors/order.py | 23 ++++++++++++++++++----- marker/schema/polygon.py | 3 +++ marker/util.py | 19 +++++++++++++++++++ 8 files changed, 60 insertions(+), 14 deletions(-) diff --git a/benchmarks/overall/inference.py b/benchmarks/overall/inference.py index 717cb3b4..03c2257c 100644 --- a/benchmarks/overall/inference.py +++ b/benchmarks/overall/inference.py @@ -23,9 +23,9 @@ def marker_scoring_func(model_dict, sample, gt_markdown, use_llm=False, **kwargs pdf_bytes = sample["pdf"] # This is a single page PDF start = time.time() marker_md = get_marker_markdown(model_dict, pdf_bytes, use_llm) - marker_md = clean_input(marker_md) + marker_md_clean = clean_input(marker_md) total = time.time() - start - scores = score_blocks(gt_markdown, marker_md) + scores = score_blocks(gt_markdown, marker_md_clean) scores["time"] = total scores["markdown"] = marker_md return scores @@ -41,8 +41,8 @@ def mathpix_scoring_func(model_dict, sample, gt_markdown, mathpix_ds=None, **kwa if not data: raise ValueError(f"Could not find data for uuid {uuid}") - mathpix_md = clean_input(data["md"]) - scores = score_blocks(gt_markdown, mathpix_md) + mathpix_md_clean = clean_input(data["md"]) + scores = score_blocks(gt_markdown, mathpix_md_clean) scores["time"] = data["time"] - scores["markdown"] = mathpix_md + scores["markdown"] = data["md"] return scores diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index e4c1fd14..291cfb3b 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -56,7 +56,7 @@ def get_method_scores(ds, model_dict, max_rows=None, score_func=marker_scoring_f "averages_by_type": averages_by_type, "averages_by_block_type": averages_by_block_type, "average_time": avg_time, - "average_score": sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores) + "average_score": sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores), } def print_scores(scores: Dict[str, FullResult], out_path: Path, default_method="marker"): diff --git a/benchmarks/overall/render.py b/benchmarks/overall/render.py index b49e32af..ff252266 100644 --- a/benchmarks/overall/render.py +++ b/benchmarks/overall/render.py @@ -12,6 +12,7 @@ import markdown2 from playwright.sync_api import sync_playwright +from benchmarks.overall.clean import convert_to_md, clean_input from benchmarks.overall.schema import FullResult def convert_to_html(md: str): @@ -90,7 +91,13 @@ def build_dataset(ds: datasets.Dataset, all_scores: Dict[str, FullResult]) -> da ds_rows = defaultdict(dict) for idx in full_idxs: - row = ds[idx] # img, gt_blocks, classification, language, uuid + row = ds[idx] + ds_rows[idx].update({ + "img": row["img"], + "classification": row["classification"], + "language": row["language"], + "uuid": row["uuid"] + }) for method in all_scores: method_row = all_scores[method]["raw_scores"][idx] ds_rows[idx].update({ @@ -99,10 +106,11 @@ def build_dataset(ds: datasets.Dataset, all_scores: Dict[str, FullResult]) -> da f"{method}_image": markdown_to_image(method_row["markdown"]), f"{method}_time": method_row["time"] }) - gt_md = "\n\n".join([clean_input(convert_to_md(block)) for block in json.loads(row["gt_blocks"])]) + gt_html = [block["html"] for block in json.loads(row["gt_blocks"]) if len(block["html"]) > 0] + gt_md = "\n\n".join([convert_to_md(block) for block in gt_html]) ds_rows[idx].update({ "gt_markdown": gt_md, - "gt_image": markdown_to_image(gt_md) + "gt_markdown_image": markdown_to_image(gt_md) }) out_dataset = datasets.Dataset.from_list([ds_rows[k] for k in full_idxs]) return out_dataset diff --git a/benchmarks/overall/schema.py b/benchmarks/overall/schema.py index d2d725f3..668f83f1 100644 --- a/benchmarks/overall/schema.py +++ b/benchmarks/overall/schema.py @@ -15,3 +15,4 @@ class FullResult(TypedDict): averages_by_block_type: Dict[str, List[float]] average_time: float average_score: float + gt_markdown: List[str] diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py index 3741b760..01f69695 100644 --- a/marker/converters/pdf.py +++ b/marker/converters/pdf.py @@ -41,6 +41,7 @@ from marker.schema.registry import register_block_class from marker.util import strings_to_classes from marker.processors.llm.llm_handwriting import LLMHandwritingProcessor +from marker.processors.order import OrderProcessor class PdfConverter(BaseConverter): @@ -59,6 +60,7 @@ class PdfConverter(BaseConverter): "Enable higher quality processing with LLMs.", ] = False default_processors: Tuple[BaseProcessor, ...] = ( + OrderProcessor, BlockquoteProcessor, CodeProcessor, DocumentTOCProcessor, diff --git a/marker/processors/order.py b/marker/processors/order.py index b28e57c3..146eaf30 100644 --- a/marker/processors/order.py +++ b/marker/processors/order.py @@ -1,4 +1,5 @@ from statistics import mean +from collections import defaultdict from marker.processors import BaseProcessor from marker.schema import BlockTypes @@ -13,41 +14,53 @@ class OrderProcessor(BaseProcessor): def __call__(self, document: Document): for page in document.pages: + # Skip OCRed pages if page.text_extraction_method != "pdftext": continue + # Skip pages without layout slicing if not page.layout_sliced: continue - block_idxs = {} + block_idxs = defaultdict(int) for block_id in page.structure: block = document.get_block(block_id) spans = block.contained_blocks(document, (BlockTypes.Span, )) if len(spans) == 0: continue + # Avg span position in original PDF block_idxs[block_id] = (spans[0].minimum_position + spans[-1].maximum_position) / 2 for block_id in page.structure: - if block_id in block_idxs and block_idxs[block_id] > 0: + # Already assigned block id via span position + if block_idxs[block_id] > 0: continue + block = document.get_block(block_id) prev_block = document.get_prev_block(block) next_block = document.get_next_block(block) + block_idx_add = 0 + if prev_block: + block_idx_add = 1 + while prev_block and prev_block.id not in block_idxs: prev_block = document.get_prev_block(prev_block) + block_idx_add += 1 if not prev_block: + block_idx_add = -1 while next_block and next_block.id not in block_idxs: next_block = document.get_next_block(next_block) + block_idx_add -= 1 if not next_block and not prev_block: - block_idxs[block_id] = 0 + pass elif prev_block: - block_idxs[block_id] = block_idxs[prev_block.id] + 1 + block_idxs[block_id] = block_idxs[prev_block.id] + block_idx_add else: - block_idxs[block_id] = block_idxs[next_block.id] - 1 + block_idxs[block_id] = block_idxs[next_block.id] + block_idx_add page.structure = sorted(page.structure, key=lambda x: block_idxs[x]) diff --git a/marker/schema/polygon.py b/marker/schema/polygon.py index 2174bc6c..25e9ed31 100644 --- a/marker/schema/polygon.py +++ b/marker/schema/polygon.py @@ -126,6 +126,9 @@ def center_distance(self, other: PolygonBox, x_weight: float = 1, y_weight: floa else: return abs(self.center[0] - other.center[0]) * x_weight + abs(self.center[1] - other.center[1]) * y_weight + def tl_distance(self, other: PolygonBox): + return ((self.bbox[0] - other.bbox[0]) ** 2 + (self.bbox[1] - other.bbox[1]) ** 2) ** 0.5 + def rescale(self, old_size, new_size): # Point is in x, y format page_width, page_height = old_size diff --git a/marker/util.py b/marker/util.py index 3dbde5f8..3586c0bb 100644 --- a/marker/util.py +++ b/marker/util.py @@ -80,3 +80,22 @@ def matrix_intersection_area(boxes1: List[List[float]], boxes2: List[List[float] height = np.maximum(0, max_y - min_y) return width * height # Shape: (N, M) + + +def matrix_distance(boxes1: List[List[float]], boxes2: List[List[float]]) -> np.ndarray: + if len(boxes2) == 0: + return np.zeros((len(boxes1), 0)) + if len(boxes1) == 0: + return np.zeros((0, len(boxes2))) + + boxes1 = np.array(boxes1) # Shape: (N, 4) + boxes2 = np.array(boxes2) # Shape: (M, 4) + + boxes1_centers = (boxes1[:, :2] + boxes1[:, 2:]) / 2 # Shape: (M, 2) + boxes2_centers = (boxes2[:, :2] + boxes2[:, 2:]) / 2 # Shape: (M, 2) + + boxes1_centers = boxes1_centers[:, np.newaxis, :] # Shape: (N, 1, 2) + boxes2_centers = boxes2_centers[np.newaxis, :, :] # Shape: (1, M, 2) + + distances = np.linalg.norm(boxes1_centers - boxes2_centers, axis=2) # Shape: (N, M) + return distances From f1f93aa77acb9666013b62b84498030a27f48179 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Mon, 3 Feb 2025 17:56:23 -0500 Subject: [PATCH 12/27] Add pandoc --- .github/workflows/benchmarks.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index ae6a1c84..a7efa7e4 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -14,6 +14,10 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.11 + - name: Install apt dependencies + run: | + sudo apt-get update + sudo apt-get install -y pandoc - name: Install python dependencies run: | pip install poetry From 805d20000142030f84c4f560d056fbff884230d1 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Tue, 4 Feb 2025 11:21:17 -0500 Subject: [PATCH 13/27] Clean up benchmark, make more pluggable --- benchmarks/__init__.py | 0 benchmarks/overall/display/__init__.py | 62 ++++++++ benchmarks/overall/inference.py | 48 ------- benchmarks/overall/methods/__init__.py | 91 ++++++++++++ benchmarks/overall/methods/gt.py | 29 ++++ benchmarks/overall/methods/marker.py | 29 ++++ benchmarks/overall/methods/mathpix.py | 22 +++ benchmarks/overall/methods/schema.py | 6 + benchmarks/overall/overall.py | 165 +++++++++------------- benchmarks/overall/registry.py | 16 +++ benchmarks/overall/render.py | 117 --------------- benchmarks/overall/schema.py | 22 ++- benchmarks/overall/scorers/__init__.py | 11 ++ benchmarks/overall/{ => scorers}/clean.py | 12 -- benchmarks/overall/scorers/heuristic.py | 96 +++++++++++++ benchmarks/overall/scorers/llm.py | 148 +++++++++++++++++++ benchmarks/overall/scorers/schema.py | 6 + benchmarks/overall/scoring.py | 83 ----------- 18 files changed, 590 insertions(+), 373 deletions(-) create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/overall/display/__init__.py delete mode 100644 benchmarks/overall/inference.py create mode 100644 benchmarks/overall/methods/__init__.py create mode 100644 benchmarks/overall/methods/gt.py create mode 100644 benchmarks/overall/methods/marker.py create mode 100644 benchmarks/overall/methods/mathpix.py create mode 100644 benchmarks/overall/methods/schema.py create mode 100644 benchmarks/overall/registry.py delete mode 100644 benchmarks/overall/render.py create mode 100644 benchmarks/overall/scorers/__init__.py rename benchmarks/overall/{ => scorers}/clean.py (93%) create mode 100644 benchmarks/overall/scorers/heuristic.py create mode 100644 benchmarks/overall/scorers/llm.py create mode 100644 benchmarks/overall/scorers/schema.py delete mode 100644 benchmarks/overall/scoring.py diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/overall/display/__init__.py b/benchmarks/overall/display/__init__.py new file mode 100644 index 00000000..25b3f73c --- /dev/null +++ b/benchmarks/overall/display/__init__.py @@ -0,0 +1,62 @@ +from pathlib import Path +from typing import Dict, List + +import tabulate + +from benchmarks.overall.schema import FullResult + +def write_table(title: str, rows: list, headers: list, out_path: Path, filename: str): + table = tabulate.tabulate(rows, headers=headers, tablefmt="github") + with open(out_path / filename, "w", encoding="utf-8") as f: + f.write(f"# {title}\n") + f.write(table) + print(title) + print(table) + + +def print_scores(result: FullResult, out_path: Path, methods: List[str], score_types: List[str], default_score_type="heuristic", default_method="marker"): + document_types = list(result["averages_by_type"][default_method][default_score_type].keys()) + headers = ["Document Type"] + for method in methods: + for score_type in score_types: + headers.append(f"{method} {score_type}") + + document_rows = [[k] for k in document_types] + for i, doc_type in enumerate(document_types): + for method in methods: + for score_type in score_types: + avg_score = sum(result["averages_by_type"][method][score_type][doc_type]) / max(1, len(result["averages_by_type"][method][score_type][doc_type])) + document_rows[i].append(avg_score) + + write_table("Document Types", document_rows, headers, out_path, "document_types.md") + + headers = ["Block Type"] + block_types = list(result["averages_by_block_type"][default_method][default_score_type].keys()) # all possible blocks + block_score_types = list(result["averages_by_block_type"][default_method].keys()) + for method in methods: + for score_type in block_score_types: + headers.append(f"{method} {score_type}") + + block_rows = [[k] for k in block_types] + for i, block_type in enumerate(block_types): + for method in methods: + for score_type in block_score_types: + avg_score = sum(result["averages_by_block_type"][method][score_type][block_type]) / max(1, len(result["averages_by_block_type"][method][score_type][block_type])) + block_rows[i].append(avg_score) + + write_table("Block types", block_rows, headers, out_path, "block_types.md") + + headers = ["Method", "Avg Time"] + score_types + inference_rows = [[k] for k in methods] + all_raw_scores = [result["scores"][i] for i in result["scores"]] + for i, method in enumerate(methods): + avg_time = sum(result["average_times"][method]) / max(1, len(result["average_times"][method])) + inference_rows[i].append(avg_time) + for score_type in score_types: + scores_lst = [ar[method][score_type]["score"] for ar in all_raw_scores] + avg_score = sum(scores_lst) / max(1, len(scores_lst)) + inference_rows[i].append(avg_score) + + write_table("Overall Results", inference_rows, headers, out_path, "overall.md") + + print("Scores computed by aligning ground truth markdown blocks with predicted markdown for each method. The scores are 0-100 based on edit distance.") \ No newline at end of file diff --git a/benchmarks/overall/inference.py b/benchmarks/overall/inference.py deleted file mode 100644 index 03c2257c..00000000 --- a/benchmarks/overall/inference.py +++ /dev/null @@ -1,48 +0,0 @@ -import tempfile -import time - -from benchmarks.overall.clean import clean_input -from benchmarks.overall.schema import BlockScores -from benchmarks.overall.scoring import score_blocks -from marker.converters.pdf import PdfConverter - -def get_marker_markdown(marker_models: dict, pdf_bytes: bytes, use_llm: bool): - block_converter = PdfConverter( - artifact_dict=marker_models, - config={"page_range": [0], "disable_tqdm": True, "use_llm": use_llm} - ) - - with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: - f.write(pdf_bytes) - rendered = block_converter(f.name) - - return rendered.markdown - - -def marker_scoring_func(model_dict, sample, gt_markdown, use_llm=False, **kwargs) -> BlockScores: - pdf_bytes = sample["pdf"] # This is a single page PDF - start = time.time() - marker_md = get_marker_markdown(model_dict, pdf_bytes, use_llm) - marker_md_clean = clean_input(marker_md) - total = time.time() - start - scores = score_blocks(gt_markdown, marker_md_clean) - scores["time"] = total - scores["markdown"] = marker_md - return scores - - -def mathpix_scoring_func(model_dict, sample, gt_markdown, mathpix_ds=None, **kwargs) -> BlockScores: - uuid = sample["uuid"] - data = None - for row in mathpix_ds: - if str(row["uuid"]) == str(uuid): - data = row - break - if not data: - raise ValueError(f"Could not find data for uuid {uuid}") - - mathpix_md_clean = clean_input(data["md"]) - scores = score_blocks(gt_markdown, mathpix_md_clean) - scores["time"] = data["time"] - scores["markdown"] = data["md"] - return scores diff --git a/benchmarks/overall/methods/__init__.py b/benchmarks/overall/methods/__init__.py new file mode 100644 index 00000000..742bb532 --- /dev/null +++ b/benchmarks/overall/methods/__init__.py @@ -0,0 +1,91 @@ +import io +import re +from typing import Tuple + +import markdown2 +from PIL import Image +from playwright.sync_api import sync_playwright + +from benchmarks.overall.methods.schema import BenchmarkResult +from marker.renderers.markdown import MarkdownRenderer + + +class BaseMethod: + def __init__(self, **kwargs): + for kwarg in kwargs: + if hasattr(self, kwarg): + setattr(self, kwarg, kwargs[kwarg]) + + @staticmethod + def convert_to_md(html: str): + md = MarkdownRenderer() + markdown = md.md_cls.convert(html) + return markdown + + def __call__(self, sample) -> BenchmarkResult: + raise NotImplementedError() + + def render(self, markdown: str): + return self.html_to_image(self.convert_to_html(markdown)) + + @staticmethod + def convert_to_html(md: str): + block_placeholders = [] + inline_placeholders = [] + + # Add placeholders for the math + def block_sub(match): + content = match.group(1) + placeholder = f"1BLOCKMATH{len(block_placeholders)}1" + block_placeholders.append((placeholder, f"$${content}$$")) + return placeholder + + def inline_sub(match): + content = match.group(1) + placeholder = f"1INLINEMATH{len(inline_placeholders)}1" + inline_placeholders.append((placeholder, f"${content}$")) + return placeholder + + md = re.sub(r'\${2}(.*?)\${2}', block_sub, md, flags=re.DOTALL) + md = re.sub(r'\$(.*?)\$', inline_sub, md) + + html = markdown2.markdown(md, extras=['tables']) + + # Replace placeholders + for placeholder, math_str in block_placeholders: + html = html.replace(placeholder, math_str) + for placeholder, math_str in inline_placeholders: + html = html.replace(placeholder, math_str) + + return html + + def html_to_image(self, html: str) -> Image.Image: + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + page.set_content(f""" + + + + + + + + + {html} + + + """) + page.set_viewport_size({"width": 1200, "height": 800}) + page.wait_for_timeout(500) # Wait for KaTeX to render + screenshot_bytes = page.screenshot(full_page=True) + browser.close() + + return Image.open(io.BytesIO(screenshot_bytes)) \ No newline at end of file diff --git a/benchmarks/overall/methods/gt.py b/benchmarks/overall/methods/gt.py new file mode 100644 index 00000000..6c2c6c32 --- /dev/null +++ b/benchmarks/overall/methods/gt.py @@ -0,0 +1,29 @@ +from typing import List +import json + +from PIL import Image + +from benchmarks.overall.methods import BaseMethod, BenchmarkResult + + +class GTMethod(BaseMethod): + def __call__(self, sample) -> BenchmarkResult: + gt_blocks = json.loads(sample["gt_blocks"]) + gt_html = [block["html"] for block in gt_blocks if len(block["html"]) > 0] + gt_markdown = [self.convert_to_md(block) for block in gt_html] + return { + "markdown": gt_markdown, + "time": 0 + } + + def render(self, html: List[str]) -> Image.Image: + joined = "\n\n".join(html) + html = f""" + + + +{joined} + + +""".strip() + return self.html_to_image(html) \ No newline at end of file diff --git a/benchmarks/overall/methods/marker.py b/benchmarks/overall/methods/marker.py new file mode 100644 index 00000000..afaafcfc --- /dev/null +++ b/benchmarks/overall/methods/marker.py @@ -0,0 +1,29 @@ +import tempfile +import time + +from benchmarks.overall.methods import BaseMethod, BenchmarkResult +from marker.converters.pdf import PdfConverter + + +class MarkerMethod(BaseMethod): + model_dict: dict = None + use_llm: bool = False + + def __call__(self, sample) -> BenchmarkResult: + pdf_bytes = sample["pdf"] # This is a single page PDF + block_converter = PdfConverter( + artifact_dict=self.model_dict, + config={"page_range": [0], "disable_tqdm": True, "use_llm": self.use_llm} + ) + + with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: + f.write(pdf_bytes) + start = time.time() + rendered = block_converter(f.name) + total = time.time() - start + + return { + "markdown": rendered.markdown, + "time": total + } + diff --git a/benchmarks/overall/methods/mathpix.py b/benchmarks/overall/methods/mathpix.py new file mode 100644 index 00000000..d06340f7 --- /dev/null +++ b/benchmarks/overall/methods/mathpix.py @@ -0,0 +1,22 @@ +import datasets + +from benchmarks.overall.methods import BaseMethod, BenchmarkResult + + +class MathpixMethod(BaseMethod): + mathpix_ds: datasets.Dataset = None + + def __call__(self, sample) -> BenchmarkResult: + uuid = sample["uuid"] + data = None + for row in self.mathpix_ds: + if str(row["uuid"]) == str(uuid): + data = row + break + if not data: + raise ValueError(f"Could not find data for uuid {uuid}") + + return { + "markdown": data["md"], + "time": data["time"] + } \ No newline at end of file diff --git a/benchmarks/overall/methods/schema.py b/benchmarks/overall/methods/schema.py new file mode 100644 index 00000000..d475876e --- /dev/null +++ b/benchmarks/overall/methods/schema.py @@ -0,0 +1,6 @@ +from typing import TypedDict, List + + +class BenchmarkResult(TypedDict): + markdown: str | List[str] + time: float | None \ No newline at end of file diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index 291cfb3b..911d107f 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -2,117 +2,86 @@ import os from collections import defaultdict from pathlib import Path -from typing import Dict +from typing import List import click import datasets -import tabulate -from benchmarks.overall.render import build_dataset from tqdm import tqdm -import pypdfium2 as pdfium -from benchmarks.overall.clean import convert_to_md, clean_input -from benchmarks.overall.inference import marker_scoring_func, mathpix_scoring_func +from benchmarks.overall.registry import SCORE_REGISTRY, METHOD_REGISTRY from benchmarks.overall.schema import FullResult from marker.logger import configure_logging from marker.models import create_model_dict from marker.settings import settings +from benchmarks.overall.display import print_scores configure_logging() -def get_method_scores(ds, model_dict, max_rows=None, score_func=marker_scoring_func, **kwargs) -> FullResult: +def get_method_scores(benchmark_dataset: datasets.Dataset, methods: List[str], score_types: List[str], artifacts: dict, max_rows=None) -> FullResult: bench_scores = {} - averages_by_type = defaultdict(list) - averages_by_block_type = defaultdict(list) - for idx, sample in tqdm(enumerate(ds), desc="Running benchmark"): + averages_by_type = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) + averages_by_block_type = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) + average_times = defaultdict(list) + markdown_by_method = defaultdict(dict) + for idx, sample in tqdm(enumerate(benchmark_dataset), desc="Running benchmark"): if max_rows is not None and idx >= max_rows: break - gt_blocks = json.loads(sample["gt_blocks"]) doc_type = sample["classification"] + gt_cls = METHOD_REGISTRY["gt"] + gt_blocks = json.loads(sample["gt_blocks"]) + gt_md = gt_cls(**artifacts)(sample)["markdown"] + + out_data = defaultdict(dict) + + for method in methods: + method_cls = METHOD_REGISTRY[method](**artifacts) + method_info = method_cls(sample) + method_md = method_info["markdown"] + average_times[method].append(method_info["time"]) + markdown_by_method[idx][method] = method_md + + for score_type in score_types: + score_cls = SCORE_REGISTRY[score_type]() + try: + scores = score_cls(sample, gt_md, method_md) + except Exception as e: + # Some scorers can fail, like the LLM one + print(f"Failed to score {method} with {score_type}: {e}") + continue - try: - gt_html = [block["html"] for block in gt_blocks if len(block["html"]) > 0] - gt_markdown = [clean_input(convert_to_md(block)) for block in gt_html] - scores = score_func(model_dict, sample, gt_markdown, **kwargs) - except ValueError as e: - print(f"Error with sample {idx}: {e}") - continue - except pdfium.PdfiumError as e: - print(f"Error opening pdf: {e}") - continue + out_data[method][score_type] = scores - averages_by_type[doc_type].append(scores["overall_score"]) + averages_by_type[method][score_type][doc_type].append(scores["score"]) - for score, gt_block in zip(scores["scores"], gt_blocks): - averages_by_block_type[gt_block["block_type"]].append(score) + if "by_block" in scores["specific_scores"]: # Not all scorers support this + for score, gt_block in zip(scores["specific_scores"]["by_block"], gt_blocks): + averages_by_block_type[method][score_type][gt_block["block_type"]].append(score) - bench_scores[idx] = scores + bench_scores[idx] = out_data - avg_time = sum([bench_scores[k]["time"] for k in bench_scores]) / len(bench_scores) return { - "raw_scores": bench_scores, + "scores": bench_scores, + "markdown": markdown_by_method, "averages_by_type": averages_by_type, "averages_by_block_type": averages_by_block_type, - "average_time": avg_time, - "average_score": sum([bench_scores[k]["overall_score"] for k in bench_scores]) / len(bench_scores), + "average_times": average_times, } -def print_scores(scores: Dict[str, FullResult], out_path: Path, default_method="marker"): - inference_types = [default_method] + [k for k in scores.keys() if k != default_method] - - document_types = list(scores[default_method]["averages_by_type"].keys()) - document_rows = [[k] for k in document_types] - for k in inference_types: - for i, doc_type in enumerate(document_types): - avg = sum(scores[k]["averages_by_type"][doc_type]) / max(1, len(scores[k]["averages_by_type"][doc_type])) - document_rows[i].append(avg) - - print("Document types") - document_type_table = tabulate.tabulate(document_rows, headers=["Document Type"] + inference_types, tablefmt="github") - print(document_type_table) - with open(out_path / "document_types.md", "w", encoding="utf-8") as f: - f.write(document_type_table) - - block_types = list(scores[default_method]["averages_by_block_type"].keys()) - block_rows = [[k] for k in block_types] - for k in inference_types: - for i, block_type in enumerate(block_types): - avg = sum(scores[k]["averages_by_block_type"][block_type]) / max(1, len(scores[k]["averages_by_block_type"][block_type])) - block_rows[i].append(avg) - - print("Block types") - block_type_table = tabulate.tabulate(block_rows, headers=["Block Type"] + inference_types, tablefmt="github") - print(block_type_table) - with open(out_path / "block_types.md", "w", encoding="utf-8") as f: - f.write(block_type_table) - - headers = ["Method", "Avg Score", "Avg Time"] - inference_rows = [[k] for k in inference_types] - for i, k in enumerate(inference_types): - inference_rows[i].append(scores[k]["average_score"]) - inference_rows[i].append(scores[k]["average_time"]) - - print("Overall") - overall_table = tabulate.tabulate(inference_rows, headers=headers, tablefmt="github") - print(overall_table) - with open(out_path / "overall.md", "w", encoding="utf-8") as f: - f.write(overall_table) - - print("Scores computed by aligning ground truth markdown blocks with predicted markdown for each method. The scores are 0-100 based on edit distance.") - @click.command(help="Benchmark PDF to MD conversion.") @click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") @click.option("--out_dataset", type=str, help="Path to the output dataset", default=None) -@click.option("--other_methods", type=str, help="Comma separated list of other methods to compare against. Possible values: mathpix", default="") +@click.option("--methods", type=str, help="Comma separated list of other methods to compare against. Possible values: marker,mathpix", default="marker") +@click.option("--scores", type=str, help="Comma separated list of scoring functions to use. Possible values: heuristic,llm", default="heuristic") @click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") @click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") @click.option("--use_llm", is_flag=True, help="Use the LLM model for better marker quality.") def main( dataset: str, out_dataset: str, - other_methods: str, + methods: str, + scores: str, result_path: str, max_rows: int, use_llm: bool @@ -120,37 +89,35 @@ def main( out_path = Path(result_path) out_path.mkdir(parents=True, exist_ok=True) - allowed_methods = ["mathpix", ""] - methods = other_methods.split(",") + methods = methods.split(",") for method in methods: - if method not in allowed_methods: - raise ValueError(f"Method {method} not allowed. Allowed methods are {allowed_methods}") - - model_dict = create_model_dict() - ds = datasets.load_dataset(dataset, split="train") - - marker_scores = get_method_scores(ds, model_dict, max_rows=max_rows, use_llm=use_llm) - all_scores = { - "marker": marker_scores + if method not in METHOD_REGISTRY: + raise ValueError(f"Method {method} not allowed. Allowed methods are {METHOD_REGISTRY.keys()}") + + # Ensure marker is always first + methods = list(set(methods)) + methods = ["marker"] + [m for m in methods if m != "marker"] + + score_types = scores.split(",") + for score_type in score_types: + if score_type not in SCORE_REGISTRY: + raise ValueError(f"Score type {score_type} not allowed. Allowed types are {SCORE_REGISTRY.keys()}") + + benchmark_dataset = datasets.load_dataset(dataset, split="train") + artifacts = { + "model_dict": create_model_dict(), + "mathpix_ds": datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train"), + "use_llm": use_llm } - if "mathpix" in methods: - mathpix_ds = datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train") - mathpix_scores = get_method_scores(ds, model_dict, max_rows=max_rows, score_func=mathpix_scoring_func, mathpix_ds=mathpix_ds) - all_scores["mathpix"] = mathpix_scores - - # Display formatted score tables - print_scores(all_scores, out_path) + result = get_method_scores(benchmark_dataset, methods, score_types, artifacts, max_rows=max_rows) - with open(out_path / "overall.json", "w", encoding="utf-8") as f: - json.dump(all_scores, f, indent=2, ensure_ascii=False) + # Display benchmark scoring tables + print_scores(result, out_path, methods, score_types) - print(f"Results saved to {out_path}.") + with open(out_path / "result.json", "w") as f: + json.dump(result, f) - # Push up comparison dataset - if out_dataset is not None: - out_ds = build_dataset(ds, all_scores) - out_ds.push_to_hub(out_dataset) if __name__ == "__main__": main() diff --git a/benchmarks/overall/registry.py b/benchmarks/overall/registry.py new file mode 100644 index 00000000..5cabeab9 --- /dev/null +++ b/benchmarks/overall/registry.py @@ -0,0 +1,16 @@ +from benchmarks.overall.methods.gt import GTMethod +from benchmarks.overall.methods.marker import MarkerMethod +from benchmarks.overall.methods.mathpix import MathpixMethod +from benchmarks.overall.scorers.heuristic import HeuristicScorer +from benchmarks.overall.scorers.llm import LLMScorer + +SCORE_REGISTRY = { + "heuristic": HeuristicScorer, + "llm": LLMScorer +} + +METHOD_REGISTRY = { + "marker": MarkerMethod, + "gt": GTMethod, + "mathpix": MathpixMethod +} \ No newline at end of file diff --git a/benchmarks/overall/render.py b/benchmarks/overall/render.py deleted file mode 100644 index ff252266..00000000 --- a/benchmarks/overall/render.py +++ /dev/null @@ -1,117 +0,0 @@ -import subprocess -import tempfile -import pypdfium2 as pdfium -from typing import Dict -from collections import defaultdict -import re -import io -import json - -from PIL import Image -import datasets -import markdown2 -from playwright.sync_api import sync_playwright - -from benchmarks.overall.clean import convert_to_md, clean_input -from benchmarks.overall.schema import FullResult - -def convert_to_html(md: str): - block_placeholders = [] - inline_placeholders = [] - - # Add placeholders for the math - def block_sub(match): - content = match.group(1) - placeholder = f"1BLOCKMATH{len(block_placeholders)}1" - block_placeholders.append((placeholder, f"$${content}$$")) - return placeholder - - def inline_sub(match): - content = match.group(1) - placeholder = f"1INLINEMATH{len(inline_placeholders)}1" - inline_placeholders.append((placeholder, f"${content}$")) - return placeholder - - md = re.sub(r'\${2}(.*?)\${2}', block_sub, md, flags=re.DOTALL) - md = re.sub(r'\$(.*?)\$', inline_sub, md) - - html = markdown2.markdown(md, extras=['tables']) - - # Replace placeholders - for placeholder, math_str in block_placeholders: - html = html.replace(placeholder, math_str) - for placeholder, math_str in inline_placeholders: - html = html.replace(placeholder, math_str) - - return html - - -def markdown_to_image(md: str) -> Image.Image: - html = convert_to_html(md) - with sync_playwright() as p: - browser = p.chromium.launch() - page = browser.new_page() - page.set_content(f""" - - - - - - - - - {html} - - - """) - page.set_viewport_size({"width": 1200, "height": 800}) - page.wait_for_timeout(500) # Wait for KaTeX to render - screenshot_bytes = page.screenshot(full_page=True) - browser.close() - - return Image.open(io.BytesIO(screenshot_bytes)) - - -def build_dataset(ds: datasets.Dataset, all_scores: Dict[str, FullResult]) -> datasets.Dataset: - # Get all the dataset indices that went through inference - full_idxs = None - for method in all_scores: - result_idxs = list(all_scores[method]["raw_scores"].keys()) - if full_idxs is None: - full_idxs = sorted(result_idxs) - else: - full_idxs = [f for f in full_idxs if f in result_idxs] - - ds_rows = defaultdict(dict) - for idx in full_idxs: - row = ds[idx] - ds_rows[idx].update({ - "img": row["img"], - "classification": row["classification"], - "language": row["language"], - "uuid": row["uuid"] - }) - for method in all_scores: - method_row = all_scores[method]["raw_scores"][idx] - ds_rows[idx].update({ - f"{method}_score": method_row["overall_score"], - f"{method}_markdown": method_row["markdown"], - f"{method}_image": markdown_to_image(method_row["markdown"]), - f"{method}_time": method_row["time"] - }) - gt_html = [block["html"] for block in json.loads(row["gt_blocks"]) if len(block["html"]) > 0] - gt_md = "\n\n".join([convert_to_md(block) for block in gt_html]) - ds_rows[idx].update({ - "gt_markdown": gt_md, - "gt_markdown_image": markdown_to_image(gt_md) - }) - out_dataset = datasets.Dataset.from_list([ds_rows[k] for k in full_idxs]) - return out_dataset - diff --git a/benchmarks/overall/schema.py b/benchmarks/overall/schema.py index 668f83f1..56d99e3a 100644 --- a/benchmarks/overall/schema.py +++ b/benchmarks/overall/schema.py @@ -1,18 +1,12 @@ -from typing import TypedDict, List, Dict, Optional +from typing import TypedDict, List, Dict +from benchmarks.overall.scorers.schema import BlockScores -class BlockScores(TypedDict): - scores: List[float] - order_score: float - overall_score: float - time: Optional[float] - markdown: str - +AVG_TYPE = Dict[str, Dict[str, Dict[str, List[float]]]] class FullResult(TypedDict): - raw_scores: Dict[int, BlockScores] - averages_by_type: Dict[str, List[float]] - averages_by_block_type: Dict[str, List[float]] - average_time: float - average_score: float - gt_markdown: List[str] + scores: Dict[int, Dict[str, Dict[str, BlockScores]]] + averages_by_type: AVG_TYPE + averages_by_block_type: AVG_TYPE + average_times: Dict[str, List[float]] + markdown: Dict[int, Dict[str, str]] diff --git a/benchmarks/overall/scorers/__init__.py b/benchmarks/overall/scorers/__init__.py new file mode 100644 index 00000000..492bc4e4 --- /dev/null +++ b/benchmarks/overall/scorers/__init__.py @@ -0,0 +1,11 @@ +from typing import List + +from benchmarks.overall.scorers.schema import BlockScores + + +class BaseScorer: + def __init__(self): + pass + + def __call__(self, sample, gt_markdown: List[str], method_markdown: str) -> BlockScores: + raise NotImplementedError() \ No newline at end of file diff --git a/benchmarks/overall/clean.py b/benchmarks/overall/scorers/clean.py similarity index 93% rename from benchmarks/overall/clean.py rename to benchmarks/overall/scorers/clean.py index f3c6cedb..ed3a6cc2 100644 --- a/benchmarks/overall/clean.py +++ b/benchmarks/overall/scorers/clean.py @@ -5,8 +5,6 @@ import latex2mathml.converter -from marker.renderers.markdown import MarkdownRenderer - class MarkdownCleaner: def __init__(self): pass @@ -112,14 +110,4 @@ def clean_latex(latex_str): return latex_str -def convert_to_md(html): - md = MarkdownRenderer() - markdown = md.md_cls.convert(html) - return markdown - -def clean_input(markdown): - cleaner = MarkdownCleaner() - return cleaner(markdown) - - diff --git a/benchmarks/overall/scorers/heuristic.py b/benchmarks/overall/scorers/heuristic.py new file mode 100644 index 00000000..ac1bf0e0 --- /dev/null +++ b/benchmarks/overall/scorers/heuristic.py @@ -0,0 +1,96 @@ +from typing import List + +from rapidfuzz import fuzz + +from benchmarks.overall.scorers.clean import MarkdownCleaner +from benchmarks.overall.scorers.schema import BlockScores +from benchmarks.overall.scorers import BaseScorer + + +class HeuristicScorer(BaseScorer): + def __call__(self, sample, gt_markdown: List[str], method_markdown: str) -> BlockScores: + # Standardize inputs + gt_markdown = [self.clean_input(block) for block in gt_markdown] + method_markdown = self.clean_input(method_markdown) + + alignments = self.find_fuzzy_alignments(method_markdown, gt_markdown) + scores = [alignment["score"] for alignment in alignments] + + # Find order score + orders = [alignment["start"] for alignment in alignments] + correct_order = list(range(len(gt_markdown))) + actual_order = sorted(range(len(gt_markdown)), key=lambda x: orders[x]) + order_score = self.kendall_tau(correct_order, actual_order) + + # Weight score by sequence length + gt_weights = [len(g) for g in gt_markdown] + weighted_scores = [score * weight for score, weight in zip(scores, gt_weights)] + + # Weight the score by sequence length + overall_score = sum(weighted_scores) / max(1, sum(gt_weights)) + overall_score = overall_score * 0.8 + order_score * 0.2 + return { + "score": overall_score, + "specific_scores": { + "order": order_score, + "by_block": scores + }, + } + + @staticmethod + def kendall_tau(correct_order: List[int], actual_order: List[int]) -> float: + n = len(correct_order) + concordant = 0 + discordant = 0 + + if n <= 1: + return 100 + + for i in range(n): + for j in range(i + 1, n): + correct_sign = correct_order[i] - correct_order[j] + actual_sign = actual_order[i] - actual_order[j] + + if (correct_sign > 0 and actual_sign > 0) or (correct_sign < 0 and actual_sign < 0): + concordant += 1 + elif (correct_sign < 0 and actual_sign > 0) or (correct_sign > 0 and actual_sign < 0): + discordant += 1 + + total_pairs = (n * (n - 1)) // 2 + tau = (concordant - discordant) / total_pairs + tau = (tau + 1) / 2 # 0-1 scale + return tau * 100 # 0-100 scale + + @staticmethod + def find_fuzzy_alignments( + main_string: str, + substrings: List[str], + threshold: int = 70 + ) -> List[dict]: + alignments = [] + + for idx, substr in enumerate(substrings): + result = fuzz.partial_ratio_alignment(substr, main_string, score_cutoff=threshold) + + score = 0 + dest_start = 0 + dest_end = 0 + if result: + score = result.score + dest_start = result.dest_start + dest_end = result.dest_end + + alignments.append({ + "string": substr, + "start": dest_start, + "end": dest_end, + "score": score, + "idx": idx + }) + return alignments + + + @staticmethod + def clean_input(md: str): + cleaner = MarkdownCleaner() + return cleaner(md) \ No newline at end of file diff --git a/benchmarks/overall/scorers/llm.py b/benchmarks/overall/scorers/llm.py new file mode 100644 index 00000000..f0de5322 --- /dev/null +++ b/benchmarks/overall/scorers/llm.py @@ -0,0 +1,148 @@ +import json +import tempfile +import time +from typing import List + +from PIL import Image +from google.ai.generativelanguage_v1beta.types import content +from google.api_core.exceptions import ResourceExhausted +import pypdfium2 as pdfium + +from benchmarks.overall.scorers import BaseScorer, BlockScores +from marker.settings import settings + +rating_prompt = """ +You're a document analysis expert who is comparing some markdown to an image to make sure the markdown is correct. You're rating how effectively the provided markdown represents the full text and formatting in the image provided. +You're given an image, along with the extracted markdown: +- Some parts of the page may have been recognized as images and linked from the markdown, like `![](_page_0_Picture_0.jpeg)`. +- Tables will be formatted as Github flavored markdown. +- Block equations will be in LaTeX. +- The image and markdown may be in any language. +- The markdown is based on the text extracted from the document, and sometimes the document may have had bad OCR applied to it, resulting in gibberish text. + +The markdown should fully capture the meaning and formatting of the text in the image. You'll evaluate the markdown based on the image provided. + +**Instructions** +Follow this process to evaluate the markdown: +1. Carefully examine the image. +2. Carefully examine the markdown input provided. +3. Compare the image to the markdown representation. Does the markdown representation properly represent the important text and formatting in the image? +4. Assign component scores, as described below. + +These are the primary scores: +- Overall - the overall quality of the markdown as compared to the image. +- Text quality - the quality of the text extraction from the image. +- Formatting quality - the quality of the formatting applied to the markdown, as compared to the image. + +Depending on which elements are present in the markdown, you will assign element-specific scores. +- Tables - how effectively the tables have been extracted and formatted. +- Forms - how effectively the forms have extracted and formatted. +- Equations - how effectively block equations have been converted to LaTeX. +- Section headers - if all of the section headers have been detected, and the right levels set. +- Lists - if the lists have been properly extracted and formatted. +- Images - if images are identified and placed correctly. + +Notes on scoring: +- To get a 5/5, all of the important text from the image must appear in the markdown, and the formatting should be correct (minor mistakes okay). It's okay to omit some text that isn't important to the meaning, like page numbers and chapter headings. If the entire page is an image, it's okay if the markdown is just a link to the image, unless the image would be better represented as text. +- A 3/5 may have small missing text elements from the markdown and/or moderate formatting issues. +- A 1/5 will have major missing text segments from the markdown or completely unreadable formatting. +- Use 0/5 if a field isn't applicable, like if the image doesn't contain a table. + +Output json, like in the example below. + +**Example** +Input +```markdown +# Section 1 +This is some *markdown* extracted from a document. Here is a block equation: +$$\frac{ab \cdot x^5 + x^2 + 2 \cdot x + 123}{t}$$ +``` +Output +```json +{ + "image_description": "In the image, there is a section header 'Section 1', followed by some text and a block equation.", + "markdown_description": "In the markdown, there is a section header 'Section 1', followed by some text and a block equation.", + "comparison": "The text and formatting matches the image. There are no formatting or text extraction issues. The equations and section headers are correct.", + "overall": 5, + "text": 5, + "formatting": 5, + "section_headers": 5, + "tables": 0, + "forms": 0, + "equations": 5, + "lists": 0, + "images": 0 +} +``` +**Input** +```markdown +{{markdown}} +``` +**Output** +""" + +comparison_keys = ["comparison"] +description_keys = ["image_description", "markdown_description"] +text_keys = comparison_keys + description_keys +score_keys = ["overall", "text", "formatting", "section_headers", "tables", "forms", "equations", + "lists", "images"] + + +class LLMScorer(BaseScorer): + def __call__(self, sample, gt_markdown: List[str], markdown: str) -> BlockScores: + pdf_bytes = sample["pdf"] + with tempfile.NamedTemporaryFile(suffix=".pdf") as f: + f.write(pdf_bytes) + f.flush() + f.seek(0) + doc = pdfium.PdfDocument(f.name) + img = doc[0].render(scale=96/72).to_pil() + doc.close() + + return self.llm_rater(img, markdown) + + + def llm_rater(self, img: Image.Image, markdown: str) -> BlockScores: + req_keys = text_keys + score_keys + properties = {} + for key in req_keys: + content_type = content.Type.INTEGER if key in score_keys else content.Type.STRING + properties[key] = content.Schema(type=content_type) + + response_schema = content.Schema( + type=content.Type.OBJECT, + required=req_keys, + properties=properties + ) + + prompt = rating_prompt.replace("{{markdown}}", markdown) + response = self.llm_response_wrapper([img, prompt], response_schema) + assert all([k in response for k in req_keys]), f"Missing keys in response: {response}" + return { + "score": response["overall"], + "specific_scores": response, + } + + def llm_response_wrapper(self, prompt, response_schema, depth=0): + import google.generativeai as genai + genai.configure(api_key=settings.GOOGLE_API_KEY) + gemini_model = genai.GenerativeModel("gemini-1.5-flash") + try: + responses = gemini_model.generate_content( + prompt, + stream=False, + generation_config={ + "temperature": 0, + "response_schema": response_schema, + "response_mime_type": "application/json", + }, + request_options={'timeout': 60} + ) + output = responses.candidates[0].content.parts[0].text + return json.loads(output) + except ResourceExhausted as e: + print(f"Hit Gemini rate limit, waiting 120 seconds") + time.sleep(120) + if depth > 2: + raise e + return self.llm_response_wrapper(prompt, response_schema, depth + 1) \ No newline at end of file diff --git a/benchmarks/overall/scorers/schema.py b/benchmarks/overall/scorers/schema.py new file mode 100644 index 00000000..74e814fc --- /dev/null +++ b/benchmarks/overall/scorers/schema.py @@ -0,0 +1,6 @@ +from typing import TypedDict, List, Optional, Dict + + +class BlockScores(TypedDict): + score: float + specific_scores: Dict[str, float | List[float]] diff --git a/benchmarks/overall/scoring.py b/benchmarks/overall/scoring.py deleted file mode 100644 index d268ef11..00000000 --- a/benchmarks/overall/scoring.py +++ /dev/null @@ -1,83 +0,0 @@ -from typing import List - -from rapidfuzz import fuzz - -from benchmarks.overall.clean import convert_to_md, MarkdownCleaner -from benchmarks.overall.schema import BlockScores - - -def kendall_tau(correct_order: List[int], actual_order: List[int]) -> float: - n = len(correct_order) - concordant = 0 - discordant = 0 - - if n <= 1: - return 100 - - for i in range(n): - for j in range(i + 1, n): - correct_sign = correct_order[i] - correct_order[j] - actual_sign = actual_order[i] - actual_order[j] - - if (correct_sign > 0 and actual_sign > 0) or (correct_sign < 0 and actual_sign < 0): - concordant += 1 - elif (correct_sign < 0 and actual_sign > 0) or (correct_sign > 0 and actual_sign < 0): - discordant += 1 - - total_pairs = (n * (n - 1)) // 2 - tau = (concordant - discordant) / total_pairs - tau = (tau + 1) / 2 # 0-1 scale - return tau * 100 # 0-100 scale - - -def find_fuzzy_alignments( - main_string: str, - substrings: List[str], - threshold: int = 70 -) -> List[dict]: - alignments = [] - - for idx, substr in enumerate(substrings): - result = fuzz.partial_ratio_alignment(substr, main_string, score_cutoff=threshold) - - score = 0 - dest_start = 0 - dest_end = 0 - if result: - score = result.score - dest_start = result.dest_start - dest_end = result.dest_end - - alignments.append({ - "string": substr, - "start": dest_start, - "end": dest_end, - "score": score, - "idx": idx - }) - return alignments - - -def score_blocks(gt_markdown: List[str], method_markdown: str) -> BlockScores: - alignments = find_fuzzy_alignments(method_markdown, gt_markdown) - scores = [alignment["score"] for alignment in alignments] - - # Find order score - orders = [alignment["start"] for alignment in alignments] - correct_order = list(range(len(gt_markdown))) - actual_order = sorted(range(len(gt_markdown)), key=lambda x: orders[x]) - order_score = kendall_tau(correct_order, actual_order) - - # Weight score by sequence length - gt_weights = [len(g) for g in gt_markdown] - weighted_scores = [score * weight for score, weight in zip(scores, gt_weights)] - - # Weight the score by sequence length - overall_score = sum(weighted_scores) / max(1, sum(gt_weights)) - overall_score = overall_score * 0.8 + order_score * 0.2 - return { - "scores": scores, - "order_score": order_score, - "overall_score": overall_score, - "time": None - } \ No newline at end of file From 75633ca3bfdf486b15df8eff3e2af00c538f88de Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Tue, 4 Feb 2025 13:25:54 -0500 Subject: [PATCH 14/27] Finalize dataset uploading --- benchmarks/overall/display/__init__.py | 62 -------------------------- benchmarks/overall/display/dataset.py | 39 ++++++++++++++++ benchmarks/overall/display/table.py | 62 ++++++++++++++++++++++++++ benchmarks/overall/overall.py | 9 +++- benchmarks/verify_scores.py | 4 +- 5 files changed, 112 insertions(+), 64 deletions(-) create mode 100644 benchmarks/overall/display/dataset.py create mode 100644 benchmarks/overall/display/table.py diff --git a/benchmarks/overall/display/__init__.py b/benchmarks/overall/display/__init__.py index 25b3f73c..e69de29b 100644 --- a/benchmarks/overall/display/__init__.py +++ b/benchmarks/overall/display/__init__.py @@ -1,62 +0,0 @@ -from pathlib import Path -from typing import Dict, List - -import tabulate - -from benchmarks.overall.schema import FullResult - -def write_table(title: str, rows: list, headers: list, out_path: Path, filename: str): - table = tabulate.tabulate(rows, headers=headers, tablefmt="github") - with open(out_path / filename, "w", encoding="utf-8") as f: - f.write(f"# {title}\n") - f.write(table) - print(title) - print(table) - - -def print_scores(result: FullResult, out_path: Path, methods: List[str], score_types: List[str], default_score_type="heuristic", default_method="marker"): - document_types = list(result["averages_by_type"][default_method][default_score_type].keys()) - headers = ["Document Type"] - for method in methods: - for score_type in score_types: - headers.append(f"{method} {score_type}") - - document_rows = [[k] for k in document_types] - for i, doc_type in enumerate(document_types): - for method in methods: - for score_type in score_types: - avg_score = sum(result["averages_by_type"][method][score_type][doc_type]) / max(1, len(result["averages_by_type"][method][score_type][doc_type])) - document_rows[i].append(avg_score) - - write_table("Document Types", document_rows, headers, out_path, "document_types.md") - - headers = ["Block Type"] - block_types = list(result["averages_by_block_type"][default_method][default_score_type].keys()) # all possible blocks - block_score_types = list(result["averages_by_block_type"][default_method].keys()) - for method in methods: - for score_type in block_score_types: - headers.append(f"{method} {score_type}") - - block_rows = [[k] for k in block_types] - for i, block_type in enumerate(block_types): - for method in methods: - for score_type in block_score_types: - avg_score = sum(result["averages_by_block_type"][method][score_type][block_type]) / max(1, len(result["averages_by_block_type"][method][score_type][block_type])) - block_rows[i].append(avg_score) - - write_table("Block types", block_rows, headers, out_path, "block_types.md") - - headers = ["Method", "Avg Time"] + score_types - inference_rows = [[k] for k in methods] - all_raw_scores = [result["scores"][i] for i in result["scores"]] - for i, method in enumerate(methods): - avg_time = sum(result["average_times"][method]) / max(1, len(result["average_times"][method])) - inference_rows[i].append(avg_time) - for score_type in score_types: - scores_lst = [ar[method][score_type]["score"] for ar in all_raw_scores] - avg_score = sum(scores_lst) / max(1, len(scores_lst)) - inference_rows[i].append(avg_score) - - write_table("Overall Results", inference_rows, headers, out_path, "overall.md") - - print("Scores computed by aligning ground truth markdown blocks with predicted markdown for each method. The scores are 0-100 based on edit distance.") \ No newline at end of file diff --git a/benchmarks/overall/display/dataset.py b/benchmarks/overall/display/dataset.py new file mode 100644 index 00000000..88fe6cad --- /dev/null +++ b/benchmarks/overall/display/dataset.py @@ -0,0 +1,39 @@ +import json +from typing import List + +import datasets +from tqdm import tqdm + +from benchmarks.overall.registry import METHOD_REGISTRY +from benchmarks.overall.schema import FullResult + + +def build_dataset(bench_dataset: datasets.Dataset, result: FullResult, score_types: List[str]) -> datasets.Dataset: + rows = [] + for idx, sample in tqdm(enumerate(bench_dataset), desc="Building dataset"): + if idx not in result["markdown"]: + continue + + row = { + "uuid": sample["uuid"], + "classification": sample["classification"], + "language": sample["language"], + "img": sample["img"], + } + for method in result["markdown"][idx]: + if method == "gt": + continue + + method_cls = METHOD_REGISTRY[method]() + md = result["markdown"][idx][method] + method_img = method_cls.render(result["markdown"][idx][method]) + row[f"{method}_md"] = md + row[f"{method}_img"] = method_img + + for score_type in score_types: + row[f"{method}_{score_type}"] = result["scores"][idx][method][score_type]["score"] + row[f"{method}_{score_type}_detail"] = json.dumps(result["scores"][idx][method][score_type]["specific_scores"]) + rows.append(row) + ds = datasets.Dataset.from_list(rows) + return ds + diff --git a/benchmarks/overall/display/table.py b/benchmarks/overall/display/table.py new file mode 100644 index 00000000..25b3f73c --- /dev/null +++ b/benchmarks/overall/display/table.py @@ -0,0 +1,62 @@ +from pathlib import Path +from typing import Dict, List + +import tabulate + +from benchmarks.overall.schema import FullResult + +def write_table(title: str, rows: list, headers: list, out_path: Path, filename: str): + table = tabulate.tabulate(rows, headers=headers, tablefmt="github") + with open(out_path / filename, "w", encoding="utf-8") as f: + f.write(f"# {title}\n") + f.write(table) + print(title) + print(table) + + +def print_scores(result: FullResult, out_path: Path, methods: List[str], score_types: List[str], default_score_type="heuristic", default_method="marker"): + document_types = list(result["averages_by_type"][default_method][default_score_type].keys()) + headers = ["Document Type"] + for method in methods: + for score_type in score_types: + headers.append(f"{method} {score_type}") + + document_rows = [[k] for k in document_types] + for i, doc_type in enumerate(document_types): + for method in methods: + for score_type in score_types: + avg_score = sum(result["averages_by_type"][method][score_type][doc_type]) / max(1, len(result["averages_by_type"][method][score_type][doc_type])) + document_rows[i].append(avg_score) + + write_table("Document Types", document_rows, headers, out_path, "document_types.md") + + headers = ["Block Type"] + block_types = list(result["averages_by_block_type"][default_method][default_score_type].keys()) # all possible blocks + block_score_types = list(result["averages_by_block_type"][default_method].keys()) + for method in methods: + for score_type in block_score_types: + headers.append(f"{method} {score_type}") + + block_rows = [[k] for k in block_types] + for i, block_type in enumerate(block_types): + for method in methods: + for score_type in block_score_types: + avg_score = sum(result["averages_by_block_type"][method][score_type][block_type]) / max(1, len(result["averages_by_block_type"][method][score_type][block_type])) + block_rows[i].append(avg_score) + + write_table("Block types", block_rows, headers, out_path, "block_types.md") + + headers = ["Method", "Avg Time"] + score_types + inference_rows = [[k] for k in methods] + all_raw_scores = [result["scores"][i] for i in result["scores"]] + for i, method in enumerate(methods): + avg_time = sum(result["average_times"][method]) / max(1, len(result["average_times"][method])) + inference_rows[i].append(avg_time) + for score_type in score_types: + scores_lst = [ar[method][score_type]["score"] for ar in all_raw_scores] + avg_score = sum(scores_lst) / max(1, len(scores_lst)) + inference_rows[i].append(avg_score) + + write_table("Overall Results", inference_rows, headers, out_path, "overall.md") + + print("Scores computed by aligning ground truth markdown blocks with predicted markdown for each method. The scores are 0-100 based on edit distance.") \ No newline at end of file diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index 911d107f..04065a6b 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -8,12 +8,13 @@ import datasets from tqdm import tqdm +from benchmarks.overall.display.dataset import build_dataset from benchmarks.overall.registry import SCORE_REGISTRY, METHOD_REGISTRY from benchmarks.overall.schema import FullResult from marker.logger import configure_logging from marker.models import create_model_dict from marker.settings import settings -from benchmarks.overall.display import print_scores +from benchmarks.overall.display.table import print_scores configure_logging() @@ -32,6 +33,7 @@ def get_method_scores(benchmark_dataset: datasets.Dataset, methods: List[str], s gt_cls = METHOD_REGISTRY["gt"] gt_blocks = json.loads(sample["gt_blocks"]) gt_md = gt_cls(**artifacts)(sample)["markdown"] + markdown_by_method[idx]["gt"] = gt_md out_data = defaultdict(dict) @@ -115,9 +117,14 @@ def main( # Display benchmark scoring tables print_scores(result, out_path, methods, score_types) + # Write to json with open(out_path / "result.json", "w") as f: json.dump(result, f) + if out_dataset: + dataset = build_dataset(benchmark_dataset, result, score_types) + dataset.push_to_hub(out_dataset) + if __name__ == "__main__": main() diff --git a/benchmarks/verify_scores.py b/benchmarks/verify_scores.py index 7b72a75a..088f137e 100644 --- a/benchmarks/verify_scores.py +++ b/benchmarks/verify_scores.py @@ -6,7 +6,9 @@ def verify_scores(file_path): with open(file_path, 'r') as file: data = json.load(file) - marker_score = data["marker"]["average_score"] + raw_scores = [data["scores"][k] for k in data["scores"]] + marker_scores = [r["marker"]["heuristic"]["score"] for r in raw_scores] + marker_score = sum(marker_scores) / len(marker_scores) if marker_score < 90: raise ValueError("Marker score below 90") From d49df6cfbc3cd02b104cdbc5341fa42d39a02b6e Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Wed, 5 Feb 2025 20:51:21 -0500 Subject: [PATCH 15/27] Benchmark fixes --- benchmarks/overall/display/dataset.py | 15 +++++-- benchmarks/overall/display/table.py | 8 +++- benchmarks/overall/methods/__init__.py | 15 +++++-- benchmarks/overall/overall.py | 56 +++++++++++++++----------- benchmarks/overall/scorers/llm.py | 2 +- benchmarks/table/gemini.py | 2 +- marker/builders/llm_layout.py | 2 +- marker/processors/llm/__init__.py | 2 +- 8 files changed, 67 insertions(+), 35 deletions(-) diff --git a/benchmarks/overall/display/dataset.py b/benchmarks/overall/display/dataset.py index 88fe6cad..e9fcabdd 100644 --- a/benchmarks/overall/display/dataset.py +++ b/benchmarks/overall/display/dataset.py @@ -8,12 +8,15 @@ from benchmarks.overall.schema import FullResult -def build_dataset(bench_dataset: datasets.Dataset, result: FullResult, score_types: List[str]) -> datasets.Dataset: +def build_dataset(bench_dataset: datasets.Dataset, result: FullResult, score_types: List[str], max_rows: int | None = None) -> datasets.Dataset: rows = [] for idx, sample in tqdm(enumerate(bench_dataset), desc="Building dataset"): if idx not in result["markdown"]: continue + if max_rows is not None and idx >= max_rows: + break + row = { "uuid": sample["uuid"], "classification": sample["classification"], @@ -31,8 +34,14 @@ def build_dataset(bench_dataset: datasets.Dataset, result: FullResult, score_typ row[f"{method}_img"] = method_img for score_type in score_types: - row[f"{method}_{score_type}"] = result["scores"][idx][method][score_type]["score"] - row[f"{method}_{score_type}_detail"] = json.dumps(result["scores"][idx][method][score_type]["specific_scores"]) + try: + row[f"{method}_{score_type}"] = result["scores"][idx][method][score_type]["score"] + except KeyError: + row[f"{method}_{score_type}"] = -1.0 # Missing score + try: + row[f"{method}_{score_type}_detail"] = json.dumps(result["scores"][idx][method][score_type]["specific_scores"]) + except KeyError: + row[f"{method}_{score_type}_detail"] = "" # Missing detail rows.append(row) ds = datasets.Dataset.from_list(rows) return ds diff --git a/benchmarks/overall/display/table.py b/benchmarks/overall/display/table.py index 25b3f73c..5d704214 100644 --- a/benchmarks/overall/display/table.py +++ b/benchmarks/overall/display/table.py @@ -53,7 +53,13 @@ def print_scores(result: FullResult, out_path: Path, methods: List[str], score_t avg_time = sum(result["average_times"][method]) / max(1, len(result["average_times"][method])) inference_rows[i].append(avg_time) for score_type in score_types: - scores_lst = [ar[method][score_type]["score"] for ar in all_raw_scores] + scores_lst = [] + for ar in all_raw_scores: + try: + # Sometimes a few llm scores are missing + scores_lst.append(ar[method][score_type]["score"]) + except KeyError: + continue avg_score = sum(scores_lst) / max(1, len(scores_lst)) inference_rows[i].append(avg_score) diff --git a/benchmarks/overall/methods/__init__.py b/benchmarks/overall/methods/__init__.py index 742bb532..a5a3f53f 100644 --- a/benchmarks/overall/methods/__init__.py +++ b/benchmarks/overall/methods/__init__.py @@ -1,4 +1,5 @@ import io +import random import re from typing import Tuple @@ -63,7 +64,9 @@ def html_to_image(self, html: str) -> Image.Image: with sync_playwright() as p: browser = p.chromium.launch() page = browser.new_page() - page.set_content(f""" + html_str = f""" + + @@ -74,16 +77,22 @@ def html_to_image(self, html: str) -> Image.Image: {html} - """) + + """.strip() page.set_viewport_size({"width": 1200, "height": 800}) + page.set_content(html_str) + page.wait_for_load_state("domcontentloaded") page.wait_for_timeout(500) # Wait for KaTeX to render screenshot_bytes = page.screenshot(full_page=True) browser.close() diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index 04065a6b..d73903b9 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -37,29 +37,35 @@ def get_method_scores(benchmark_dataset: datasets.Dataset, methods: List[str], s out_data = defaultdict(dict) - for method in methods: - method_cls = METHOD_REGISTRY[method](**artifacts) - method_info = method_cls(sample) - method_md = method_info["markdown"] - average_times[method].append(method_info["time"]) - markdown_by_method[idx][method] = method_md - - for score_type in score_types: - score_cls = SCORE_REGISTRY[score_type]() - try: - scores = score_cls(sample, gt_md, method_md) - except Exception as e: - # Some scorers can fail, like the LLM one - print(f"Failed to score {method} with {score_type}: {e}") - continue - - out_data[method][score_type] = scores - - averages_by_type[method][score_type][doc_type].append(scores["score"]) - - if "by_block" in scores["specific_scores"]: # Not all scorers support this - for score, gt_block in zip(scores["specific_scores"]["by_block"], gt_blocks): - averages_by_block_type[method][score_type][gt_block["block_type"]].append(score) + try: + for method in methods: + method_cls = METHOD_REGISTRY[method](**artifacts) + method_info = method_cls(sample) + method_md = method_info["markdown"] + average_times[method].append(method_info["time"]) + markdown_by_method[idx][method] = method_md + + for score_type in score_types: + score_cls = SCORE_REGISTRY[score_type]() + try: + scores = score_cls(sample, gt_md, method_md) + except Exception as e: + # Some scorers can fail, like the LLM one + print(f"Failed to score {method} with {score_type}: {e}") + continue + + out_data[method][score_type] = scores + + averages_by_type[method][score_type][doc_type].append(scores["score"]) + + if "by_block" in scores["specific_scores"]: # Not all scorers support this + for score, gt_block in zip(scores["specific_scores"]["by_block"], gt_blocks): + averages_by_block_type[method][score_type][gt_block["block_type"]].append(score) + except Exception as e: + print(f"Failed to process {idx}: {e}") + if idx in markdown_by_method: + del markdown_by_method[idx] + continue bench_scores[idx] = out_data @@ -122,7 +128,9 @@ def main( json.dump(result, f) if out_dataset: - dataset = build_dataset(benchmark_dataset, result, score_types) + if use_llm: + out_dataset += "_llm" + dataset = build_dataset(benchmark_dataset, result, score_types, max_rows=max_rows) dataset.push_to_hub(out_dataset) diff --git a/benchmarks/overall/scorers/llm.py b/benchmarks/overall/scorers/llm.py index f0de5322..5da61c41 100644 --- a/benchmarks/overall/scorers/llm.py +++ b/benchmarks/overall/scorers/llm.py @@ -126,7 +126,7 @@ def llm_rater(self, img: Image.Image, markdown: str) -> BlockScores: def llm_response_wrapper(self, prompt, response_schema, depth=0): import google.generativeai as genai genai.configure(api_key=settings.GOOGLE_API_KEY) - gemini_model = genai.GenerativeModel("gemini-1.5-flash") + gemini_model = genai.GenerativeModel("gemini-2.0-flash") try: responses = gemini_model.generate_content( prompt, diff --git a/benchmarks/table/gemini.py b/benchmarks/table/gemini.py index 9e2591ee..c58f2a92 100644 --- a/benchmarks/table/gemini.py +++ b/benchmarks/table/gemini.py @@ -32,7 +32,7 @@ def gemini_table_rec(image: Image.Image): } ) - model = genai.GenerativeModel("gemini-1.5-flash") + model = genai.GenerativeModel("gemini-2.0-flash") responses = model.generate_content( [image, prompt], # According to gemini docs, it performs better if the image is the first element diff --git a/marker/builders/llm_layout.py b/marker/builders/llm_layout.py index c9aae671..5e9881fd 100644 --- a/marker/builders/llm_layout.py +++ b/marker/builders/llm_layout.py @@ -37,7 +37,7 @@ class LLMLayoutBuilder(LayoutBuilder): model_name: Annotated[ str, "The name of the Gemini model to use.", - ] = "gemini-1.5-flash" + ] = "gemini-2.0-flash" max_retries: Annotated[ int, "The maximum number of retries to use for the Gemini model.", diff --git a/marker/processors/llm/__init__.py b/marker/processors/llm/__init__.py index 5f36139e..c53d2cca 100644 --- a/marker/processors/llm/__init__.py +++ b/marker/processors/llm/__init__.py @@ -23,7 +23,7 @@ class BaseLLMProcessor(BaseProcessor): model_name: Annotated[ str, "The name of the Gemini model to use.", - ] = "gemini-1.5-flash" + ] = "gemini-2.0-flash" max_retries: Annotated[ int, "The maximum number of retries to use for the Gemini model.", From de9651e8aa41e11a2f2053c73df8bc0a37cc4415 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 6 Feb 2025 11:42:45 -0500 Subject: [PATCH 16/27] Cleanup texify integration --- marker/processors/equation.py | 33 +++++++++------------------------ marker/settings.py | 8 -------- 2 files changed, 9 insertions(+), 32 deletions(-) diff --git a/marker/processors/equation.py b/marker/processors/equation.py index 6bd79fa7..ebc53e54 100644 --- a/marker/processors/equation.py +++ b/marker/processors/equation.py @@ -57,8 +57,6 @@ def __call__(self, document: Document): predictions = self.get_latex_batched(equation_data) for prediction, equation_d in zip(predictions, equation_data): conditions = [ - self.get_total_texify_tokens(prediction) < self.model_max_length, - # Make sure we didn't get to the overall token max, indicates run-on len(prediction) > equation_d["token_count"] * .4, len(prediction.strip()) > 0 ] @@ -78,28 +76,15 @@ def get_batch_size(self): return 2 def get_latex_batched(self, equation_data: List[dict]): - predictions = [""] * len(equation_data) - batch_size = self.get_batch_size() - - for i in tqdm(range(0, len(equation_data), batch_size), desc="Recognizing equations", disable=self.disable_tqdm): - # Dynamically set max length to save inference time - min_idx = i - max_idx = min(min_idx + batch_size, len(equation_data)) - - batch_equations = equation_data[min_idx:max_idx] - batch_images = [eq["image"] for eq in batch_equations] - - model_output = self.texify_model( - batch_images - ) - - for j, output in enumerate(model_output): - token_count = self.get_total_texify_tokens(output.text) - if token_count >= self.model_max_length - 1: - output.text = "" - - image_idx = i + j - predictions[image_idx] = output.text + inference_images = [eq["image"] for eq in equation_data] + model_output = self.texify_model(inference_images, batch_size=self.get_batch_size()) + predictions = [output.text for output in model_output] + + for i, pred in enumerate(predictions): + token_count = self.get_total_texify_tokens(pred) + # If we're at the max token length, the prediction may be repetitive or invalid + if token_count >= self.model_max_length - 1: + predictions[i] = "" return predictions def get_total_texify_tokens(self, text): diff --git a/marker/settings.py b/marker/settings.py index 2d416b90..2b1eda90 100644 --- a/marker/settings.py +++ b/marker/settings.py @@ -46,14 +46,6 @@ def MODEL_DTYPE(self) -> torch.dtype: else: return torch.float32 - # Texify model - TEXIFY_MODEL_NAME: str = "vikp/texify" - - @computed_field - @property - def TEXIFY_DTYPE(self) -> torch.dtype: - return torch.float32 if self.TORCH_DEVICE_MODEL == "cpu" else torch.float16 - class Config: env_file = find_dotenv("local.env") extra = "ignore" From dac0f79efc0c0cc19bc2b7ce49b6885d06af2aa1 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 6 Feb 2025 12:26:38 -0500 Subject: [PATCH 17/27] Update property name --- marker/processors/table.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/marker/processors/table.py b/marker/processors/table.py index 75b723c0..11259f54 100644 --- a/marker/processors/table.py +++ b/marker/processors/table.py @@ -29,7 +29,7 @@ class TableProcessor(BaseProcessor): bool, "Whether to detect boxes for the table recognition model.", ] = False - detector_batch_size: Annotated[ + detection_batch_size: Annotated[ int, "The batch size to use for the table detection model.", "Default is None, which will use the default batch size for the model." @@ -318,7 +318,7 @@ def assign_ocr_lines(self, ocr_blocks: list): [None] * len(det_images), self.detection_model, recognition_batch_size=self.get_recognition_batch_size(), - detection_batch_size=self.get_detector_batch_size() + detection_batch_size=self.get_detection_batch_size() ) for block, ocr_res in zip(ocr_blocks, ocr_results): @@ -333,9 +333,9 @@ def assign_ocr_lines(self, ocr_blocks: list): block["table_text_lines"] = table_cells - def get_detector_batch_size(self): - if self.detector_batch_size is not None: - return self.detector_batch_size + def get_detection_batch_size(self): + if self.detection_batch_size is not None: + return self.detection_batch_size elif settings.TORCH_DEVICE_MODEL == "cuda": return 4 return 4 From 4ceab296d9379dc54ff43b6e47746f556228d118 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 6 Feb 2025 13:34:00 -0500 Subject: [PATCH 18/27] Bump to newer google client lib --- benchmarks/overall/scorers/llm.py | 37 +- benchmarks/table/gemini.py | 35 +- marker/builders/llm_layout.py | 31 +- marker/converters/pdf.py | 1 - marker/processors/llm/llm_complex.py | 20 +- marker/processors/llm/llm_equation.py | 19 +- marker/processors/llm/llm_form.py | 21 +- marker/processors/llm/llm_handwriting.py | 20 +- .../processors/llm/llm_image_description.py | 19 +- marker/processors/llm/llm_table.py | 18 +- marker/processors/llm/llm_table_merge.py | 39 +- marker/processors/llm/llm_text.py | 153 ----- marker/processors/llm/utils.py | 57 +- poetry.lock | 596 +++++++----------- pyproject.toml | 2 +- tests/processors/test_llm_processors.py | 20 - 16 files changed, 351 insertions(+), 737 deletions(-) delete mode 100644 marker/processors/llm/llm_text.py diff --git a/benchmarks/overall/scorers/llm.py b/benchmarks/overall/scorers/llm.py index 5da61c41..8ee8d138 100644 --- a/benchmarks/overall/scorers/llm.py +++ b/benchmarks/overall/scorers/llm.py @@ -4,8 +4,8 @@ from typing import List from PIL import Image -from google.ai.generativelanguage_v1beta.types import content -from google.api_core.exceptions import ResourceExhausted +from google.genai.errors import APIError +from google import genai import pypdfium2 as pdfium from benchmarks.overall.scorers import BaseScorer, BlockScores @@ -106,15 +106,14 @@ def llm_rater(self, img: Image.Image, markdown: str) -> BlockScores: req_keys = text_keys + score_keys properties = {} for key in req_keys: - content_type = content.Type.INTEGER if key in score_keys else content.Type.STRING - properties[key] = content.Schema(type=content_type) - - response_schema = content.Schema( - type=content.Type.OBJECT, - required=req_keys, - properties=properties - ) + content_type = "INTEGER" if key in score_keys else "STRING" + properties[key] = {"type": content_type} + response_schema = { + "required": req_keys, + "properties": properties, + "type": "OBJECT" + } prompt = rating_prompt.replace("{{markdown}}", markdown) response = self.llm_response_wrapper([img, prompt], response_schema) assert all([k in response for k in req_keys]), f"Missing keys in response: {response}" @@ -124,23 +123,23 @@ def llm_rater(self, img: Image.Image, markdown: str) -> BlockScores: } def llm_response_wrapper(self, prompt, response_schema, depth=0): - import google.generativeai as genai - genai.configure(api_key=settings.GOOGLE_API_KEY) - gemini_model = genai.GenerativeModel("gemini-2.0-flash") + client = genai.Client( + api_key=settings.GOOGLE_API_KEY, + http_options={"timeout": 60000} + ) try: - responses = gemini_model.generate_content( - prompt, - stream=False, - generation_config={ + responses = client.models.generate_content( + model="gemini-2.0-flash", + contents=prompt, + config={ "temperature": 0, "response_schema": response_schema, "response_mime_type": "application/json", }, - request_options={'timeout': 60} ) output = responses.candidates[0].content.parts[0].text return json.loads(output) - except ResourceExhausted as e: + except APIError as e: print(f"Hit Gemini rate limit, waiting 120 seconds") time.sleep(120) if depth > 2: diff --git a/benchmarks/table/gemini.py b/benchmarks/table/gemini.py index c58f2a92..5832a90f 100644 --- a/benchmarks/table/gemini.py +++ b/benchmarks/table/gemini.py @@ -1,7 +1,10 @@ import json from PIL import Image -import google.generativeai as genai -from google.ai.generativelanguage_v1beta.types import content +from google import genai +from google.genai import types +from io import BytesIO +from pydantic import BaseModel + from marker.settings import settings prompt = """ @@ -19,30 +22,26 @@ 3. Output only the HTML for the table, starting with the tag and ending with the
tag. """.strip() -genai.configure(api_key=settings.GOOGLE_API_KEY) +class TableSchema(BaseModel): + table_html: str def gemini_table_rec(image: Image.Image): - schema = content.Schema( - type=content.Type.OBJECT, - required=["table_html"], - properties={ - "table_html": content.Schema( - type=content.Type.STRING, - ) - } + client = genai.Client( + api_key=settings.GOOGLE_API_KEY, + http_options={"timeout": 60000} ) - model = genai.GenerativeModel("gemini-2.0-flash") + image_bytes = BytesIO() + image.save(image_bytes, format="PNG") - responses = model.generate_content( - [image, prompt], # According to gemini docs, it performs better if the image is the first element - stream=False, - generation_config={ + responses = client.models.generate_content( + model="gemini-2.0-flash", + contents=[types.Part.from_bytes(data=image_bytes.getvalue(), mime_type="image/png"), prompt], # According to gemini docs, it performs better if the image is the first element + config={ "temperature": 0, - "response_schema": schema, + "response_schema": TableSchema, "response_mime_type": "application/json", }, - request_options={'timeout': 60} ) output = responses.candidates[0].content.parts[0].text diff --git a/marker/builders/llm_layout.py b/marker/builders/llm_layout.py index 5e9881fd..8dbccc1f 100644 --- a/marker/builders/llm_layout.py +++ b/marker/builders/llm_layout.py @@ -1,10 +1,10 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Annotated -from google.ai.generativelanguage_v1beta.types import content from surya.layout import LayoutPredictor from surya.ocr_error import OCRErrorPredictor from tqdm import tqdm +from pydantic import BaseModel from marker.builders.layout import LayoutBuilder from marker.processors.llm import GoogleModel @@ -41,7 +41,7 @@ class LLMLayoutBuilder(LayoutBuilder): max_retries: Annotated[ int, "The maximum number of retries to use for the Gemini model.", - ] = 3 + ] = 2 max_concurrency: Annotated[ int, "The maximum number of concurrent requests to make to the Gemini model.", @@ -158,21 +158,15 @@ def process_block_complex_relabeling(self, document: Document, page: PageGroup, def process_block_relabeling(self, document: Document, page: PageGroup, block: Block, prompt: str): image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["image_description", "label"], - properties={ - "image_description": content.Schema( - type=content.Type.STRING, - ), - "label": content.Schema( - type=content.Type.STRING, - ), - }, - ) - response = self.model.generate_response(prompt, image, block, response_schema) + response = self.model.generate_response( + prompt, + image, + block, + LayoutSchema, + max_retries=self.max_retries, + timeout=self.timeout + ) generated_label = None if response and "label" in response: generated_label = response["label"] @@ -188,3 +182,8 @@ def process_block_relabeling(self, document: Document, page: PageGroup, block: B def extract_image(self, document: Document, image_block: Block, expand: float = 0.01): return image_block.get_image(document, highres=False, expansion=(expand, expand)) + + +class LayoutSchema(BaseModel): + image_description: str + label: str diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py index 01f69695..54003b82 100644 --- a/marker/converters/pdf.py +++ b/marker/converters/pdf.py @@ -76,7 +76,6 @@ class PdfConverter(BaseConverter): LLMTableMergeProcessor, LLMFormProcessor, TextProcessor, - LLMTextProcessor, LLMComplexRegionProcessor, LLMImageDescriptionProcessor, LLMEquationProcessor, diff --git a/marker/processors/llm/llm_complex.py b/marker/processors/llm/llm_complex.py index 6d58a077..72966d62 100644 --- a/marker/processors/llm/llm_complex.py +++ b/marker/processors/llm/llm_complex.py @@ -1,9 +1,8 @@ import markdown2 +from pydantic import BaseModel from marker.processors.llm import BaseLLMProcessor -from google.ai.generativelanguage_v1beta.types import content - from marker.schema import BlockTypes from marker.schema.blocks import Block from marker.schema.document import Document @@ -55,18 +54,8 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): text = block.raw_text(document) prompt = self.complex_region_prompt.replace("{extracted_text}", text) image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["corrected_markdown"], - properties={ - "corrected_markdown": content.Schema( - type=content.Type.STRING - ) - }, - ) - response = self.model.generate_response(prompt, image, block, response_schema) + response = self.model.generate_response(prompt, image, block, ComplexSchema) if not response or "corrected_markdown" not in response: block.update_metadata(llm_error_count=1) @@ -85,4 +74,7 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): # Convert LLM markdown to html corrected_markdown = corrected_markdown.strip().lstrip("```markdown").rstrip("```").strip() - block.html = markdown2.markdown(corrected_markdown, extras=["tables"]) \ No newline at end of file + block.html = markdown2.markdown(corrected_markdown, extras=["tables"]) + +class ComplexSchema(BaseModel): + corrected_markdown: str \ No newline at end of file diff --git a/marker/processors/llm/llm_equation.py b/marker/processors/llm/llm_equation.py index 74cfc4a3..89d0318d 100644 --- a/marker/processors/llm/llm_equation.py +++ b/marker/processors/llm/llm_equation.py @@ -1,6 +1,6 @@ -from marker.processors.llm import BaseLLMProcessor +from pydantic import BaseModel -from google.ai.generativelanguage_v1beta.types import content +from marker.processors.llm import BaseLLMProcessor from marker.schema import BlockTypes from marker.schema.blocks import Equation @@ -67,18 +67,8 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Equation prompt = self.equation_latex_prompt.replace("{equation}", text) image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["html_equation"], - properties={ - "html_equation": content.Schema( - type=content.Type.STRING - ) - }, - ) - response = self.model.generate_response(prompt, image, block, response_schema) + response = self.model.generate_response(prompt, image, block, EquationSchema) if not response or "html_equation" not in response: block.update_metadata(llm_error_count=1) @@ -89,3 +79,6 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Equation block.update_metadata(llm_error_count=1) return block.html = html_equation + +class EquationSchema(BaseModel): + html_equation: str diff --git a/marker/processors/llm/llm_form.py b/marker/processors/llm/llm_form.py index fc66f155..8fb4a32a 100644 --- a/marker/processors/llm/llm_form.py +++ b/marker/processors/llm/llm_form.py @@ -1,6 +1,6 @@ -from marker.processors.llm import BaseLLMProcessor +from pydantic import BaseModel -from google.ai.generativelanguage_v1beta.types import content +from marker.processors.llm import BaseLLMProcessor from marker.schema import BlockTypes from marker.schema.blocks import Block @@ -73,18 +73,8 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): prompt = self.form_rewriting_prompt.replace("{block_html}", block_html) image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["corrected_html"], - properties={ - "corrected_html": content.Schema( - type=content.Type.STRING - ) - }, - ) - response = self.model.generate_response(prompt, image, block, response_schema) + response = self.model.generate_response(prompt, image, block, FormSchema) if not response or "corrected_html" not in response: block.update_metadata(llm_error_count=1) @@ -102,4 +92,7 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): return corrected_html = corrected_html.strip().lstrip("```html").rstrip("```").strip() - block.html = corrected_html \ No newline at end of file + block.html = corrected_html + +class FormSchema(BaseModel): + corrected_html: str \ No newline at end of file diff --git a/marker/processors/llm/llm_handwriting.py b/marker/processors/llm/llm_handwriting.py index 10a0c25b..760efb35 100644 --- a/marker/processors/llm/llm_handwriting.py +++ b/marker/processors/llm/llm_handwriting.py @@ -1,9 +1,8 @@ import markdown2 +from pydantic import BaseModel from marker.processors.llm import BaseLLMProcessor -from google.ai.generativelanguage_v1beta.types import content - from marker.schema import BlockTypes from marker.schema.blocks import Handwriting, Text from marker.schema.document import Document @@ -49,18 +48,8 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Handwrit prompt = self.handwriting_generation_prompt image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["markdown"], - properties={ - "markdown": content.Schema( - type=content.Type.STRING - ) - }, - ) - - response = self.model.generate_response(prompt, image, block, response_schema) + + response = self.model.generate_response(prompt, image, block, HandwritingSchema) if not response or "markdown" not in response: block.update_metadata(llm_error_count=1) @@ -73,3 +62,6 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Handwrit markdown = markdown.strip().lstrip("```markdown").rstrip("```").strip() block.html = markdown2.markdown(markdown, extras=["tables"]) + +class HandwritingSchema(BaseModel): + markdown: str diff --git a/marker/processors/llm/llm_image_description.py b/marker/processors/llm/llm_image_description.py index a08e0dc9..c125df0f 100644 --- a/marker/processors/llm/llm_image_description.py +++ b/marker/processors/llm/llm_image_description.py @@ -1,6 +1,6 @@ -from marker.processors.llm import BaseLLMProcessor +from pydantic import BaseModel -from google.ai.generativelanguage_v1beta.types import content +from marker.processors.llm import BaseLLMProcessor from marker.schema import BlockTypes from marker.schema.blocks import Block @@ -49,18 +49,8 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): prompt = self.image_description_prompt.replace("{raw_text}", block.raw_text(document)) image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["image_description"], - properties={ - "image_description": content.Schema( - type=content.Type.STRING - ) - }, - ) - response = self.model.generate_response(prompt, image, block, response_schema) + response = self.model.generate_response(prompt, image, block, ImageSchema) if not response or "image_description" not in response: block.update_metadata(llm_error_count=1) @@ -72,3 +62,6 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): return block.description = image_description + +class ImageSchema(BaseModel): + image_description: str diff --git a/marker/processors/llm/llm_table.py b/marker/processors/llm/llm_table.py index e0c738a0..584e6c04 100644 --- a/marker/processors/llm/llm_table.py +++ b/marker/processors/llm/llm_table.py @@ -1,8 +1,8 @@ from typing import Annotated, List, Tuple from bs4 import BeautifulSoup -from google.ai.generativelanguage_v1beta.types import content from PIL import Image +from pydantic import BaseModel from marker.processors.llm import BaseLLMProcessor from marker.schema import BlockTypes @@ -133,18 +133,7 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Table): def rewrite_single_chunk(self, page: PageGroup, block: Block, block_html: str, children: List[TableCell], image: Image.Image): prompt = self.table_rewriting_prompt.replace("{block_html}", block_html) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["corrected_html"], - properties={ - "corrected_html": content.Schema( - type=content.Type.STRING - ) - }, - ) - - response = self.model.generate_response(prompt, image, block, response_schema) + response = self.model.generate_response(prompt, image, block, TableSchema) if not response or "corrected_html" not in response: block.update_metadata(llm_error_count=1) @@ -246,3 +235,6 @@ def parse_html_table(self, html_text: str, block: Block, page: PageGroup) -> Lis cur_col += colspan return cells + +class TableSchema(BaseModel): + correct_html: str diff --git a/marker/processors/llm/llm_table_merge.py b/marker/processors/llm/llm_table_merge.py index a3f74396..c34ea1dc 100644 --- a/marker/processors/llm/llm_table_merge.py +++ b/marker/processors/llm/llm_table_merge.py @@ -1,7 +1,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Annotated, List, Tuple, Literal -from google.ai.generativelanguage_v1beta.types import content +from pydantic import BaseModel from tqdm import tqdm from PIL import Image @@ -234,36 +234,11 @@ def process_rewriting(self, document: Document, blocks: List[Block]): prompt = self.table_merge_prompt.replace("{{table1}}", start_html).replace("{{table2}}", curr_html) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["table1_description", "table2_description", "explanation", "merge", "direction"], - properties={ - "table1_description": content.Schema( - type=content.Type.STRING - ), - "table2_description": content.Schema( - type=content.Type.STRING - ), - "explanation": content.Schema( - type=content.Type.STRING - ), - "merge": content.Schema( - type=content.Type.STRING, - enum=["true", "false"] - ), - "direction": content.Schema( - type=content.Type.STRING, - enum=["bottom", "right"] - ), - }, - ) - response = self.model.generate_response( prompt, [start_image, curr_image], curr_block, - response_schema + MergeSchema, ) if not response or ("direction" not in response or "merge" not in response): @@ -335,4 +310,12 @@ def join_images(image1: Image.Image, image2: Image.Image, direction: Literal['ri new_img = Image.new('RGB', (new_width, new_height), 'white') new_img.paste(image1, (0, 0)) new_img.paste(image2, (0, h1)) - return new_img \ No newline at end of file + return new_img + + +class MergeSchema(BaseModel): + table1_description: str + table2_description: str + explanation: str + merge: Literal["true", "false"] + direction: Literal["bottom", "right"] \ No newline at end of file diff --git a/marker/processors/llm/llm_text.py b/marker/processors/llm/llm_text.py deleted file mode 100644 index 8a71b54e..00000000 --- a/marker/processors/llm/llm_text.py +++ /dev/null @@ -1,153 +0,0 @@ -import json -import textwrap - -from marker.processors.llm import BaseLLMProcessor -from bs4 import BeautifulSoup -from google.ai.generativelanguage_v1beta.types import content -from marker.schema import BlockTypes -from marker.schema.blocks import Block -from marker.schema.document import Document -from marker.schema.groups.page import PageGroup -from marker.schema.registry import get_block_class -from marker.schema.text.span import Span - - -class LLMTextProcessor(BaseLLMProcessor): - block_types = (BlockTypes.TextInlineMath,) - text_math_rewriting_prompt = """You are a text correction expert specializing in accurately reproducing text from images. -You will receive an image of a text block and a set of extracted lines corresponding to the text in the image. -Your task is to correct any errors in the extracted lines, including math, formatting, and other inaccuracies, and output the corrected lines in a JSON format. -The number of output lines MUST match the number of input lines. Stay as faithful to the original text as possible. - -**Instructions:** - -1. Carefully examine the provided text block image . -2. Analyze the extracted lines. -3. For each extracted line, compare it to the corresponding line in the image. -4. Correct any errors in the extracted line, including: - * Inline math: Ensure all mathematical expressions are correctly formatted and rendered. - * Formatting: Maintain consistent formatting with the text block image, including spacing, indentation, and special characters. - * Other inaccuracies: If the image is handwritten then you may correct any spelling errors, or other discrepancies. -5. Do not remove any formatting i.e bold, italics, etc from the extracted lines unless it is necessary to correct the error. -6. Ensure that inline math is properly with inline math tags. -7. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. -8. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. - -**Example:** - -Input: -``` -{ - "extracted_lines": [ - "Adversarial training (AT) [23], which aims to minimize\n", - "the model's risk under the worst-case perturbations, is cur-\n", - "rently the most effective approach for improving the robust-\n", - "ness of deep neural networks. For a given neural network\n", - "f(x, w) with parameters w, the optimization objective of\n", - "AT can be formulated as follows:\n" - ] -} -``` - -Output: - -```json -{ - "corrected_lines": [ - "Adversarial training (AT) [23], which aims to minimize\n", - "the model's risk under the worst-case perturbations, is cur-\n", - "rently the most effective approach for improving the robust-\n", - "ness of deep neural networks. For a given neural network\n", - "f(x, w) with parameters w, the optimization objective of\n", - "AT can be formulated as follows:\n" - ] -} -``` - -**Input:** -```json -{extracted_lines} -``` -""" - - def process_rewriting(self, document: Document, page: PageGroup, block: Block): - SpanClass: Span = get_block_class(BlockTypes.Span) - - text_lines = block.contained_blocks(document, (BlockTypes.Line,)) - extracted_lines = [line.formatted_text(document) for line in text_lines] - - prompt = self.text_math_rewriting_prompt.replace("{extracted_lines}", json.dumps({"extracted_lines": extracted_lines}, indent=2)) - image = self.extract_image(document, block) - response_schema = content.Schema( - type=content.Type.OBJECT, - enum=[], - required=["corrected_lines"], - properties={ - "corrected_lines": content.Schema( - type=content.Type.ARRAY, - items=content.Schema( - type=content.Type.STRING, - ), - ) - }, - ) - - response = self.model.generate_response(prompt, image, block, response_schema) - if not response or "corrected_lines" not in response: - block.update_metadata(llm_error_count=1) - return - - corrected_lines = response["corrected_lines"] - if not corrected_lines or len(corrected_lines) != len(extracted_lines): - block.update_metadata(llm_error_count=1) - return - - for text_line, corrected_text in zip(text_lines, corrected_lines): - text_line.structure = [] - corrected_spans = self.text_to_spans(corrected_text) - - for span_idx, span in enumerate(corrected_spans): - if span_idx == len(corrected_spans) - 1: - span['content'] += "\n" - - span_block = page.add_full_block( - SpanClass( - polygon=text_line.polygon, - text=span['content'], - font='Unknown', - font_weight=0, - font_size=0, - minimum_position=0, - maximum_position=0, - formats=[span['type']], - page_id=text_line.page_id, - text_extraction_method="gemini", - ) - ) - text_line.structure.append(span_block.id) - - def text_to_spans(self, text): - soup = BeautifulSoup(text, 'html.parser') - - tag_types = { - 'b': 'bold', - 'i': 'italic', - 'math': 'math' - } - spans = [] - - for element in soup.descendants: - if not len(list(element.parents)) == 1: - continue - if element.name in tag_types: - spans.append({ - 'type': tag_types[element.name], - 'content': element.get_text() - }) - elif element.string: - spans.append({ - 'type': 'plain', - 'content': element.string - }) - - return spans diff --git a/marker/processors/llm/utils.py b/marker/processors/llm/utils.py index da7be67f..feefbec9 100644 --- a/marker/processors/llm/utils.py +++ b/marker/processors/llm/utils.py @@ -1,13 +1,16 @@ import json import time +from io import BytesIO from typing import List import PIL -import google.generativeai as genai -from google.ai.generativelanguage_v1beta.types import content -from google.api_core.exceptions import ResourceExhausted +from google import genai +from google.genai import types +from google.genai.errors import APIError +from pydantic import BaseModel from marker.schema.blocks import Block +from marker.settings import settings class GoogleModel: @@ -17,45 +20,59 @@ def __init__(self, api_key: str, model_name: str): self.api_key = api_key self.model_name = model_name - self.model = self.configure_google_model() - def configure_google_model(self): - genai.configure(api_key=self.api_key) - return genai.GenerativeModel(self.model_name) + def get_google_client(self, timeout: int = 60): + return genai.Client( + api_key=settings.GOOGLE_API_KEY, + http_options={"timeout": timeout * 1000} # Convert to milliseconds + ) + + def img_to_bytes(self, img: PIL.Image.Image): + image_bytes = BytesIO() + img.save(image_bytes, format="PNG") + return image_bytes.getvalue() def generate_response( self, prompt: str, image: PIL.Image.Image | List[PIL.Image.Image], block: Block, - response_schema: content.Schema, - max_retries: int = 3, + response_schema: type[BaseModel], + max_retries: int = 2, timeout: int = 60 ): if not isinstance(image, list): image = [image] + + client = self.get_google_client(timeout=timeout) + image_parts = [types.Part.from_bytes(data=self.img_to_bytes(img), mime_type="image/png") for img in image] + tries = 0 while tries < max_retries: try: - responses = self.model.generate_content( - image + [prompt], # According to gemini docs, it performs better if the image is the first element - stream=False, - generation_config={ + responses = client.models.generate_content( + model="gemini-2.0-flash", + contents=image_parts + [prompt], # According to gemini docs, it performs better if the image is the first element + config={ "temperature": 0, "response_schema": response_schema, "response_mime_type": "application/json", - }, - request_options={'timeout': timeout} + } ) output = responses.candidates[0].content.parts[0].text total_tokens = responses.usage_metadata.total_token_count block.update_metadata(llm_tokens_used=total_tokens, llm_request_count=1) return json.loads(output) - except ResourceExhausted as e: - tries += 1 - wait_time = tries * 3 - print(f"ResourceExhausted: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{max_retries})") - time.sleep(wait_time) + except APIError as e: + if e.code == 429: + # Rate limit exceeded + tries += 1 + wait_time = tries * 3 + print(f"APIError: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{max_retries})") + time.sleep(wait_time) + else: + print(e) + break except Exception as e: print(e) break diff --git a/poetry.lock b/poetry.lock index f7db7b6c..5904e131 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,87 +13,92 @@ files = [ [[package]] name = "aiohttp" -version = "3.11.11" +version = "3.11.12" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" files = [ - {file = "aiohttp-3.11.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a60804bff28662cbcf340a4d61598891f12eea3a66af48ecfdc975ceec21e3c8"}, - {file = "aiohttp-3.11.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b4fa1cb5f270fb3eab079536b764ad740bb749ce69a94d4ec30ceee1b5940d5"}, - {file = "aiohttp-3.11.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:731468f555656767cda219ab42e033355fe48c85fbe3ba83a349631541715ba2"}, - {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb23d8bb86282b342481cad4370ea0853a39e4a32a0042bb52ca6bdde132df43"}, - {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f047569d655f81cb70ea5be942ee5d4421b6219c3f05d131f64088c73bb0917f"}, - {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd7659baae9ccf94ae5fe8bfaa2c7bc2e94d24611528395ce88d009107e00c6d"}, - {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af01e42ad87ae24932138f154105e88da13ce7d202a6de93fafdafb2883a00ef"}, - {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5854be2f3e5a729800bac57a8d76af464e160f19676ab6aea74bde18ad19d438"}, - {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6526e5fb4e14f4bbf30411216780c9967c20c5a55f2f51d3abd6de68320cc2f3"}, - {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:85992ee30a31835fc482468637b3e5bd085fa8fe9392ba0bdcbdc1ef5e9e3c55"}, - {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:88a12ad8ccf325a8a5ed80e6d7c3bdc247d66175afedbe104ee2aaca72960d8e"}, - {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0a6d3fbf2232e3a08c41eca81ae4f1dff3d8f1a30bae415ebe0af2d2458b8a33"}, - {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84a585799c58b795573c7fa9b84c455adf3e1d72f19a2bf498b54a95ae0d194c"}, - {file = "aiohttp-3.11.11-cp310-cp310-win32.whl", hash = "sha256:bfde76a8f430cf5c5584553adf9926534352251d379dcb266ad2b93c54a29745"}, - {file = "aiohttp-3.11.11-cp310-cp310-win_amd64.whl", hash = "sha256:0fd82b8e9c383af11d2b26f27a478640b6b83d669440c0a71481f7c865a51da9"}, - {file = "aiohttp-3.11.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ba74ec819177af1ef7f59063c6d35a214a8fde6f987f7661f4f0eecc468a8f76"}, - {file = "aiohttp-3.11.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4af57160800b7a815f3fe0eba9b46bf28aafc195555f1824555fa2cfab6c1538"}, - {file = "aiohttp-3.11.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffa336210cf9cd8ed117011085817d00abe4c08f99968deef0013ea283547204"}, - {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b8fe282183e4a3c7a1b72f5ade1094ed1c6345a8f153506d114af5bf8accd9"}, - {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3af41686ccec6a0f2bdc66686dc0f403c41ac2089f80e2214a0f82d001052c03"}, - {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70d1f9dde0e5dd9e292a6d4d00058737052b01f3532f69c0c65818dac26dc287"}, - {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:249cc6912405917344192b9f9ea5cd5b139d49e0d2f5c7f70bdfaf6b4dbf3a2e"}, - {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0eb98d90b6690827dcc84c246811feeb4e1eea683c0eac6caed7549be9c84665"}, - {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec82bf1fda6cecce7f7b915f9196601a1bd1a3079796b76d16ae4cce6d0ef89b"}, - {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9fd46ce0845cfe28f108888b3ab17abff84ff695e01e73657eec3f96d72eef34"}, - {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bd176afcf8f5d2aed50c3647d4925d0db0579d96f75a31e77cbaf67d8a87742d"}, - {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ec2aa89305006fba9ffb98970db6c8221541be7bee4c1d027421d6f6df7d1ce2"}, - {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:92cde43018a2e17d48bb09c79e4d4cb0e236de5063ce897a5e40ac7cb4878773"}, - {file = "aiohttp-3.11.11-cp311-cp311-win32.whl", hash = "sha256:aba807f9569455cba566882c8938f1a549f205ee43c27b126e5450dc9f83cc62"}, - {file = "aiohttp-3.11.11-cp311-cp311-win_amd64.whl", hash = "sha256:ae545f31489548c87b0cced5755cfe5a5308d00407000e72c4fa30b19c3220ac"}, - {file = "aiohttp-3.11.11-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e595c591a48bbc295ebf47cb91aebf9bd32f3ff76749ecf282ea7f9f6bb73886"}, - {file = "aiohttp-3.11.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3ea1b59dc06396b0b424740a10a0a63974c725b1c64736ff788a3689d36c02d2"}, - {file = "aiohttp-3.11.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8811f3f098a78ffa16e0ea36dffd577eb031aea797cbdba81be039a4169e242c"}, - {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7227b87a355ce1f4bf83bfae4399b1f5bb42e0259cb9405824bd03d2f4336a"}, - {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d40f9da8cabbf295d3a9dae1295c69975b86d941bc20f0a087f0477fa0a66231"}, - {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffb3dc385f6bb1568aa974fe65da84723210e5d9707e360e9ecb51f59406cd2e"}, - {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8f5f7515f3552d899c61202d99dcb17d6e3b0de777900405611cd747cecd1b8"}, - {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3499c7ffbfd9c6a3d8d6a2b01c26639da7e43d47c7b4f788016226b1e711caa8"}, - {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8e2bf8029dbf0810c7bfbc3e594b51c4cc9101fbffb583a3923aea184724203c"}, - {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b6212a60e5c482ef90f2d788835387070a88d52cf6241d3916733c9176d39eab"}, - {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d119fafe7b634dbfa25a8c597718e69a930e4847f0b88e172744be24515140da"}, - {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:6fba278063559acc730abf49845d0e9a9e1ba74f85f0ee6efd5803f08b285853"}, - {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92fc484e34b733704ad77210c7957679c5c3877bd1e6b6d74b185e9320cc716e"}, - {file = "aiohttp-3.11.11-cp312-cp312-win32.whl", hash = "sha256:9f5b3c1ed63c8fa937a920b6c1bec78b74ee09593b3f5b979ab2ae5ef60d7600"}, - {file = "aiohttp-3.11.11-cp312-cp312-win_amd64.whl", hash = "sha256:1e69966ea6ef0c14ee53ef7a3d68b564cc408121ea56c0caa2dc918c1b2f553d"}, - {file = "aiohttp-3.11.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:541d823548ab69d13d23730a06f97460f4238ad2e5ed966aaf850d7c369782d9"}, - {file = "aiohttp-3.11.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:929f3ed33743a49ab127c58c3e0a827de0664bfcda566108989a14068f820194"}, - {file = "aiohttp-3.11.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0882c2820fd0132240edbb4a51eb8ceb6eef8181db9ad5291ab3332e0d71df5f"}, - {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b63de12e44935d5aca7ed7ed98a255a11e5cb47f83a9fded7a5e41c40277d104"}, - {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa54f8ef31d23c506910c21163f22b124facb573bff73930735cf9fe38bf7dff"}, - {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a344d5dc18074e3872777b62f5f7d584ae4344cd6006c17ba12103759d407af3"}, - {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7fb429ab1aafa1f48578eb315ca45bd46e9c37de11fe45c7f5f4138091e2f1"}, - {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c341c7d868750e31961d6d8e60ff040fb9d3d3a46d77fd85e1ab8e76c3e9a5c4"}, - {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed9ee95614a71e87f1a70bc81603f6c6760128b140bc4030abe6abaa988f1c3d"}, - {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:de8d38f1c2810fa2a4f1d995a2e9c70bb8737b18da04ac2afbf3971f65781d87"}, - {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a9b7371665d4f00deb8f32208c7c5e652059b0fda41cf6dbcac6114a041f1cc2"}, - {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:620598717fce1b3bd14dd09947ea53e1ad510317c85dda2c9c65b622edc96b12"}, - {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bf8d9bfee991d8acc72d060d53860f356e07a50f0e0d09a8dfedea1c554dd0d5"}, - {file = "aiohttp-3.11.11-cp313-cp313-win32.whl", hash = "sha256:9d73ee3725b7a737ad86c2eac5c57a4a97793d9f442599bea5ec67ac9f4bdc3d"}, - {file = "aiohttp-3.11.11-cp313-cp313-win_amd64.whl", hash = "sha256:c7a06301c2fb096bdb0bd25fe2011531c1453b9f2c163c8031600ec73af1cc99"}, - {file = "aiohttp-3.11.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3e23419d832d969f659c208557de4a123e30a10d26e1e14b73431d3c13444c2e"}, - {file = "aiohttp-3.11.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21fef42317cf02e05d3b09c028712e1d73a9606f02467fd803f7c1f39cc59add"}, - {file = "aiohttp-3.11.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1f21bb8d0235fc10c09ce1d11ffbd40fc50d3f08a89e4cf3a0c503dc2562247a"}, - {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1642eceeaa5ab6c9b6dfeaaa626ae314d808188ab23ae196a34c9d97efb68350"}, - {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2170816e34e10f2fd120f603e951630f8a112e1be3b60963a1f159f5699059a6"}, - {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8be8508d110d93061197fd2d6a74f7401f73b6d12f8822bbcd6d74f2b55d71b1"}, - {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eed954b161e6b9b65f6be446ed448ed3921763cc432053ceb606f89d793927e"}, - {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6c9af134da4bc9b3bd3e6a70072509f295d10ee60c697826225b60b9959acdd"}, - {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:44167fc6a763d534a6908bdb2592269b4bf30a03239bcb1654781adf5e49caf1"}, - {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:479b8c6ebd12aedfe64563b85920525d05d394b85f166b7873c8bde6da612f9c"}, - {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:10b4ff0ad793d98605958089fabfa350e8e62bd5d40aa65cdc69d6785859f94e"}, - {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:b540bd67cfb54e6f0865ceccd9979687210d7ed1a1cc8c01f8e67e2f1e883d28"}, - {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1dac54e8ce2ed83b1f6b1a54005c87dfed139cf3f777fdc8afc76e7841101226"}, - {file = "aiohttp-3.11.11-cp39-cp39-win32.whl", hash = "sha256:568c1236b2fde93b7720f95a890741854c1200fba4a3471ff48b2934d2d93fd3"}, - {file = "aiohttp-3.11.11-cp39-cp39-win_amd64.whl", hash = "sha256:943a8b052e54dfd6439fd7989f67fc6a7f2138d0a2cf0a7de5f18aa4fe7eb3b1"}, - {file = "aiohttp-3.11.11.tar.gz", hash = "sha256:bb49c7f1e6ebf3821a42d81d494f538107610c3a705987f53068546b0e90303e"}, + {file = "aiohttp-3.11.12-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:aa8a8caca81c0a3e765f19c6953416c58e2f4cc1b84829af01dd1c771bb2f91f"}, + {file = "aiohttp-3.11.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:84ede78acde96ca57f6cf8ccb8a13fbaf569f6011b9a52f870c662d4dc8cd854"}, + {file = "aiohttp-3.11.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:584096938a001378484aa4ee54e05dc79c7b9dd933e271c744a97b3b6f644957"}, + {file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:392432a2dde22b86f70dd4a0e9671a349446c93965f261dbaecfaf28813e5c42"}, + {file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:88d385b8e7f3a870146bf5ea31786ef7463e99eb59e31db56e2315535d811f55"}, + {file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b10a47e5390c4b30a0d58ee12581003be52eedd506862ab7f97da7a66805befb"}, + {file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b5263dcede17b6b0c41ef0c3ccce847d82a7da98709e75cf7efde3e9e3b5cae"}, + {file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50c5c7b8aa5443304c55c262c5693b108c35a3b61ef961f1e782dd52a2f559c7"}, + {file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d1c031a7572f62f66f1257db37ddab4cb98bfaf9b9434a3b4840bf3560f5e788"}, + {file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:7e44eba534381dd2687be50cbd5f2daded21575242ecfdaf86bbeecbc38dae8e"}, + {file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:145a73850926018ec1681e734cedcf2716d6a8697d90da11284043b745c286d5"}, + {file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:2c311e2f63e42c1bf86361d11e2c4a59f25d9e7aabdbdf53dc38b885c5435cdb"}, + {file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ea756b5a7bac046d202a9a3889b9a92219f885481d78cd318db85b15cc0b7bcf"}, + {file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:526c900397f3bbc2db9cb360ce9c35134c908961cdd0ac25b1ae6ffcaa2507ff"}, + {file = "aiohttp-3.11.12-cp310-cp310-win32.whl", hash = "sha256:b8d3bb96c147b39c02d3db086899679f31958c5d81c494ef0fc9ef5bb1359b3d"}, + {file = "aiohttp-3.11.12-cp310-cp310-win_amd64.whl", hash = "sha256:7fe3d65279bfbee8de0fb4f8c17fc4e893eed2dba21b2f680e930cc2b09075c5"}, + {file = "aiohttp-3.11.12-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:87a2e00bf17da098d90d4145375f1d985a81605267e7f9377ff94e55c5d769eb"}, + {file = "aiohttp-3.11.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b34508f1cd928ce915ed09682d11307ba4b37d0708d1f28e5774c07a7674cac9"}, + {file = "aiohttp-3.11.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:936d8a4f0f7081327014742cd51d320296b56aa6d324461a13724ab05f4b2933"}, + {file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de1378f72def7dfb5dbd73d86c19eda0ea7b0a6873910cc37d57e80f10d64e1"}, + {file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b9d45dbb3aaec05cf01525ee1a7ac72de46a8c425cb75c003acd29f76b1ffe94"}, + {file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:930ffa1925393381e1e0a9b82137fa7b34c92a019b521cf9f41263976666a0d6"}, + {file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8340def6737118f5429a5df4e88f440746b791f8f1c4ce4ad8a595f42c980bd5"}, + {file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4016e383f91f2814e48ed61e6bda7d24c4d7f2402c75dd28f7e1027ae44ea204"}, + {file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c0600bcc1adfaaac321422d615939ef300df81e165f6522ad096b73439c0f58"}, + {file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:0450ada317a65383b7cce9576096150fdb97396dcfe559109b403c7242faffef"}, + {file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:850ff6155371fd802a280f8d369d4e15d69434651b844bde566ce97ee2277420"}, + {file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8fd12d0f989c6099e7b0f30dc6e0d1e05499f3337461f0b2b0dadea6c64b89df"}, + {file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:76719dd521c20a58a6c256d058547b3a9595d1d885b830013366e27011ffe804"}, + {file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:97fe431f2ed646a3b56142fc81d238abcbaff08548d6912acb0b19a0cadc146b"}, + {file = "aiohttp-3.11.12-cp311-cp311-win32.whl", hash = "sha256:e10c440d142fa8b32cfdb194caf60ceeceb3e49807072e0dc3a8887ea80e8c16"}, + {file = "aiohttp-3.11.12-cp311-cp311-win_amd64.whl", hash = "sha256:246067ba0cf5560cf42e775069c5d80a8989d14a7ded21af529a4e10e3e0f0e6"}, + {file = "aiohttp-3.11.12-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e392804a38353900c3fd8b7cacbea5132888f7129f8e241915e90b85f00e3250"}, + {file = "aiohttp-3.11.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8fa1510b96c08aaad49303ab11f8803787c99222288f310a62f493faf883ede1"}, + {file = "aiohttp-3.11.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dc065a4285307607df3f3686363e7f8bdd0d8ab35f12226362a847731516e42c"}, + {file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddb31f8474695cd61fc9455c644fc1606c164b93bff2490390d90464b4655df"}, + {file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dec0000d2d8621d8015c293e24589d46fa218637d820894cb7356c77eca3259"}, + {file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e3552fe98e90fdf5918c04769f338a87fa4f00f3b28830ea9b78b1bdc6140e0d"}, + {file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dfe7f984f28a8ae94ff3a7953cd9678550dbd2a1f9bda5dd9c5ae627744c78e"}, + {file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a481a574af914b6e84624412666cbfbe531a05667ca197804ecc19c97b8ab1b0"}, + {file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1987770fb4887560363b0e1a9b75aa303e447433c41284d3af2840a2f226d6e0"}, + {file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:a4ac6a0f0f6402854adca4e3259a623f5c82ec3f0c049374133bcb243132baf9"}, + {file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c96a43822f1f9f69cc5c3706af33239489a6294be486a0447fb71380070d4d5f"}, + {file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a5e69046f83c0d3cb8f0d5bd9b8838271b1bc898e01562a04398e160953e8eb9"}, + {file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:68d54234c8d76d8ef74744f9f9fc6324f1508129e23da8883771cdbb5818cbef"}, + {file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9fd9dcf9c91affe71654ef77426f5cf8489305e1c66ed4816f5a21874b094b9"}, + {file = "aiohttp-3.11.12-cp312-cp312-win32.whl", hash = "sha256:0ed49efcd0dc1611378beadbd97beb5d9ca8fe48579fc04a6ed0844072261b6a"}, + {file = "aiohttp-3.11.12-cp312-cp312-win_amd64.whl", hash = "sha256:54775858c7f2f214476773ce785a19ee81d1294a6bedc5cc17225355aab74802"}, + {file = "aiohttp-3.11.12-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:413ad794dccb19453e2b97c2375f2ca3cdf34dc50d18cc2693bd5aed7d16f4b9"}, + {file = "aiohttp-3.11.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4a93d28ed4b4b39e6f46fd240896c29b686b75e39cc6992692e3922ff6982b4c"}, + {file = "aiohttp-3.11.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d589264dbba3b16e8951b6f145d1e6b883094075283dafcab4cdd564a9e353a0"}, + {file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5148ca8955affdfeb864aca158ecae11030e952b25b3ae15d4e2b5ba299bad2"}, + {file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:525410e0790aab036492eeea913858989c4cb070ff373ec3bc322d700bdf47c1"}, + {file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bd8695be2c80b665ae3f05cb584093a1e59c35ecb7d794d1edd96e8cc9201d7"}, + {file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0203433121484b32646a5f5ea93ae86f3d9559d7243f07e8c0eab5ff8e3f70e"}, + {file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40cd36749a1035c34ba8d8aaf221b91ca3d111532e5ccb5fa8c3703ab1b967ed"}, + {file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a7442662afebbf7b4c6d28cb7aab9e9ce3a5df055fc4116cc7228192ad6cb484"}, + {file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:8a2fb742ef378284a50766e985804bd6adb5adb5aa781100b09befdbfa757b65"}, + {file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2cee3b117a8d13ab98b38d5b6bdcd040cfb4181068d05ce0c474ec9db5f3c5bb"}, + {file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f6a19bcab7fbd8f8649d6595624856635159a6527861b9cdc3447af288a00c00"}, + {file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e4cecdb52aaa9994fbed6b81d4568427b6002f0a91c322697a4bfcc2b2363f5a"}, + {file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:30f546358dfa0953db92ba620101fefc81574f87b2346556b90b5f3ef16e55ce"}, + {file = "aiohttp-3.11.12-cp313-cp313-win32.whl", hash = "sha256:ce1bb21fc7d753b5f8a5d5a4bae99566386b15e716ebdb410154c16c91494d7f"}, + {file = "aiohttp-3.11.12-cp313-cp313-win_amd64.whl", hash = "sha256:f7914ab70d2ee8ab91c13e5402122edbc77821c66d2758abb53aabe87f013287"}, + {file = "aiohttp-3.11.12-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c3623053b85b4296cd3925eeb725e386644fd5bc67250b3bb08b0f144803e7b"}, + {file = "aiohttp-3.11.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67453e603cea8e85ed566b2700efa1f6916aefbc0c9fcb2e86aaffc08ec38e78"}, + {file = "aiohttp-3.11.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6130459189e61baac5a88c10019b21e1f0c6d00ebc770e9ce269475650ff7f73"}, + {file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9060addfa4ff753b09392efe41e6af06ea5dd257829199747b9f15bfad819460"}, + {file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34245498eeb9ae54c687a07ad7f160053911b5745e186afe2d0c0f2898a1ab8a"}, + {file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dc0fba9a74b471c45ca1a3cb6e6913ebfae416678d90529d188886278e7f3f6"}, + {file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a478aa11b328983c4444dacb947d4513cb371cd323f3845e53caeda6be5589d5"}, + {file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c160a04283c8c6f55b5bf6d4cad59bb9c5b9c9cd08903841b25f1f7109ef1259"}, + {file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:edb69b9589324bdc40961cdf0657815df674f1743a8d5ad9ab56a99e4833cfdd"}, + {file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:4ee84c2a22a809c4f868153b178fe59e71423e1f3d6a8cd416134bb231fbf6d3"}, + {file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bf4480a5438f80e0f1539e15a7eb8b5f97a26fe087e9828e2c0ec2be119a9f72"}, + {file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:e6b2732ef3bafc759f653a98881b5b9cdef0716d98f013d376ee8dfd7285abf1"}, + {file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f752e80606b132140883bb262a457c475d219d7163d996dc9072434ffb0784c4"}, + {file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ab3247d58b393bda5b1c8f31c9edece7162fc13265334217785518dd770792b8"}, + {file = "aiohttp-3.11.12-cp39-cp39-win32.whl", hash = "sha256:0d5176f310a7fe6f65608213cc74f4228e4f4ce9fd10bcb2bb6da8fc66991462"}, + {file = "aiohttp-3.11.12-cp39-cp39-win_amd64.whl", hash = "sha256:74bd573dde27e58c760d9ca8615c41a57e719bff315c9adb6f2a4281a28e8798"}, + {file = "aiohttp-3.11.12.tar.gz", hash = "sha256:7603ca26d75b1b86160ce1bbe2787a0b706e592af5b2504e12caa88a217767b0"}, ] [package.dependencies] @@ -339,31 +344,32 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "babel" -version = "2.16.0" +version = "2.17.0" description = "Internationalization utilities" optional = false python-versions = ">=3.8" files = [ - {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"}, - {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"}, + {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, + {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, ] [package.extras] -dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] +dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] [[package]] name = "beautifulsoup4" -version = "4.12.3" +version = "4.13.3" description = "Screen-scraping library" optional = false -python-versions = ">=3.6.0" +python-versions = ">=3.7.0" files = [ - {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, - {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, + {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, + {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, ] [package.dependencies] soupsieve = ">1.2" +typing-extensions = ">=4.0.0" [package.extras] cchardet = ["cchardet"] @@ -1060,79 +1066,6 @@ gitdb = ">=4.0.1,<5" doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"] test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] -[[package]] -name = "google-ai-generativelanguage" -version = "0.6.15" -description = "Google Ai Generativelanguage API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_ai_generativelanguage-0.6.15-py3-none-any.whl", hash = "sha256:5a03ef86377aa184ffef3662ca28f19eeee158733e45d7947982eb953c6ebb6c"}, - {file = "google_ai_generativelanguage-0.6.15.tar.gz", hash = "sha256:8f6d9dc4c12b065fe2d0289026171acea5183ebf2d0b11cefe12f3821e159ec3"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = [ - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, -] -protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" - -[[package]] -name = "google-api-core" -version = "2.24.1" -description = "Google API client core library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"}, - {file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"}, -] - -[package.dependencies] -google-auth = ">=2.14.1,<3.0.dev0" -googleapis-common-protos = ">=1.56.2,<2.0.dev0" -grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, - {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, -] -grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, - {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, -] -proto-plus = [ - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, -] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" -requests = ">=2.18.0,<3.0.0.dev0" - -[package.extras] -async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] -grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] - -[[package]] -name = "google-api-python-client" -version = "2.160.0" -description = "Google API Client Library for Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_api_python_client-2.160.0-py2.py3-none-any.whl", hash = "sha256:63d61fb3e4cf3fb31a70a87f45567c22f6dfe87bbfa27252317e3e2c42900db4"}, - {file = "google_api_python_client-2.160.0.tar.gz", hash = "sha256:a8ccafaecfa42d15d5b5c3134ced8de08380019717fc9fb1ed510ca58eca3b7e"}, -] - -[package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0" -google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0.dev0" -google-auth-httplib2 = ">=0.2.0,<1.0.0" -httplib2 = ">=0.19.0,<1.dev0" -uritemplate = ">=3.0.1,<5" - [[package]] name = "google-auth" version = "2.38.0" @@ -1158,59 +1091,21 @@ reauth = ["pyu2f (>=0.1.5)"] requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] -name = "google-auth-httplib2" -version = "0.2.0" -description = "Google Authentication Library: httplib2 transport" -optional = false -python-versions = "*" -files = [ - {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"}, - {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"}, -] - -[package.dependencies] -google-auth = "*" -httplib2 = ">=0.19.0" - -[[package]] -name = "google-generativeai" -version = "0.8.4" -description = "Google Generative AI High level API client library and tools." +name = "google-genai" +version = "1.0.0" +description = "GenAI Python SDK" optional = false python-versions = ">=3.9" files = [ - {file = "google_generativeai-0.8.4-py3-none-any.whl", hash = "sha256:e987b33ea6decde1e69191ddcaec6ef974458864d243de7191db50c21a7c5b82"}, + {file = "google_genai-1.0.0-py3-none-any.whl", hash = "sha256:e9c3abd48f46ecb2b0a51efa7f65c6830b50f9784df603a91019b43918a7531f"}, + {file = "google_genai-1.0.0.tar.gz", hash = "sha256:15712abb808f891a14eafc9edf21b8cf92ea952f627dd0e2e939657efd234acd"}, ] [package.dependencies] -google-ai-generativelanguage = "0.6.15" -google-api-core = "*" -google-api-python-client = "*" -google-auth = ">=2.15.0" -protobuf = "*" -pydantic = "*" -tqdm = "*" -typing-extensions = "*" - -[package.extras] -dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "pyyaml"] - -[[package]] -name = "googleapis-common-protos" -version = "1.66.0" -description = "Common protobufs used in Google APIs" -optional = false -python-versions = ">=3.7" -files = [ - {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"}, - {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"}, -] - -[package.dependencies] -protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" - -[package.extras] -grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] +google-auth = ">=2.14.1,<3.0.0dev" +pydantic = ">=2.0.0,<3.0.0dev" +requests = ">=2.28.1,<3.0.0dev" +websockets = ">=13.0,<15.0dev" [[package]] name = "greenlet" @@ -1298,89 +1193,6 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] -[[package]] -name = "grpcio" -version = "1.70.0" -description = "HTTP/2-based RPC framework" -optional = false -python-versions = ">=3.8" -files = [ - {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"}, - {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"}, - {file = "grpcio-1.70.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:374d014f29f9dfdb40510b041792e0e2828a1389281eb590df066e1cc2b404e5"}, - {file = "grpcio-1.70.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2af68a6f5c8f78d56c145161544ad0febbd7479524a59c16b3e25053f39c87f"}, - {file = "grpcio-1.70.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce7df14b2dcd1102a2ec32f621cc9fab6695effef516efbc6b063ad749867295"}, - {file = "grpcio-1.70.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c78b339869f4dbf89881e0b6fbf376313e4f845a42840a7bdf42ee6caed4b11f"}, - {file = "grpcio-1.70.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:58ad9ba575b39edef71f4798fdb5c7b6d02ad36d47949cd381d4392a5c9cbcd3"}, - {file = "grpcio-1.70.0-cp310-cp310-win32.whl", hash = "sha256:2b0d02e4b25a5c1f9b6c7745d4fa06efc9fd6a611af0fb38d3ba956786b95199"}, - {file = "grpcio-1.70.0-cp310-cp310-win_amd64.whl", hash = "sha256:0de706c0a5bb9d841e353f6343a9defc9fc35ec61d6eb6111802f3aa9fef29e1"}, - {file = "grpcio-1.70.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:17325b0be0c068f35770f944124e8839ea3185d6d54862800fc28cc2ffad205a"}, - {file = "grpcio-1.70.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:dbe41ad140df911e796d4463168e33ef80a24f5d21ef4d1e310553fcd2c4a386"}, - {file = "grpcio-1.70.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5ea67c72101d687d44d9c56068328da39c9ccba634cabb336075fae2eab0d04b"}, - {file = "grpcio-1.70.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb5277db254ab7586769e490b7b22f4ddab3876c490da0a1a9d7c695ccf0bf77"}, - {file = "grpcio-1.70.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7831a0fc1beeeb7759f737f5acd9fdcda520e955049512d68fda03d91186eea"}, - {file = "grpcio-1.70.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:27cc75e22c5dba1fbaf5a66c778e36ca9b8ce850bf58a9db887754593080d839"}, - {file = "grpcio-1.70.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d63764963412e22f0491d0d32833d71087288f4e24cbcddbae82476bfa1d81fd"}, - {file = "grpcio-1.70.0-cp311-cp311-win32.whl", hash = "sha256:bb491125103c800ec209d84c9b51f1c60ea456038e4734688004f377cfacc113"}, - {file = "grpcio-1.70.0-cp311-cp311-win_amd64.whl", hash = "sha256:d24035d49e026353eb042bf7b058fb831db3e06d52bee75c5f2f3ab453e71aca"}, - {file = "grpcio-1.70.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:ef4c14508299b1406c32bdbb9fb7b47612ab979b04cf2b27686ea31882387cff"}, - {file = "grpcio-1.70.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:aa47688a65643afd8b166928a1da6247d3f46a2784d301e48ca1cc394d2ffb40"}, - {file = "grpcio-1.70.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:880bfb43b1bb8905701b926274eafce5c70a105bc6b99e25f62e98ad59cb278e"}, - {file = "grpcio-1.70.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e654c4b17d07eab259d392e12b149c3a134ec52b11ecdc6a515b39aceeec898"}, - {file = "grpcio-1.70.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2394e3381071045a706ee2eeb6e08962dd87e8999b90ac15c55f56fa5a8c9597"}, - {file = "grpcio-1.70.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b3c76701428d2df01964bc6479422f20e62fcbc0a37d82ebd58050b86926ef8c"}, - {file = "grpcio-1.70.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac073fe1c4cd856ebcf49e9ed6240f4f84d7a4e6ee95baa5d66ea05d3dd0df7f"}, - {file = "grpcio-1.70.0-cp312-cp312-win32.whl", hash = "sha256:cd24d2d9d380fbbee7a5ac86afe9787813f285e684b0271599f95a51bce33528"}, - {file = "grpcio-1.70.0-cp312-cp312-win_amd64.whl", hash = "sha256:0495c86a55a04a874c7627fd33e5beaee771917d92c0e6d9d797628ac40e7655"}, - {file = "grpcio-1.70.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa573896aeb7d7ce10b1fa425ba263e8dddd83d71530d1322fd3a16f31257b4a"}, - {file = "grpcio-1.70.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:d405b005018fd516c9ac529f4b4122342f60ec1cee181788249372524e6db429"}, - {file = "grpcio-1.70.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f32090238b720eb585248654db8e3afc87b48d26ac423c8dde8334a232ff53c9"}, - {file = "grpcio-1.70.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfa089a734f24ee5f6880c83d043e4f46bf812fcea5181dcb3a572db1e79e01c"}, - {file = "grpcio-1.70.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f19375f0300b96c0117aca118d400e76fede6db6e91f3c34b7b035822e06c35f"}, - {file = "grpcio-1.70.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:7c73c42102e4a5ec76608d9b60227d917cea46dff4d11d372f64cbeb56d259d0"}, - {file = "grpcio-1.70.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:0a5c78d5198a1f0aa60006cd6eb1c912b4a1520b6a3968e677dbcba215fabb40"}, - {file = "grpcio-1.70.0-cp313-cp313-win32.whl", hash = "sha256:fe9dbd916df3b60e865258a8c72ac98f3ac9e2a9542dcb72b7a34d236242a5ce"}, - {file = "grpcio-1.70.0-cp313-cp313-win_amd64.whl", hash = "sha256:4119fed8abb7ff6c32e3d2255301e59c316c22d31ab812b3fbcbaf3d0d87cc68"}, - {file = "grpcio-1.70.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:8058667a755f97407fca257c844018b80004ae8035565ebc2812cc550110718d"}, - {file = "grpcio-1.70.0-cp38-cp38-macosx_10_14_universal2.whl", hash = "sha256:879a61bf52ff8ccacbedf534665bb5478ec8e86ad483e76fe4f729aaef867cab"}, - {file = "grpcio-1.70.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:0ba0a173f4feacf90ee618fbc1a27956bfd21260cd31ced9bc707ef551ff7dc7"}, - {file = "grpcio-1.70.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558c386ecb0148f4f99b1a65160f9d4b790ed3163e8610d11db47838d452512d"}, - {file = "grpcio-1.70.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:412faabcc787bbc826f51be261ae5fa996b21263de5368a55dc2cf824dc5090e"}, - {file = "grpcio-1.70.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3b0f01f6ed9994d7a0b27eeddea43ceac1b7e6f3f9d86aeec0f0064b8cf50fdb"}, - {file = "grpcio-1.70.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7385b1cb064734005204bc8994eed7dcb801ed6c2eda283f613ad8c6c75cf873"}, - {file = "grpcio-1.70.0-cp38-cp38-win32.whl", hash = "sha256:07269ff4940f6fb6710951116a04cd70284da86d0a4368fd5a3b552744511f5a"}, - {file = "grpcio-1.70.0-cp38-cp38-win_amd64.whl", hash = "sha256:aba19419aef9b254e15011b230a180e26e0f6864c90406fdbc255f01d83bc83c"}, - {file = "grpcio-1.70.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:4f1937f47c77392ccd555728f564a49128b6a197a05a5cd527b796d36f3387d0"}, - {file = "grpcio-1.70.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:0cd430b9215a15c10b0e7d78f51e8a39d6cf2ea819fd635a7214fae600b1da27"}, - {file = "grpcio-1.70.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:e27585831aa6b57b9250abaf147003e126cd3a6c6ca0c531a01996f31709bed1"}, - {file = "grpcio-1.70.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1af8e15b0f0fe0eac75195992a63df17579553b0c4af9f8362cc7cc99ccddf4"}, - {file = "grpcio-1.70.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbce24409beaee911c574a3d75d12ffb8c3e3dd1b813321b1d7a96bbcac46bf4"}, - {file = "grpcio-1.70.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ff4a8112a79464919bb21c18e956c54add43ec9a4850e3949da54f61c241a4a6"}, - {file = "grpcio-1.70.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5413549fdf0b14046c545e19cfc4eb1e37e9e1ebba0ca390a8d4e9963cab44d2"}, - {file = "grpcio-1.70.0-cp39-cp39-win32.whl", hash = "sha256:b745d2c41b27650095e81dea7091668c040457483c9bdb5d0d9de8f8eb25e59f"}, - {file = "grpcio-1.70.0-cp39-cp39-win_amd64.whl", hash = "sha256:a31d7e3b529c94e930a117b2175b2efd179d96eb3c7a21ccb0289a8ab05b645c"}, - {file = "grpcio-1.70.0.tar.gz", hash = "sha256:8d1584a68d5922330025881e63a6c1b54cc8117291d382e4fa69339b6d914c56"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.70.0)"] - -[[package]] -name = "grpcio-status" -version = "1.70.0" -description = "Status proto mapping for gRPC" -optional = false -python-versions = ">=3.8" -files = [ - {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"}, - {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.5.5" -grpcio = ">=1.70.0" -protobuf = ">=5.26.1,<6.0dev" - [[package]] name = "h11" version = "0.14.0" @@ -1413,20 +1225,6 @@ http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] trio = ["trio (>=0.22.0,<1.0)"] -[[package]] -name = "httplib2" -version = "0.22.0" -description = "A comprehensive HTTP client library." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, - {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, -] - -[package.dependencies] -pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} - [[package]] name = "httpx" version = "0.28.1" @@ -1545,13 +1343,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.31.0" +version = "8.32.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.31.0-py3-none-any.whl", hash = "sha256:46ec58f8d3d076a61d128fe517a51eb730e3aaf0c184ea8c17d16e366660c6a6"}, - {file = "ipython-8.31.0.tar.gz", hash = "sha256:b6a2274606bec6166405ff05e54932ed6e5cfecaca1fc05f2cacde7bb074d70b"}, + {file = "ipython-8.32.0-py3-none-any.whl", hash = "sha256:cae85b0c61eff1fc48b0a8002de5958b6528fa9c8defb1894da63f42613708aa"}, + {file = "ipython-8.32.0.tar.gz", hash = "sha256:be2c91895b0b9ea7ba49d33b23e2040c352b33eb6a519cca7ce6e0c743444251"}, ] [package.dependencies] @@ -1818,17 +1616,18 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout" [[package]] name = "jupyter-events" -version = "0.11.0" +version = "0.12.0" description = "Jupyter Event System library" optional = false python-versions = ">=3.9" files = [ - {file = "jupyter_events-0.11.0-py3-none-any.whl", hash = "sha256:36399b41ce1ca45fe8b8271067d6a140ffa54cec4028e95491c93b78a855cacf"}, - {file = "jupyter_events-0.11.0.tar.gz", hash = "sha256:c0bc56a37aac29c1fbc3bcfbddb8c8c49533f9cf11f1c4e6adadba936574ab90"}, + {file = "jupyter_events-0.12.0-py3-none-any.whl", hash = "sha256:6464b2fa5ad10451c3d35fabc75eab39556ae1e2853ad0c0cc31b656731a97fb"}, + {file = "jupyter_events-0.12.0.tar.gz", hash = "sha256:fc3fce98865f6784c9cd0a56a20644fc6098f21c8c33834a8d9fe383c17e554b"}, ] [package.dependencies] jsonschema = {version = ">=4.18.0", extras = ["format-nongpl"]} +packaging = "*" python-json-logger = ">=2.0.4" pyyaml = ">=5.3" referencing = "*" @@ -2468,13 +2267,13 @@ dill = ">=0.3.8" [[package]] name = "narwhals" -version = "1.24.1" +version = "1.25.2" description = "Extremely lightweight compatibility layer between dataframe libraries" optional = false python-versions = ">=3.8" files = [ - {file = "narwhals-1.24.1-py3-none-any.whl", hash = "sha256:d8983fe14851c95d60576ddca37c094bd4ed24ab9ea98396844fb20ad9aaf184"}, - {file = "narwhals-1.24.1.tar.gz", hash = "sha256:b09b8253d945f23cdb683a84685abf3afb9f96114d89e9f35dc876e143f65007"}, + {file = "narwhals-1.25.2-py3-none-any.whl", hash = "sha256:e645f7fc1f8c0a3563a6cdcd0191586cdf88470ad90f0818abba7ceb6c181b00"}, + {file = "narwhals-1.25.2.tar.gz", hash = "sha256:37594746fc06fe4a588967a34a2974b1f3a7ad6ff1571b6e31ac5e58c9591000"}, ] [package.extras] @@ -3176,23 +2975,23 @@ type = ["mypy (>=1.11.2)"] [[package]] name = "playwright" -version = "1.49.1" +version = "1.50.0" description = "A high-level API to automate web browsers" optional = false python-versions = ">=3.9" files = [ - {file = "playwright-1.49.1-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:1041ffb45a0d0bc44d698d3a5aa3ac4b67c9bd03540da43a0b70616ad52592b8"}, - {file = "playwright-1.49.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9f38ed3d0c1f4e0a6d1c92e73dd9a61f8855133249d6f0cec28648d38a7137be"}, - {file = "playwright-1.49.1-py3-none-macosx_11_0_universal2.whl", hash = "sha256:3be48c6d26dc819ca0a26567c1ae36a980a0303dcd4249feb6f59e115aaddfb8"}, - {file = "playwright-1.49.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:753ca90ee31b4b03d165cfd36e477309ebf2b4381953f2a982ff612d85b147d2"}, - {file = "playwright-1.49.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd9bc8dab37aa25198a01f555f0a2e2c3813fe200fef018ac34dfe86b34994b9"}, - {file = "playwright-1.49.1-py3-none-win32.whl", hash = "sha256:43b304be67f096058e587dac453ece550eff87b8fbed28de30f4f022cc1745bb"}, - {file = "playwright-1.49.1-py3-none-win_amd64.whl", hash = "sha256:47b23cb346283278f5b4d1e1990bcb6d6302f80c0aa0ca93dd0601a1400191df"}, + {file = "playwright-1.50.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:f36d754a6c5bd9bf7f14e8f57a2aea6fd08f39ca4c8476481b9c83e299531148"}, + {file = "playwright-1.50.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:40f274384591dfd27f2b014596250b2250c843ed1f7f4ef5d2960ecb91b4961e"}, + {file = "playwright-1.50.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:9922ef9bcd316995f01e220acffd2d37a463b4ad10fd73e388add03841dfa230"}, + {file = "playwright-1.50.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:8fc628c492d12b13d1f347137b2ac6c04f98197ff0985ef0403a9a9ee0d39131"}, + {file = "playwright-1.50.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcff35f72db2689a79007aee78f1b0621a22e6e3d6c1f58aaa9ac805bf4497c"}, + {file = "playwright-1.50.0-py3-none-win32.whl", hash = "sha256:3b906f4d351260016a8c5cc1e003bb341651ae682f62213b50168ed581c7558a"}, + {file = "playwright-1.50.0-py3-none-win_amd64.whl", hash = "sha256:1859423da82de631704d5e3d88602d755462b0906824c1debe140979397d2e8d"}, ] [package.dependencies] -greenlet = "3.1.1" -pyee = "12.0.0" +greenlet = ">=3.1.1,<4.0.0" +pyee = ">=12,<13" [[package]] name = "pluggy" @@ -3328,23 +3127,6 @@ files = [ {file = "propcache-0.2.1.tar.gz", hash = "sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64"}, ] -[[package]] -name = "proto-plus" -version = "1.26.0" -description = "Beautiful, Pythonic protocol buffers" -optional = false -python-versions = ">=3.7" -files = [ - {file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"}, - {file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"}, -] - -[package.dependencies] -protobuf = ">=3.19.0,<6.0.0dev" - -[package.extras] -testing = ["google-api-core (>=1.31.5)"] - [[package]] name = "protobuf" version = "5.29.3" @@ -3683,13 +3465,13 @@ jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "t [[package]] name = "pyee" -version = "12.0.0" +version = "12.1.1" description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" optional = false python-versions = ">=3.8" files = [ - {file = "pyee-12.0.0-py3-none-any.whl", hash = "sha256:7b14b74320600049ccc7d0e0b1becd3b4bd0a03c745758225e31a59f4095c990"}, - {file = "pyee-12.0.0.tar.gz", hash = "sha256:c480603f4aa2927d4766eb41fa82793fe60a82cbfdb8d688e0d08c55a534e145"}, + {file = "pyee-12.1.1-py3-none-any.whl", hash = "sha256:18a19c650556bb6b32b406d7f017c8f513aceed1ef7ca618fb65de7bd2d347ef"}, + {file = "pyee-12.1.1.tar.gz", hash = "sha256:bbc33c09e2ff827f74191e3e5bbc6be7da02f627b7ec30d86f5ce1a6fb2424a3"}, ] [package.dependencies] @@ -3712,20 +3494,6 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] -[[package]] -name = "pyparsing" -version = "3.2.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -optional = false -python-versions = ">=3.9" -files = [ - {file = "pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1"}, - {file = "pyparsing-3.2.1.tar.gz", hash = "sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a"}, -] - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - [[package]] name = "pypdfium2" version = "4.30.0" @@ -3880,17 +3648,18 @@ files = [ [[package]] name = "pywinpty" -version = "2.0.14" +version = "2.0.15" description = "Pseudo terminal support for Windows from Python." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pywinpty-2.0.14-cp310-none-win_amd64.whl", hash = "sha256:0b149c2918c7974f575ba79f5a4aad58bd859a52fa9eb1296cc22aa412aa411f"}, - {file = "pywinpty-2.0.14-cp311-none-win_amd64.whl", hash = "sha256:cf2a43ac7065b3e0dc8510f8c1f13a75fb8fde805efa3b8cff7599a1ef497bc7"}, - {file = "pywinpty-2.0.14-cp312-none-win_amd64.whl", hash = "sha256:55dad362ef3e9408ade68fd173e4f9032b3ce08f68cfe7eacb2c263ea1179737"}, - {file = "pywinpty-2.0.14-cp313-none-win_amd64.whl", hash = "sha256:074fb988a56ec79ca90ed03a896d40707131897cefb8f76f926e3834227f2819"}, - {file = "pywinpty-2.0.14-cp39-none-win_amd64.whl", hash = "sha256:5725fd56f73c0531ec218663bd8c8ff5acc43c78962fab28564871b5fce053fd"}, - {file = "pywinpty-2.0.14.tar.gz", hash = "sha256:18bd9529e4a5daf2d9719aa17788ba6013e594ae94c5a0c27e83df3278b0660e"}, + {file = "pywinpty-2.0.15-cp310-cp310-win_amd64.whl", hash = "sha256:8e7f5de756a615a38b96cd86fa3cd65f901ce54ce147a3179c45907fa11b4c4e"}, + {file = "pywinpty-2.0.15-cp311-cp311-win_amd64.whl", hash = "sha256:9a6bcec2df2707aaa9d08b86071970ee32c5026e10bcc3cc5f6f391d85baf7ca"}, + {file = "pywinpty-2.0.15-cp312-cp312-win_amd64.whl", hash = "sha256:83a8f20b430bbc5d8957249f875341a60219a4e971580f2ba694fbfb54a45ebc"}, + {file = "pywinpty-2.0.15-cp313-cp313-win_amd64.whl", hash = "sha256:ab5920877dd632c124b4ed17bc6dd6ef3b9f86cd492b963ffdb1a67b85b0f408"}, + {file = "pywinpty-2.0.15-cp313-cp313t-win_amd64.whl", hash = "sha256:a4560ad8c01e537708d2790dbe7da7d986791de805d89dd0d3697ca59e9e4901"}, + {file = "pywinpty-2.0.15-cp39-cp39-win_amd64.whl", hash = "sha256:d261cd88fcd358cfb48a7ca0700db3e1c088c9c10403c9ebc0d8a8b57aa6a117"}, + {file = "pywinpty-2.0.15.tar.gz", hash = "sha256:312cf39153a8736c617d45ce8b6ad6cd2107de121df91c455b10ce6bba7a39b2"}, ] [[package]] @@ -4752,13 +4521,13 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart [[package]] name = "streamlit" -version = "1.41.1" +version = "1.42.0" description = "A faster way to build and share data apps" optional = false python-versions = "!=3.9.7,>=3.9" files = [ - {file = "streamlit-1.41.1-py2.py3-none-any.whl", hash = "sha256:0def00822480071d642e6df36cd63c089f991da3a69fd9eb4ab8f65ce27de4e0"}, - {file = "streamlit-1.41.1.tar.gz", hash = "sha256:6626d32b098ba1458b71eebdd634c62af2dd876380e59c4b6a1e828a39d62d69"}, + {file = "streamlit-1.42.0-py2.py3-none-any.whl", hash = "sha256:edf333fd3525b7c64b19e1156b483a1a93cbdb09a3a06f26478388d68f971090"}, + {file = "streamlit-1.42.0.tar.gz", hash = "sha256:8c48494ccfad33e7d0bc5873151800b203cb71203bfd42bc7418940710ca4970"}, ] [package.dependencies] @@ -4779,11 +4548,11 @@ rich = ">=10.14.0,<14" tenacity = ">=8.1.0,<10" toml = ">=0.10.1,<2" tornado = ">=6.0.3,<7" -typing-extensions = ">=4.3.0,<5" +typing-extensions = ">=4.4.0,<5" watchdog = {version = ">=2.1.5,<7", markers = "platform_system != \"Darwin\""} [package.extras] -snowflake = ["snowflake-connector-python (>=2.8.0)", "snowflake-snowpark-python[modin] (>=1.17.0)"] +snowflake = ["snowflake-connector-python (>=3.3.0)", "snowflake-snowpark-python[modin] (>=1.17.0)"] [[package]] name = "surya-ocr" @@ -5235,17 +5004,6 @@ files = [ [package.extras] dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"] -[[package]] -name = "uritemplate" -version = "4.1.1" -description = "Implementation of RFC 6570 URI Templates" -optional = false -python-versions = ">=3.6" -files = [ - {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, - {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, -] - [[package]] name = "urllib3" version = "2.3.0" @@ -5373,6 +5131,84 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] optional = ["python-socks", "wsaccel"] test = ["websockets"] +[[package]] +name = "websockets" +version = "14.2" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"}, + {file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"}, + {file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"}, + {file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"}, + {file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"}, + {file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"}, + {file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"}, + {file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"}, + {file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"}, + {file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"}, + {file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"}, + {file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"}, + {file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"}, + {file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"}, + {file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"}, + {file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"}, + {file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"}, + {file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"}, + {file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"}, + {file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"}, + {file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"}, + {file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"}, + {file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"}, + {file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"}, + {file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"}, + {file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"}, + {file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"}, + {file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"}, + {file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"}, + {file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"}, + {file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"}, + {file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"}, + {file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"}, +] + [[package]] name = "widgetsnbextension" version = "4.0.13" @@ -5615,4 +5451,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "589d4265c99bb94e935eeae053707638d72da1eaca38f0d60c832210703bd5bc" +content-hash = "0ab5205db01e1abea947536074593b29b16347a16ca5e9489c024a2c3a05df8f" diff --git a/pyproject.toml b/pyproject.toml index 9922923a..98156269 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,10 +31,10 @@ regex = "^2024.4.28" pdftext = "~0.5.1" markdownify = "^0.13.1" click = "^8.1.7" -google-generativeai = "^0.8.3" markdown2 = "^2.5.2" filetype = "^1.2.0" scikit-learn = "^1.6.1" +google-genai = "^1.0.0" [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" diff --git a/tests/processors/test_llm_processors.py b/tests/processors/test_llm_processors.py index f8d0bc38..86871919 100644 --- a/tests/processors/test_llm_processors.py +++ b/tests/processors/test_llm_processors.py @@ -6,7 +6,6 @@ from marker.processors.llm.llm_form import LLMFormProcessor from marker.processors.llm.llm_image_description import LLMImageDescriptionProcessor from marker.processors.llm.llm_table import LLMTableProcessor -from marker.processors.llm.llm_text import LLMTextProcessor from marker.processors.table import TableProcessor from marker.renderers.markdown import MarkdownRenderer from marker.schema import BlockTypes @@ -97,25 +96,6 @@ def test_llm_table_processor(pdf_document, detection_model, table_rec_model, rec assert "Value 1 $x$" in markdown -@pytest.mark.filename("adversarial.pdf") -@pytest.mark.config({"page_range": [0]}) -def test_llm_text_processor(pdf_document, mocker): - inline_math_block = pdf_document.contained_blocks((BlockTypes.TextInlineMath,))[0] - text_lines = inline_math_block.contained_blocks(pdf_document, (BlockTypes.Line,)) - corrected_lines = ["Text"] * len(text_lines) - - mock_cls = Mock() - mock_cls.return_value.generate_response.return_value = {"corrected_lines": corrected_lines} - mocker.patch("marker.processors.llm.GoogleModel", mock_cls) - - processor = LLMTextProcessor({"use_llm": True, "google_api_key": "test"}) - processor(pdf_document) - - contained_spans = text_lines[0].contained_blocks(pdf_document, (BlockTypes.Span,)) - assert contained_spans[0].text == "Text\n" # Newline inserted at end of line - assert contained_spans[0].formats == ["italic"] - - @pytest.mark.filename("A17_FlightPlan.pdf") @pytest.mark.config({"page_range": [0]}) def test_llm_caption_processor_disabled(pdf_document): From 364525d214f35ed9c887e2aa1581d35f4f56eec0 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 6 Feb 2025 14:11:36 -0500 Subject: [PATCH 19/27] Remove old import --- marker/converters/pdf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py index 54003b82..da7198a2 100644 --- a/marker/converters/pdf.py +++ b/marker/converters/pdf.py @@ -28,7 +28,6 @@ from marker.processors.llm.llm_form import LLMFormProcessor from marker.processors.llm.llm_image_description import LLMImageDescriptionProcessor from marker.processors.llm.llm_table import LLMTableProcessor -from marker.processors.llm.llm_text import LLMTextProcessor from marker.processors.page_header import PageHeaderProcessor from marker.processors.reference import ReferenceProcessor from marker.processors.sectionheader import SectionHeaderProcessor From e875e5e0674b4da100c9746dbf75e15fdc6d27c2 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Thu, 6 Feb 2025 14:17:36 -0500 Subject: [PATCH 20/27] Test fixes --- benchmarks/overall/overall.py | 7 +++++-- marker/processors/equation.py | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index d73903b9..cbfaf31b 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -114,10 +114,13 @@ def main( benchmark_dataset = datasets.load_dataset(dataset, split="train") artifacts = { "model_dict": create_model_dict(), - "mathpix_ds": datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train"), - "use_llm": use_llm + "use_llm": use_llm, + "mathpix_ds": None } + if "mathpix" in methods: + artifacts["mathpix_ds"] = datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train") + result = get_method_scores(benchmark_dataset, methods, score_types, artifacts, max_rows=max_rows) # Display benchmark scoring tables diff --git a/marker/processors/equation.py b/marker/processors/equation.py index ebc53e54..20ac0fb4 100644 --- a/marker/processors/equation.py +++ b/marker/processors/equation.py @@ -54,6 +54,9 @@ def __call__(self, document: Document): "token_count": token_count }) + if len(equation_data) == 0: + return + predictions = self.get_latex_batched(equation_data) for prediction, equation_d in zip(predictions, equation_data): conditions = [ From 3d4807a4ec3212d0d70276896060b59d872a3a2b Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Fri, 7 Feb 2025 11:04:16 -0500 Subject: [PATCH 21/27] Add back llm text processor --- .github/workflows/benchmarks.yml | 2 +- marker/builders/layout.py | 6 +- marker/converters/pdf.py | 2 + marker/processors/llm/__init__.py | 4 +- marker/processors/llm/llm_text.py | 144 ++++++++++++++++++++++++ marker/processors/llm/utils.py | 4 +- tests/processors/test_llm_processors.py | 21 +++- 7 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 marker/processors/llm/llm_text.py diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index a7efa7e4..5b76ff15 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -25,7 +25,7 @@ jobs: - name: Run benchmark test run: | poetry run python benchmarks/overall/overall.py --max_rows 5 - poetry run python benchmarks/verify_scores.py conversion_results/benchmark/overall/overall.json --type marker + poetry run python benchmarks/verify_scores.py conversion_results/benchmark/overall/result.json --type marker - name: Run table benchmark run: | poetry run python benchmarks/table/table.py --max_rows 5 diff --git a/marker/builders/layout.py b/marker/builders/layout.py index ff4af17d..33e4e622 100644 --- a/marker/builders/layout.py +++ b/marker/builders/layout.py @@ -22,7 +22,7 @@ class LayoutBuilder(BaseBuilder): """ A builder for performing layout detection on PDF pages and merging the results into the document. """ - batch_size: Annotated[ + layout_batch_size: Annotated[ Optional[int], "The batch size to use for the layout model.", "Default is None, which will use the default batch size for the model." @@ -67,8 +67,8 @@ def __call__(self, document: Document, provider: PdfProvider): self.merge_blocks(document.pages, provider.page_lines) def get_batch_size(self): - if self.batch_size is not None: - return self.batch_size + if self.layout_batch_size is not None: + return self.layout_batch_size elif settings.TORCH_DEVICE_MODEL == "cuda": return 6 return 6 diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py index da7198a2..01f69695 100644 --- a/marker/converters/pdf.py +++ b/marker/converters/pdf.py @@ -28,6 +28,7 @@ from marker.processors.llm.llm_form import LLMFormProcessor from marker.processors.llm.llm_image_description import LLMImageDescriptionProcessor from marker.processors.llm.llm_table import LLMTableProcessor +from marker.processors.llm.llm_text import LLMTextProcessor from marker.processors.page_header import PageHeaderProcessor from marker.processors.reference import ReferenceProcessor from marker.processors.sectionheader import SectionHeaderProcessor @@ -75,6 +76,7 @@ class PdfConverter(BaseConverter): LLMTableMergeProcessor, LLMFormProcessor, TextProcessor, + LLMTextProcessor, LLMComplexRegionProcessor, LLMImageDescriptionProcessor, LLMEquationProcessor, diff --git a/marker/processors/llm/__init__.py b/marker/processors/llm/__init__.py index c53d2cca..21ee04f3 100644 --- a/marker/processors/llm/__init__.py +++ b/marker/processors/llm/__init__.py @@ -27,7 +27,7 @@ class BaseLLMProcessor(BaseProcessor): max_retries: Annotated[ int, "The maximum number of retries to use for the Gemini model.", - ] = 3 + ] = 1 max_concurrency: Annotated[ int, "The maximum number of concurrent requests to make to the Gemini model.", @@ -35,7 +35,7 @@ class BaseLLMProcessor(BaseProcessor): timeout: Annotated[ int, "The timeout for requests to the Gemini model.", - ] = 60 + ] = 15 image_expansion_ratio: Annotated[ float, "The ratio to expand the image by when cropping.", diff --git a/marker/processors/llm/llm_text.py b/marker/processors/llm/llm_text.py new file mode 100644 index 00000000..61226e12 --- /dev/null +++ b/marker/processors/llm/llm_text.py @@ -0,0 +1,144 @@ +import json +from typing import List + +from pydantic import BaseModel + +from marker.processors.llm import BaseLLMProcessor +from bs4 import BeautifulSoup +from marker.schema import BlockTypes +from marker.schema.blocks import Block +from marker.schema.document import Document +from marker.schema.groups.page import PageGroup +from marker.schema.registry import get_block_class +from marker.schema.text.span import Span + + +class LLMTextProcessor(BaseLLMProcessor): + block_types = (BlockTypes.TextInlineMath,) + text_math_rewriting_prompt = """You are a text correction expert specializing in accurately reproducing text from images. +You will receive an image of a text block and a set of extracted lines corresponding to the text in the image. +Your task is to correct any errors in the extracted lines, including math, formatting, and other inaccuracies, and output the corrected lines in a JSON format. +The number of output lines MUST match the number of input lines. Stay as faithful to the original text as possible. + +**Instructions:** + +1. Carefully examine the provided text block image . +2. Analyze the extracted lines. +3. For each extracted line, compare it to the corresponding line in the image. +4. Correct any errors in the extracted line, including: + * Inline math: Ensure all mathematical expressions are correctly formatted and rendered. + * Formatting: Maintain consistent formatting with the text block image, including spacing, indentation, and special characters. + * Other inaccuracies: If the image is handwritten then you may correct any spelling errors, or other discrepancies. +5. Do not remove any formatting i.e bold, italics, etc from the extracted lines unless it is necessary to correct the error. +6. Ensure that inline math is properly with inline math tags. +7. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines. +8. Output the corrected lines in JSON format with a "lines" field, as shown in the example below. + +**Example:** + +Input: +``` +{ + "extracted_lines": [ + "Adversarial training (AT) [23], which aims to minimize\n", + "the model's risk under the worst-case perturbations, is cur-\n", + "rently the most effective approach for improving the robust-\n", + "ness of deep neural networks. For a given neural network\n", + "f(x, w) with parameters w, the optimization objective of\n", + "AT can be formulated as follows:\n" + ] +} +``` + +Output: + +```json +{ + "corrected_lines": [ + "Adversarial training (AT) [23], which aims to minimize\n", + "the model's risk under the worst-case perturbations, is cur-\n", + "rently the most effective approach for improving the robust-\n", + "ness of deep neural networks. For a given neural network\n", + "f(x, w) with parameters w, the optimization objective of\n", + "AT can be formulated as follows:\n" + ] +} +``` + +**Input:** +```json +{extracted_lines} +``` +""" + + def process_rewriting(self, document: Document, page: PageGroup, block: Block): + SpanClass: Span = get_block_class(BlockTypes.Span) + + text_lines = block.contained_blocks(document, (BlockTypes.Line,)) + extracted_lines = [line.formatted_text(document) for line in text_lines] + + prompt = self.text_math_rewriting_prompt.replace("{extracted_lines}", json.dumps({"extracted_lines": extracted_lines}, indent=2)) + image = self.extract_image(document, block) + + response = self.model.generate_response(prompt, image, block, LLMTextSchema) + if not response or "corrected_lines" not in response: + block.update_metadata(llm_error_count=1) + return + + corrected_lines = response["corrected_lines"] + if not corrected_lines or len(corrected_lines) != len(extracted_lines): + block.update_metadata(llm_error_count=1) + return + + for text_line, corrected_text in zip(text_lines, corrected_lines): + text_line.structure = [] + corrected_spans = self.text_to_spans(corrected_text) + + for span_idx, span in enumerate(corrected_spans): + if span_idx == len(corrected_spans) - 1: + span['content'] += "\n" + + span_block = page.add_full_block( + SpanClass( + polygon=text_line.polygon, + text=span['content'], + font='Unknown', + font_weight=0, + font_size=0, + minimum_position=0, + maximum_position=0, + formats=[span['type']], + page_id=text_line.page_id, + text_extraction_method="gemini", + ) + ) + text_line.structure.append(span_block.id) + + def text_to_spans(self, text): + soup = BeautifulSoup(text, 'html.parser') + + tag_types = { + 'b': 'bold', + 'i': 'italic', + 'math': 'math' + } + spans = [] + + for element in soup.descendants: + if not len(list(element.parents)) == 1: + continue + if element.name in tag_types: + spans.append({ + 'type': tag_types[element.name], + 'content': element.get_text() + }) + elif element.string: + spans.append({ + 'type': 'plain', + 'content': element.string + }) + + return spans + +class LLMTextSchema(BaseModel): + corrected_lines: List[str] \ No newline at end of file diff --git a/marker/processors/llm/utils.py b/marker/processors/llm/utils.py index feefbec9..a36bdb4d 100644 --- a/marker/processors/llm/utils.py +++ b/marker/processors/llm/utils.py @@ -38,8 +38,8 @@ def generate_response( image: PIL.Image.Image | List[PIL.Image.Image], block: Block, response_schema: type[BaseModel], - max_retries: int = 2, - timeout: int = 60 + max_retries: int = 1, + timeout: int = 15 ): if not isinstance(image, list): image = [image] diff --git a/tests/processors/test_llm_processors.py b/tests/processors/test_llm_processors.py index 86871919..61197a98 100644 --- a/tests/processors/test_llm_processors.py +++ b/tests/processors/test_llm_processors.py @@ -6,6 +6,7 @@ from marker.processors.llm.llm_form import LLMFormProcessor from marker.processors.llm.llm_image_description import LLMImageDescriptionProcessor from marker.processors.llm.llm_table import LLMTableProcessor +from marker.processors.llm.llm_text import LLMTextProcessor from marker.processors.table import TableProcessor from marker.renderers.markdown import MarkdownRenderer from marker.schema import BlockTypes @@ -148,4 +149,22 @@ def test_llm_complex_region_processor(pdf_document, mocker): renderer = MarkdownRenderer() rendered_md = renderer(pdf_document).markdown - assert md in rendered_md \ No newline at end of file + assert md in rendered_md + +@pytest.mark.filename("adversarial.pdf") +@pytest.mark.config({"page_range": [0]}) +def test_llm_text_processor(pdf_document, mocker): + inline_math_block = pdf_document.contained_blocks((BlockTypes.TextInlineMath,))[0] + text_lines = inline_math_block.contained_blocks(pdf_document, (BlockTypes.Line,)) + corrected_lines = ["Text"] * len(text_lines) + + mock_cls = Mock() + mock_cls.return_value.generate_response.return_value = {"corrected_lines": corrected_lines} + mocker.patch("marker.processors.llm.GoogleModel", mock_cls) + + processor = LLMTextProcessor({"use_llm": True, "google_api_key": "test"}) + processor(pdf_document) + + contained_spans = text_lines[0].contained_blocks(pdf_document, (BlockTypes.Span,)) + assert contained_spans[0].text == "Text\n" # Newline inserted at end of line + assert contained_spans[0].formats == ["italic"] \ No newline at end of file From 358b16325db4107bad983645bc037448fbb6f875 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Sun, 9 Feb 2025 16:37:00 -0500 Subject: [PATCH 22/27] More bench options --- benchmarks/overall/methods/llamaparse.py | 22 ++++++++++++++++++++++ benchmarks/overall/overall.py | 16 +++++++++++----- benchmarks/overall/registry.py | 4 +++- poetry.lock | 12 ++++++------ 4 files changed, 42 insertions(+), 12 deletions(-) create mode 100644 benchmarks/overall/methods/llamaparse.py diff --git a/benchmarks/overall/methods/llamaparse.py b/benchmarks/overall/methods/llamaparse.py new file mode 100644 index 00000000..e2b1e43a --- /dev/null +++ b/benchmarks/overall/methods/llamaparse.py @@ -0,0 +1,22 @@ +import datasets + +from benchmarks.overall.methods import BaseMethod, BenchmarkResult + + +class LlamaParseMethod(BaseMethod): + llamaparse_ds: datasets.Dataset = None + + def __call__(self, sample) -> BenchmarkResult: + uuid = sample["uuid"] + data = None + for row in self.llamaparse_ds: + if str(row["uuid"]) == str(uuid): + data = row + break + if not data: + raise ValueError(f"Could not find data for uuid {uuid}") + + return { + "markdown": data["md"], + "time": data["time"] + } \ No newline at end of file diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index cbfaf31b..b61e5486 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -80,7 +80,7 @@ def get_method_scores(benchmark_dataset: datasets.Dataset, methods: List[str], s @click.command(help="Benchmark PDF to MD conversion.") @click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") @click.option("--out_dataset", type=str, help="Path to the output dataset", default=None) -@click.option("--methods", type=str, help="Comma separated list of other methods to compare against. Possible values: marker,mathpix", default="marker") +@click.option("--methods", type=str, help="Comma separated list of other methods to compare against. Possible values: marker,mathpix,llamaparse", default="marker") @click.option("--scores", type=str, help="Comma separated list of scoring functions to use. Possible values: heuristic,llm", default="heuristic") @click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") @click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") @@ -103,8 +103,9 @@ def main( raise ValueError(f"Method {method} not allowed. Allowed methods are {METHOD_REGISTRY.keys()}") # Ensure marker is always first - methods = list(set(methods)) - methods = ["marker"] + [m for m in methods if m != "marker"] + all_methods = list(set(methods)) + methods = ["marker"] if "marker" in all_methods else [] + methods += [m for m in all_methods if m != "marker"] score_types = scores.split(",") for score_type in score_types: @@ -115,16 +116,21 @@ def main( artifacts = { "model_dict": create_model_dict(), "use_llm": use_llm, - "mathpix_ds": None + "mathpix_ds": None, + "llamaparse_ds": None, } if "mathpix" in methods: artifacts["mathpix_ds"] = datasets.load_dataset("datalab-to/marker_benchmark_mathpix", split="train") + if "llamaparse" in methods: + artifacts["llamaparse_ds"] = datasets.load_dataset("datalab-to/marker_benchmark_llamaparse", split="train") + + print(f"Running benchmark with methods: {methods} and scores: {score_types}") result = get_method_scores(benchmark_dataset, methods, score_types, artifacts, max_rows=max_rows) # Display benchmark scoring tables - print_scores(result, out_path, methods, score_types) + print_scores(result, out_path, methods, score_types, default_method=methods[0], default_score_type=score_types[0]) # Write to json with open(out_path / "result.json", "w") as f: diff --git a/benchmarks/overall/registry.py b/benchmarks/overall/registry.py index 5cabeab9..fd959da8 100644 --- a/benchmarks/overall/registry.py +++ b/benchmarks/overall/registry.py @@ -1,4 +1,5 @@ from benchmarks.overall.methods.gt import GTMethod +from benchmarks.overall.methods.llamaparse import LlamaParseMethod from benchmarks.overall.methods.marker import MarkerMethod from benchmarks.overall.methods.mathpix import MathpixMethod from benchmarks.overall.scorers.heuristic import HeuristicScorer @@ -12,5 +13,6 @@ METHOD_REGISTRY = { "marker": MarkerMethod, "gt": GTMethod, - "mathpix": MathpixMethod + "mathpix": MathpixMethod, + "llamaparse": LlamaParseMethod } \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 5904e131..8322e04f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4556,13 +4556,13 @@ snowflake = ["snowflake-connector-python (>=3.3.0)", "snowflake-snowpark-python[ [[package]] name = "surya-ocr" -version = "0.10.2" +version = "0.10.3" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "surya_ocr-0.10.2-py3-none-any.whl", hash = "sha256:fbb590ae92b2a785e75ca25a53dd2ff59b1f56ec017a22f6127c9c7c62a1b910"}, - {file = "surya_ocr-0.10.2.tar.gz", hash = "sha256:ddbaf5d2f2cc0a08992446f889f782aa81e9e1cfa3fd957c124273365d411057"}, + {file = "surya_ocr-0.10.3-py3-none-any.whl", hash = "sha256:9831e6aca929f60374385cf40ce79a7a70eefab4f8508fe6948bf49a33487937"}, + {file = "surya_ocr-0.10.3.tar.gz", hash = "sha256:c78b3db6daaf324fd7c976e8ac100a15827cb070339744d76f3bedca00e7aad9"}, ] [package.dependencies] @@ -4871,13 +4871,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "transformers" -version = "4.48.2" +version = "4.48.3" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.9.0" files = [ - {file = "transformers-4.48.2-py3-none-any.whl", hash = "sha256:493bc5b0268b116eff305edf6656367fc89cf570e7a9d5891369e04751db698a"}, - {file = "transformers-4.48.2.tar.gz", hash = "sha256:dcfb73473e61f22fb3366fe2471ed2e42779ecdd49527a1bdf1937574855d516"}, + {file = "transformers-4.48.3-py3-none-any.whl", hash = "sha256:78697f990f5ef350c23b46bf86d5081ce96b49479ab180b2de7687267de8fd36"}, + {file = "transformers-4.48.3.tar.gz", hash = "sha256:a5e8f1e9a6430aa78215836be70cecd3f872d99eeda300f41ad6cc841724afdb"}, ] [package.dependencies] From cd98de16738901150ccf19e65a7473aace6c53d1 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Mon, 10 Feb 2025 14:46:58 -0500 Subject: [PATCH 23/27] Additional benchmark types --- README.md | 37 +++++----- benchmarks/overall/download/__init__.py | 0 benchmarks/overall/download/base.py | 60 ++++++++++++++++ benchmarks/overall/download/llamaparse.py | 64 +++++++++++++++++ benchmarks/overall/download/main.py | 23 +++++++ benchmarks/overall/download/mathpix.py | 80 ++++++++++++++++++++++ benchmarks/overall/methods/docling.py | 26 +++++++ benchmarks/overall/registry.py | 4 +- benchmarks/table/inference.py | 5 +- benchmarks/throughput/__init__.py | 0 benchmarks/throughput/main.py | 39 +++++++++++ data/images/overall.png | Bin 27412 -> 22589 bytes marker/builders/layout.py | 10 ++- 13 files changed, 326 insertions(+), 22 deletions(-) create mode 100644 benchmarks/overall/download/__init__.py create mode 100644 benchmarks/overall/download/base.py create mode 100644 benchmarks/overall/download/llamaparse.py create mode 100644 benchmarks/overall/download/main.py create mode 100644 benchmarks/overall/download/mathpix.py create mode 100644 benchmarks/overall/methods/docling.py create mode 100644 benchmarks/throughput/__init__.py create mode 100644 benchmarks/throughput/main.py diff --git a/README.md b/README.md index be37250f..3469f773 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,9 @@ It only uses models where necessary, which improves speed and accuracy. ![Benchmark overall](data/images/overall.png) -The above results are with marker setup so it takes ~7GB of VRAM on an A10. +Marker benchmarks favorably compared to cloud services like Llamaparse and Mathpix. + +The above results are running single PDF pages serially. Marker is significantly faster when running in batch mode, with a projected throughput of 122 pages/second on an H100 (.18 seconds per page across 22 processes). See [below](#benchmarks) for detailed speed and accuracy benchmarks, and instructions on how to run your own benchmarks. @@ -377,30 +379,31 @@ There are some settings that you may find useful if things aren't working the wa Pass the `debug` option to activate debug mode. This will save images of each page with detected layout and text, as well as output a json file with additional bounding box information. # Benchmarks -## Overall PDF Conversion -Benchmarking PDF extraction quality is hard. I've created a test set by finding books and scientific papers that have a pdf version and a latex source. I convert the latex to text, and compare the reference to the output of text extraction methods. It's noisy, but at least directionally correct. - -**Speed** - -| Method | Average Score | Time per page | Time per document | -|---------|----------------|---------------|------------------| -| marker | 0.625115 | 0.234184 | 21.545 | -**Accuracy** +## Overall PDF Conversion +We created a [benchmark set](https://huggingface.co/datasets/datalab-to/marker_benchmark) by extracting single PDF pages from common crawl. -| Method | thinkpython.pdf | switch_trans.pdf | thinkdsp.pdf | crowd.pdf | thinkos.pdf | multicolcnn.pdf | -|---------|----------------|-----------------|--------------|------------|-------------|----------------| -| marker | 0.720347 | 0.592002 | 0.70468 | 0.515082 | 0.701394 | 0.517184 | +| Method | Avg Time | Heuristic Score | LLM Score | +|------------|------------|-----------------|-----------| +| marker | 2.83837 | 95.6709 | 4.23916 | +| llamaparse | 23.348 | 84.2442 | 3.97619 | +| mathpix | 6.36223 | 86.4281 | 4.15626 | +| docling | 3.86 | 87.7347 | 3.72222 | Peak GPU memory usage during the benchmark is `6GB` for marker. Benchmarks were run on an A10. -**Throughput** +## Throughput + +We benchmarked throughput using a [single long PDF](https://www.greenteapress.com/thinkpython/thinkpython.pdf). -Marker takes about 6GB of VRAM on average per task, so you can convert 8 documents in parallel on an A6000. +| Method | Time per page | Time per document | VRAM used | +|---------|---------------|-------------------|---------- | +| marker | 0.18 | 43.42 | 3.17GB | -![Benchmark results](data/images/per_doc.png) +The projected throughput is 122 pages per second on an H100 - we can run 22 individual processes. ## Table Conversion + Marker can extract tables from PDFs using `marker.converters.table.TableConverter`. The table extraction performance is measured by comparing the extracted HTML representation of tables against the original HTML representations using the test split of [FinTabNet](https://developer.ibm.com/exchanges/data/all/fintabnet/). The HTML representations are compared using a tree edit distance based metric to judge both structure and content. Marker detects and identifies the structure of all tables in a PDF page and achieves these scores: | Avg score | Total tables | use_llm | @@ -433,7 +436,7 @@ python benchmarks/overall.py data/pdfs data/references report.json The processed FinTabNet dataset is hosted [here](https://huggingface.co/datasets/datalab-to/fintabnet-test) and is automatically downloaded. Run the benchmark with: ```shell -python benchmarks/table/table.py table_report.json --max_rows 1000 +python benchmarks/table/table.py --max_rows 1000 ``` # Thanks diff --git a/benchmarks/overall/download/__init__.py b/benchmarks/overall/download/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/overall/download/base.py b/benchmarks/overall/download/base.py new file mode 100644 index 00000000..cc3f3557 --- /dev/null +++ b/benchmarks/overall/download/base.py @@ -0,0 +1,60 @@ +import json +from json import JSONDecodeError +from pathlib import Path + +import datasets +from tqdm import tqdm + + +class Downloader: + cache_path: Path = Path("cache") + service: str + + def __init__(self, api_key, app_id, max_rows: int = 2200): + self.cache_path.mkdir(exist_ok=True) + self.max_rows = max_rows + self.api_key = api_key + self.app_id = app_id + self.ds = datasets.load_dataset("datalab-to/marker_benchmark", split="train") + + def get_html(self, pdf_bytes): + raise NotImplementedError + + def upload_ds(self): + rows = [] + for file in self.cache_path.glob("*.json"): + with open(file, "r") as f: + data = json.load(f) + rows.append(data) + + out_ds = datasets.Dataset.from_list(rows, features=datasets.Features({ + "md": datasets.Value("string"), + "uuid": datasets.Value("string"), + "time": datasets.Value("float"), + })) + out_ds.push_to_hub(f"datalab-to/marker_benchmark_{self.service}") + + def generate_data(self): + max_rows = 2200 + for idx, sample in tqdm(enumerate(self.ds), desc=f"Saving {self.service} results"): + cache_file = self.cache_path / f"{idx}.json" + if cache_file.exists(): + continue + + pdf_bytes = sample["pdf"] # This is a single page PDF + try: + out_data = self.get_html(pdf_bytes) + except JSONDecodeError as e: + print(f"Error with sample {idx}: {e}") + continue + out_data["uuid"] = sample["uuid"] + + with cache_file.open("w") as f: + json.dump(out_data, f) + + if idx >= max_rows: + break + + def __call__(self): + self.generate_data() + self.upload_ds() diff --git a/benchmarks/overall/download/llamaparse.py b/benchmarks/overall/download/llamaparse.py new file mode 100644 index 00000000..a6b65867 --- /dev/null +++ b/benchmarks/overall/download/llamaparse.py @@ -0,0 +1,64 @@ +import io +import os +import time + +import requests + +from benchmarks.overall.download.base import Downloader + + +class LlamaParseDownloader(Downloader): + service = "llamaparse" + + def get_html(self, pdf_bytes): + rand_name = str(time.time()) + ".pdf" + start = time.time() + buff = io.BytesIO(pdf_bytes) + md = upload_and_parse_file(self.api_key, rand_name, buff) + end = time.time() + if isinstance(md, bytes): + md = md.decode("utf-8") + + return { + "md": md, + "time": end - start, + } + + +def upload_and_parse_file(api_key: str, fname: str, buff, max_retries: int = 180, delay: int = 1): + headers = { + "Authorization": f"Bearer {api_key}", + "Accept": "application/json" + } + + # Upload file + files = { + 'file': (fname, buff, 'application/pdf') + } + response = requests.post( + 'https://api.cloud.llamaindex.ai/api/v1/parsing/upload', + headers=headers, + files=files + ) + response.raise_for_status() + job_id = response.json()['id'] + + # Poll for completion + for _ in range(max_retries): + status_response = requests.get( + f'https://api.cloud.llamaindex.ai/api/v1/parsing/job/{job_id}', + headers=headers + ) + status_response.raise_for_status() + if status_response.json()['status'] == 'SUCCESS': + # Get results + result_response = requests.get( + f'https://api.cloud.llamaindex.ai/api/v1/parsing/job/{job_id}/result/markdown', + headers=headers + ) + result_response.raise_for_status() + return result_response.json()['markdown'] + + time.sleep(delay) + + raise TimeoutError("Job did not complete within the maximum retry attempts") \ No newline at end of file diff --git a/benchmarks/overall/download/main.py b/benchmarks/overall/download/main.py new file mode 100644 index 00000000..01a31c37 --- /dev/null +++ b/benchmarks/overall/download/main.py @@ -0,0 +1,23 @@ +import click + +from benchmarks.overall.download.llamaparse import LlamaParseDownloader +from benchmarks.overall.download.mathpix import MathpixDownloader + + +@click.command("Download data from inference services") +@click.argument("service", type=click.Choice(["mathpix", "llamaparse"])) +@click.argument("--max_rows", type=int, default=2200) +@click.argument("--api_key", type=str, default=None) +@click.argument("--app_id", type=str, default=None) +def main(service: str, max_rows: int, api_key: str, app_id: str): + registry = { + "mathpix": MathpixDownloader, + "llamaparse": LlamaParseDownloader + } + downloader = registry[service](api_key, app_id, max_rows=max_rows) + + # Generate data and upload to hub + downloader() + +if __name__ == "__main__": + main() diff --git a/benchmarks/overall/download/mathpix.py b/benchmarks/overall/download/mathpix.py new file mode 100644 index 00000000..204424ac --- /dev/null +++ b/benchmarks/overall/download/mathpix.py @@ -0,0 +1,80 @@ +import json +import time + +import requests + +from benchmarks.overall.download.base import Downloader + + +class MathpixDownloader(Downloader): + service = "mathpix" + + def get_html(self, pdf_bytes): + headers = { + "app_id": self.app_id, + "app_key": self.api_key, + } + start = time.time() + pdf_id = mathpix_request(pdf_bytes, headers) + status = mathpix_status(pdf_id, headers) + if status in ["processing", "error"]: + md = "" + else: + md = mathpix_results(pdf_id, headers) + end = time.time() + if isinstance(md, bytes): + md = md.decode("utf-8") + + return { + "md": md, + "time": end - start + } + +def mathpix_request(buffer, headers): + response = requests.post("https://api.mathpix.com/v3/pdf", + headers=headers, + data={ + "options_json": json.dumps( + { + "conversion_formats": { + "md": True, + "html": True + } + } + ) + }, + files={ + "file": buffer + } + ) + data = response.json() + pdf_id = data["pdf_id"] + return pdf_id + +def mathpix_status(pdf_id, headers): + max_iters = 120 + i = 0 + status = "processing" + status2 = "processing" + while i < max_iters: + time.sleep(1) + response = requests.get(f"https://api.mathpix.com/v3/converter/{pdf_id}", + headers=headers + ) + status_resp = response.json() + if "conversion_status" not in status_resp: + continue + status = status_resp["conversion_status"]["md"]["status"] + status2 = status_resp["conversion_status"]["html"]["status"] + if status == "completed" and status2 == "completed": + break + elif status == "error" or status2 == "error": + break + out_status = "completed" if status == "completed" and status2 == "completed" else "error" + return out_status + +def mathpix_results(pdf_id, headers, ext="md"): + response = requests.get(f"https://api.mathpix.com/v3/converter/{pdf_id}.{ext}", + headers=headers + ) + return response.content diff --git a/benchmarks/overall/methods/docling.py b/benchmarks/overall/methods/docling.py new file mode 100644 index 00000000..f36ee041 --- /dev/null +++ b/benchmarks/overall/methods/docling.py @@ -0,0 +1,26 @@ +import tempfile +import time + +from benchmarks.overall.methods import BaseMethod, BenchmarkResult + + +class DoclingMethod(BaseMethod): + model_dict: dict = None + use_llm: bool = False + + def __call__(self, sample) -> BenchmarkResult: + from docling.document_converter import DocumentConverter + pdf_bytes = sample["pdf"] # This is a single page PDF + converter = DocumentConverter() + + with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb") as f: + f.write(pdf_bytes) + start = time.time() + result = converter.convert(f.name) + total = time.time() - start + + return { + "markdown": result.document.export_to_markdown(), + "time": total + } + diff --git a/benchmarks/overall/registry.py b/benchmarks/overall/registry.py index fd959da8..02184ad3 100644 --- a/benchmarks/overall/registry.py +++ b/benchmarks/overall/registry.py @@ -1,3 +1,4 @@ +from benchmarks.overall.methods.docling import DoclingMethod from benchmarks.overall.methods.gt import GTMethod from benchmarks.overall.methods.llamaparse import LlamaParseMethod from benchmarks.overall.methods.marker import MarkerMethod @@ -14,5 +15,6 @@ "marker": MarkerMethod, "gt": GTMethod, "mathpix": MathpixMethod, - "llamaparse": LlamaParseMethod + "llamaparse": LlamaParseMethod, + "docling": DoclingMethod } \ No newline at end of file diff --git a/benchmarks/table/inference.py b/benchmarks/table/inference.py index c6d4d7d4..e2626d10 100644 --- a/benchmarks/table/inference.py +++ b/benchmarks/table/inference.py @@ -121,7 +121,10 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m gemini_html = "" if use_gemini: - gemini_html = gemini_table_rec(table_images[aligned_idx]) + try: + gemini_html = gemini_table_rec(table_images[aligned_idx]) + except Exception as e: + print(f'Gemini failed: {e}') aligned_tables.append( (marker_tables[aligned_idx], gt_tables[table_idx], gemini_html) diff --git a/benchmarks/throughput/__init__.py b/benchmarks/throughput/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/throughput/main.py b/benchmarks/throughput/main.py new file mode 100644 index 00000000..6e07054b --- /dev/null +++ b/benchmarks/throughput/main.py @@ -0,0 +1,39 @@ +import time +import torch + +import click +import pypdfium2 as pdfium + +from marker.converters.pdf import PdfConverter +from marker.models import create_model_dict + + +@click.command(help="Benchmark PDF to MD conversion throughput.") +@click.argument("pdf_path", type=str) +def main(pdf_path): + print(f"Converting {pdf_path} to markdown...") + pdf = pdfium.PdfDocument(pdf_path) + page_count = len(pdf) + pdf.close() + model_dict = create_model_dict() + torch.cuda.reset_peak_memory_stats() + + times = [] + for i in range(10): + block_converter = PdfConverter( + artifact_dict=model_dict, + config={"disable_tqdm": True} + ) + start = time.time() + block_converter(pdf_path) + total = time.time() - start + times.append(total) + + max_gpu_vram = torch.cuda.max_memory_allocated() / 1024 ** 3 + + print(f"Converted {page_count} pages in {sum(times)/len(times):.2f} seconds.") + print(f"Max GPU VRAM: {max_gpu_vram:.2f} GB") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data/images/overall.png b/data/images/overall.png index 0946421ac30c97d8bba96f27b9a2949346d0d672..5858e9ce3c3f68865bd5948c3ef3767bb2870752 100644 GIT binary patch literal 22589 zcmeFZbySpH+crK!ii9FcmxMHklr$#Nxk=Mf_A+#hS90 zOX}$>*e1uDWHFA=1LHINd5Xp zYA#miufO&}@RFE+`~*`b^pcbiA40-ocx9|&K&KqWhSIEi*5-z4PRbgFcn`O@F!tu^Txz&i{Uf1MDWbq+*=(nRY?$|@Hlugb&w5PrDR+>JUux= zzQQL6ZsT|P6j!WpP9A9< zj!*Z5GRi3#8yoL@x45<>)Og2(2OBS`K$@AH&m?d)H{NsilYWfja(~~6@|V*=qmY=A z3D1#Jq+5dP65IA%oK1h#x5Y8%2>}`r*mb!O+8~LD6#X;QfXQ52OxJ6NLaPzMo@$Sy zPZ%m9sasq2Jw+A|dn%l53c!>sC{l7!hClh@8*~5DgFbQDhWSx<8ip&e%Pwhml`%KJ zQQ1Vt#!_l49F7en9qhD=u181dxn@3PcXa4l87e>y7g?;X_m+XtlWIP6M|!|!^ATR4|X|sZcN*qakurD zASxkQz3*fbra5|5OXD2O+U_O23QHOq{t{;OFY(gBV zbkN2afBsrE`D}z` zc;c21PM=Xazaj*|%)UsXD)!_m60frjN1O7P$Nf8gvA3LNe22%Hg9&P9(J{v^IrnrD zSMs{UTw9)3kGbVgegmg=aa-lt>Cvv9sBz0>Z3?y>F}WvM+1c4gCxdxLCdgO;lhfnF zxfs)gJnV>29)w_{^y_A$O1jbvqwgm}s0JR-Cn8QOW*uSk6l@HW7)^tP=3(Npr zrLGNclXSW)XO(d3bd|cF9Zi%FVj9?uSe0lQFf29kI;`;D;OldsNlF=DWVwD zSF@5xu^UIMpRQG`7hh~WI-tg}YO1nc?$2@zRTE;oO;dHUH^TK+t%NnM7q5lcICgs4 z3wrF%y?X|BX5o6PwWQwj@7R1#n=ZOp_TEqa(kmpw-2QqxBT(H_n5DfTLSsp+vDM;( zMlWWta7DJdq_F4GHA&$I!Tu_{V1Jb)BJ;{W@ONxdsL;7@c$;@6zY-T}ztS^)PgR(7 z>1JbdS+|o~<4x{;J&CwIUyIv_X;*vep{XkB3ynbs$L2lhF{&O*8gsr>*17X!9^%(K z4CpHmQ|^ae#}|ePhp$TET%0>TY&NJ&POkS1k)C$pvcf$Jb8w!04bFseaL%>&BrZO%NcAr z*;!KVw2o1z!zRX%!L2(qwS^SD`b+ea3KTP5ovK?}){_e@OuPK&Q?W`s4_ozzC)g4T z$*4y%H8NIePs`>LK3QBqlydL*tHe*6N^DcKvJN$_VU5-^_d=IArf!;<-sEn7WwhFr z^4evqnPt^wLvpR=%yWd)w(5Yc5L4%2_LYGIi$)9K8fL}FLHPZh<8D?cEb=_mjJ-*G zf7Iw|WH83tbWD5tNojA~VZViYZH}A)0L<(=Ut&BIXXlL}*2E z>slHc7LJrTnD*owD{fEvTo`V_Bfv}(b6&67A3hU?E!-P9+>nsq5nZ}#dMhpIh3MO~ zOI>(fT^e}{PZ#_aa?3~=Wdm~~BENmot#ERxt(A}NVEHK97)?WgNnh@`{J_~7)-Q?b z3Tb1J*u--i(=lC;X0AD$i(7Sp3}fODt@SU&MThvo3EAq!lmME5Ep&BGI*2ZvKElsxp{p zF;d$)Z~m|r^TOcXiwU+8*i&h#*{yfmDaQ_kANT^uc#6F= z)>*eJ6I|h?58#<31Na;B1v;-4J?6?cBr{*vVs|)*m*TXQ@?QTwx>h<(rHA0^B@aSl*4R^A za`hUxJiqgr&IoDOHIJi_iQ?HXiP-(U<$;cVE0iOSy_hVWTntt)Vqu-FCtN!?z$kTP zyNvroZJBXLOYP;#*ZHf`XUYhM4`di-DEsyxqg0w1uPxzTe3bhEyatcHmO1`{6@k^c zfks`)CE-;JOj4BXFager6#qOKVRo7t#scRRj807~y*sy3Z*+Ti-%{_)moZY6NoRb2 zo%eQ{VfUtS2pmBz#rH8KFX^`0)k6YyiLyp-F#tGQ%R7mtq2m4vyI;hNgSB1em;&OpT>`GMK>sGv3=imz;CI=5B4klnpnr7L2 zi1Hr5CtPCk&BO@&S;{1%q;JkDG^uw|RLVYZ@A|sVb-(yP7w%H~HviQE7Ws2^uZ7&_ zA&R)r&(!W3r;AF925clvWNgqR{~Ki@*i7o>5kk+NZ0VOX(g|zdqPcGc&xA{cqUkU=;6WVSWjpiV{a%NPC627 z=&)lzuc1hnXgf4=Sfkvz3(BOm*p4oU@4U@&Zacp%@^B4d#>2X7E&B=ru_E@kVVvlC z_atRPE?oQ}9a|W|%*_kKk}TtQXi`HQ`8dM8$cSYtCEU{E2Gl%VLP#7}%9peG&@oU2 zLE6>x`z_Cq`?}uJ^=5n?zbqg3lKky9<<~RQ&b#6nRmr+ZG(IHMLS?8mlaJk;0DC{! zUS3(@$CQ0GL@%4tYOx??Q@JTSLZN=HAVckg0gStrno(HRMhb&aC`kNDLVHLi1q*H` zcVmn!LM4VMw*3teHna-HbosGtzh5XwWH&1#vMVtmV9P>DjI6&=bi6(TXQh@nn!y%tsJP2#yoni4u#Fyj#EwX7=@_ zS8a@J1C+l8vy=agjwu74&K*i9`}KL*mmPRgtrDu4i!xg*NwMF7?)&9o1;l!yMo2 zc{@#h(m#2otCb;cyW7u_Y$lzx7*D<+3{R(#n*J8_2~!W}f)rQRHQdg4%=vm@wMcXQ z=YifC0pW~vQX2ZDg0;+QMiac<3JydN+yR7Ods@+k=TaIbaZ-dxi=Fxe@dQmGa=+nd zZ0CCN*yLjq=LH=}+|DlSd(`-d0(_m$2X-s4HG=2^cO`A9#x)3KlM@7)*QH`FR(KT) zdeFSTE**2#RcoPMHq%_1PASP(@T6kmBpX|C>E;RRu8-hL&t>$Ey`r#xk|#Nacf{jyccX4J z^WM1WoJ_Pqy^EpdwZOCJr8`+$gg>UH3zV zY^qi4fe5E28nUxE`gr{3U1uWjx#1oO2dfXN?ibqV?ZijVrd+#dbx`oY(=8fH+Xn zk#(i>PExZS!<^zT&%EQ(LsKY^ zX{w3bEB!JtW*gM<8n<-6CNx0^&*dUrFz~2(CWsS}o3^w>52&SjZedWzCx%n2agVkI zwR&F@X>nFSOdqALbzba=xu<8|7Q;OwY|xy>c&}vP9#nt)i?b>XqdH+=m%p0zhfi?6 z^guPwMgD?Tj${IRF)2!8|NObF(?e;Z@Ue_qXmq@?#FZh-wY3SZ@?**E75jEN5dIKhIF6x9!oGuLAr1HZL970$eH9UT z7fx5Gy>!RcfiH!7-7xn8#J0Wgp?eJ$xmivbt$Sjjx%-(4T*&7p zTbX^Al0jH?cr{$&#SH~5W7}3|#kh`r&N$%rCm2v2LSn%uhAX&uLWXpLr z{BV{MDk3k9aheqn)t48aliyUYW_t}245w@1bPPwScm!kc+sYzl9i^&1Y4l#13=xqZ z`+ks#*;a4+xsscT*4tuSAJUF?1Q*vTWLEZlA{_+xKve);cG(?gO)7>Q#&1f29so5nq<9FrXot_)M zMLHDHMT@k~_hjEQf!ep=&A#!B%)cWZkglyRVXi&O{QzVxSoEsGa?&MZDxy1Mg_z;O z!2w%$MMK3EeIRPX=i5Vz8zUTAZk#Zj=oTWT3S2@0PVEt9auiGlCJ)Hp+FN7s9MHMb zTH#tH&~>sdlR}Zi%eaF;$rBgy{-1f>cqQf+xS7G ze04NJ`f`9uXp8=NyrXwa0y@%jez`KvwV|o%WQ%G9+zLKzdVwqoujCsl6zVDeA1o6Ma^mkp)iFD8Pp5j!LbkUFI0nTskKC2=j?w{N9@0cw#2OT zXx9TgBRN@`ds$lYw8<$hnM+yMG1VfuugxUpLtbG&vK|py%B;Yo&S8G#r{n6BZg5qZ zdN=Cq77+si3xSRQ#6PJZh2UPB;pK#R$6htkth_!FOnp+H(6bIWe2ix>#+PmIX0Uc7 z)iV?|sTS#xiApVbKwn>m0K(kok@J^iu|}H0FQ4r&5{9}kPT{Ht36CzSjwoJ-7r)gp z-7CZ?*7~|IT)S` zEd@7YpdAw0x|RAm*HI0ooo9Id(fo6WbFs)%H|EJ`t6sPAFh1%_t!2rk#lNs7z^-LsYAtn}v7Yb}Yo+uD^4-uc?`Sd^ zSh(ok$}f^q)slp!S(Xwmu}Ml}j_Qv4iC{*|kaj}_VV~+Oz^GwHeU~A~U1NAFp+nQY z5xky0x~7@rDTDyFp$i>?^O?}YOTB$7V%{(@Yhf&OhgKY!2s(GR^A5faZ3_LyiAq&= zcJ9T>hMJlXnq!Uy-&ODV!~~(_P`y>86rE2%F3HY&)kTV2$DA<(6jTFg$wbyyukCdRl6hmCqB6WVWS)kOMC zIxWVN@jOycdggOOS%bEw0(4>Z^(Q2P(G#Ju*E301T|O)(?2>Z0Fnv5A^^TGq3afX_ zt&irN;5FGnE^dE_3q0&g`4&$SrIU3DvlI8DN*vFgj-^DOQ}JQI{9I~_*SJ&gq0Z3l z?KmTWuAazLSvEFEm8WXN;Rw-oTcTTwm%~0wBErhr-7ZGhMIj|ox~tvHo@K&+8?vk*4m4CN<60PJZ8M@Syn;i{Bn9tj`O49o5)pdlWeb5 z*X^MJi8vAchn|Qs({b@RODMh=R%b&A#1T?; ziduI1vUjvzGhsQHTZbWV#IQ|sDNp51)k+>wkb4r#5ce6?XjseC2olj%X#FUl|m>)1ia2B7d-L}GuUm+@W`zXw;)JD zrUZ{YtvIsfLg#h8iq1f7$`BgSe(C+Mv;7em3s0dibj~La(RIg93gbdiubmSCUVGdN z+0OL*{BE3cvO$!_MASF$t?y!C$cE5bY4kq2Cl1ONM|w___9d@F6v3ubgmBHO)j^bT zTbxXMa0v6Sbo=920Pj2fqrtrgNmc&Aj?vM9BEu~QhQwg=p#;%vm z({6tyQ61c+Z;RZb*wL?WUT;UuOo4LMkKM^mo`e81hb|x*_3)7b@0f}{z10tr8+S+Z zvGj972x?xx`SP9_w%OX+mcE7Jq<9w}xBSmFa#n}Ly6e6mU-9E}*_tUA^~qOMIXgXB z=Grp;_O=$gj?np`T0#cbwS~}Yd6O}{g!T#E2DLH3vxW%Fw;C=+a6BBgkdMXjAT7uU zw^1i4OK3}?6?f%FZOTMIM7{%Tcj+~NI}sDCBvTzfy}TY^c`gaN^Op0W&+w&#umuV- zCMZl6V_`~^4T9vPH$PuXFC=Ifr!W~^HSAw%GLOnHAaW)*#zpWNeH&~Xe3GFYCn%W3 zZ8arw(PfUZ>q1#Ymi1_9SEO!j^zy9qIRCPv{LrHcr>s{NCENGn{}OtH5s=3annK91 z1=tPom*FPw1&@luUPu*QmN(fy*B`r4s$IJ({WWIYdUvVM$;)|7?EJj~zE|M{fKRxg zh2e4-3*?+|qDPR`B8Z5V%6X{@QJ+i7#&uK`3UBjkr`D)!D!bC{OJi=HQA-pK8=bpXr4v|Vr<&9B{zfth!GJx^8#(LlW0ln$ zx(Rs>zFN*tb$7z6e!n(WC+{N&P3|A>EDMmP!_KK3$Bkn9W~ga3Pceb9sG3wPwZWDL z;yF>L^)nhthtcDa9WOpF$+>mpZjNpuI~romOOpTndjW_CKo+!3Mh(6B9-qclP+S(P z**aWcbNd}b0NryTA_maH0f0$b5RX-6G${Rns|HY5ErwYRc4xDF3It`i>UgvP=VjSX z>!G7-d6}KBxx-Ql9VJNf{}zDG|*nrHaXe!{*x&j?U?UcOhfvev!am z2MaBqnCQirr<;#Fo3o~mWi5o1Cj{ViuIwr!*88*KR!jFm&q2xcmA+3=6hhYV`_{FU zdJvBYbVxA4gX=IB;Mkpd0GW+{P5}7me7kYWWTAOa+gE>*)sE&cmXXsUqsHL9YodM& z{Gm&r);&bEko&q6cIEG1LPnM3ub$^@TRaN0)x z+ooP(DRG{t5ZczgS2N+htvpgEx%rjEF+`Ct{1;)8O5t+C_sXLIx7?HkSiQ`I5TaTC ztW~gbd1JK9!9nUVgbPme7_#cf3q@7!j~5Wn7Fi4odtWbcpu6$ZZwO#wRa!+(;{Ee- zj7(^jGJvNWVf&`gWV%rwZRKEfVl060*CQiIisIBVMC~q{KFHCta$qUbrU~T`F|j6D zl?ZV4Sn6dH-BmmZ>OcB`)d|H*0z2BY_a1zXbTv!6q}cIYaGYm^b_Gf3HQg^PTnv^k ze<5|?bARdYD=Nj6m80Bdk9K&6cG z$j#U|El3lyZ*{n+)n7@D8BQb*Sp@)=;)}tbNdcxXi>xi#x_s0u5vw!kEtss77f>{X z&24=DVzSWreoxu2Md@-tu=5vR!rd))f7yo*ACBx_7w3eNO3ee%0Mq6AEs$caXh4r| zz2>ZXu{$k5tI#Zz;<^ERHSlO})qz*_he3MnT>ueRoO zDZcvIhOEa$AKfq!oT28FkgXg+>(NL1uIXMZ%CS$sml?N4rh{K=!eEep&f2wlr52oZ zBSC+t_4;@fmo#Q4fonk;^{-um-O2Xu=j+50UG_^5c1X{9tHj9bIFsCN^evDVv{)cr zE=(%5r!jz;q{dIG;=U%OVd`>%)+?VcG3ff}EPVX=GVgOmD;{XI+)~P z^i;2iH}~IE!VBxa#om~$Taj#|K?HOAW1IdJn}2^eozXb$6g2Qy_QtH{!l&=NCK_t= zH#}ql^NGs^-+K%}aQ*?Hp}*GuAqTV&B4YL%XnrBzk8eb)R_snF7(b~LOY85@(_s29 zWd9ie;OiOwE8_EigV<#tac2Jx;lIB-Lmn;l{sz2%u!hXvgZ9tJ|Fs5a!TQJJ|Ksic z14I8;z8ct}HT&akCf%tDChhUUy9Ni7bv`CrGtDN=@0jOShDM3>e}M;@LjQQv9}^7} zjPv@q6$k=LQxSCk$zYkod>b5!$tktJHa5bkUoE!94dwsiL;o>7K&HWxK&zNghuC(Y zR?)Kvj};J{=>R+D0F;_^CCiuqR(8yh^gtD2uYzqA+n=Q!7aJQ}KJR`>3YQ7KcNc;H zk#_LM1AdQ+N4NYb(8va8t$a->+JLSVX94t#4bR7O>y#Swh@xc|Xj3rx`03Mf!nOj0 z<}G9xB=2ZRqy3wEfA48M&}@V5&!G@|o=YL>^jWJMc&hd@eiZy>Z%--!$m|_{XbO06 z{xzDhf3f9XR~mrnIOq$C5wwcMHZ^!YR_?gRLbC!|+^inI&I!9u^lzIVe6IR29$I$& z@7(`@sYdmmfHL3SH{%QyME(i8&`l$j&j9OX^{tuV)<5w5A0lAc5QG1OO^C~F;r;8j zwXq8195N2c7$Rl}`mKz|z0OV+TC>a;u{s~J{VmKL^PsVDRQDe|7(N$yQ~~msc+n_) zzrWC?Uj@egLf4lSzy-HqU_eTV5+0VC!~XKSEfE~ZQoCs-&=7(guOd7PW?}>+clQr) z0pJLLKMr*5;WxBK@swN?x1W(mx4fa#t0yAbzaKJ%oO!%0iXu>T!R!29mJz{YQz4w1BG)Br+7XubL@^ceubAN!{P%d9r~!#28iXL$=W6e`Kcywa+Co`*dfBGIyH6kgx;AF2kd1HjoL z1P^}5kH#$1c4|KN1KXp~q+331OMRbK*Fg$)J?xtDjT6)^ALw4prE!+POTxS)1+-rK zhxZVqRAjkg3l5*cdgXR7dWX|od~Kgp27=+Fus} zY@DZN;#uAx3+HpYmv0z-!Zze3_$Sjd{qR(NGTcuVI*+r^lc72yBg<#nUKm%x=vh8- zfkx!%aEXo0!)-=&p;D6W-b@W7UxyeYTBQI{_m8;znt;NP_W2|f-4^Jkeb2fy7q+PG zCfbFTr^jX@+h%f~E))Iaa-iY=PLjYmsB+mhDX<*6;dZ2-j2tZs+#zIWx2Zl%S030H z!&V45_iw0#jk(iz_h0z}_%VM<@xMRw{}D4R|Nn~SK$B`WJ{dRv@<5Kze7o+NJhxt@ z8E_FTNqO`vMMr=(|EcExOUVFn9Q#JXqMs9Os1(@Gwv;!PSPmJM!xjzENrmWef$6d{ z>Ngnv)2>ye2~_#_*&|jxGjxFL9u|d!*4{iB8gU6x}?&vofRs_DFlt z&m8yfuMMJCzZ}-g`M9u&fRewd7yJ^JFz;qUZ)DKAOF`u?@q8o6<9KJWXYUJIfL zc7I*`Pel9aoc+que@8X=B(JL2=1Z*-Ys9PXn7BmIRwKnjLTCi57g(f`PDa43rOLg| zPT%}bkqvZp5xr72-{kS>(fWu`ev<8YrOVzXXv3A8NT*)d`}c`OJ!Ae?4g3!r3G9x4 z6~F-hhOR}1H-ldKxR#c?+B{l`SWGs+`lt*HD9gRI zWHgEQ)PV%$FPUTLoPb4twCH+ePk2<+$YmmRA>Rvh)<;^Xkq_aR|LG|J_JyUYN}d!h zcE0y110AkbOtI}ze)l^P#F3F)+AGTtlWtKbAd)k@7fb)5G#`S5&R(J7F%XkTm+?HK z*C*7;NU({iuKr#1@Ph81=<+7yTlpbKw8D3T_iAWa^HM~+#T%$zKtX6C6#z#wZvQQX z0Si+|a)_Lf!RVb4?m3ss_23VO;lJ`05h6JEzyI%l=WPEU_bYzV&x8#dlYiuKp3q8H zirn%lo?>GQlI%m${rmU-!pZ;0WM!YaKB%QAeR`19mS@zsL_1Mr5oJDBUbNbjLI*VF zzsFqc2fnys2TWfd+zb8UU?V!Ad!6}A+Q`voGCn&$wPtYzc;d#0gU&x2f0|i;CnZ3H zL4%@^+D9gs{6czzz$VcG*)baj2cLj|z}_a{={Pox^heLHJzJ%{@IPk}*J70}mw{gC z;x9PyWt$kpCeE$;35=?g#Chr4vzs6oSuy?xM8s?X>O;VJ!lT6Ha5f4FR2?#2*uj#B z-wT~*wIxbUyl^TG$NZ^m{&5zeMhr;KYfLoLzz^z|jy;T7#|G&@P|1ZP}zW02;^&PG^#o|k5oA4EDjfa z8p?g*QWyzMr>-vZnJ6AJ-mOvPh74yW^5d?0uI*WN$hN0>}K?pFS*oS}&;rI^6ysWd22y4I~!Gw0-*a`t_kQEDeyCP1mgdaMndk)YAR z&0Vy%;p)FS2ziSOT`B1gbMyppV{a%XY@-cEL=MPpo}mf3N3e9iDOSJ&z?m1;hBf=Y z&+`3LtCi8Ro^kD$O?>x|+0;@y;9%AA7Q!W1GZ^G7I(ati>Hj(<|LlAI7mEBRPUxNN z?bsmSzPV~rTMnH|+vQ1XiSPLUj-`aO>mHV$`@6gRXTH4m%&pI9I}K0&Mk2*Fh4y&Q z%yLT(pOA7@GqtD(r|YorKTSHU9%CUKSbu%NA8FHeKzt~J@69EJ|@>>5&^FW3wo!dil7D$XX5168X&w5oN%QR7mWIz=F59){S|Ce27 z2CPvEAh=cIs$#8Zl*fFzl~!W%dDR^^x7eiYD0;?_~#*T)z9OY!Jqek z)|$hWJn*IfC*)j?R!fk_hud7&qaq@P_=AA_7#Wb6$N*UcFg<=4*P|E_?UB$N?E`%3 zZSn$nwcqANFiFz>`h*QSg*h4&8h_4+DvbEykN!wNf!FK+Op#9>n}BkS*3~>0I|W;!L=mv7#?$L z3)m8?M55ITu;M>`*+=knZW5OSe)D%O-(QifSZn#A%I#!4G>?wD&C~&s6&5eUk?65}BAE$}>XJrb>0tlS&6_)*+^bCWVA7#I`3 z?5H4Yvn6JnR4t2{ewI!0)p}rADS^f$u$pcR>*43${Wh(B)wqCN9eeHy8&t}pNea`zk~1B%a5)7B5Cm(~*{FZ|!C*obqXSn?{iv$~qbP=;F)l8afL)4u3 z2@s+`?YZAIGhHRPiKv2zwut5=d0%huVBfK6d%S<;V@S}?hyp{x5B+0X{3&6Xfn5;ubyAz$o9%6kUCox9U*}6Z2Kkq+R z<*%7ntD%Ls0jK~slYT%P#C_lwdggkvJAizNN5U!QAk=$%IBxJi*8yF-{EcCzSYibw z#~eA8hU^?1%d zV@&LrzFM1V`!vIDgIcBVmyL8$icdW?-}IrZY=qdi9sH4Q>3>V9(3==o(=kq0>k?=NYo|F=U?E{wr%_dPbGW8_AHtkPHj;HP#A9g%+o3fnVlK?ebYv?*@C>=_fc&Ui6O^zQ z!TcjA0o@VOJFkrnaq5(I!WO$5vXils(Js?NWcRt;VN%<6&>L}bwEqwbu=+1kI|f}v z&w~sv&z^_fm(tJrCrR2|02wy=VXeQ^`t8pDX_mMEUIx`q zFw*OP)Rz9(;4B>&W+6WakHqAUiuE5Ne@5%aD}ijg{tpx4&k~0ievKbG6=>d;6Au2mq?2%s4>Z0s!!M0fD`d z;DTH8px12GQ4LZK%={f7w$QQlKpwXh?VF0b?&Kz1e{x#|ca$E!w(AQKVaM&%#qJye z;11)0#Ns4<{@mu;@aD>H0*TZB2CTvY(rJvb0G`NKP6`nRYN{toTS(gr7+9A)9z(|T z&^GDtax!EG2xdz;SJaNVwwm+G;1jxxEp+!4HL#|B_z$8^hl!vBS||GnY6FR{S@cV( ze!lr)XnW+@cEcJzG9v+De#$^pf-pYaDjO;bA*=J!Tkdd-v2mi6Fz43Y(Z|dw(-Vl8 zJ5KWApBFtPbEw{?BON2FI1VkFbg5)r+i;sZ{wnIwadcj=$f!_A;`tXHeb1`Hm4yz{ zF)vBfI?}XN(!$DXzjE#|swYLlYAwOT*($?{{gLT&>I)P%QoA5Xh%+!P<`B-~yj5=d z#fbsr1SEY^E;#k?jWW4<=dvcK`Mi#fskQr4W@}i3^G-dtVq)fePZg1yv3zwFle-Oy zbH|~pbM84cV1OsLtPgo!WLETkUznD$%Q$=1zJH?aP;Ku#r;{>Ix6y!4hU|(>nVVx1 zid&Y|kJ32JDatGu2q}{&Mjbh)oVFM0_V*{4?u-@$MRIC&v%)3Z_CBMF_%B|i$~ z67J8(9D4Ozmcgb#f*wAbvtU|G;N4_M2u@IxelGre#-4FZ){uSLf7<4feeQJGsQ|gP zA3Rvh?s|Gl|Hj;LyHMLVqxYk zpUiH}=uzO|)@n#h$MMLCaL1KKtG1vEg#u%4y9M)<;%Aj>`fg*ihmkIer#yYAR+-&e z+-gISv&IbN5IS8OuMi0@q=){hu(+MYny`Kya3lFwU5ojI3ahV@P8VS1)q z7M`bbT0gBqIwzvCxt6=Oyt0%LC-r0c*Y#aCIP!WYa!O9i%SukFgy*RRRv!Veg{*&t zpVF6a0K{K3aD_)uE%ebH(y$Hmg$%oU=z9x5F|5La+m)dGM335_dcR<=__JKA7w@sE zv$(N@$9C0R;<}5-e3{{azQu_D-k9qM*d}YY9IDDoK4mHDBf0ivu6tPTk4RXQpH7xP zN9ji=&z2w+dG=bx_r+)*X%Dob!eea2PqX6l^)E|0`5Z3!=nLqVF5~C*>JO)CCzLnr zEKP9Ck1p%J9^UE#I#GnwX$u1e3UKIZ9kOVtH#2Og(xo7X9kXA$uGnV6noiPg>ZOM` z4POl(wvuLS;=?Wf>4?08FC_O(rhRu!Y6DJ?%^#=dr0=g;y!n2&Lzrx4x@chYp!w{q z;CVJG_gf%4ozWbR&QtU{uOfx-&nDPFvyZt3cBDD8g|&=Kl;WPGAKSk<5l6Z6>fF~F zbbLPzBcA?rMWb?i4#}q&#a%>*=@RaKEfIOfntG~UoHASWDEoBdrB)QP|;I`m>AgW5wNy)7G%@ zNeAzOrsaF0^~{N%%KW?`!XxS)p{gaIGic!pa}MZij0GJvGIT1hXEky z7W{J+b{jOs>6!b;3R1B=x><)UOx}Fayf(b7Ub10nHG-L4V*Mqi#)w^@w)-2yTg_@Q<-!v?E*2f-4r_;hdIQ4o=urYH|^)yT^2jk)}d&yS{#^}S9UZ^20LA%=N^6&U6;DxP*8rHg%_MN)0c zqU=Kna87ggIpts6B~60XIjs)IfGcHWg3M`oOP{Y64YYx3^9pTBb%lPK7XMD0X6{?k zdzizeG2SDXy3Z&#t8y_LIz0FI9CjAE7JAep(fwn%lxW|4EJu>9EAFOrRobb2do7AxQgBTx5XZw}f8&O-=)1?O+`{=oO6iL>glme8#H*w$xfg??L2q@}&u#kSKpYT$Z|PWU&*>?@+HxsKO6 zn;ulA#%Jr-NT9lV-#}=0qxKLE8I_}s1G<9bwGUb$jv60lAh9j2t14wFB!sLbGHLLS znAtocvlRH3TM#01@m=;WIWxKtQ$7KpM`Q%iMFok{w*Z;`rCp=d3iV;q(w52(h zbdkUy$81m2x-CdK)pr@(aa2@rmB95-CHhK|xUs6kVm@m|&G9S`64(zb@O|{PF?`lK zC53Btw3i$&KbXcb*r`c1;B~uoJtwn_YZsy{cf0V_IlfShEz^YOQ+T4WDL1x_OW7pY z7|*k*xpWJb8qt*4&X*H68`i4bO@6U9nD=EaiFZui=iBP@BB-ZWvR=8s`B5xi*)Z)` zK9bt~+jlVvAZeG>xTJOPMYLXE)!v}t*^RPj*yV^2pbMwL(ZPcHNdPU^b=vOONtikn zTJai7GBam~Wv6CFVq3^KObgOC(K_|`o=w>kBFUIV*lrX>s=DEoeto+yJSg2^K9t@I zW}N^gRxtGTi?N>iY;{^ombU*PfPNZ!O>NGS6Nl~2meRgF$-VH6yU7~wd)|8+y$RB>%ck?{Sqy)n!xUCXSc|zZ`50m) ztrl$^K6Pa_+-SAl&>?QRe=Y+P-3`7{8?emOz>@qhMDny$8DaOS zOJRhbUH3fA-S-&5c$cpW5PisPATcftFaDfjogaspX(AoxeNkNyUZQPN)yK`>=lNyx z&5)kS9lzvD@$W6S#P3BZrs?f&PP_u*uSS!q zk(>=m3mziWsa>zd2qy!AnMrB!*PE$q#B&rkKk+6U z>F?SECnV4DC~)XZU+i)weU@9oq(+kNWxnAU7XG}U_SJGb_jPHbrti9-L+#Zaxco;R z91ryMEXq9)75Rrr)9?A%neV+v8rn9{2gL9VyYH2oocNk>PLUK`b;l)eKa^PTqG;+m zd)Be4!+YQ{6Dqf&?CE*3tUt0WQ|oI6V@oT&sGCk`l>P!nXFqd@u&6ImC|Aeq)Kn*> z$eYC%<_|QuYjsuUD#Ml%J4O(-MwL=Y*M{BLU9OVwD;+b!e8yi7`8Et&CG5NIvb|!) zV`z*)wYCxC%P-t}bWZkl{2mPxCBtMis&)7O1d|4A`J(KSUrJqO@KMt-^N&=8c&7-|% z@Jrem+rMj z>^MR6s?@|D*E46%vy#7fFH^sy`ivYo(zgE^aK$Uq4<38$FoUS!NNh(i0D{IBVu8R zzKi-|dY2#pIpU;mFmu8ngS%vw(w2416cKr6(7@s}>MlVO_RcM(K|~TnpF;n^Px@iX zr(5(Tc&;yinYv3qbN~JK+v~+3nCLUx&r?m*33PCLONJFJ<6Mxn&@nc6elfK6Sj7$Nu)l(@600EkbzBBDe zE*kk(Oa&`#qv=G$N%Vomg4ADq7xhJ*W!e{5WN{5a0tOvKZ%RqP8jB^_>zXJu?H7Xy z1`%rWn@+%=n+2x+D)L-kZm>tBm2@@wUdnN)?Y2RHgqb3u?+kcJ_hIKNBGJut@S6&N zM&u9y(9&fw9m1{#bTU8+(?{Gag=jR9=TZ{s0GvBhjFc7fzF9oUUf5V_S^-1Ap+M-e z01+b1bhf|=fdU4RSilJTT+@hAflCQOq~;^aD-mWE*btp3!bhh|1dDB+N=+;*BHx*7K`I)9 zU+wT9B~_GH`WtffnK>f01yH2img0ha#G-vzgvdKN@-RtrzqyeBI;CO}B~zWGDgew( z8B-vWz}C!dCEA%ISq$m<1^iBvHq&dpNzoy9k$7JTaUm6p2v#W%B&6huh$>NLb`>JUh(&UX z7A>;}nt>{=VE7Y0xz4OjHZE`*pnMD2Ow6~%pm690L zxjcN*64)h&TOgBe#9)ubzooKEjwCfV(YFH5z%lovp#No|98qV|tw`elpr!Ol0Nu<* zB!&?#`zn$n$pRy8k^(f}ci(+B=a~wwk*_2rPehbifCV%&t;qscwt1q{WzihF#xOuu zuD&ECPt;3x$zhPoLVaeBT$)jNJltAeJO4nPTt@JaaQ6)+-RiqTafh;H_J?M&BZ( zNCN0)E*8@ean6Qla~Uq-2Z8)bi!aZQZ5LvgIU$V(Ky5wJCBXNy!znyX@w~ zjX(ftW|CNY#DG{ggi5~z&?yl4R}P}tteq)fNs^j}NUw4b&9;+7v$VIdBsCAQ@Yg{! zQD^#M4SY*O0_c>;K{SgP(z3!;l1fDmqS>mBePWgKEJ@8r4x-D;%gw=q2esy?L?nPt zk$`A!G9XD(^ApfaPPC#|lGOY}ItMw>Qt3zlElH9jNs=T<4Uhm@k|arzBuSDQAOW-_ zNs=TN(m_C}((ha!?tL=u*=LXQ-}B%9{A1kvjqGg1ue{&$KF?Zn%{kY~4Mq8* zE0%3o#$Yg3$Q(PQ%wYU2jKNqa|Lr&Ujln?5cKn}&_2E<2D&_{(cDh#j3^`qE3lno| z6GOdCw)$3=49(3%gmw$<64-Rn+S=mM9${hAe>_3R-0Ffb*OtvTc$1|T$22Z67^`&Y ze+wd|BMcb~qhDkW{j7TVQGc`jDb3WA0$7v59az4?>V znYuInmx896h69z_r4l5%q}CN|XIX&jz+%^zwxVt6H=wpWvxyn~oNG-MDe*gK2;_yB0p$g3+ndd2aUV$0NLp&u{j4;S*MrWz)y5 z)$80-C_KAymAJULHtalq{f8fZVApCf%^h!X6z-ziRp>q zREJR`=b@K8V*PQN=_haeu*EkpP)J=}UF3L9!izuuj5Ke}d$`;3>GaHu&1kQhX;X$$ zkkil&e$~h)l8$4Miopjwg+#6x*TqMf)F&-lwMzZap+n)$KdfG@mUG#%Z@9B8pmD8y zcvx5umdVP-=H3gv%m{~VCu1Ys+!)R?qjlrEUaz=N9(XT3J-zVuMt|LfyUrJXclhvO zz6h5Z?yXx(ii(bkTzIXT?=oZ6F3Moo)n-_pd%VwH`KT9X&F@#1sAXE|ANBuDprmwI zxW{4BT29XR4=+57`x{cDHPg-fbgoROciwmXaqn6S>jnF@heoOt8uztk*~FYo(0&{& zDV=TIW^#0kO!>ayTRAy7Bd_-kUc^S|tq8fsrr*%fmylokK;%Mo zs&O5=?RbH)UR8E1Vf?G_Of9R9P!@=G0-2b!yqR{UcL@`K^h88zaoj z-#+^pJH^M}-+$k}ebw1^=8vUaXXCYb1O!eTzPYC2eZjTSFKu2i7b~9j*2MTX2dTxJ zjN{e{8j-DRG|!Jaa(lze($eDt0|U!gSgNt+A|LHKO4hE}D0X7YaX;0Hr-w(|?<(~E zd1H0bDFL=Tr9GA(ew!TaIhOVm4zgZ6S@+)wR6oZ&O) zDch^R{T6xtrH_@Joqv9QzSin1gLzZ(Hoet%0s;bzsw0ELj{6&Q-@Si7^83w4ip$Ft z;^X6^&txXGI1S5%MMNCWnjNfdY}EAGrTfR<`l9CAioDcMow|?Z=C0Rna5(7TkQFW$ z5Ppr*;h343+0f_D6}Uk=rya8szbS}Ti@|Pl&dkp4D_A9+VrJf)l`3Y|bib#35o1#q zV!?id1$A1=Y88cME9am;|2%Z{+O-(H7dNrLu^&5_Ybohi+t7uD!_m#*bo%sjW@cuT zbx&n)Q)co?E-3*S+-|M#<3BdtKZ-+ks?kh*)x_r_u9~pptoMXE9y9AvVvnzb$CB^)=3MCXJZ4+0bzA#XJ1S&m+%q^bDMhg>*D3U4?1g{^*l&rwyC4Y#b&!{B645{OJ7Y_az%djeN_52Na|x4mnSbC>=X?ofcaw z&q;&MC$?V-f`EWvtQ5}g+HEIKV9o1hXQo6ib)MkZYx`=?(v)$V*@-SLy0bEOw>nJq zrw{gy3~pSlBrGf}Q&+d@GJ;S|jJj}GcsRe9*h#G{>xw!Z*Erk$295FM%a>QN>No|@ zP36z26u7TQb(wJp=ojba=8n`YaK{d#aGH~wJKCS#8tpnaD=RNwd;R;(W}2-CKxzrv zlG&|wxx1YwQ#c(z?yz@sG#=@$;6E9A-;-1FVb7miMV74MR82Q)9{HGD9<2~0b<@-H zq`#0>idBbfs#){t%F)zi55>*nSM$hUeDmaIex1A=G4qz#>({@1yvIhiDqQX^?u)_e zM^RmYj`H}Ga@Xs}TV!yME3r+jhuUAB5Z1jHiqobc%(h<4T@!`YMUluKi#=hU<^yOLJWbd}z*~vbmil7~} z;kfb@bSXzEZ&|!}adV=4r%wIexh$uNByPjr>L^NIskyTE`00n0?7N>}gS{QKKlUmg z@%rd`hSjTgzhAknIdgxsr>Y2f=GW1QJDZQP$hc#z@7%wC(bVhu_3hkjh{s~_TGE0UoIC^Rf!y&%0GPc=tiGg&!6w#EGk;l zAf;cj=Acia-iu|~IuW?@?%e0FHPW5?UF)71lx=8x^+2TXb6e5vw{K4sl$MrS*x8BW zO9}&yv>|YbO<}WcSg>Hh5yZw=)4jGIaYdVt+^-2E|~VU=I1MASsS)} ztWOqm&bAvgyX@d#a5*;s2fFOpDS^V}!Z{X=QY_6L1ND{}0I8^`9-HwKl_*Dct7 z3+I{5=T?yGY<;{X{*26f_SjB80S!IV5%gN<42P}U%X($YFv2z=;fyeEfz6Bzi_!uS9qTnIQ8s#xN}FzJ+=n(>TIiyk`t}v zFYofKkSPo>&$I6mU&Sas*_duF&dnC1o*;>*olLji<2{Tw9giBX0Zg6jlxKjOAQt zM8rmX!lka)qTFnQIm537r=~v@ta9gW!?t`@?{bcFEeD6GX?F8I?&Q|~J9qB5ztG#^ z$+6eK)VMbGfl|svrGT`6z8$yvE{Otl1WYvIWZYi3c-i^M?9T?Y8FtJaa4G6=eC`~M zOgc0&k|LFF$r=z5xpH4b|qaocK z+rp$p1(DIm$H$~_CLL??ayv`F;a6pfzy^qMDjBx@51Xg_`BVx!H#TqdUk31a{PX9} zKJl&|O_ZpGwVOIGEnT_t)Qt+CPB|7B7-)nnRQC3lUw+|N4n5}XdUSSn zcG=RUFRf!Y2F;ZDyD}KB5^>@tzI=(qcJYbV5yGAje^FS-FDO{nVE)o)OEh+yd-4sl zkC#Fe@JEs9_`J;W=3*00tKl;r5T__s-~@6b-Wb;j$y5UCr05oWGul_D&{682uP>SF zIDS%A_GVd_3ASP;B6*NN!nTrWj|tXc;dXW4l%@-_l0BKW$e>!|K68omsxToLwF>Z6@nM^AUhyIPo%_Hxwtcu?M%R=O1 zrJ889j`op!mKnlb#tFIM&DnO_wrqLP;4=KT&ugug0-eeEv#UO%3o-6X~#Z&zTx=$flv#g$8 z!sVn`Icl@a$=TWF)4QK%Ci@aagvY)0@3I{fpwVjPj=5m0amY`Jb1+FZ|;gA#z;3$Vh4Y(!kt=w%%SNk>Hm% zk8;}o=`il^It64wXe?SI+2G-hbMA;4f~>5ph=j)P|F~+K>p7A?J`89ag8=woEVG&8 zw5%`hk0@Pw^g<Lp(M{NY6dKr_YoxYMb!z{yqBQ7Wl6eJ9uQDYpmeY?hSNLiJF4 zTsYUaXKdr$$ZU>m)hOlkGc4n+s}lqAAI~73Ib+)mW^ppyX7c7{9Vv4rCMMpzeY@2vtTOL1kYqB<~ocjo0!A~+IK!^9IyktLWCB{l(u;1#;w6B8>brOBcz>k zxv0Fuw$;1YNHa|KUJ6o?s9DpOwAF#J8K3Lb6SQXn3l}inUcka(Yx<7L>xgS+Sw~?j zhT|T;Y;M-N$FEiyDCuanSDRjM+9*cZrc*gBpgH3_?~WZ6;WIcz8u3%<$p6pTOQd1|?f|+rx8Ck+z3}S3Zq@AXOSsOj7si=A?kDily7Ixj@zrbAg!VV4U&x9>%|i%v zc6Q1r+LC7gSJQWUPNgTE zD_W@`;3v@B{4K*<5|I2UfE^J4Z&VZ{@4epv$g7oM@w1N0L>M<8EvG4Yj7DKPiL;rC@SbW|RTwXF3`?I?H=!=CJ3A`M*@NLk8(MyVlUJXHa}O$B5GK92$=qSTHfns zqq_LVE%sd;_?#ms?yHfB*REX~R&sA!A5K~7*m4#Y)r%J+sdUPpn{9MB{$XS5k>=?Y zmF+L@j-ptK1jHrqN(CBnpCOXv>12c3uOIKVYkauJy88(g4@*|E7mxTFxw^VK&&`Y* zwd7>V2kzsOarkh9X9RJubyWWzSOcO{a6EYscV$?G&b)afajE_Nd)=9nqqbB{jSjZz z5Ut_lUV` zOe|^#?nPz;&OJ=GjjBNXp5#-x4yhZ&%}jFjMNuVEaL=-M|ErD5Cv#N9zJRRj1Wuim z+;$m}6xZZ|N;m=#6bmCyo5rB*VHo(47`wrihtjTjbl`$qCS~LT#843W)9II-n;!0c z6MSW#q$JgdRFI+SiAJo*o1HYyuC0$c8TSyOAXF*jNDqqcp--RQVE63IUGs5fyj2~$ zSrt{#3ijQKKn_GE~G0!3^yNa$&Jg;-nq9sp92V8dah*YtNht(IMlbJ#wQmI zJCryc%ekE*)h z)99!uim-EMa5)5U)+m z6sEEF)*rmKQWO>I3Z&WhgBO8Yj+{OFa*&4yq)sY&>;h!sy3$yVk1in%MFUt>TPLQz_=HEf$w9i3p)iE|t>N)pbW`wjBGYL8S?*eKB_yHa%ZK&06N= z#GjD}L3*m~2U7PJ))CWo9#t!7ZEc;L`VtipxD$+oN<JS8SVi?W*Qgjr3Lmd3O$_ zXdzMvi_8&g>y*^i{>~pA#|K0KmYKH?M`_nLEbGrsBR#VfZf7ZYn|#5m0!KFFRHB|c zXc{H36z5*u-@#URLPloCO;%y5dp016yeugyp5At?OQ)+mNSs@P3yYKAmLYzQ$x4*# zt>4<3#CYz^w{`0g=c%z8td&#gVWHt)aU5gJnzJYvf+k2X*=Sc8DuYVB7BrxFCU35_ zv*0hNP+pdm2{vl)l@1UyeHeb?!BfDm*5QV48H^?m*V&1q`X#P;EHdi;q^_vDZy z!tGk@kIqe$yWan90fXIaVyI)!rOsPrA9^M#W_HgV7}Es>T=-C#F%-+E5`Gb=z^Fbc zj39$~{>;f%m#MgKR6V=pJQ^qYYfb{MTu=g``~U=(jAHUg!) zDr#`06DMwklv?iY)M0%59T&uWZ0A76X8!ct`nSLTPAgASg&$B3n~n%PkV))XEAehy zSPI^toK_7#!6qTm5i~e@e=lC^sBhEaJ9r=p$If6%i;Kb7KWjPt#S+C4IL`a-nX4I$ z3{AU-;vLzF(I624khgPka;}VrHqpKgs|$iF`Sq5e<(={j#&d0r$MnSBLc=aR=|n7p z;nqCVxW(kvWBg@vR#gt)oWS+@OLv?4CV0*Kk0*}uE}s9Xsv}!8EQt~~NR><0Y!eT0 z93DM>3?84J@J{NF&slByb)WYHSLd&H4+b5uW$V^GS!<3F3k2vq-260&`A(Yb39ULw zZI>S__Uf2TFA@g2Le@UC{=+7B-ENfph*T-u#N@4v$DYDFAHxf{l7{I8TY(E}UEgVI zYkLauv|a*t$^BT$`7{972W2UGa&g3N?fm@nxQykGTiXjoqpmJnYh|$|<9%W4;=1LF z@QnSlFFsGs@a*-hTN<|WcRsiARkK3$N`|<_t;=)8p83oFDs*V9TfI62B!@&63-d`9 zcbgn?ULWYvP_-Np56^3yK}D z$W#J9QQc_MSF17go4b3%M-f06A|;WDmwo%~vox>uI}QLEy#7RqO}(!+t_If#Fdzr2 zmyeInq3yOfVFIvfGCrJ_-z^|f225};qJ&wq>xVyYfGx1G%`;^%h8{zyI_=up`Vr7d zT~*aLUOP9MRr~VKD7|QPf!GaR-Tx7!{1VmW490Hl^6QHHUZ2OtP9n#xS-pC5!S?ES zt-bdIG%CZ(v2R3CxdM>NL9t2Q8V_(OHdowNmk^{ojoTcFsENut0^iTTR2ub?kWSvC z_gv;iM$u(a*V8w!G8k{Wx`JGZ+!&oeW~t7yF}CTcJnRDzB;Gu4;)usOK_9>eq^_p` zjmBUZJ^9%fZcew`MsH!0Jpr$2SnU0yPiSOhBugXssRW$Oi`X&zD&e=lP_6y$yC=}k z0$dQSUO!7f?nbs6MTyw=f>lSgK1qLz46CrVFSqk>nRsnlz<6d=2EvO}OG}GbIf7m+ zs<|i}6pys;%?b zQji+vIyaL(1}0bHQYWp3&-uC!`ttJflPJEclP@Twn$#=fcB(wssjnJxWCer}ej%aP zoz>C|hWD_tippqDM37N?@f|?BhQxE%D9gNich)!v^g$HBNCRjo(}PjIzH7olLr(?q z@bGk9>_;@^VNYoM$+zSR;$5nFt5jc*`AOt&oHj$i3VyAu6yh8lMlNO-={!683lU^r z`Te2MQDl)2TK3a>`k@P9PFNr+#D@ zM2(Zvv}w`JTeqrzTe{jR5}9O3+_TQqay)lmNkPFu#B#O<)L^vVp#t!3+O+@u`}Yog zap@m9^Fj4Yf^Z*sb8uA@mYE{!R=J0vJI)pI?%b)29IuWXK2j#$THM+yy|?8D^AM6NlIT1gLq7j&i*(rVQ7aAq;@MP(gtjtV4etuacC8e$E2=$}3z1`PAgW9oUM{QStxcN!2O~meCX^e3Zbi8sM z)4BNo7iOl$Nn!lEss<^(b_UxififoKUpy86K&)%d&2wh1%D{2z@GIctvI$Mpdoj}AY3%$*e4TFj0c~9fWYyDJK!Ur(DVU*)t5F!6qHA0>P znzTGLfyA;j0<>OIlaas%n`InOVW!#*YUhsDYHG|L&9iuaKto_%*#nWIP)wthLfnap zTd?SxQ&Slk85jTfZE0;6DoaEtkGwA*meWOpN56wwTUlATy|2%4&>Xmwax;5TADhI; zP%fBd<4^B?CteX^d>q8Ep2Q-~D%87X*+JK?T?1pI@5~P6F!bi7Yw$v-5G#O1FliWv z!i8e6F$I*^DN^oEpFZtZ*ply>k2tvn{tDy9&0hO_f_0GZ7A{%@YM&4`l~^}DJhDCP zm>^+)%k$4!g4f)Y37CZk44zemsuGv+LhnvOLc-|#wzhN1FES4%82u znuZTINJ#AGlB^jBWF)7J+T$iu>Y$Sj<=7__4826b{o5NvD~HRa%}w`0pOXth{b(T3 z&xCE865d^&aUBTA9>nJx|9Sm;qo$0+g$oy&)hA#HiGVs8dn%afhn}7ue6>5!yb?}K zMTCa(N=Vdq-r=f_)lfmfTnRUUxvAs(tE?%u{io#~?veq!^9BmmF6T*We0)hm1Ood* zDQ6sshA?n=ef1ZD?g5$GIF<)W-U|$j2lZ&wn&%99_a1p0Wa?BjWuj5|gEff9OM7q# zHi#IUaOe7sEH_}Gh_|vqnVVgu#avpe&7O60U3p8xzFEZ?3(|^LOl&M|2Cl zM4&pbK(q^1Nl$ODnngHbsz^{Nt)5O#Re~+Sm6$negCKGK{DKvfwA%jtZiSv4G0>G7 z1RfHt4^boPE4;zU3Ii3Qk?wf;a(7HZUL~kDH5?>TC5SgJAa%h#rA#H=gsrcQK0qkt7`(N+flleI0_Kk3u*PNYR z;wKnWTzpKXPQlU`oH^?4SQsoQ3hlA|cpscO+qGP;>8(;y+R$;-fJPu@!~xZJ1LfIS zYcM~l-8i%Va&vHSHPqWJGJ>a5o*8<1dZuZMv$3-B@I=fBI@>^OU9o=qerRv}+Bq4B z5~=pX2EalEu}QV~D>rE$UmD?=&c18>e|;c)HoHwFdb$;NaS? zB{a);S@Zb(bG==^lhyyw*|_tM#4p**Uo}17@nL53-dQ1bhl!S%@Uy@F3h}BE^fYZr z9Lf^C#NAo3PEZOVCimi!@z=^Aia4}@cmThCM$PDYzKcp%In*odgM$gEje5YjdI~M^ z0RRp~5^WT)WhU)n9XxyPd=azIT_bQx5p`G{n9`7xlwC*t?N`?kbP5 zY%8>~w5)(^69Waz2zZJ%aB8_PpUMcxRA4}jhBVUvaH+B0-rvC+So6z0B)l`~&nqbO zKX*Z$6^_uihD7<{@$vN8`t2-KTN0I39ei+!OeIW#9c^tP*f}vE{KD%Hq%1^UlbN#k zD%nYD5_ELHIx0mehru+X1x!$sHB)?N^Gz==b$E9HK zbQ6KyVGG6uNzDix!Pb-X%|t zFzJ0~a+|>%7U?=S3z^8D-W34V2pDQB-Df=99F?Bm5zYg79nb{`OP_7~@og}Ii+)zQ z<6v$C5M(z<5MORWX($0|A(o3GTCgXuG2ehOKETAMd@??&PY>PD90Ob80rteS;lP0d znAd;5k zVkYQjyo+xZzaYDUbKfb)v5!h7CMIc~8`;?S6ciNt3d+lcHH(%oKf`Tiz(dgpOwS~6 zCsSF3OC-I&udgrWx8MF2kcNNZ)EYEM|Mb%VFc!5%D7TX1%;vAJi36zKUH2khemc$M z0htXZrlwjggK$3K&ZrGHh2d@4vSlSH`5=hEbeK?V!oxz~T0Ea9v?UVhVNq5D2OigV zz}~^31cJH`0m^%VnkRuDeL_KD#-K!exW^_McO(M#Fw@pNEu3uIw^Q@)=H|@q5Kah$ ztV85GA=gV0=r6PW)vT-$NZ}em;zrfS;J~YfNDgzZsi1bwDHsXz`#*Hgzeh&??vWFG z1=|PSUI3%V){tf0^AtOxegEP4zu#WqocKm!{^udqzcRz~zx!XZz%yU^UpCYiUoG_F z683nZHyjCN(q;;D4lOMT;izMGep9i2`VkO|S20qGj8W zzO+xDES#ZdoKCrTr#3++0rYA#H071C>u?}XE;tG@k>QrD0xuWsgo_IQnzAbsS__6@ z(R>mMTgkaEa;JVtH@T=2v~$-070UdTZ)^QGzRe?~1o83N?+?IiF)|flmR$Axjnx1su(AW5s5H zzZ`>0&y4Vjpn0sZ0BmP9wkq)m#FM{xaTs-Ws#z;)WpILP)~vB^v;=k1fg)lUU$A)T z{0A;a?Iw$IO&$6K{Hrp=5uo%WFFXm*`AAmm63Sa-eZ5hryRYPk#?czc!KDstoX^c~ zbX;e`5V5+UMEXJfez?oe-E&uR8Hu5m|)E zuYv*>q0}cJKvTY&sfRJ#_H8WxC+hf21|%E^`0ewhCtZ}EB#Ss>eEU^%>*u066xQ3ZzV z4k)HwGPdw*N*_NzG2kf9zT09y6xci5s6me*?N@>FgSloa*-XLfly?4rrw_SZgS1@%nomfE`i@{ON>1N{V7Tuc z2tdAh`%S$1R6*twpks6k8n=9eMArw!y)g;V;6{ByL3zc%6H%|d34zdT9947_FnD^u3V5)?hYno? zg$6(_615X=Qk1yY7g5#^b+y@L$OY{mKRyyXjMZx2Vz(^RtP>_2h&3H&(GtWMUy^+= zO4IVvrPBCd{BbCUbbivqfTbc(_Y!yqv@(!KBp29lTj) zGjtk~75j#%YlZ3tA=mFf9D*g@6Afg~sM6>2CHZx<96^w}tH`Wjx)H4b;R-?VfMvXZ zB~vj()^~KieoRQi5UebzTvYK~2y#A&7lKrK*!k)~2-*KD^idhzFMQd;L6-uscmASz zRrqaGgLmhbO;pEu^d3n+fFj*jR!O}9=0MmFR@ZS>J6Cw?4%no1LT9?WcVdlCoU`NY z=;#PT8^`&JI-qIL50wJ`{2u-_e zTb<3p3OjT@xR7Gjyay(#{XS$vj60L545!IU=1CdyfsRBXRl_sgD-PoWak%h{usk%` zAXm2U#g?ZqfMtIPEs6T5h{(f5BOmdDPG2HRD>Q>lhPEvAnt4uA8P3NP`1RKEs~$sh zKrJfezHT~aC-ir)6j)e)Z-C|yKU9?SeL}p5HksTOe`2JWgnPsr0D#k@|%R&EGc`$QLnHm>O;4=)%i0FiNWr6TTh_{&z*T= zhL>aux84oK$>!K&eGF3CNmQ$4X@tk^AHfpV9hcrxiNkJ<9tk)SsYND7e-1}JitPRk z3Xk_}(a~uY$QQz**Ap2Eo7iD|Gi4~Y(6Hhfqtz4Q@jk?AuZ;Z;D}KgSCodh2T($T! z8l;_ptfm;)w>kNFw=Fb#Dm2sVH zlPv7JKgAaX?XypU)DHbi*EAcQZs1u3KwZH2dx$dlv11v1b$E+ndE0BsaEZqD)+HnW z5K#vjwY7~}4m3$*7aBFD#c4bCiy|7njGw{L#n;p6D|q>GARV{nY&6ar?A?bU4pO6- z6gmh@HckO7*J5)=;vUqe0BQ6Q+lu~_6j;fq2aq)Kv^t#X3|4%3J9xJVL6ZUHhC@;) z1O&ntMCC%yfnd;@WR64vJtDUJ?%lg(ytGe`0O7zK_`!s09F#l`mp`4TI&H_YhCtwU zk{bcgNz~_-si>%k*3QjJGi#2`~`}mW)Yg0YCAi-NmU*;E^(&ex%kf1XN0t8 zRr(p%4DH8c9-`q?h+20m=6?EPIWaCsD`>^hOP*3kh`kCNgP;P^>lhYb5F4xi>n>TM3)&1kQy?J6-(A=cGzCic)xS{WqHy9+I zI=|i>+Xr6xq=YPi%k6iLrhvj?rs;bqzitqP`WE`yV{QA6wYRyXwQ*)oA|t86Y=oY< z9*|KD*>(w({?RSK3#1DSp4E0A8eJWr1l7uTn!aY7Q!tYJD?K}28wlu+BHQ92!%X?L z?nYc}T7`D1XrKvRAt8BSgG$`iNazB6NSM?Y)ms}ELyA3wWh-0T*Om9t$nyr~S85zH z)|_v}ctQ7)X-&8R zYrp?KnEJQO6P5o|$_^>T zjrF!>4X*C42%)xL5b75$y25zdzbp(d^LA!(^-X1Ea!|H!-n?mb;%EFi{r&mDl}PL= z(9=ay#?jpZinGk`DgFu%?{ok>a^k@2%hmulZk%$Mto~8C`M)i0`1HtK`@KLyt3|i$y^C4ngQ5!uIC%>)zb= z_*9Nii{vxif~4I2lv>Y2hePjh1s0ud3|X8Z^ZDSAgd7KZvi)AO{~%bytB8XM`LhTK zNMJrAki-nXe0mR&T0&_va8=+Ay+6ODeJ4FLa+i_>Iy<95Jy3(B?Lbpc_Yf|V5}3og4Y-_iV2mOAIC|2#dfs2-A}yUf*KF?ZnQ7I^ zw>g*uRB%*}f}LSpT(*39spms{%1{o8x*o!LZxDc@$aRF6PzV2c=$ki+gsmyuh8(9H zu4J3QhkYyrsJ8o{BRn>u=Zn2f_z*=sOzQifRvv6hjVYv08K`>C z7BR7sJyH-XQt*oxzoF=zl9KZEJsi*kRs-Z(_DDhnsn(Z_mQv8y2){3irq%Vrw;+9& zjg8ssKtd~CzYD5*G`dyiF{}0vqF^*C7GTy43tgBz0#uPi+|B{<=1z157@?t!A`K9W ztdGm|=c8MAc={mym45|0RS^o?5J7<;o*A}+q=#Qe4XwsY)WCwA70PDalYkTIGBc`9 z@;NX=ElwD*PBU-|z!+A1V*?K0&@Jd^eQ5dyN`M59KkR+-6IQ$E+0+R>8G?u{m)mZC zj7M7BRNjctNvj;3i$aa!=pud`8w-}~a$huw#g9|d4o)W2hC?B?qf%h9Z9BcFt4ay1y_+i4p#}^S3V+*lsnvK zBQ&oq+D5DF-yB>`-CS_c8N#+DmcHE#$}`^OLEH;`w{}qkxin8M>39@X)X7GkS-_pO zTwDoJDiM3KnBe)Xv*#D6z`%RSEdAv0IdS6E{DzAK@Mem{RDPlppcuRz>7ei$n&f?= zUEPPH2)idWHKSj@BHS4PiL5_KF@&`C#h`(bPB&Bev*1NtfwJZz&KGIoNMmSKh@u7( zlqw`kGizS&u8+;x>hQuLHKZE4qKlb>xhpd9QAfNu#i;V2X@Jsow z9;KDzgU^|aEb>88aoC~cPpkt_V8KyOoAz|F46M^DL;_KRT$kOJ-PWI>4%CtI6hmZU&fGdXB7GKYa zkDp&Sf}O07te*h+gJpeaUId8ny-B<9Nh*l4Bya=R-{U6q3zq?zfpAn30KEhoms3nQ z4nqRn#A7}ic3pfzCTf(-D3*S*+llisaT5m?F_4mu%;`+4umcAVszoaPh$tctPsppU zM{aJqOB!}6e{Q))yOmJXD&akdd{+lCm3Jj({^Z-W4x^C9w+HjRlnNBMBYi@lww02X zeEwok?ZvvH^CxNcOWP{y+dvVS-tr4_+$L$9yeP~eh(uo&^y)_?zI-7lAdY3ZXy^c) zC1=C?>$a~A_?utxi5md>xhidZ&_YZs@g770UJW@c0T`0d3GS8O-Kx*dX-l^#Wu5wL1D8e3-{1%>HyvY z*uD>DfC?xMH84RUD3+qT2DSlaFL)Fp6IF?8)~zel`hxFgrVFM5WfX+ZJns(CFqr7# zfX=$nUs8?47Y*IN=kLI+Iw}N6yYoC;;`As!_))vN2}y#W29A`~6LwrgP8oi)&ZEPx zF;j#KPZA#J9dN|6GxkDOOSX6EZxBUC{BiOZ!Nka}3aZ#`f+@b8fFOE}#Z*CZ)R+80 z&gfwylh2}P1kvq5cSB&AWw=#8(>+MKLWMs5bRP|e$oRigj~FFS#4sCc+(t%tW?$(z z_}frw2Sxa!$reC7S}CjXm&))Ln|7Xn!yLDO5ErfKl9Tgk;b0Uo4&eW*@P=U2sr#sx zsB1tN8nOb8o zv;gYS|M?z;n9_l(uZ2-i~`a1;&>oZ zrPq(aENpOGmp)YqMCd4-+kL5Z+EuU_b$~-f4j~BQDFiw0xhfD38Q5?XVT!6}Y5ZDh zg<+?MM_|2Fw1_WJ#ol|j9#=x=1gP785xyIv z-5KJ8D!zb7N_IvD`NkGdt+(5~*;rOCNJY9GEJ~eh4*)ytIoW1fkVpa z6k3&ixTzl--l+!&$nEcX8PDb0BssJexMhncu4Urwi7y&@dUJO-<_kJN{Boh)MVpk_)$uQ`d3uKPgJ0aC(40k`Q!ZI z*L&+vB8u(TrnRQYP^Z!8ODP6WfcTsWeOedF>|nZ75GJ;0Nxzsj5Epj9yD~JPMNv_K zI~LpCex7nSJY=WyoFwd8@FgZZ0mzWUy;s}NEi;7XL#Tl~|47bqrVS}~l9K_8A9i$C z;tA}33^1o_jZI;)kSLA=O$nLbVYmp33;{uFG|yc?gx8#N$|ll%CuIowzUX!&4iLbh z4qtuHAqVZjfFP&Qs7N9b40|K!(>$H7lk1>{07r%8%{6>aQ7NFg^L&veE!;Zr{Lr7` z`VK>rZ5hL_+-_|UGl`_^0KOG4!CUX#IbsedhCt<|9!#w^1TlH)&Ola5!Du0L?a6|a zdjkX@BtyMjpg_Yw(innu*keD$t+Qz%64y<*$Tx4}fLR9y`6E&2JeJT>@%(4h;p+HB znGIh;**EUt>HH34Zyl{Qb-I!x1IQPanbK^|1oSJC@16QZUxM8y*%!^jTV%+vtD5Uz zV_Khd7=+%dq5B4}9`k$7ZarGK*n8MBYt))7;SJ;?Cl9s|;0q4957v~3P_h!hGF>YL_Cf_fA{9BQ(es{<&r@uO0NkYg z^2JuX_g<`y#{D|W16MO#$rS_1p)OPX;zaWyQ#*{+UqJK1P8`I8v+1siKw1DTF+io+^lafu7*c~h*O(Dm^OO$^!Da8Rhq*C2hk37}qA#HFj{IoVT}H{CgWHl;^w zLz*`9@5v9_W@0}!@#stV>PZrcn<@gCy@sCU-^@7d?k{{J=yj<6elCU(y#+qWj{pw;y z_@GBoiCVBKe57b4H)+7X$YhU{dOk}5-Ch)JR$)_%$RbbT@!YgMTCvv*Gwyi#_+C!# zcA3ryIX?e!#pCQ*;l8;jF0mKSR{3E=f((PGe<8~Y91dVm887w(Jv4k{Ui*@shKym_ zK<@V^Pc|)4!|ubwE&Kf&#a<*GYEi&)2+C033atygnKE_4x#W^fokJ3gp~8`Ts#KDc z-QRIg$LTY9&N!4%aL{AG2P=(`PAV;Rk#ZY?8l@lLZQdqBY@g^8A8r`Rs2}PyI@k$n zqt5OP@sc8;;b1R?aD3}#%kGEDf8prv_DG>WtmJo!7xd@?v8%&Iz$1`H*PYoIBUsyncj zf@BbKJ980vZo|II&q&7r_UywZq`3yI=KTA6(Qg9Nqi z8o3Gn?mF#&>MaUdlZ0G=b*4vjExgU(T9bSFU{(F{<%=dWqk)U+8*XoJXODub%}s=G zs8uHU&0&=zQ%P4MHIw%A7&?E!l%j)I@%L==RvEp6?kvoCb2L&9-!D0BYI2evjeW@C zVl%6M_<@>ONiu)x{N+7_tMi`oKAW8^ZJy3e|41mSnF)nvxzKI?|JGmfzbile zOH<2#^M{Tf=B4>6KvE@;DabMnIo-N657`olRE$Q&u8#Sdcyg^S^HM2jj1Z1NBaeuA z#mU_U4IWH!UWXJc%S`v2C43pH$yY~rkohW_WD3=yw=q3|2A_a@AUF#~b3kxB@st2W ztW+r^z7ym%r^k3nFtf5DU7y-d1E7||2}-z8bHMKNhr{66es*zjp?M@OnBk&1CI_7r zCg`%S5X7?s84Ugc6w0L*;df!?{?OJY(i{QG{y*$3I(pAuPs2mwN({cFu7s;_Z+^u=D)XsW^(=oLSaPQ(ULz9?nGfFPs5^ApZS{*#MGhs&mr zg`N-qFkd(Ve+#pMX?P=sUZfaSjReh2AH{z0<}M48)O&%EvQ3J$>*8N7p7|9ocyoXh`khrV8e zd^aS?e5M=C8TdQ$Jn9cvIK<0L-TTOs;^Ry3G7$f2_IvQx2Iz`C3xN3N&9qAZj!W@m za(kws95=={0^rQ*a6vJLlKUC)RqcPPWj1w$Vcb|G^azb0E(xnc7*I)5pjA=r{EG$! zb@2Gl=a}O+ypNOh88m*16kZ?79fd(NL^(jg4F$|M#LOS;cIJdw*wZ1_OGKZ?r*E4_ zBhg59NJD;&g6ll!e4snw;i@&ARjx#L$J+sgo zPT#1QiwT72$Nxr)Ha`9TaC|+G_iY#kPr|i*82gghsR?hJS)(hQ|D=xr8bK76qiLa{ zQK&@Snu=O7VL*3dLzE^pfGU4TrV$W?qEXILNI{)1(w~oRLYmME)Q5RA*<>aKsiK6Y zhaH(#c>PpR?j;J;$-DRYI&?j@+jaIiW3<0f9gc!GkTq2yWR*SMD_%8AL|obg=;nCmN`9;PD-M>; zNAf2^7XDfD2>Ykn-m&XlQ% ziDvy8#?Vd$`1aqmoOnGE^Z_(Vag~VOwE5rP`Oe;1@xOUTO`UCNf1_B!^8NSUi(NPg zBCiilG7$DYFZ3+W>KETeGg$`=u3(Hx3knJkeT)ZHgf$$#; zU}WL>Djgq`_RgiVxIGg!&99iZ>BFbeaT@Oi3M>-SOdQ~c+GX0X3!p*nUs&c;l6)N%UwkC0s9_9Hg(9 zx>(creeng1Sq!eSKMAu4WQsuNG3XWKCWNGgdS!U|A`4A%7hBj(;Dd9Tu!7fRiJFZu zJea+26GNb3xHMX;F4Ftp0$L)tRNHicdi1clrrjYHby43qal-JGPT1AXN$P+0*c8WMmhyb7*cWr^)YkFlb=C zyDo+e*3g)}M5KuT=gILwa||5^TrYr4iHk+15C}JUqIpqf-R&SPOaMpCumPkEy+2F} z22Ed5NkRO?&O8_ zb5P`I`nPGf0J`{*w(8H*jEJ1?<+I*uRdne-H6q zbqdD*C9_*--<;B{CaiumW`pLIP;V{vEQC4y2M>&@5>NuLjoF}E1jPdl(WI5+*G!A$ z?g9Dg6N*0R!Uo%6DZ34`fe90Ps|*GYWJ75|(dA1K8Wb+M=*)9R z{XkP2)a_N@8_7sj-~WUni;##jLO zB%fk3Ry1+|nPN%j!v_?DFoMAd$-q7>k86|k16VpheN!LWX{Hk#Z&5$eh`~)X9JMdq z8S+1o0X9?MzHHE{(>IK#_M+=tjoQ)NXbRWs?FQKSBam4&8_l#|sr%4aCphLDo%|UV zb7x!r)~@|3z`OC;8;POC(1hril2y{e!3buB4s-{Qf(i4;B=mNgOf)}SiV8018hj{B z0qh|cF^5Z`e}SB+(gO|A&w8fuz&{-#vh)-fL(!q!U%9f!zV7aY2SuT@8@ZKDByr13*{ zCZ`m%h#1@qnz@TX4M7~{ON+%()?B=#9)To7W=KVhe#Rc$g|GxqQbnigC>Zw=#D^@~ zmsl=>etou34ygH&Bmlb4%qEQ@&wJc`4ohKwrvJV~4d%O(ph^?R%Fw$*)B#aHQn)t?vlMqa3Knj}m$?;BFK9+T$I|*+$u$P{e;q+DP2tOBmiEsi$Y9{MQ hKmET_^bE``;9*2OFq^f5sfWRkIV^uD{=nH^{|7=-5TpPA diff --git a/marker/builders/layout.py b/marker/builders/layout.py index 33e4e622..0eba225a 100644 --- a/marker/builders/layout.py +++ b/marker/builders/layout.py @@ -36,7 +36,7 @@ class LayoutBuilder(BaseBuilder): float, "The minimum coverage ratio required for the layout model to consider", "the lines from the PdfProvider valid.", - ] = .1 + ] = .25 document_ocr_threshold: Annotated[ float, "The minimum ratio of pages that must pass the layout coverage check", @@ -140,7 +140,11 @@ def merge_blocks(self, document_pages: List[PageGroup], provider_page_lines: Pro good_pages = [] for (document_page, ocr_error_detection_label) in zip(document_pages, ocr_error_detection_labels): provider_lines = provider_page_lines.get(document_page.page_id, []) - good_pages.append(bool(provider_lines) and self.check_layout_coverage(document_page, provider_lines) and (ocr_error_detection_label != "bad")) + good_pages.append( + bool(provider_lines) and + self.check_layout_coverage(document_page, provider_lines) and + (ocr_error_detection_label != "bad") + ) ocr_document = sum(good_pages) / len(good_pages) < self.document_ocr_threshold for idx, document_page in enumerate(document_pages): @@ -180,7 +184,7 @@ def check_layout_coverage( large_text_blocks += 1 coverage_ratio = covered_blocks / total_blocks if total_blocks > 0 else 1 - text_okay = coverage_ratio >= self.layout_coverage_threshold + text_okay = coverage_ratio > self.layout_coverage_threshold # Model will sometimes say there is a single block of text on the page when it is blank if not text_okay and (total_blocks == 1 and large_text_blocks == 1): From cc88b74edcc08432174559dc8c06b6b6aaf01741 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Mon, 10 Feb 2025 17:43:50 -0500 Subject: [PATCH 24/27] Add elo ratings --- README.md | 18 +- benchmarks/overall/elo.py | 216 +++++++++++++++++++++++ benchmarks/overall/overall.py | 2 +- benchmarks/table/inference.py | 22 ++- marker/processors/llm/llm_table.py | 8 +- marker/processors/llm/llm_table_merge.py | 2 +- marker/renderers/markdown.py | 2 +- 7 files changed, 256 insertions(+), 14 deletions(-) create mode 100644 benchmarks/overall/elo.py diff --git a/README.md b/README.md index 3469f773..e7178f96 100644 --- a/README.md +++ b/README.md @@ -406,10 +406,11 @@ The projected throughput is 122 pages per second on an H100 - we can run 22 indi Marker can extract tables from PDFs using `marker.converters.table.TableConverter`. The table extraction performance is measured by comparing the extracted HTML representation of tables against the original HTML representations using the test split of [FinTabNet](https://developer.ibm.com/exchanges/data/all/fintabnet/). The HTML representations are compared using a tree edit distance based metric to judge both structure and content. Marker detects and identifies the structure of all tables in a PDF page and achieves these scores: -| Avg score | Total tables | use_llm | -|-----------|--------------|---------| -| 0.822 | 54 | False | -| 0.887 | 54 | True | +| Method | Avg score | Total tables | +|------------------|-----------|--------------| +| marker | 0.822 | 54 | +| marker w/use_llm | 0.887 | 54 | +| gemini | | 54 | The `--use_llm` flag can significantly improve table recognition performance, as you can see. @@ -429,9 +430,16 @@ poetry install Download the benchmark data [here](https://drive.google.com/file/d/1ZSeWDo2g1y0BRLT7KnbmytV2bjWARWba/view?usp=sharing) and unzip. Then run the overall benchmark like this: ```shell -python benchmarks/overall.py data/pdfs data/references report.json +python benchmarks/overall.py --methods marker --scores heuristic,llm ``` +Options: + +- `--use_llm` use an llm to improve the marker results. +- `--max_rows` how many rows to process for the benchmark. +- `--methods` can be `llamaparse`, `mathpix`, `docling`, `marker`. Comma separated. +- `--scores` which scoring functions to use, can be `--llm`, `--heuristic`. + ### Table Conversion The processed FinTabNet dataset is hosted [here](https://huggingface.co/datasets/datalab-to/fintabnet-test) and is automatically downloaded. Run the benchmark with: diff --git a/benchmarks/overall/elo.py b/benchmarks/overall/elo.py new file mode 100644 index 00000000..09316dba --- /dev/null +++ b/benchmarks/overall/elo.py @@ -0,0 +1,216 @@ +import json +import random +import time +from dataclasses import dataclass +from typing import List, Dict, Tuple, Literal +from PIL import Image + +import click +import datasets +from google import genai +from google.genai.errors import APIError +from pydantic import BaseModel +from tqdm import tqdm + +from marker.settings import settings + +rating_prompt = """ +You're a document analysis expert who is comparing two different markdown samples to an image to see which one represents the content of the image better. The markdown will be called version A and version B. + +Here are some notes on the image and markdown: +- Some parts of the page may have been recognized as images and linked from the markdown, like `![](_page_0_Picture_0.jpeg)`. +- Tables will be formatted as Github flavored markdown. +- Block equations will be in LaTeX. +- The image and markdown may be in any language. +- The markdown is based on the text extracted from the document, and sometimes the document may have had bad OCR applied to it, resulting in gibberish text. + +The markdown should fully capture the meaning and formatting of the text in the image. You'll evaluate the markdown based on the image provided. + +**Instructions** +Follow this process to evaluate the markdown: +1. Carefully examine the image. +2. Carefully examine the first markdown input provided. +3. Describe how well version a represents the image. +4. Carefully examine the second markdown input provided. +5. Describe how well version B represents the image. +6. Compare version A and version B. +7. Decide which markdown representation is better, based on the criteria below. Output version_a if version a is better, and version_b if version b is better. + +Use these criteria when judging the markdown: +- Overall - the overall quality of the markdown as compared to the image. +- Text quality - the quality of the text extraction from the image. +- Formatting quality - the quality of the formatting applied to the markdown, as compared to the image. +- Tables - how effectively the tables have been extracted and formatted. +- Forms - how effectively the forms have extracted and formatted. +- Equations - how effectively block equations have been converted to LaTeX. +- Lists - if the lists have been properly extracted and formatted. +- Images - if images are identified and placed correctly. + +Notes on scoring: +- Perfect markdown will include all of the important text from the image, and the formatting will be correct (minor mistakes okay). It's okay to omit some text that isn't important to the meaning, like page numbers and chapter headings. If the entire page is an image, it's okay if the markdown is just a link to the image, unless the image would be better represented as text. +- Bad markdown will have major missing text segments from the markdown or completely unreadable formatting. + +Output json, like in the example below. + +**Example** +Version A +```markdown +# *Section 1* +This is some *markdown* extracted from a document. Here is a block equation: +$$\frac{ab \cdot x^5 + x^2 + 2 \cdot x + 123}{t}$$ +``` +Version B +```markdown +# Section 1 +This is some markdown extracted from a document. Here is a block equation: +$$\frac{ab \cdot x^5 + x^2 + 2 \cdot x + 123}{t}$$ +``` +Output +```json +{ + "image_description": "In the image, there is a section header 'Section 1', followed by some text and a block equation.", + "version_a_description": "In the markdown, there is a section header 'Section 1', followed by some text and a block equation.", + "version_b_description": "In the markdown, there is a section header 'Section 1', followed by some text and a block equation. The formatting in version b is slightly different from the image.", + "comparison": "Version A is better than version B. The text and formatting in version A matches the image better than version B.", + "winner": "version_a", +} +``` +**Input** +Version A +```markdown +{{version_a}} +``` +Version B +```markdown +{{version_b}} +``` +**Output** +""" + +class ComparerSchema(BaseModel): + image_description: str + version_a_description: str + version_b_description: str + comparison: str + winner: Literal["version_a", "version_b"] + + +class Comparer: + def __init__(self): + pass + + def __call__( + self, + img: Image.Image, + version_a: str, + version_b: str + ) -> str | None: + hydrated_prompt = rating_prompt.replace("{{version_a}}", version_a).replace("{{version_b}}", version_b) + rating = self.llm_rater(img, hydrated_prompt) + return rating + + + def llm_rater(self, img: Image.Image, prompt: str): + response = self.llm_response_wrapper( + [img, prompt], + ComparerSchema + ) + assert "winner" in response, f"Response missing 'winner' key: {response}" + return response["winner"] + + def llm_response_wrapper( + self, + prompt, + response_schema, + ): + client = genai.Client( + api_key=settings.GOOGLE_API_KEY, + http_options={"timeout": 60000} + ) + try: + responses = client.models.generate_content( + model="gemini-2.0-flash", + contents=prompt, + config={ + "temperature": 0, + "response_schema": response_schema, + "response_mime_type": "application/json", + }, + ) + output = responses.candidates[0].content.parts[0].text + return json.loads(output) + except APIError as e: + print(f"Hit Gemini rate limit") + return + +@dataclass +class Method: + name: str + rating: float = 1500 + k_factor: float = 32 + + +class EloSystem: + def __init__(self, player_names: List[str]): + self.methods = {name: Method(name) for name in player_names} + + def expected_score(self, rating_a: float, rating_b: float) -> float: + return 1 / (1 + 10 ** ((rating_b - rating_a) / 400)) + + def update_ratings(self, winner: str, loser: str) -> Tuple[float, float]: + method_a = self.methods[winner] + method_b = self.methods[loser] + + expected_a = self.expected_score(method_a.rating, method_b.rating) + expected_b = self.expected_score(method_b.rating, method_a.rating) + + # Winner gets score of 1, loser gets 0 + method_a.rating += method_a.k_factor * (1 - expected_a) + method_b.rating += method_b.k_factor * (0 - expected_b) + + return method_a.rating, method_b.rating + + +@click.command("Calculate ELO scores for document conversion methods") +@click.argument("dataset", type=str) +@click.option("--methods", type=str, help="List of methods to compare: comma separated like marker,mathpix") +@click.option("--row_samples", type=int, default=2, help="Number of samples per row") +@click.option("--max_rows", type=int, default=100, help="Maximum number of rows to process") +def main( + dataset: str, + methods: str, + row_samples: int, + max_rows: int +): + ds = datasets.load_dataset(dataset, split="train") + method_lst = methods.split(",") + elo = EloSystem(method_lst) + comparer = Comparer() + + for i in tqdm(range(min(len(ds), max_rows)), desc="Calculating ELO"): + row = ds[i] + for j in range(row_samples): + method_a = random.choice(method_lst) + method_b = random.choice(method_lst) + if method_a == method_b: + continue + + method_a_md = row[f"{method_a}_md"] + method_b_md = row[f"{method_b}_md"] + winner = comparer(row["img"], method_a_md, method_b_md) + if not winner: + continue + + if winner == "version_a": + elo.update_ratings(method_a, method_b) + else: + elo.update_ratings(method_b, method_a) + if i % 10 == 0: + print(elo.methods) + + # Print out ratings + print(elo.methods) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/benchmarks/overall/overall.py b/benchmarks/overall/overall.py index b61e5486..481753e3 100644 --- a/benchmarks/overall/overall.py +++ b/benchmarks/overall/overall.py @@ -80,7 +80,7 @@ def get_method_scores(benchmark_dataset: datasets.Dataset, methods: List[str], s @click.command(help="Benchmark PDF to MD conversion.") @click.option("--dataset", type=str, help="Path to the benchmark dataset", default="datalab-to/marker_benchmark") @click.option("--out_dataset", type=str, help="Path to the output dataset", default=None) -@click.option("--methods", type=str, help="Comma separated list of other methods to compare against. Possible values: marker,mathpix,llamaparse", default="marker") +@click.option("--methods", type=str, help="Comma separated list of other methods to compare against. Possible values: marker,mathpix,llamaparse,docling", default="marker") @click.option("--scores", type=str, help="Comma separated list of scoring functions to use. Possible values: heuristic,llm", default="heuristic") @click.option("--result_path", type=str, default=os.path.join(settings.OUTPUT_DIR, "benchmark", "overall"), help="Output path for results.") @click.option("--max_rows", type=int, default=None, help="Maximum number of rows to process.") diff --git a/benchmarks/table/inference.py b/benchmarks/table/inference.py index e2626d10..cfb57aa2 100644 --- a/benchmarks/table/inference.py +++ b/benchmarks/table/inference.py @@ -11,6 +11,8 @@ from marker.config.parser import ConfigParser from marker.converters.table import TableConverter from marker.models import create_model_dict +from marker.processors.llm.llm_table import LLMTableProcessor +from marker.processors.table import TableProcessor from marker.renderers.json import JSONBlockOutput from marker.schema.polygon import PolygonBox from marker.util import matrix_intersection_area @@ -42,10 +44,14 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m pdf_binary = base64.b64decode(row['pdf']) gt_tables = row['tables'] # Already sorted by reading order, which is what marker returns + # Only use the basic table processors converter = TableConverter( config=config_parser.generate_config_dict(), artifact_dict=models, - processor_list=config_parser.get_processors(), + processor_list=[ + "marker.processors.table.TableProcessor", + "marker.processors.llm.llm_table.LLMTableProcessor", + ], renderer=config_parser.get_renderer() ) @@ -67,6 +73,11 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m marker_table_boxes = [table.bbox for table in marker_tables] page_bbox = marker_json[0].bbox + if len(marker_tables) != len(gt_tables): + print(f'Number of tables do not match, skipping...') + total_unaligned += len(gt_tables) + continue + table_images = [ page_image.crop( PolygonBox.from_bbox(bbox) @@ -102,6 +113,11 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m unaligned_tables.add(table_idx) continue + if max_area <= .01: + # No alignment found + unaligned_tables.add(table_idx) + continue + if aligned_idx in used_tables: # Marker table already aligned with another gt table unaligned_tables.add(table_idx) @@ -109,13 +125,13 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m # Gt table doesn't align well with any marker table gt_table_pct = gt_areas[table_idx] / max_area - if not .75 < gt_table_pct < 1.25: + if not .85 < gt_table_pct < 1.15: unaligned_tables.add(table_idx) continue # Marker table doesn't align with gt table marker_table_pct = marker_areas[aligned_idx] / max_area - if not .75 < marker_table_pct < 1.25: + if not .85 < marker_table_pct < 1.15: unaligned_tables.add(table_idx) continue diff --git a/marker/processors/llm/llm_table.py b/marker/processors/llm/llm_table.py index 584e6c04..a6f0718b 100644 --- a/marker/processors/llm/llm_table.py +++ b/marker/processors/llm/llm_table.py @@ -42,13 +42,13 @@ class LLMTableProcessor(BaseLLMProcessor): - If you see any math in a table cell, fence it with the tag. Block math should be fenced with . - Replace any images with a description, like "Image: [description]". - Only use the tags th, td, tr, br, span, i, b, math, and table. Only use the attributes display, style, colspan, and rowspan if necessary. You can use br to break up text lines in cells. +- If you see a dollar sign ($), or a percent sign (%) associated with a number, combine it with the number it is associated with in a single column versus splitting it into multiple columns. **Instructions:** 1. Carefully examine the provided text block image. 2. Analyze the html representation of the table. -3. If the html representation is largely correct, or you cannot read the image properly, then write "No corrections needed." -4. If the html representation contains errors, generate the corrected html representation. -5. Output only either the corrected html representation or "No corrections needed." +3. Write a comparison of the image and the html representation. +4. If the html representation is largely correct, or you cannot read the image properly, then write "No corrections needed." If the html representation contains errors, generate the corrected html representation. Output only either the corrected html representation or "No corrections needed." **Example:** Input: ```html @@ -67,6 +67,7 @@ class LLMTableProcessor(BaseLLMProcessor): ``` Output: ```html +Comparison: The image shows a table with 2 rows and 3 columns. The text and formatting of the html table matches the image. No corrections needed. ``` **Input:** @@ -237,4 +238,5 @@ def parse_html_table(self, html_text: str, block: Block, page: PageGroup) -> Lis return cells class TableSchema(BaseModel): + description: str correct_html: str diff --git a/marker/processors/llm/llm_table_merge.py b/marker/processors/llm/llm_table_merge.py index ca8c2ebb..c978a906 100644 --- a/marker/processors/llm/llm_table_merge.py +++ b/marker/processors/llm/llm_table_merge.py @@ -39,7 +39,7 @@ class LLMTableMergeProcessor(BaseLLMProcessor): horizontal_table_distance_threshold: Annotated[ int, "The maximum distance between table edges for adjacency." - ] = 20 + ] = 10 column_gap_threshold: Annotated[ int, "The maximum gap between columns to merge tables" diff --git a/marker/renderers/markdown.py b/marker/renderers/markdown.py index 28895ef2..3d10af9f 100644 --- a/marker/renderers/markdown.py +++ b/marker/renderers/markdown.py @@ -99,7 +99,7 @@ def convert_table(self, el, text, convert_as_inline): for r in range(int(cell.get('rowspan', 1)) - 1): rowspan_cols[i + r] += colspan # Add the colspan to the next rows, so they get the correct number of columns colspans.append(row_cols) - total_cols = max(colspans) + total_cols = max(colspans) if colspans else 0 grid = [[None for _ in range(total_cols)] for _ in range(total_rows)] From 9e4247754985f86989d4155c3f40defba6dd8801 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Tue, 11 Feb 2025 10:09:28 -0500 Subject: [PATCH 25/27] README updates --- README.md | 56 ++++++++++++++++++++++---------- benchmarks/overall/elo.py | 43 +++++++++++++++---------- benchmarks/table/inference.py | 4 ++- data/images/overall.png | Bin 22589 -> 50718 bytes data/images/per_doc.png | Bin 47320 -> 89347 bytes marker/processors/table.py | 59 ++++++++++++++++++++++++++++++++++ 6 files changed, 127 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index e7178f96..b9ab54f0 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,9 @@ It only uses models where necessary, which improves speed and accuracy. ## Performance -![Benchmark overall](data/images/overall.png) + -Marker benchmarks favorably compared to cloud services like Llamaparse and Mathpix. +Marker benchmarks favorably compared to cloud services like Llamaparse and Mathpix, as well as other open source tools. The above results are running single PDF pages serially. Marker is significantly faster when running in batch mode, with a projected throughput of 122 pages/second on an H100 (.18 seconds per page across 22 processes). @@ -381,16 +381,33 @@ Pass the `debug` option to activate debug mode. This will save images of each p # Benchmarks ## Overall PDF Conversion -We created a [benchmark set](https://huggingface.co/datasets/datalab-to/marker_benchmark) by extracting single PDF pages from common crawl. -| Method | Avg Time | Heuristic Score | LLM Score | -|------------|------------|-----------------|-----------| -| marker | 2.83837 | 95.6709 | 4.23916 | -| llamaparse | 23.348 | 84.2442 | 3.97619 | -| mathpix | 6.36223 | 86.4281 | 4.15626 | -| docling | 3.86 | 87.7347 | 3.72222 | - -Peak GPU memory usage during the benchmark is `6GB` for marker. Benchmarks were run on an A10. +We created a [benchmark set](https://huggingface.co/datasets/datalab-to/marker_benchmark) by extracting single PDF pages from common crawl. We scored based on a heuristic that aligns text with ground truth text segments, and an LLM as a judge scoring method. + +| Method | Avg Time | Heuristic Score | LLM Score | +|------------|----------|-----------------|-----------| +| marker | 2.83837 | 95.6709 | 4.23916 | +| llamaparse | 23.348 | 84.2442 | 3.97619 | +| mathpix | 6.36223 | 86.4281 | 4.15626 | +| docling | 3.69949 | 86.7073 | 3.70429 | + +Benchmarks were run on an H100 for markjer and docling - llamaparse and mathpix used their cloud services. We can also look at it by document type: + + + +| Document Type | Marker heuristic | Marker LLM | Llamaparse Heuristic | Llamaparse LLM | Mathpix Heuristic | Mathpix LLM | Docling Heuristic | Docling LLM | +|----------------------|------------------|------------|----------------------|----------------|-------------------|-------------|-------------------|-------------| +| Scientific paper | 96.6737 | 4.34899 | 87.1651 | 3.96421 | 91.2267 | 4.46861 | 92.135 | 3.72422 | +| Book page | 97.1846 | 4.16168 | 90.9532 | 4.07186 | 93.8886 | 4.35329 | 90.0556 | 3.64671 | +| Other | 95.1632 | 4.25076 | 81.1385 | 4.01835 | 79.6231 | 4.00306 | 83.8223 | 3.76147 | +| Form | 88.0147 | 3.84663 | 66.3081 | 3.68712 | 64.7512 | 3.33129 | 68.3857 | 3.40491 | +| Presentation | 95.1562 | 4.13669 | 81.2261 | 4 | 83.6737 | 3.95683 | 84.8405 | 3.86331 | +| Financial document | 95.3697 | 4.39106 | 82.5812 | 4.16111 | 81.3115 | 4.05556 | 86.3882 | 3.8 | +| Letter | 98.4021 | 4.5 | 93.4477 | 4.28125 | 96.0383 | 4.45312 | 92.0952 | 4.09375 | +| Engineering document | 93.9244 | 4.04412 | 77.4854 | 3.72059 | 80.3319 | 3.88235 | 79.6807 | 3.42647 | +| Legal document | 96.689 | 4.27759 | 86.9769 | 3.87584 | 91.601 | 4.20805 | 87.8383 | 3.65552 | +| Newspaper page | 98.8733 | 4.25806 | 84.7492 | 3.90323 | 96.9963 | 4.45161 | 92.6496 | 3.51613 | +| Magazine page | 98.2145 | 4.38776 | 87.2902 | 3.97959 | 93.5934 | 4.16327 | 93.0892 | 4.02041 | ## Throughput @@ -400,7 +417,7 @@ We benchmarked throughput using a [single long PDF](https://www.greenteapress.co |---------|---------------|-------------------|---------- | | marker | 0.18 | 43.42 | 3.17GB | -The projected throughput is 122 pages per second on an H100 - we can run 22 individual processes. +The projected throughput is 122 pages per second on an H100 - we can run 22 individual processes given the VRAM used. ## Table Conversion @@ -408,9 +425,9 @@ Marker can extract tables from PDFs using `marker.converters.table.TableConverte | Method | Avg score | Total tables | |------------------|-----------|--------------| -| marker | 0.822 | 54 | +| marker | 0.816 | 99 | | marker w/use_llm | 0.887 | 54 | -| gemini | | 54 | +| gemini | 0.829 | 99 | The `--use_llm` flag can significantly improve table recognition performance, as you can see. @@ -438,15 +455,20 @@ Options: - `--use_llm` use an llm to improve the marker results. - `--max_rows` how many rows to process for the benchmark. - `--methods` can be `llamaparse`, `mathpix`, `docling`, `marker`. Comma separated. -- `--scores` which scoring functions to use, can be `--llm`, `--heuristic`. +- `--scores` which scoring functions to use, can be `llm`, `heuristic`. Comma separated. ### Table Conversion The processed FinTabNet dataset is hosted [here](https://huggingface.co/datasets/datalab-to/fintabnet-test) and is automatically downloaded. Run the benchmark with: ```shell -python benchmarks/table/table.py --max_rows 1000 +python benchmarks/table/table.py --max_rows 100 ``` +Options: + +- `--use_llm` uses an llm with marker to improve accuracy. +- `--use_gemini` also benchmarks gemini 2.0 flash. + # Thanks This work would not have been possible without amazing open source models and datasets, including (but not limited to): @@ -456,4 +478,4 @@ This work would not have been possible without amazing open source models and da - Pypdfium2/pdfium - DocLayNet from IBM -Thank you to the authors of these models and datasets for making them available to the community! +Thank you to the authors of these models and datasets for making them available to the community! \ No newline at end of file diff --git a/benchmarks/overall/elo.py b/benchmarks/overall/elo.py index 09316dba..9eea3b55 100644 --- a/benchmarks/overall/elo.py +++ b/benchmarks/overall/elo.py @@ -106,7 +106,11 @@ def __call__( version_b: str ) -> str | None: hydrated_prompt = rating_prompt.replace("{{version_a}}", version_a).replace("{{version_b}}", version_b) - rating = self.llm_rater(img, hydrated_prompt) + try: + rating = self.llm_rater(img, hydrated_prompt) + except Exception as e: + print(f"Error: {e}") + return return rating @@ -142,6 +146,9 @@ def llm_response_wrapper( except APIError as e: print(f"Hit Gemini rate limit") return + except Exception as e: + print(f"Error: {e}") + return @dataclass class Method: @@ -189,22 +196,24 @@ def main( for i in tqdm(range(min(len(ds), max_rows)), desc="Calculating ELO"): row = ds[i] - for j in range(row_samples): - method_a = random.choice(method_lst) - method_b = random.choice(method_lst) - if method_a == method_b: - continue - - method_a_md = row[f"{method_a}_md"] - method_b_md = row[f"{method_b}_md"] - winner = comparer(row["img"], method_a_md, method_b_md) - if not winner: - continue - - if winner == "version_a": - elo.update_ratings(method_a, method_b) - else: - elo.update_ratings(method_b, method_a) + # Avoid any bias in ordering + random.shuffle(method_lst) + + for j, method_a in enumerate(method_lst[:-1]): + for z, method_b in enumerate(method_lst[j:]): + if method_a == method_b: + continue + + method_a_md = row[f"{method_a}_md"] + method_b_md = row[f"{method_b}_md"] + winner = comparer(row["img"], method_a_md, method_b_md) + if not winner: + continue + + if winner == "version_a": + elo.update_ratings(method_a, method_b) + else: + elo.update_ratings(method_b, method_a) if i % 10 == 0: print(elo.methods) diff --git a/benchmarks/table/inference.py b/benchmarks/table/inference.py index cfb57aa2..0c6432d7 100644 --- a/benchmarks/table/inference.py +++ b/benchmarks/table/inference.py @@ -160,8 +160,10 @@ def inference_tables(dataset, use_llm: bool, table_rec_batch_size: int | None, m tbody.unwrap() for th_tag in marker_table_soup.find_all('th'): th_tag.name = 'td' + for br_tag in marker_table_soup.find_all('br'): + br_tag.replace_with(marker_table_soup.new_string('')) + marker_table_html = str(marker_table_soup) - marker_table_html = marker_table_html.replace("
", " ") # Fintabnet uses spaces instead of newlines marker_table_html = marker_table_html.replace("\n", " ") # Fintabnet uses spaces instead of newlines gemini_table_html = gemini_table.replace("\n", " ") # Fintabnet uses spaces instead of newlines diff --git a/data/images/overall.png b/data/images/overall.png index 5858e9ce3c3f68865bd5948c3ef3767bb2870752..1e2a9cce0991302a3545e456dff0d8781c0878f2 100644 GIT binary patch literal 50718 zcmeFacU)9g*EWnAMMXq|f>Ko!RLV#fq^L9%2gE{=Ceoz$UM+}-fQW!FG(kY6_YO+$ z9fnTm=mY)0dagWf=t-qex5%cPT zo3D86^E*#&KRtctzLZFlAzi*uTc{eV*&r~X%Wv3-zDFg4eGr~ zH3CG+bNS@?X?e?fIZ=u`dHNq4)}A4JS4H4-mXtc=fVRkJ@X8cJPLu@mR#Zm+uQ^dJ z_R9-JeOq=?y|fUC%^J;2&kLK4I*7MvloE5AZk9(eB~`MiOgG7ptghf!X>zAy4BiRB zhh9L7S!GqQWEJ+t9wke05zuaYX;?ySF2#bm2JdSi>Nva*#j^8VzP?u~_9l(>2!ID(UYc1GQ zNGiEQoH^y{++^@-w1rdN(g4cZi;>oT?s?ZQ1O$&=-6v9C(l3N^R8wk2xdc7Qne93? zHQIS@aWege4+eC}d?^(j1+kFx!IeExREdogKUyd9aD_|y@WR&%IMJLj((aBdI zYktYxYFxpbR+yEocK)e;XJ*l$Y1Dv%M&+~JJ5T=Rm>%HY-*sIlW^Z?%Ib$+aT+A|6 z%J};Mg~a|8jW9Q^?W^OotIxU9T$ZX+TQx9a88vn+T}|}jZ~4dGY(kfmjx0PGVk=M_ zwf1tI%{;HXc*<&`jG>RG-!?MHTDxO*yo9!-b&QQvsXAe%O~={5eiCDJih#gWqFFe}bI!o+`gL*hM2hW}jD`16efDyRdRRpZVmQ;f3aK$o zLmSI(mED--q%U}K`k#p3S6*GfT4T?a@`P`T ze;0u%PLT^g0o!o}A0Z#Bjb{`|;mCI;p`~$E(Q4!-tNL|pV#B$!a=9x#Bo6!Zv&W0m zC999oYP<8#w4^UbCsjYBCFl>}eGHL_;?=<8$~#YbuP~;0;OH~?mM%=7DX^-=kG2g3 zJJ=65MPbjjdZ)RtY)3DfMPa9J5;j_CtaGzTyGf6YxcS;Wfz;yr7ju!;E9ZZa>lqF%8PB- zn;O`cF(b#FCa%dS?#&%{GbpT~;;k95NGqn$DGa^tPl{Et(MzAKKBfAWMbK-{Vq~EC z^$wPy$2D71i0}jiweu~OeQpJ;B~+w&E1h_I@}yoDhM<>tS}tAL?30Z9TT6+#J8G}? z4VG=2&qZ^!9@cQ{k5=nrui+z|JzpTpGI1eEj!mBab=f#6{l#0nC2t97snW!Q%4aKd z$V?1`N}gX@hI9fW9xxnL3hEGxjFU=5?FVy z8VcjvXV~Nfh|bejC~n~FHz^;)7|@`84KS(ZNgxxPD5dMueArerGV?gwmuMHoyzPOi z$<_JbRhM9s+{I3OG0`sLGhv<%E0e(q7PXe1zl5Ayi+4T|=3Vre(GK(?kqTwxK@KbP zbIYU*5|vZ-`N>)hH!qbXTpkubd70_Pf z5vuN<3%R__vXve^BWl08*j+NR@Om!Jemcsh{o`Ixw&CKc3zuRRdBK69^b}3p)8p1Y z$yGOSyOT{PW_PEs??xr1H2?ADbMh4#!}QiZ+4@8oCsBDsQX2h%R)c;<8X38c%sSZe zdZR?qs31RS8;`UuYeurl`i@(Cq7-{3CRh3y1Ya3=bqDHt-OSmYA$tH%7fejy+1GC= zR{%u-FS#++bBS`LR*0ks`-HOIP+zzH!b~aaY=?=Y9p*rV`}x%7QZLep>+W`=8ABBA zOXR#!M`m9{bn2ybzHe7+P7VdvKAo~c?Pp_(9m za_hWQ*{AfGx>>`auJAW(j*W};TUT3a?4`R`dRtdMaNzyOf$=*mUqvsJy173%;UeqqjwwQ9pNn(DD%z zW5a{W<4-j+ImlWkvb_9etc#ylwt3|-F3%n+$_bFy7~Mp0dZ-u536Bb2PJvxlG-$Z> zz4n(Gi#=<6$*HDQ1DTFDz;32>TF=V(tWw2gdg_CCx}nGF8ZQK550W|`t1mqDo&=faX=O2;NAp{-@ds%T$boYmORUaw>` zl2Tj4Y%yDAHC<`s#(|S>!cEMd_wlw_p2kkcIz2v5U|d3RdOs9`mv+hdx4G5}ulCWd z?#t7h=r0>LH_YG{FP*Tbn6EvAdr{_%yDpT&C3L#^j2CuT_jwCNdr z10TK1w&j(2eld#jBsrqZ$Qh`~*l@>z{+@kwcxVJ`bT1+}#hDXk%@;N1@XuxjSE4syw@PP;ajO?jgIvA2$*$chPQvfG^TAbxQ)3pF*_+-TvcqsBvS zb;+8s3o?nNZ%FhtlttG0D~jDQ^rb=?>a8@6YyeS04)7j-?7)vqH4r4Jfc2YQ3umhG_{ZeoL#ywW-O8M+W}c$6HoPa#pav7 z`x{gC<2g-9`EkZ9Z0$rsiQ=IthR@Sg(}H5LRqi}eJ3U1Qq_e09R({a$qv$fAIABWHfBR(Cq;oBr7_p^ z_+LiP+>pwc&KVbau%uwjvV5aXdc$fdokyW6B{nna;Pc}oq7cw-LG2B90+lm$Iop!4 zD3K+9z^qT`?M}Y2H|ZKWeLHkBHY;=)NMVMb#92$XBbAe8R-jSw{%O65+!%_vOqZY| zaqYd_dGqKB)wO&%`?9k*aOiDlVybo@&%oKN0C7Rtz%qxTdomSGTA6h}J2`h9;lFlrBB|CIK>rG*Wm6!zb z1LS+Bel@$pgsZod@gMt;R8u^=I)HL$O{NYxx|YdfO1)EL2F&$h_Ee@EczWjwo?Pc( zbV2N1YR=u-rB=rMN1%QG68WjtzjQx&K@zF1;L_FonNP$nLPTYo7m zY)R+Zxn>`lRuyOJ-b&N5G^l~tyDoQFs>naQz#~C(k-p@dmpt25pKJ24=9T+=TV*dl z6~8;b*u84c#1y_&PvL^7M*Devd%R7zYei%kyVNh8!sLnlW{mx_hiYUz@8985P?(Tm zVJQ{a64-b7DAr!nSIJr{{r&5x-g&4_l>|i}+Ve!DR%dYt^((OD78A|ItVsN#Ra=oh zz-8m-auk0sm|2Z!Vt;r{o+<&sQCi4h7m>nmI`+X521SzCWzlViYwzC>Fu`AM>#Hu$ z0UN!#;k65kxM#V5oCJE}GR<}+%s%>dO3$p9Y8bvpSA6Js=6<`|_p5qhOlrU17}R3L z{)+E!yiyG>Lj`m0u4soP+^3m&p0)cyulqyWnVCCfrgi z?ZOeB2Bjui^MVV=6w|CQUhNrh^L zV3ZnH&PDoK3-6R^i$mv<6gBcIR@xRzhc;;$mqyzV5DZhDGf^;(H7ZUM)xK2b+NOBJ zn8Az5kDg~1z_mQ}GAeQKUO*VcrpGC@1Nj-Yd1^jtIwNNd=_ZmYcn(J$VHn6zp5Qs- z?xp#J<50%2lJXjq>J!(|O@uoindwU-i}YAsQTdbPaJdQoCb7`H(%Jx5eLMAJ|6`$E zV{QWVulTOhJJmW(g@yQJMwf~y37PL5rD>_W*-q}o)|6n&=K7~&7ZtJPx z-M~}r1MpsFT6qeeO`1EC66ds6TK3@7hgS9-lw3>*O@S`?M4w#^l&BN-jhwIcV`s>o z?Xu|sXnpF?p_}U7(O#8aACe3KCp#e=QHTBF2s36lOw7aGZ5-a0op13&l1kq44AJu9 zEXm|xRTOO!Y+Wbj)Rsd_;~ow>@>ZAe1zRX}ckMj&A^v_+GReH#g9nObSNAJzDK!)p z{%~BQ3XgpD?Q@$5cJEGzI@;>Hokv*}D5vKYJEp{0GG64O1x_N&T91#szDry%f`06P zI0}AW(&4bW{9Hawzh@e&9A}_IGC{d}cUI^z_Uxl;Z>??qv^y}=g~iEE@5F-k8BMeP zQct6c3_v8DUDQ^x8t_&DdccEybCIHX>yf2F?mW5Sbbm2gJ5#htdiYylZ0P)T+Yn(%Aqw7X*LNNA>){Xx(nsH1x}mzM|8yH`?km4lN8H@(u{}wgA&*_Ho?0 zWC@%G#+~Q=%dq@RTr8y^Dkk&&YZZLXtUKwcsSFBg0b9#`CvKdUHE&hNEqfu90=i^s zq}}3qja)sZZs=@bta|g(=pMA`{>M8+SE4sc@{J4cxL3v9%@+Wqg|!9svHCdJZ!P_g z^X2ad=bwIPhXQ^JfuqD7vB9!=3e2zV*|S!_ogE*Ws6aUr(pVnIo~jq0R;8$6K(M6?`|3a_u#Hb? zJ$9fT9(UVZUKC_Ay1&?+nlb{7A)}WJqN41t?^uCYh$~S5s4;HR@}V`U;EE*w-kDB| z?voiRD@*gln%aXA82#-e{8N{fIt~vB3_ers9ZCZ%8)g5~%UaQ&-%JFdhO6N2^nVPpF)2n2AYkJ08o5I!9^;TpoJyR4-69+fR)odv$fNOwa(N z$8_x=AUIM)SKu0YLG`vV9a?|h0PR1hcd!QUsY<~>YbrdF*8|l~daj%OHiG0sI)in> z%9T`B{hmF^4HB-(Lwcckt1FA+<`Fid8IieYvYc)+tYQH$sJoR(L1YM|^r(aqK5jsP zgGO~+G*OydA>aMf=spwq$Ml&U`{~K|^q(yNqK>u;f-UnJ8uckPn)pCFpFM-kTS>`PTsRUHmuoGn?tQISGr<*=d(u94vtul}xiZWpcb}(o zieBcs2$imEt8p`g7OBbXvc@&#DW?OX&mYag)6o9PfCvu59hPcA`k`1FCSXZ@o72h` zQ>^1(MoY;F`Oua@k?WDbw6ovp@RK?=i439-k%G%wvNv6hyl0;rurLL7&*|8eh{EnD zj?y%sGtCWHeY#^1(%LyP*q*m5f{DDi#EmkN#Lad#tJ_T{l3POnwjHM0-zMf|y=+K! z>5J2Hq>A*6x74)S5zc(YBE3$OQr+K)g>>hDP5aso zuQD!Y>M&oSJf&jRUoD<7Dd}_g7spufwZ(n**iSpvUt{oH}^Z3(Ek@H%0S)1I3tW_{{0mR4ietl2{+brwNnM=_LJbuA$F{--3Hsw1V)y!^08Bbn& zPRmU5gzxip_T9a)w&`&eXMeZL%;@N2lJkUAhNrVj50|8xVej)r%V-3aU$;u^xWiQN z0(EnPrIb@;l$3WuHh|dio2g81>$m&yA0basmv)dZE!w(#LrSjuN{s}0|Jf4X+^Hiu z`$2?~xMnPgY)yqy;hkC`;`p-~{+eer9y*Mg1QV&2xHo0BiTD&y^BBMAQqWwQ8#nqj z)V6ukr-e~EV9n!uQ0V@>o&smAaz)HcXO;y?-dqtX9|R>_DmT&N0fi98Lpq&hG1eG^ zT&pa!DbMI5_UZ??{T0@=F)PZRB$p;u>!ie}_xi>^_S>Df)6|D^j^Og8@Jd{?>OAl< zdq*KO&8@E9B@x({+H%u1ek8U1zNFwXherI;sYB#xnl(YxzJ{LduIjHa#qzf;+V#>5 zo36<;DOcnP8fT5YlnU$BE1r0B$xrG?kv!c*){~J$2?hv1EWA3SDQn^KV2w3IZV$eR zb60}R#)`44Xn#SBkmnqzVI;x`aYP? zFQp;^;POm++`G4-lKj(G1NIyww#{glrP+b1vThb4bd2^B1C5rKlg;F`kMOR`9CZT) zPXq98Hjz9+Co-pXB&vrIy>UIRt^}t9PR#CW7TZ}}}&4$k}_{oGPKs9PA>}?!(K^0*OJU)Hyk+|Fi?d&3a zt7dSFfjHEP@>{0wu1E;X_s4gdw{z-k8VmhTe{IoWB(`LsQg*~QnlUcR3`dq1Br*zjBjeLrC)4d3;v+ z`UGI0cu3)E;3pF{j1vS+Nm}Wl21!9;MCOt;$eGS(t6(DK0;$Ikz%Vy8yTi(oA>Yj_ETlJ%0fX<3T^o`%pU<;{5dq6ypCil0I(%)-M(sXXu&thiETbk&Bysmqal)zE)C+?4&x-=jhLm#LjgpDu(hSW5cZUj8m zd>)`@K~g*sVA<1Co(f$U*s}CwNcI-P5q9v~>%I#Rk@{_yZ^?{1CivcHDDDM8YZ*Gxi(*I0E(YUq`B;H|coU-kSTllV=zRN5rTQcLJDC4NQ;Lz_W zmQRn-{nbZK!w)%3#q^7yQ!90&vH<|~G+vP;wp*NW(+Q6QP}kF8Qsbr*C$ss#*eTC- z!hY_n2EB4w3B=nnv_l<Pm#`_GVjz;$!|@q9SW zPYyW)VTXzayV3M122}cQcj9lny%W)gsKD`ni|+$`V`a+vkf?&Bq`2&f3k|GprHTPh zHn)MZCCmO1lUq6jypo}_`D(SP?WVBTPahzl(vq*J#`572lbdh9p^OQt1O@Zbj-A>5+Jpf;T51_ z_6ON7yqjA9NVy#W3l!CjYb@4ab)dXOTe4Py0M#jT-Q{PH(^v!GPV|TgF6>56J-PJz zK+JrZbQvnjt(&&&*ovU}0J&Nv(q)8cxx0l}yDb$dGUFVUap?9oI^2zk5LdBeB1OtS z>;p$~CAU@zV+kwE#k|$UJXE7_9=I@W<{Tn%@GTxeya$*mNFve=Mq{WK8eW>4IE~+I zAiM`qDtl$&vCz(KNTpD)dw$KFvz;+|m%!4PP!8jYUA-`h@b}5G>kwxh)TpI~ZCa5< zai_FH;~=Vy-@Q!Nz0jQ2yJIGGZm!Rxk3`7?98Z2*e!mRon+fncJ_VaML^Z|HmB!sb z_U8jTMoYU8iy8KB%R5gZxD?;Dxks6tZ($lo9w8Y_qb&JVm>!=-;ws9qH1HRs1RaJ# ze{RAzk7^X6Q*ndo117xHhlG;%RlEI%2c8295imT6g3yN40~KHpXg@M=w zKb&Y<0qjvn@`%4qtP5>v+gbgbr?g7#b>H0RGK${8A9cDFNojfKH|Ev=Dz2!tQY`H8bbW3K$-N=LCLgEWO_MqkLwbKrJ_#(_^pIG z=9hln!sX6G7dWYWH>YJLwn6pk9GUMFy*|q8tY7lnP9Fy&zH^J|} zThp`UWxNBeuhA>c!glS@ZXiTez^vmsglmYDw2dWA7Nr-D$(d;zuR*v2vvPI6K%>6m zqUE@}73-xeUPxa4xsuI^g5X8Kq^1;srgud&txTg zUm`E{K?m|g`eg_&&n=psyC~7D6<4v;pQ<{gSo%!c8-cuL+FlUV%oGYPlc{%DTgw|@q6uQrwuu0g! zNvT@B^zlKJxiKJy5oAaE2qedPAX!m`)79!OzN!ABF{QlE_=bCTk7PbCAROWSW8xXh z4`XGl(Woc2Q6pOb=NMU@PqD3hG~|l}n%hpEtxJi*)%UiM-eg~d^hJ_mJ^hfJL%Wpg z)c|4)Jz22&Ix6-dIg6TAAVnrkbV7r~sUqjn)ZRdgE*G~+ClIE`aEWW$= zr7Tc9^{I5t?nj$iG1I=G$bibD;6A1PzDx@|P41Wzs*ps?>QBD=hexGscZLEjloW!1 z1)@pdIZhI(07gv#jzbn_${jqc{m!^?SPn>tTeXDq^Sr3>Rb=$gM(>z^u?qBsHq_vILvLW^hXIV@M9J%8x_PMtI zvVQE`b>xJ9vcm&CrNTs=z%Q246_5iV{AS-Gl*WzD#q=>8i~Vn(K->VwP)$p%mng2k z%&P$MEH&m~{nMt>yC?D{yyI$x4G3$Bb?Lyymee#0M`g}}h19B9!K5m_+kSEUhLm9u z-RwhzGzb`O+x=Bd|AW2q;n5B#rUi~BdT!cz@&J|nTo1Y(@v)pFz3$(;3}H6jCDsbE zbFV`QxVCiLSj5q#R_YnsKsaZii?ObI{vIFV(gmjvM)tYMwxt{>G_G9g%9Ep!RoLU1tH`$VsBB&U2`pPG^bBcPTa5J4IK2KoA$5bT&=DvsdWDfTUba_HL^j2xeI9`I>fN z-`U6e@t}FBHq7+7pJGAq`!p5|3#Z3QKVwSs!mr9}{o%1QSv!$ZC58=ASkhM(aMOc6 z=L*C%;<3$iJ{r#|Pv?Qn@_h*+eT}OarF9sK+sSMG?*7fMeysB){Me3kfUcne;{3M0 zVmH^tfiq7Kx*Fg-7T4{S=3>7zUQ&YhW7m|hYlmCA7dwjBn%}pBsOb3H7qB?;;49SI zS=)2hoVu3yMzXxfU`;SBrP9RXGyv`rc>OX^A}A2+<^oPGNu%iE)RpDtRBFN3LY4(sC=ztruI*@)xaeecI|q2Lcz(B&#)V@k{xC!+x& zW)7dXB&S;c;=EBi6k~OUYFi1ZO)s4UeG`*vCc2I%TlRa(4NX)!fh)>q!=^2Tad6Z| z(%r4X>=ve6#NTAk^}3?Fu@5>%4iTI3q&6jysrenVQ<}#w!O>QD*|U>Ck?2o%;PG(? zP=(HuWfR*AoJo+NT+RVL7-}EqCtPPqb0&XV1MS7trM1t63J@zZ{l#aypf!M69|X)f zg9<;*pjRX@4ca`u2wG7vYq~a#1-bBScFQbySvdHEdw@68o{QD!&ol8c*?Wk9kE(ogdz0$jY<@N3DWqIS4jZ~%t~z}E8zE*FU+H&X z5Eupn@fsxo$eI$>mLdX{DP2k)W3}a!CGPg}VrNPnUK>)yA(FP~wN?3(kGB-gx~2hg zhFj9Pb|DJsiTF!$Po8_W-4#Jn;sRG0Aq~@Yfln-oB4wc9E$k+w{(Uw6AI}mbP6$l7 z>b-?7Mj4ysC>50|gys=6p6&pd*9fBlj<4;`RK^dwsP^+)o62g1d*>r(p@zqD?6+Dt z^Ct|t`0Q>)7OaP;+LXHayXO4JA;3`l$E9YWsncIWe+n!J1@YVh-(_^n_xdKq7R00M z@6AH8?By^^>7gU65$3Uv^-wgN38O8U0VY7gEW}HEg_{A!O%je+BiWyuljbvXg?0ag ziah9f=^Kct1#V$CtiheH(R?NsuS0DtA+o$z)+wOxo$4aG--Gr4Js+xA+j`eLu=YF4 zy2jakrpi90)iv-%y3RUgh9p5{_WU~ww8o}oOq`!9ck*xq(3ZNsZ0DR;5pKBwe^#~AY!2)Xut%I$sW}sO< z0*c2#n!OL@6${{&oy3~&o)xi@9X+-NF+S6}pJN;Z8KvQkzm7zFg+o||Kb*;u7+yM) zMT7i=1Qd5mqzbZ;0M;ni9*hb3h_r}wWh}4)y>*svFrkDd$^iG^5M1V&zp+Hv-Rzpx zkMKNZ?;Gc?YXn!|@|H&o&?RNM(?AbJYsu3g-i)OT9dNs7CU|VVaA%E^25Na8Djly+ zB@INGSn^Cq;jClp++i3IuT5<%>DtMMpY)N~%P#;9m=m{ZL*k(rX-?~erX3_I5sNLm z>ZP{Q2LQcqZ4&57CsrJ+n3WD!cJ}j?Ek&&7*Pqb|uYwiqUK{8LEz1tP`Vc^6&(CdvG4c}4> zEzXJMK%M>tsL%GYkt))NazPn3XAK^Dl~35fBTf^jTL*W9GaPUKVt0zr<0LB)Dnqu% z*U=r~{FS71Hd~{QECwRoAe$#!0X@a4T70jHtDPwt`r7J{aMA%j&V_hhV@?I-gTDK9 zt;&9n#32oQ_2qz0kURMa3*=uGXWqM-jWlj7GNAqUIJkp*FRUkP&0v%dojV=LF~!gy zNnXC(xTZ&ayW^{R%g2h&tPuu?TD2xR0RaRo>Zl=vqhi8mrIjhrs>2|VtOg|GLhS>N zIvFU|B2wDAY$wrnvd(JM7imo47<}iLVYkaYQD2h+{skIq6 z1-e!S0a(cMK`$Qor}Q5rwICc`JV3?50mT=`x@e+bwegsJge2=BQ;pb=QOJ2Gu}(~i z^fT~MJ>$1APX`K0C{8ISABuW8K@AY`RDgiqZqaEbQ@Q%~E}=8@uQ^*y4sr&(()-~V z`zU#?pP>Q$PzP|?sSH;7t#KiMGCVe0$g|gRevwunx*43tCRzVs$9|S=zda_97(Pj^UrtFSdJ16)x_%AXEGMQ&BWu3!E)u6W z^2F?fRW0P2Kdi`qRfAQ4>3%?-ULs&0R>C_)YQPL|e?DRanI+lJc33~kXk~tA&Ffd` z&&kL)-FO9x`3|-qvw2^NZ!o{aZ4_L}cnn9S@iFg<69HbL>(vxJu;(nqR01{b0Cpwpm`StrH><^A# zhX1gL=Ra~Aze4NlN8`U*h=1SVqmuFOefal2fP>|8oes9sFFe@4C+pkf`)`E#A3{5P z@@4(It3UeF{(p8KpopSn*SOfCn^Xn!L!x(e0NOpm+UZH``pMT3>yA48$dup^oPHLYp@T&g@Foy zx9b7hlR=V8F4F9qVKUSVrG^E@{oR8iPO_s1ztB=&3*ldQus;!rZbi8WfF2g1XLal* z)1_bkREhZ9EbyI7MqMXOngiZ(L?T84;SNOLLcC)b_c~j!X^aoW@@Q5E6bXZW7J|v-+CG9dXg`*IB*9GI|w^Ug|ce6%2S|JrVuKM2mb#HstVHM+HK? zcK;x9>A1p8Wk!5NYbFzZ>#406{YTGWEM{ z53bP0G_W|^sQ9V-xTLvgzWpIr>Rt*wy4*l#rkSgFwp#dugwy9inf{~y=4;gq=zQ8~ zXqzR1CgU5nV=bQ=Tr(F|+G4tE_$DtFSUAOlGu!!!1W&k@y!;P^E$l-Qfw=>NNk{P( zkd=$#l)>qoj)!iat;mENgsN7^SBJ7~JD8Loh^MNl8@X-1@Zj_E{P>2HJ~AGM2_tK4 zWID{u>&qNkvm>o5BkvZG5f_Hg=g+Mdm2k58N)uf4w`4D(iFB{bq>bApBNRHV-INr# zMEQF+PauYGmQHi9``(9^-YdZoiey;Xkb7UY?^^4^cLJ@B)1GD+yJgpq>Ln?_qnp-g zK8_R73iY7kF@AnWV0x{w=i9UT9qs6AAKcfxZc3;@cW%!!Rd=m6E@8DH=_`MZLB27Id_gfW&?v&NCS8fTQS3#U zOAzz_dP~>u{4q%E0M|U#!YrP4unGf>B&p+TEmJ=M4t{iDsp;S(TZ4U?z6Q|Q+XK@9 zblOl!bz+b9?Un}jvn66rDu78b7 zmDl@_@%~yqovh-XR{7@?7Hl>+iO12Pvb?lt@}#$h4#DX`g?~mJx`>_vCq*&0A2Q_e znwbAEjO|bhNXSJdd4ddws#dEJ`bTgF0=^MN7#Vlb0`~doSb7?}sX7=c!F>;eYIGdj3E(JmGwEYSYJ7$#28(cQiM$S1$JT=NXE^ z<&^B_)pHn0kxX-; z40^MQ_75X9PYwU9!tyisZ_Mt$vj_Y<6@au;Z2RPwfMzLi-~?_hPisX6k|FLV#9WU= z!gwerkO2>wxq?Lhn6w!B&s+i0RQiV{U-#Gk1Re6f_Z?O}ax6AvMKR&0P{fBH81EX` z;j&{pukh=g*LLYdpETRDZ$GuG+2I$?QoEBwu3aSBm$LWLG0U^oHD^D*xbfAYo|EtZL5Q)3i@E%y_|#Z zvWa1>+DKhDbHknBglrp^2jGf^`^AKP6d;)wn}JsR2;&lFW!Ljer2Q_>_1c*m3Z!kc zk8r2k?;0f;nZ}`tIfzV!;a3~q+|X+0kensX!gtR~S8PI{M0ilv73~I~g~ix!-lt6hjY*{0#KJB?4=dXgS_%+~Y+4Ua?uc zT_qEmoge84_1!fhhBnB)Oz`yPu5e4L{E`}NmqTK+!Zc-0ZkWRfQueVQQnPthxF1fc4qo=~$d0#VKPk!0@ zn-VQ0u(~&k9uA`7^lv?5K{iKQ?c5E8O~{3@fHrHyu0=Vf^@i0omCz&^*&X!Bg8Q(y z;lecfhLw~bX^{;-fM5OM__jXPiZaE;zLK@%DKu2YAhRQ_8wy&UHDrukn0nXv*S|8X zIx-}llX4iT<)H;zerhn~GKvW-kIvOR=Fog^s8Y9E`)}=TY|f~eIxx7$d=-q(%Rw($ zA@pcD7zc-o`}z!2_(}@o!c?Ae4UC?&1Ptjj?sMbjiqw7;X(`uTbj~~V`>XQv|33)i+7Ev;gvp@`{;DhGmYEs$gc?YG3jAS=y@Zw{_>5d!3;l( zAa;Xl6bVl_XkpmLO)56IkpFYLO*d54|F56D1Q*~GzuD^lk6w6JTRPB_70;b%uA9T0 zqQRO|t4otZ9mt{Z_LAUz^#Pb`b$EczQKT6YIUj=^nnHY+lim>gRXAdxrvPIX2JVqc z9EH~MuIt=&Feu^!EIL2Bew@A1zFAWKzh{%|Z!c&X_MZZ2E_4~(3OGEF#Q5NHlmp-3&z7} zbVfZz#tIFPtd5ZkQDLh5Id%N=W*p5q8zbL<78rJe~<={i$y^&8*D+w$;!}2c>}o5>a}_dq z3r)rY-ZDNg#Vn6+HlrqRni1>q1N=zrnc`B-Sf}!5pC?ZXoQZ)|`IVr{x$vYV-y`Ba z7rE+i^H)GKn4IvdvDGl{hQ{E8`7C5Qa{!e*?GLgN*FF7EgAeZ%o(gEJ-*CMj1cc@i z=*E$Q;iTz!@<_gUVLDGuu1!JQ>O!0^FpWwi+6HA8X;+B;zlG3ziA)h!xBO=?K%lc7$M`_|(8NUj|@I$}wkZb6MmF(dQTx z&~Xy&*!3p}K8?$M>@Mw7s0*GWN6QUZB9QY9onZBX!hXDnT=2s--1|iJ3jcOo zRGlR6yBCSjFu)aeS`a4mmLrpXXSrJmJQ|r!ocPL^D9C>kGwe#nd>qil6{|W|rK`Cx{|eXn4}kX| zJJMya0Uvi5CwGvF&7LSX24{kI_hL>rBLE(``D%;YFpI!=nB6ys66kw=gjbTfu%ZWP7wKWILhu7jnY4}??B=0GKNvboXvv2!2`7n?Cqg`=1M~;=Vmi0 zW||tW0*w%+VEA_P0NAvd!TF=d1B0?F!f2O=VOEi?x^Xb^-$!LwE7E1qwDs*(%>YMM zjZBvyb=v6as6}4Z2S~STSOL7!idJullrAC9ia_fd2>l~xLg3clCVBpqAO0ulE+>Dr zpDUpCk;!>~O&w-CFd`3Rk;}F;nun+I##ms+99HqG%r$nSCJeLPFbS;!nR;u1TUy+x z27J-?LCv4DXGXs)6$mZ~$tB;Cn(yph?lhhs@bv{3QjYCAapmLPt1Gk9v82c|qkO;v zo}~b7zyp9Djs`C@MwWxMOKxQXwQ6h}>^5(=f$eXIZ6`N&f&JeH_)aV9bas^8JMjt_ z`OZ3j5;wcKK?v9cCBU%G6g^>{j12HQQqBm&#k>=CYmG0H7F1?&anFu`mb^wVR z{j$Y)qFvAby6V@rRW|;{8F;SVI4w7(XZk7A^wL1^Tp`0$EQu=25@H6}gYvMS3adEP zJzvd}CStGbxZ`gKobHa0jI)sn+{8VnhuGjKFm0;=sybmoLrVaPw3JkoP&C(Tyaa^$ zXE&FI{H^OsH&mIwD>!$Tr$k_>hXfjYTh*%*h{q!x79!Q*DJ}lC0n>{cb^i{LCH969 zX0AIc5+nyzC@72M9_$)-w|FYzN`@AwUa;H1^>_4Cy5Hb|QAd#^ZH3M1x~88q-E52i zQDh4H5(pMrdlaSaXE$>Y#aZ<%Nm@5zE z==C?0UAi8Fl|h5@-Xd2F(QSe0SCV?wMgkc13LkGIvk0ubAr!a-ymhd~6S|AnDp3wRyKxzN80b|8-YyrJU%$ZhbpNm3>t5NSOxeCFzg|7*7- zZjychOvBfA!lNz{0cCMJya8)hoQ2gs2gY3a_;4o93ff*x-#se2}Dy)C6|NX@; zo?VSu^KkNj%d8y6Z(y}a!E|VVf`KCHRt=c0tNQ6Z_b1+x*vBJUB=Xp-#;jPc-|HoR zaR~vx3ShV}!$?~J#5%h{Dw`rkB`Kw)Uh*2ARDrggU&E%qxX}PPtx)ETQx1SFJJ93< zBxTE1Yut>o@IUtMPb2xlk*E8ZxFa)Us^W5BM*0v{unMr?Rm!%-*P^6Teh1UQ!t_xt zexdg!a>EY@?Jv4!og*lN`4Q*a2Z@bI?TQ}mtr~fM|Nhs29(+arg|5hfNRIJZP%F-$ zr`&h~iQrR zIcqG7+1C`sym{``h-ifj zndVFAgg9YM`+I)m%a#HMLW&e>AqUz?yi52;jntu82ZfO088C5uuw&>pAQU~Pr= z=5kfq#g>Ug-<;?Ucut4OXp+DWBuc#Bh)Dz7kRZ4qErQq%?GI!oWpXQ zS3_K?`q8A*B!~d93FK8G4E@(SD&pGvY$MPqkc3Ac4%Q$emM|X3gW`RW(iw`YV84z? zrS=zHLzwQR!Ty$oAFwBBtZOnJgkWYFWjiwy=0uo57Gf2;1kcH<%aV{y(m+qn1{pQI zzION7H@nAR-GTUZD5FG*Q20h*_QrT1LQDzF_nHk@Y!b1|c{$f+d6?|CKN=s<%EaBf zQU;L8;qxse0ukJ3+4iDy&L@fZHhz5<7OTFf$zR`v_Fd3YbAtySzF7|Y;Uteal)#+i zzsT4xV;yp+U?{D1dXJ~9qs|}YYCDc>=4sxeP$OdtNNJ!iL*>HPTXP1p?$P`zC8QmE zpHV~3e7%wBnP$r~^px1m4Y7ap$V}CZaUsup8Gy%L+FGqHcE@qYcs;M)=s|#7KN1Dq zok$$!3I^UuI#{>DW8EllDKYO~yWlX)1K0kU=0JW97X3@6L!SAc(oM8PLFtL65@2y)u@{f|t&l{D19zcR-U@ z`!-6IDocxs$a1uzqM}UMiXdu5!~wFk2#Acxl9>cW!3ip|5fBF;5D;WTB1R2@5R@e| z>K$fZ!~U>HS)^0w#^OmOONV^we34&88DF|HA^d?mR}h z^BnJJ^R5rbS7)utFmOzjq#JW>(2JOU&|-dz9`bc9yR@hmcy`l0AD{oB8u16yFj3T4 ziu3tt5BhQQh>0YkLK-$#S6fnN5|15qw3aoo^08Iz2;g1Q()Cx+Uc@xF?wqW&W@k4I z;0)8PK%K>R$_y!&_|VdPzR&wq%jBVbER3l<<&E)-M!4@&pUWZ^Dx>E16P_&b~Sp>5y)z>#t*#h+R zsv0@WZUI`|DcY}{>aD-;R4aaS)BYM)CI~lwEDGj6VvM*ZMe;RVQxxvj`@ZmjOrI*p zm>YMigAAszSN`iA{c9*Q*WXJjAc%;ZJ$S7l)fyP)KRk)2m!hg&5BE=IW_`hb0L=RT z0eixAB{qF3*eR7tXs%9=OAMuI4!_pnu&EqO^#rso<1aKjgxTZ@Tb%R=__pKwn_F}; zB^gzSpg$0TZ=853Wn0^S!YYV%t1VP^w;^Ok58jsX;+ zk~Q6)f>Nf{56Im2INF5B%$p=OQls8kK;EGm)Yfkd*qr!-Vn_0lIf&h6k7)dP`VF{nkrcYhVa{K@*Gdf}S6nijg*7J?#N1syJd8}LLUukedT zK>x~+s@4hWv*)%)nP5u*B2WP;l8=#;E>ZYv7soaT2Iy`Yvq;~=eB|pJqK4IB)f?6F zJ|JO35#U8z4}Dc~fW!V*{E?p@yIht7SZBIf?m<)xqUB_90Z!B8`%uhtu6>7|t1Rf$ z`T6v#Q<_@YfztDd$h7C)p|s5S`8CS#pM@jNpz+EKfIGi7?X$3~zks$5`BWoXoo-+o zh7E+feaL=RK7a$#=>M1uMZ?J`lExDsr8y`QQ09vvJxBe*nl%dbJw7#M&7O>O$BZ6F z5XZ=K#_^8=vE}R94+KGfPGk|t(E!d|)nexM;*p~+g}tjveKItLP03pzX`hd?fvo*P z&NVBQaw@dJ`r?Li+E4^m1r+U{wYz5+uKXOj{o`k_qjK1aUeve#!M9+fR?rDWqQRJ{ z_(l^P2QlNwlx{Bct1$HCphecddj>(KLlmqt4strTgrWs_r>5+;Bo1^tj-q~Ig}9!ZT`+O-7!nhT`9jSeC#;1=jb!(eVz1lKB|4ZPk;B!^HX*`)x; zs&T7DLDmR_##JmtbiB0Xzf}_d$}(^<#ocC~^S>`f?!NMS?DRuvCBh)$>{LmV$%uT# z%p=y~Gv3F~ zq3gEK>Gd#z;Z|^HL4f9vEJo%|fXrLTd^L4GL@NG;?EB5#GN}UTVP->x>{C2Z%O!#!6{b&=03`MP z%ZtLT_27WH*!Rc714suDTHVYA3Pa1X0>nCxSWU$=-UGC69#SZ(8in@QHpnd!S~-$b zZ+!+p^o}Dc1d|}tnC!ENi3DCa0-O03t*nbcAmAW<0+n-7WXQ*QW|WbH;p=Dpr&JvT zXt=r&f$sjne72-O?IH#-O7K*FSK$1oH|0OV=6|>|zhkz3Ic$GMY~@zA3V?JP^{*Vv zH!j`pSi1k_82(0s_;Ne{$rJkx`NFNg{*}Y~);;`>3<)tve^NyL#3_E0i=f{B;0Asp z5p!!c|2sDSJU0J^_4?njp?~7~{1?CcKWp=AJ>hGo_rGKFzhm=flO+CsvNp=l{@*Lj zJc2+0O##K`18qo47|rWeS}9SjNpD5MP8GT}RM?T825tNBW2DaszWzVr-H<-L!Dg1& zH(H8%A3tJDzOk(c*cJ&v5tN)X$Y@rA0JF6fNE$~XAtaZ=vUnn%udZZxB`TpXo9!xvza4AXx!GohdK_a@7iqSzu}- zqTxTQ_TD>Y*EVc|OdIS4sp@F=5mmC!VISyYsP}(lBG}#k8ka9Gx`0*w2A@SaVn0Fl z8zCsd+`VBvpX76b0Yx9}{#k^_1&LkcJD_U;fy=6TSNBa-Qu|C^emF8xvicbfK|dow z)F<>m?8x?CiwDXPw+7ix-*`aI0`I^5fFQvH!EmFgSWLG#CCYt%8_MVBsYCSUlw=oG zQ1j}SPzMd}A zoHs_nAI5PkT+R64an7w{b84B1 z)Tf;G_sR0?u$D?5yhA#t)_PAmxw%NqOVi7N;xVelVu#00aylKt;H4hd7%7!r(z#87 z)Mn~ZLC&XF?vfmQd!EGz!7HiG1a;>mlOcshL@!Q8x>5TS6do-57abhBq6dx48dZ|e zu8x4iqO02Or3%}z3@xyiJ70DyYL1^Y;L|uveBem$wC?1LPrtBHk{sVyyKqmNu>=2kQ|5}zg9nBTvdgP*2Fb8825Frv7}FlTm3*3eYky(%E{hJ89L!B zRyIl`E!n#e>bx@ai=ISm7xJFI9qNAv|*0R#dWU0 z{ux>qka{V|PR`=ESlV=K5Uk`S+wz0{>UPM)3hPNcAO2+90wgldIYQlg3_)@Ns@vr+-} zFnz};VM#MoM=Fi9b#~en8EepAR}AXBv7tyAClV=khRd2LOLL%JG+%$e zteV5n<|O+=1Hl2qHo?+NdXXZ3A$ZyT)?m^jY4(IP=b4X2J3#OaL)xIST}7fbje8O1 z+H%GOcR0ZU_7Z{@!R9DRyjjSS;#2fLSu@_5y_pV++HAYA0-Cgrg`n8nA6L$?r23lp zyitfZijcF`D%9DvJYoA#HxtHxlrk7d1$MH4q>1zI*&ywpg%hnyA=%;g1X^JFy)eoC zT_Ool{__kn7SH9za|ZJ7TQ%V z_>?-%3(lZ6E4eReV|YGjQ1&8p=&1`o;%kIsb4x7;n;kQ^LEXt*6*R3X0oXUYag~BU zbVr=pE2c+LSigqM00C&v;rg0tvFv_qyn9JJWgJ>F+mYr-B^RA|at_a6g08m~@=X#uyH7h?hYfgX z!;Q$vBS$WUD{&DMdLtKYkkrZAO$@GT)-u@=dydegn{af<{N|9tL)y~UAefG244Dv` z{f$yl?$Hn8ZwwqT^gTMY0p%{XF^90WKCO@dfT>vjKx4wDdeU=@i1J=^`mmnk(_!W> zi&g!B8?qf*!FQ!xyc=@jPawpL>?LzBvbt8oku3+E315}{XJ$BbCW@VbLOV`QnBXVF z`$o?Y=86gC(5Iv-k%r7!Ln=i`F*f!_P09EbIj4BB{*`-neD~=v@w23)z8t0kxM_O` z(n7HV#}1<3j4EaN7)(tu^BzJZ=sr2&bXbeg!);ETUd@w3xLv}iLb=4#5cK*EpPz^j zuNku|u?)E?1#enxRz9AVQKgUx5dyi#)NK9O1N^oTL6rC2DIZTRk-QO=hyi#hw;51X zD7+GHG;=JW375%)4xe)c%#3M?lg&gp@lUpzWcxRd;{#GPOFAJagG%Ka2_Q!)OMo`xQuoMz1$nzYBx*oS3C(z=Pl+9iy|0*S(BXLmiIu;6kFeloS9YudFH z{R?WSp2UsJ6+5Q%lB_Hq#3!Up0v@VJl~W@E7O>Cx2L*as11FtU(Sd-4dguY2iO*6{ z*i_2R6GJP@hE=SUKGHIFDxe}$P1NEA$$5P{=O$7zSEpv`MET@R!%XSZ?yiYzCYs2; zjpRaP4$$Pzo@10JDzOCiFkw6N&?|`eXr?fyQ(JL4$C@*YHMpcn1d3yfNmQNvyJ1zK z1I=^A zC6f49Qro$HD9UbtzUx6J@k)>|AG1wJ`90r07TGLe3HqIlRPDTeDWfk|Cci#xl8jlL zf=73O>BP9(c5#O)aiJ~%xUxVPRforTiKl0BGC1Av(a1o72oPN@frCalYn7V?(-4pA=*N11@c)4skKpw@yXJ1<7KXfeN9Il)V z>B0Hx3Fgq{jJ)Qj{;XkZ6!;+Fr{s5&Bl)4wFLSCU(OgqScc!8y((3FBhF>3iN}IpJ zEZiwV^R+GWZ@GDf)tp$ii|3wSgb3;oK8h;Nb5h7c2T-zC5m*2rE`LpjbM6VXhM;y@|0dm zyge0h1g1qEn@xCB60JuXAtCc89;XByf4E}`r8#x07INBJjl&)N;2T}=xD(n_(d0uR zjr^+h?$7E`S$6s*n80G9kh&tObkXWHivDpmRCXeT7!*W2nMmor5=7?{l-@pFtnQQ~ ze4D8^59kOF#(1!HtG@-s*tpx0qGgo1p7quclU>4LQRN^7+l07dD_mjU8iwSl@6)MG z1joatSqTY^VVsbIQQV$L_F8BAT!l4IT) z1M8PxeYMf#yLu88dioANy4+vA|Yr0;ai@!rUihbsV^g{n3`ZuYpUb0 z2plhWT&6FcD5hL*Z+ib7n`1@Zb5h%+9dLa2u=6CWQ%F6(YL)Ihz_#zV(lgmZncfM{NhOk*?kRK2RRjiK>Mt;2F>3A^Pup3v)1#kt?wbU#;PlHf$^jX&s=ou+!IY{(xgH~es&B8XbyLaG z(|z3)BfA>ik2kpYL-*A+%!UUIo>GuCCd|~-tT`XGZ-74gppylm881^3tg#d%q!Wc? zG_J7Ai$yjiL=hkejhhm%yycBnN02*KYy|u1CK2?yDE_DWK5@f2{d^-SYzkS5j~5vxDQYK>*R^I3wVqk|CLD&@t;k+l>64t(( zn9RprdwdER;{tpi#8J;G<>LCQ3qPz@+P{0z z(A^?E!OMETWUbk-Ah2LXMz+$jXHK3Ao;m%{IKb$qM@4g5f6hdWlxix_=ML;IRB%VX zKi#JiuR!)`1tEgA%S%MkzuUNpQ5$%Y!)94pTHcq$f{*@K_!^S&13Ot?pn zURCTop*8~r)?|r?+1ZEIZGXQ^fA|1)YV8ptqto<)>3s$?8f^?1-mb-7RH6LkUE_P@V3v$NNh zM6+Db=ME-KJ^~nAYmq_P3Wa8g{d2tG;o%LB?+2%hyr(KN=jzSZpIh` zg8&&a6es5gFM!pJ;+*mtZMIF&o*Pl{0#JJ)Kq70Dl$C=>WriTCaZ}zQtgDi8-0j)_TJDiTlH4lzfzR0Sltb=Lu!G2d>&1ju+Q61BZBUHEdw#q(@(C z|2|sDF=DilcN~Zc4snld||<1drOYKpEkHZ&B4NUAq)O+~xnI^ z#KvSM)m2vOlI7b@J&$j;jkN5u^6p%e@7$s+>;B%%sHn*w7-nN{#8DoQ`8{Frnv|nH zX02ms^9n!rcxZAG7%?Q}7Bql%PpWjkXvi>c1G3`%iN4F6jlNon?oS=hmfv2tsv(A9 zY-)E3dJ-<<9c4K4e3v{q?{b}C_46G8;vk_HK2Zwr*gOM;CKi(hbsTyeTBxC1q>YZ~ znT#uRh;UB=0Zzht;++-TlYm)AWa(o1g#rgme`vMv*vsjSv~I0SJ=O+FK|~hlXg_ys z#NDr2fynqM%i?T2YJ;&!IT>3P9UrtFe{#ugr~zj=3>^Uhza!=a=O2WB9}y8@=5@|) zGW+Mo+Z7DR-9_d!?5icrYqLP%$z`JJb*HQPg^{eq3T2E$-Bj8ldMSw77a2X=XL{^z zOw5{=CrbRZ^DDoXNjR{>9n>RPHmdBPl%lf)JefqQ&YICPL6NSd!-~H)ILEf-n zFofM?gQy+r;#SJM%IO~OB#uEIlVj@g{$0M>^Ryq2-GA&E0wewI%xtUH&(pRj_c6@Dv5fro?A*rV zTWWJ18}A`&m1>yoSamg`s)OJ#xW}YlM=)T`nl(<&a!N6GfWz_{afKJq85V`|qvqus zHA_NP?|fui>lb%#cq zeS>!{fMMrZ_FCZ)Z9dqC#?89~RIqUmMnA;T~=Hrv!C z{LY}pSwNfr1Y>*OG%$;xn3?ftYXScIiPj^V94&P;&o3g(J$bUR_%JAX4n}M9-FDf|)Gu+UeR2;ybTduqBS;Iq zO%F{r+hX#A(z#_yjx_@DE{D~EmpUvePz%(TexvA)lb4xSDWe|O_8R%viL^w`htmoR zo2ue9TN(&{u;Xn|2ytpC29b_cE6n(I7QFG^UybkZO3%kmYwtYQ_oHvY51O+>Wk2`p zp*HCfcxNvQgo2Zj>+$mcbbH#lzI0+Yre6xKeA42LSFT*aB_+LdZZA=_wKgyiRJmZN zM>Nk#-NN5e>gQEpV2kZfz~VhGh_=P87yto|69;P&J$1>!3(XsE*_A!ad7>sd+MJss z)Cs883jo>6=j#R6b-HR>8+H1^OTaA0WS8dUHLR%`7)?AR{5C+Ye0+8t0lW`ML@cBL zp6@jQI=`5dc8}NCj;rx{F@kbiwz&27_HG{DSB3>&XB_d!Eh{Y(-z-(M5N`FeqkF>N zrECtAs=IW%tIg@}fc{*_(5{um>Z^KG2tU7{KtZW(J` z5G#tC3Z%^LXzif<&26ijJiQRHgq)e z>C=Nv`X|qjgtg~8zzBA#c?>MWVMULaT)0wj@ZB15@nbWtTJx`C*I$*DZ$%+-9O5FE z>-XeyxM%O}UQGV>#qaOrC*@FLOh@Zd!xp^^H?eZ)Jy`q1;D!Xt@M@@(W^)@v)0cHa zQdEyn+e_QBP7_B)FQ*I*o$~^6jTx^9&u(x4EPV_wkC{%;T&Mt_yv;I69~7jUW6-qr zf&xGmIR<@#co)NB7K$m@J(4H}k-;6aOD0-tnBHcUKMNloQj^SeYLd4zUa-~>`Tm3j zJkhZ28;BPpc3urBF}AiwN#3kqP?DFY+=}yjw4#0K1V||D!%u_`2&iQnR$aV){rc8d z;%nA?@Dl0bxouxg^Mn+Io_zezJi8~)6!nKuCiY->cVj9%C^AH_oP6w1 zc!w(kg4V;h`ntN4JNP790-MHb535%hfabG?^7Uo(y;(w1>x6jT>SJX17=k>ED*}c> z@)EOrfdl(X&Q9HV8zxf>p+w`#RjZ1?>Z_c3!9#}rt74DAt}R=(yrjO+R=l?$ zpl1UiEO>r#!2E77i(*1Mu7+sHdyQE%<~o|>yR@mzrMq+hbLi%9eOhGr&1*XT(6x5~ zq2kNm@$g6?J9kw^USjuM_;0!~DyVp+vhP!B^BX42O1z(;ja8!?tka%8b$k8#H4en( zF5Hw*^VlSS(vjc^XkGXD|9*fye+9$SZa7l#rHf`j+xq1ws6K^MU6B1qR_&KC0f*MI zawbMl8z2>}AjeDWUV`G`af3zktc@=}&%+4V?;w%27hZOSqo11YM4ZnN{{XaJ*75)V literal 22589 zcmeFZbySpH+crK!ii9FcmxMHklr$#Nxk=Mf_A+#hS90 zOX}$>*e1uDWHFA=1LHINd5Xp zYA#miufO&}@RFE+`~*`b^pcbiA40-ocx9|&K&KqWhSIEi*5-z4PRbgFcn`O@F!tu^Txz&i{Uf1MDWbq+*=(nRY?$|@Hlugb&w5PrDR+>JUux= zzQQL6ZsT|P6j!WpP9A9< zj!*Z5GRi3#8yoL@x45<>)Og2(2OBS`K$@AH&m?d)H{NsilYWfja(~~6@|V*=qmY=A z3D1#Jq+5dP65IA%oK1h#x5Y8%2>}`r*mb!O+8~LD6#X;QfXQ52OxJ6NLaPzMo@$Sy zPZ%m9sasq2Jw+A|dn%l53c!>sC{l7!hClh@8*~5DgFbQDhWSx<8ip&e%Pwhml`%KJ zQQ1Vt#!_l49F7en9qhD=u181dxn@3PcXa4l87e>y7g?;X_m+XtlWIP6M|!|!^ATR4|X|sZcN*qakurD zASxkQz3*fbra5|5OXD2O+U_O23QHOq{t{;OFY(gBV zbkN2afBsrE`D}z` zc;c21PM=Xazaj*|%)UsXD)!_m60frjN1O7P$Nf8gvA3LNe22%Hg9&P9(J{v^IrnrD zSMs{UTw9)3kGbVgegmg=aa-lt>Cvv9sBz0>Z3?y>F}WvM+1c4gCxdxLCdgO;lhfnF zxfs)gJnV>29)w_{^y_A$O1jbvqwgm}s0JR-Cn8QOW*uSk6l@HW7)^tP=3(Npr zrLGNclXSW)XO(d3bd|cF9Zi%FVj9?uSe0lQFf29kI;`;D;OldsNlF=DWVwD zSF@5xu^UIMpRQG`7hh~WI-tg}YO1nc?$2@zRTE;oO;dHUH^TK+t%NnM7q5lcICgs4 z3wrF%y?X|BX5o6PwWQwj@7R1#n=ZOp_TEqa(kmpw-2QqxBT(H_n5DfTLSsp+vDM;( zMlWWta7DJdq_F4GHA&$I!Tu_{V1Jb)BJ;{W@ONxdsL;7@c$;@6zY-T}ztS^)PgR(7 z>1JbdS+|o~<4x{;J&CwIUyIv_X;*vep{XkB3ynbs$L2lhF{&O*8gsr>*17X!9^%(K z4CpHmQ|^ae#}|ePhp$TET%0>TY&NJ&POkS1k)C$pvcf$Jb8w!04bFseaL%>&BrZO%NcAr z*;!KVw2o1z!zRX%!L2(qwS^SD`b+ea3KTP5ovK?}){_e@OuPK&Q?W`s4_ozzC)g4T z$*4y%H8NIePs`>LK3QBqlydL*tHe*6N^DcKvJN$_VU5-^_d=IArf!;<-sEn7WwhFr z^4evqnPt^wLvpR=%yWd)w(5Yc5L4%2_LYGIi$)9K8fL}FLHPZh<8D?cEb=_mjJ-*G zf7Iw|WH83tbWD5tNojA~VZViYZH}A)0L<(=Ut&BIXXlL}*2E z>slHc7LJrTnD*owD{fEvTo`V_Bfv}(b6&67A3hU?E!-P9+>nsq5nZ}#dMhpIh3MO~ zOI>(fT^e}{PZ#_aa?3~=Wdm~~BENmot#ERxt(A}NVEHK97)?WgNnh@`{J_~7)-Q?b z3Tb1J*u--i(=lC;X0AD$i(7Sp3}fODt@SU&MThvo3EAq!lmME5Ep&BGI*2ZvKElsxp{p zF;d$)Z~m|r^TOcXiwU+8*i&h#*{yfmDaQ_kANT^uc#6F= z)>*eJ6I|h?58#<31Na;B1v;-4J?6?cBr{*vVs|)*m*TXQ@?QTwx>h<(rHA0^B@aSl*4R^A za`hUxJiqgr&IoDOHIJi_iQ?HXiP-(U<$;cVE0iOSy_hVWTntt)Vqu-FCtN!?z$kTP zyNvroZJBXLOYP;#*ZHf`XUYhM4`di-DEsyxqg0w1uPxzTe3bhEyatcHmO1`{6@k^c zfks`)CE-;JOj4BXFager6#qOKVRo7t#scRRj807~y*sy3Z*+Ti-%{_)moZY6NoRb2 zo%eQ{VfUtS2pmBz#rH8KFX^`0)k6YyiLyp-F#tGQ%R7mtq2m4vyI;hNgSB1em;&OpT>`GMK>sGv3=imz;CI=5B4klnpnr7L2 zi1Hr5CtPCk&BO@&S;{1%q;JkDG^uw|RLVYZ@A|sVb-(yP7w%H~HviQE7Ws2^uZ7&_ zA&R)r&(!W3r;AF925clvWNgqR{~Ki@*i7o>5kk+NZ0VOX(g|zdqPcGc&xA{cqUkU=;6WVSWjpiV{a%NPC627 z=&)lzuc1hnXgf4=Sfkvz3(BOm*p4oU@4U@&Zacp%@^B4d#>2X7E&B=ru_E@kVVvlC z_atRPE?oQ}9a|W|%*_kKk}TtQXi`HQ`8dM8$cSYtCEU{E2Gl%VLP#7}%9peG&@oU2 zLE6>x`z_Cq`?}uJ^=5n?zbqg3lKky9<<~RQ&b#6nRmr+ZG(IHMLS?8mlaJk;0DC{! zUS3(@$CQ0GL@%4tYOx??Q@JTSLZN=HAVckg0gStrno(HRMhb&aC`kNDLVHLi1q*H` zcVmn!LM4VMw*3teHna-HbosGtzh5XwWH&1#vMVtmV9P>DjI6&=bi6(TXQh@nn!y%tsJP2#yoni4u#Fyj#EwX7=@_ zS8a@J1C+l8vy=agjwu74&K*i9`}KL*mmPRgtrDu4i!xg*NwMF7?)&9o1;l!yMo2 zc{@#h(m#2otCb;cyW7u_Y$lzx7*D<+3{R(#n*J8_2~!W}f)rQRHQdg4%=vm@wMcXQ z=YifC0pW~vQX2ZDg0;+QMiac<3JydN+yR7Ods@+k=TaIbaZ-dxi=Fxe@dQmGa=+nd zZ0CCN*yLjq=LH=}+|DlSd(`-d0(_m$2X-s4HG=2^cO`A9#x)3KlM@7)*QH`FR(KT) zdeFSTE**2#RcoPMHq%_1PASP(@T6kmBpX|C>E;RRu8-hL&t>$Ey`r#xk|#Nacf{jyccX4J z^WM1WoJ_Pqy^EpdwZOCJr8`+$gg>UH3zV zY^qi4fe5E28nUxE`gr{3U1uWjx#1oO2dfXN?ibqV?ZijVrd+#dbx`oY(=8fH+Xn zk#(i>PExZS!<^zT&%EQ(LsKY^ zX{w3bEB!JtW*gM<8n<-6CNx0^&*dUrFz~2(CWsS}o3^w>52&SjZedWzCx%n2agVkI zwR&F@X>nFSOdqALbzba=xu<8|7Q;OwY|xy>c&}vP9#nt)i?b>XqdH+=m%p0zhfi?6 z^guPwMgD?Tj${IRF)2!8|NObF(?e;Z@Ue_qXmq@?#FZh-wY3SZ@?**E75jEN5dIKhIF6x9!oGuLAr1HZL970$eH9UT z7fx5Gy>!RcfiH!7-7xn8#J0Wgp?eJ$xmivbt$Sjjx%-(4T*&7p zTbX^Al0jH?cr{$&#SH~5W7}3|#kh`r&N$%rCm2v2LSn%uhAX&uLWXpLr z{BV{MDk3k9aheqn)t48aliyUYW_t}245w@1bPPwScm!kc+sYzl9i^&1Y4l#13=xqZ z`+ks#*;a4+xsscT*4tuSAJUF?1Q*vTWLEZlA{_+xKve);cG(?gO)7>Q#&1f29so5nq<9FrXot_)M zMLHDHMT@k~_hjEQf!ep=&A#!B%)cWZkglyRVXi&O{QzVxSoEsGa?&MZDxy1Mg_z;O z!2w%$MMK3EeIRPX=i5Vz8zUTAZk#Zj=oTWT3S2@0PVEt9auiGlCJ)Hp+FN7s9MHMb zTH#tH&~>sdlR}Zi%eaF;$rBgy{-1f>cqQf+xS7G ze04NJ`f`9uXp8=NyrXwa0y@%jez`KvwV|o%WQ%G9+zLKzdVwqoujCsl6zVDeA1o6Ma^mkp)iFD8Pp5j!LbkUFI0nTskKC2=j?w{N9@0cw#2OT zXx9TgBRN@`ds$lYw8<$hnM+yMG1VfuugxUpLtbG&vK|py%B;Yo&S8G#r{n6BZg5qZ zdN=Cq77+si3xSRQ#6PJZh2UPB;pK#R$6htkth_!FOnp+H(6bIWe2ix>#+PmIX0Uc7 z)iV?|sTS#xiApVbKwn>m0K(kok@J^iu|}H0FQ4r&5{9}kPT{Ht36CzSjwoJ-7r)gp z-7CZ?*7~|IT)S` zEd@7YpdAw0x|RAm*HI0ooo9Id(fo6WbFs)%H|EJ`t6sPAFh1%_t!2rk#lNs7z^-LsYAtn}v7Yb}Yo+uD^4-uc?`Sd^ zSh(ok$}f^q)slp!S(Xwmu}Ml}j_Qv4iC{*|kaj}_VV~+Oz^GwHeU~A~U1NAFp+nQY z5xky0x~7@rDTDyFp$i>?^O?}YOTB$7V%{(@Yhf&OhgKY!2s(GR^A5faZ3_LyiAq&= zcJ9T>hMJlXnq!Uy-&ODV!~~(_P`y>86rE2%F3HY&)kTV2$DA<(6jTFg$wbyyukCdRl6hmCqB6WVWS)kOMC zIxWVN@jOycdggOOS%bEw0(4>Z^(Q2P(G#Ju*E301T|O)(?2>Z0Fnv5A^^TGq3afX_ zt&irN;5FGnE^dE_3q0&g`4&$SrIU3DvlI8DN*vFgj-^DOQ}JQI{9I~_*SJ&gq0Z3l z?KmTWuAazLSvEFEm8WXN;Rw-oTcTTwm%~0wBErhr-7ZGhMIj|ox~tvHo@K&+8?vk*4m4CN<60PJZ8M@Syn;i{Bn9tj`O49o5)pdlWeb5 z*X^MJi8vAchn|Qs({b@RODMh=R%b&A#1T?; ziduI1vUjvzGhsQHTZbWV#IQ|sDNp51)k+>wkb4r#5ce6?XjseC2olj%X#FUl|m>)1ia2B7d-L}GuUm+@W`zXw;)JD zrUZ{YtvIsfLg#h8iq1f7$`BgSe(C+Mv;7em3s0dibj~La(RIg93gbdiubmSCUVGdN z+0OL*{BE3cvO$!_MASF$t?y!C$cE5bY4kq2Cl1ONM|w___9d@F6v3ubgmBHO)j^bT zTbxXMa0v6Sbo=920Pj2fqrtrgNmc&Aj?vM9BEu~QhQwg=p#;%vm z({6tyQ61c+Z;RZb*wL?WUT;UuOo4LMkKM^mo`e81hb|x*_3)7b@0f}{z10tr8+S+Z zvGj972x?xx`SP9_w%OX+mcE7Jq<9w}xBSmFa#n}Ly6e6mU-9E}*_tUA^~qOMIXgXB z=Grp;_O=$gj?np`T0#cbwS~}Yd6O}{g!T#E2DLH3vxW%Fw;C=+a6BBgkdMXjAT7uU zw^1i4OK3}?6?f%FZOTMIM7{%Tcj+~NI}sDCBvTzfy}TY^c`gaN^Op0W&+w&#umuV- zCMZl6V_`~^4T9vPH$PuXFC=Ifr!W~^HSAw%GLOnHAaW)*#zpWNeH&~Xe3GFYCn%W3 zZ8arw(PfUZ>q1#Ymi1_9SEO!j^zy9qIRCPv{LrHcr>s{NCENGn{}OtH5s=3annK91 z1=tPom*FPw1&@luUPu*QmN(fy*B`r4s$IJ({WWIYdUvVM$;)|7?EJj~zE|M{fKRxg zh2e4-3*?+|qDPR`B8Z5V%6X{@QJ+i7#&uK`3UBjkr`D)!D!bC{OJi=HQA-pK8=bpXr4v|Vr<&9B{zfth!GJx^8#(LlW0ln$ zx(Rs>zFN*tb$7z6e!n(WC+{N&P3|A>EDMmP!_KK3$Bkn9W~ga3Pceb9sG3wPwZWDL z;yF>L^)nhthtcDa9WOpF$+>mpZjNpuI~romOOpTndjW_CKo+!3Mh(6B9-qclP+S(P z**aWcbNd}b0NryTA_maH0f0$b5RX-6G${Rns|HY5ErwYRc4xDF3It`i>UgvP=VjSX z>!G7-d6}KBxx-Ql9VJNf{}zDG|*nrHaXe!{*x&j?U?UcOhfvev!am z2MaBqnCQirr<;#Fo3o~mWi5o1Cj{ViuIwr!*88*KR!jFm&q2xcmA+3=6hhYV`_{FU zdJvBYbVxA4gX=IB;Mkpd0GW+{P5}7me7kYWWTAOa+gE>*)sE&cmXXsUqsHL9YodM& z{Gm&r);&bEko&q6cIEG1LPnM3ub$^@TRaN0)x z+ooP(DRG{t5ZczgS2N+htvpgEx%rjEF+`Ct{1;)8O5t+C_sXLIx7?HkSiQ`I5TaTC ztW~gbd1JK9!9nUVgbPme7_#cf3q@7!j~5Wn7Fi4odtWbcpu6$ZZwO#wRa!+(;{Ee- zj7(^jGJvNWVf&`gWV%rwZRKEfVl060*CQiIisIBVMC~q{KFHCta$qUbrU~T`F|j6D zl?ZV4Sn6dH-BmmZ>OcB`)d|H*0z2BY_a1zXbTv!6q}cIYaGYm^b_Gf3HQg^PTnv^k ze<5|?bARdYD=Nj6m80Bdk9K&6cG z$j#U|El3lyZ*{n+)n7@D8BQb*Sp@)=;)}tbNdcxXi>xi#x_s0u5vw!kEtss77f>{X z&24=DVzSWreoxu2Md@-tu=5vR!rd))f7yo*ACBx_7w3eNO3ee%0Mq6AEs$caXh4r| zz2>ZXu{$k5tI#Zz;<^ERHSlO})qz*_he3MnT>ueRoO zDZcvIhOEa$AKfq!oT28FkgXg+>(NL1uIXMZ%CS$sml?N4rh{K=!eEep&f2wlr52oZ zBSC+t_4;@fmo#Q4fonk;^{-um-O2Xu=j+50UG_^5c1X{9tHj9bIFsCN^evDVv{)cr zE=(%5r!jz;q{dIG;=U%OVd`>%)+?VcG3ff}EPVX=GVgOmD;{XI+)~P z^i;2iH}~IE!VBxa#om~$Taj#|K?HOAW1IdJn}2^eozXb$6g2Qy_QtH{!l&=NCK_t= zH#}ql^NGs^-+K%}aQ*?Hp}*GuAqTV&B4YL%XnrBzk8eb)R_snF7(b~LOY85@(_s29 zWd9ie;OiOwE8_EigV<#tac2Jx;lIB-Lmn;l{sz2%u!hXvgZ9tJ|Fs5a!TQJJ|Ksic z14I8;z8ct}HT&akCf%tDChhUUy9Ni7bv`CrGtDN=@0jOShDM3>e}M;@LjQQv9}^7} zjPv@q6$k=LQxSCk$zYkod>b5!$tktJHa5bkUoE!94dwsiL;o>7K&HWxK&zNghuC(Y zR?)Kvj};J{=>R+D0F;_^CCiuqR(8yh^gtD2uYzqA+n=Q!7aJQ}KJR`>3YQ7KcNc;H zk#_LM1AdQ+N4NYb(8va8t$a->+JLSVX94t#4bR7O>y#Swh@xc|Xj3rx`03Mf!nOj0 z<}G9xB=2ZRqy3wEfA48M&}@V5&!G@|o=YL>^jWJMc&hd@eiZy>Z%--!$m|_{XbO06 z{xzDhf3f9XR~mrnIOq$C5wwcMHZ^!YR_?gRLbC!|+^inI&I!9u^lzIVe6IR29$I$& z@7(`@sYdmmfHL3SH{%QyME(i8&`l$j&j9OX^{tuV)<5w5A0lAc5QG1OO^C~F;r;8j zwXq8195N2c7$Rl}`mKz|z0OV+TC>a;u{s~J{VmKL^PsVDRQDe|7(N$yQ~~msc+n_) zzrWC?Uj@egLf4lSzy-HqU_eTV5+0VC!~XKSEfE~ZQoCs-&=7(guOd7PW?}>+clQr) z0pJLLKMr*5;WxBK@swN?x1W(mx4fa#t0yAbzaKJ%oO!%0iXu>T!R!29mJz{YQz4w1BG)Br+7XubL@^ceubAN!{P%d9r~!#28iXL$=W6e`Kcywa+Co`*dfBGIyH6kgx;AF2kd1HjoL z1P^}5kH#$1c4|KN1KXp~q+331OMRbK*Fg$)J?xtDjT6)^ALw4prE!+POTxS)1+-rK zhxZVqRAjkg3l5*cdgXR7dWX|od~Kgp27=+Fus} zY@DZN;#uAx3+HpYmv0z-!Zze3_$Sjd{qR(NGTcuVI*+r^lc72yBg<#nUKm%x=vh8- zfkx!%aEXo0!)-=&p;D6W-b@W7UxyeYTBQI{_m8;znt;NP_W2|f-4^Jkeb2fy7q+PG zCfbFTr^jX@+h%f~E))Iaa-iY=PLjYmsB+mhDX<*6;dZ2-j2tZs+#zIWx2Zl%S030H z!&V45_iw0#jk(iz_h0z}_%VM<@xMRw{}D4R|Nn~SK$B`WJ{dRv@<5Kze7o+NJhxt@ z8E_FTNqO`vMMr=(|EcExOUVFn9Q#JXqMs9Os1(@Gwv;!PSPmJM!xjzENrmWef$6d{ z>Ngnv)2>ye2~_#_*&|jxGjxFL9u|d!*4{iB8gU6x}?&vofRs_DFlt z&m8yfuMMJCzZ}-g`M9u&fRewd7yJ^JFz;qUZ)DKAOF`u?@q8o6<9KJWXYUJIfL zc7I*`Pel9aoc+que@8X=B(JL2=1Z*-Ys9PXn7BmIRwKnjLTCi57g(f`PDa43rOLg| zPT%}bkqvZp5xr72-{kS>(fWu`ev<8YrOVzXXv3A8NT*)d`}c`OJ!Ae?4g3!r3G9x4 z6~F-hhOR}1H-ldKxR#c?+B{l`SWGs+`lt*HD9gRI zWHgEQ)PV%$FPUTLoPb4twCH+ePk2<+$YmmRA>Rvh)<;^Xkq_aR|LG|J_JyUYN}d!h zcE0y110AkbOtI}ze)l^P#F3F)+AGTtlWtKbAd)k@7fb)5G#`S5&R(J7F%XkTm+?HK z*C*7;NU({iuKr#1@Ph81=<+7yTlpbKw8D3T_iAWa^HM~+#T%$zKtX6C6#z#wZvQQX z0Si+|a)_Lf!RVb4?m3ss_23VO;lJ`05h6JEzyI%l=WPEU_bYzV&x8#dlYiuKp3q8H zirn%lo?>GQlI%m${rmU-!pZ;0WM!YaKB%QAeR`19mS@zsL_1Mr5oJDBUbNbjLI*VF zzsFqc2fnys2TWfd+zb8UU?V!Ad!6}A+Q`voGCn&$wPtYzc;d#0gU&x2f0|i;CnZ3H zL4%@^+D9gs{6czzz$VcG*)baj2cLj|z}_a{={Pox^heLHJzJ%{@IPk}*J70}mw{gC z;x9PyWt$kpCeE$;35=?g#Chr4vzs6oSuy?xM8s?X>O;VJ!lT6Ha5f4FR2?#2*uj#B z-wT~*wIxbUyl^TG$NZ^m{&5zeMhr;KYfLoLzz^z|jy;T7#|G&@P|1ZP}zW02;^&PG^#o|k5oA4EDjfa z8p?g*QWyzMr>-vZnJ6AJ-mOvPh74yW^5d?0uI*WN$hN0>}K?pFS*oS}&;rI^6ysWd22y4I~!Gw0-*a`t_kQEDeyCP1mgdaMndk)YAR z&0Vy%;p)FS2ziSOT`B1gbMyppV{a%XY@-cEL=MPpo}mf3N3e9iDOSJ&z?m1;hBf=Y z&+`3LtCi8Ro^kD$O?>x|+0;@y;9%AA7Q!W1GZ^G7I(ati>Hj(<|LlAI7mEBRPUxNN z?bsmSzPV~rTMnH|+vQ1XiSPLUj-`aO>mHV$`@6gRXTH4m%&pI9I}K0&Mk2*Fh4y&Q z%yLT(pOA7@GqtD(r|YorKTSHU9%CUKSbu%NA8FHeKzt~J@69EJ|@>>5&^FW3wo!dil7D$XX5168X&w5oN%QR7mWIz=F59){S|Ce27 z2CPvEAh=cIs$#8Zl*fFzl~!W%dDR^^x7eiYD0;?_~#*T)z9OY!Jqek z)|$hWJn*IfC*)j?R!fk_hud7&qaq@P_=AA_7#Wb6$N*UcFg<=4*P|E_?UB$N?E`%3 zZSn$nwcqANFiFz>`h*QSg*h4&8h_4+DvbEykN!wNf!FK+Op#9>n}BkS*3~>0I|W;!L=mv7#?$L z3)m8?M55ITu;M>`*+=knZW5OSe)D%O-(QifSZn#A%I#!4G>?wD&C~&s6&5eUk?65}BAE$}>XJrb>0tlS&6_)*+^bCWVA7#I`3 z?5H4Yvn6JnR4t2{ewI!0)p}rADS^f$u$pcR>*43${Wh(B)wqCN9eeHy8&t}pNea`zk~1B%a5)7B5Cm(~*{FZ|!C*obqXSn?{iv$~qbP=;F)l8afL)4u3 z2@s+`?YZAIGhHRPiKv2zwut5=d0%huVBfK6d%S<;V@S}?hyp{x5B+0X{3&6Xfn5;ubyAz$o9%6kUCox9U*}6Z2Kkq+R z<*%7ntD%Ls0jK~slYT%P#C_lwdggkvJAizNN5U!QAk=$%IBxJi*8yF-{EcCzSYibw z#~eA8hU^?1%d zV@&LrzFM1V`!vIDgIcBVmyL8$icdW?-}IrZY=qdi9sH4Q>3>V9(3==o(=kq0>k?=NYo|F=U?E{wr%_dPbGW8_AHtkPHj;HP#A9g%+o3fnVlK?ebYv?*@C>=_fc&Ui6O^zQ z!TcjA0o@VOJFkrnaq5(I!WO$5vXils(Js?NWcRt;VN%<6&>L}bwEqwbu=+1kI|f}v z&w~sv&z^_fm(tJrCrR2|02wy=VXeQ^`t8pDX_mMEUIx`q zFw*OP)Rz9(;4B>&W+6WakHqAUiuE5Ne@5%aD}ijg{tpx4&k~0ievKbG6=>d;6Au2mq?2%s4>Z0s!!M0fD`d z;DTH8px12GQ4LZK%={f7w$QQlKpwXh?VF0b?&Kz1e{x#|ca$E!w(AQKVaM&%#qJye z;11)0#Ns4<{@mu;@aD>H0*TZB2CTvY(rJvb0G`NKP6`nRYN{toTS(gr7+9A)9z(|T z&^GDtax!EG2xdz;SJaNVwwm+G;1jxxEp+!4HL#|B_z$8^hl!vBS||GnY6FR{S@cV( ze!lr)XnW+@cEcJzG9v+De#$^pf-pYaDjO;bA*=J!Tkdd-v2mi6Fz43Y(Z|dw(-Vl8 zJ5KWApBFtPbEw{?BON2FI1VkFbg5)r+i;sZ{wnIwadcj=$f!_A;`tXHeb1`Hm4yz{ zF)vBfI?}XN(!$DXzjE#|swYLlYAwOT*($?{{gLT&>I)P%QoA5Xh%+!P<`B-~yj5=d z#fbsr1SEY^E;#k?jWW4<=dvcK`Mi#fskQr4W@}i3^G-dtVq)fePZg1yv3zwFle-Oy zbH|~pbM84cV1OsLtPgo!WLETkUznD$%Q$=1zJH?aP;Ku#r;{>Ix6y!4hU|(>nVVx1 zid&Y|kJ32JDatGu2q}{&Mjbh)oVFM0_V*{4?u-@$MRIC&v%)3Z_CBMF_%B|i$~ z67J8(9D4Ozmcgb#f*wAbvtU|G;N4_M2u@IxelGre#-4FZ){uSLf7<4feeQJGsQ|gP zA3Rvh?s|Gl|Hj;LyHMLVqxYk zpUiH}=uzO|)@n#h$MMLCaL1KKtG1vEg#u%4y9M)<;%Aj>`fg*ihmkIer#yYAR+-&e z+-gISv&IbN5IS8OuMi0@q=){hu(+MYny`Kya3lFwU5ojI3ahV@P8VS1)q z7M`bbT0gBqIwzvCxt6=Oyt0%LC-r0c*Y#aCIP!WYa!O9i%SukFgy*RRRv!Veg{*&t zpVF6a0K{K3aD_)uE%ebH(y$Hmg$%oU=z9x5F|5La+m)dGM335_dcR<=__JKA7w@sE zv$(N@$9C0R;<}5-e3{{azQu_D-k9qM*d}YY9IDDoK4mHDBf0ivu6tPTk4RXQpH7xP zN9ji=&z2w+dG=bx_r+)*X%Dob!eea2PqX6l^)E|0`5Z3!=nLqVF5~C*>JO)CCzLnr zEKP9Ck1p%J9^UE#I#GnwX$u1e3UKIZ9kOVtH#2Og(xo7X9kXA$uGnV6noiPg>ZOM` z4POl(wvuLS;=?Wf>4?08FC_O(rhRu!Y6DJ?%^#=dr0=g;y!n2&Lzrx4x@chYp!w{q z;CVJG_gf%4ozWbR&QtU{uOfx-&nDPFvyZt3cBDD8g|&=Kl;WPGAKSk<5l6Z6>fF~F zbbLPzBcA?rMWb?i4#}q&#a%>*=@RaKEfIOfntG~UoHASWDEoBdrB)QP|;I`m>AgW5wNy)7G%@ zNeAzOrsaF0^~{N%%KW?`!XxS)p{gaIGic!pa}MZij0GJvGIT1hXEky z7W{J+b{jOs>6!b;3R1B=x><)UOx}Fayf(b7Ub10nHG-L4V*Mqi#)w^@w)-2yTg_@Q<-!v?E*2f-4r_;hdIQ4o=urYH|^)yT^2jk)}d&yS{#^}S9UZ^20LA%=N^6&U6;DxP*8rHg%_MN)0c zqU=Kna87ggIpts6B~60XIjs)IfGcHWg3M`oOP{Y64YYx3^9pTBb%lPK7XMD0X6{?k zdzizeG2SDXy3Z&#t8y_LIz0FI9CjAE7JAep(fwn%lxW|4EJu>9EAFOrRobb2do7AxQgBTx5XZw}f8&O-=)1?O+`{=oO6iL>glme8#H*w$xfg??L2q@}&u#kSKpYT$Z|PWU&*>?@+HxsKO6 zn;ulA#%Jr-NT9lV-#}=0qxKLE8I_}s1G<9bwGUb$jv60lAh9j2t14wFB!sLbGHLLS znAtocvlRH3TM#01@m=;WIWxKtQ$7KpM`Q%iMFok{w*Z;`rCp=d3iV;q(w52(h zbdkUy$81m2x-CdK)pr@(aa2@rmB95-CHhK|xUs6kVm@m|&G9S`64(zb@O|{PF?`lK zC53Btw3i$&KbXcb*r`c1;B~uoJtwn_YZsy{cf0V_IlfShEz^YOQ+T4WDL1x_OW7pY z7|*k*xpWJb8qt*4&X*H68`i4bO@6U9nD=EaiFZui=iBP@BB-ZWvR=8s`B5xi*)Z)` zK9bt~+jlVvAZeG>xTJOPMYLXE)!v}t*^RPj*yV^2pbMwL(ZPcHNdPU^b=vOONtikn zTJai7GBam~Wv6CFVq3^KObgOC(K_|`o=w>kBFUIV*lrX>s=DEoeto+yJSg2^K9t@I zW}N^gRxtGTi?N>iY;{^ombU*PfPNZ!O>NGS6Nl~2meRgF$-VH6yU7~wd)|8+y$RB>%ck?{Sqy)n!xUCXSc|zZ`50m) ztrl$^K6Pa_+-SAl&>?QRe=Y+P-3`7{8?emOz>@qhMDny$8DaOS zOJRhbUH3fA-S-&5c$cpW5PisPATcftFaDfjogaspX(AoxeNkNyUZQPN)yK`>=lNyx z&5)kS9lzvD@$W6S#P3BZrs?f&PP_u*uSS!q zk(>=m3mziWsa>zd2qy!AnMrB!*PE$q#B&rkKk+6U z>F?SECnV4DC~)XZU+i)weU@9oq(+kNWxnAU7XG}U_SJGb_jPHbrti9-L+#Zaxco;R z91ryMEXq9)75Rrr)9?A%neV+v8rn9{2gL9VyYH2oocNk>PLUK`b;l)eKa^PTqG;+m zd)Be4!+YQ{6Dqf&?CE*3tUt0WQ|oI6V@oT&sGCk`l>P!nXFqd@u&6ImC|Aeq)Kn*> z$eYC%<_|QuYjsuUD#Ml%J4O(-MwL=Y*M{BLU9OVwD;+b!e8yi7`8Et&CG5NIvb|!) zV`z*)wYCxC%P-t}bWZkl{2mPxCBtMis&)7O1d|4A`J(KSUrJqO@KMt-^N&=8c&7-|% z@Jrem+rMj z>^MR6s?@|D*E46%vy#7fFH^sy`ivYo(zgE^aK$Uq4<38$FoUS!NNh(i0D{IBVu8R zzKi-|dY2#pIpU;mFmu8ngS%vw(w2416cKr6(7@s}>MlVO_RcM(K|~TnpF;n^Px@iX zr(5(Tc&;yinYv3qbN~JK+v~+3nCLUx&r?m*33PCLONJFJ<6Mxn&@nc6elfK6Sj7$Nu)l(@600EkbzBBDe zE*kk(Oa&`#qv=G$N%Vomg4ADq7xhJ*W!e{5WN{5a0tOvKZ%RqP8jB^_>zXJu?H7Xy z1`%rWn@+%=n+2x+D)L-kZm>tBm2@@wUdnN)?Y2RHgqb3u?+kcJ_hIKNBGJut@S6&N zM&u9y(9&fw9m1{#bTU8+(?{Gag=jR9=TZ{s0GvBhjFc7fzF9oUUf5V_S^-1Ap+M-e z01+b1bhf|=fdU4RSilJTT+@hAflCQOq~;^aD-mWE*btp3!bhh|1dDB+N=+;*BHx*7K`I)9 zU+wT9B~_GH`WtffnK>f01yH2img0ha#G-vzgvdKN@-RtrzqyeBI;CO}B~zWGDgew( z8B-vWz}C!dCEA%ISq$m<1^iBvHq&dpNzoy9k$7JTaUm6p2v#W%B&6huh$>NLb`>JUh(&UX z7A>;}nt>{=VE7Y0xz4OjHZE`*pnMD2Ow6~%pm690L zxjcN*64)h&TOgBe#9)ubzooKEjwCfV(YFH5z%lovp#No|98qV|tw`elpr!Ol0Nu<* zB!&?#`zn$n$pRy8k^(f}ci(+B=a~wwk*_2rPehbifCV%&t;qscwt1q{WzihF#xOuu zuD&ECPt;3x$zhPoLVaeBT$)jNJltAeJO4nPTt@JaaQ6)+-RiqTafh;H_J?M&BZ( zNCN0)E*8@ean6Qla~Uq-2Z8)bi!aZQZ5LvgIU$V(Ky5wJCBXNy!znyX@w~ zjX(ftW|CNY#DG{ggi5~z&?yl4R}P}tteq)fNs^j}NUw4b&9;+7v$VIdBsCAQ@Yg{! zQD^#M4SY*O0_c>;K{SgP(z3!;l1fDmqS>mBePWgKEJ@8r4x-D;%gw=q2esy?L?nPt zk$`A!G9XD(^ApfaPPC#|lGOY}ItMw>Qt3zlElH9jNs=T<4Uhm@k|arzBuSDQAOW-_ zNs=TRT4Z21>Nj_DyU?wQWV!WGx->?tO7-r5Gut_t9JENk}4%JVnK1% zjzlY(q3>%y&TgnJj6VF<$A-(#4eV)o<61-C+TUn|(ARGOUvp}XB`xW#du%#TVQBN_ z^9+|?5&jQueC2FTR_q@}VfW_wT{^RKF%u^)WLzunugN`|;-DI8=vZeNyTwlY#8 z`Q+w94V%*=N5+47(#t`gb;^i+aizn98M~7eufOZ}tJm*L@l*5_TQ0~vv{lCDs%bV> zjv3g&`*m@)OT^6qq#2TMBro>n!Nx7lNzxX5kG&@g#7_fLtMrf+%3(6T<8c7N5-(%a z(QT#Ajxf4&NIp2Vh$LEkc|KR!X~XzzquCadnCB<^{W8gtCD%$W^3zOZDgmDB#otDf z&qS^KX@{Hl%~FmvJE+QSWky)dqSnYsaTU{rp_0}p?x2#7q@G*LFfaCssb1o$~pt@8!Z??CHBmSl78b=myG;6bfIvey}T=%VM7rn4U!-kspE5!^qWFM zqG^<`ETZowYp|{sT9ACNHH={%BV346^`LAdXnF~ihr=t_T&MQI5+CzJ`aG1f-5{N5 zt}bVXl&?&!?Dex;!gLUQ!ri-%Gj!SJ%q%yrlLqO{Kd6RTX!#327fYIfPjuU$D>UTl zYZ1@u`G*fOmuS!u$qx3rKR-1{y>NkgpnVBS_MK?u;8PZ*$m3VmCZlsy4RpsvZC8RraV0@w4`ll;f7L){C(lGGOML32^ZNu$g1p*sOu;! zze}u+&B=A644$crv&%W=*TIFVBQ5=Wx9g z+4&qn81bjtrO{R)BVHJH_VGotWD_EbzW*fV_$8UXXVAlpHPJ5`ulS{UM=qiQ^{w|> zAmt$-cQG4H?fmw^M+RQkuNFN$(2*&?2Ap$>ixtW12hSXGBD+6|I9A<|i&DK9EF`I} zRy?0?xj%WmcT{Ucdg_rvrc-&0UGmb_ec4HA9bc4oUrtZ+Yu&oh{IQoMhgr(cvTt9N zNmPMv|4|{S6qqyP`oq)Nvz^JDC~jP9PlU_-)}H9p=U$1>P!2NKC%IwSDM(I+=Hwqs zZpgDNda|b@*}i{Yaxal--p?h*_4;P(hv$Fc)b{h|HLH5AHnecS>lE3YkX}eInG;%> z-&Je~0fXgwnULI;a)=LOloln=e(J;#duVK-(?oLYa za!%)u6Q}QUL70%7%{-WP{EU8ISk?sbYQfhyeOWXtc z)W|2h6FQv?E%_uoPL_(7r7rWUIb9%aC!{9Xa7x&|3Fx>U&pO{(L=UV5I)>qyrl?G< zXFcjG=K4fF-JjE`ipb*PD!eCKt! zX^uDWd17YbyJ7a!>+bhxcdSzCDha(vRkv$L3=`v1Sdzp~RzFImG-ia@X639(!L4Rj zOQ)iwcl^&u9};mlJa$!g7yF)@HTB_+ogSAwjE?3jz2b%+JDbfwm$V)29CSYvH+YH^ zq+gNmMc77@So~&>z*J4MG4n)MM#x(DPHr4XElHJirjsj#N$6f5Oa1ftH@pJ#ZkFY& zfrLbSOPuP95<{Y2Z_N^Ni&@|OT%X}T{1-i)mlBiT>*<@cGVM}GOLArur!)rdkSS6t z1YC!mXbhX3rhtwhls^>y9oxMTwd%nB@8sq*9`U30LHC1`c`8GpwA%NxOd}xgx(KGd zprR8;P%Q0d)T^XONQA75`%Hp&a1vUzM^ZXhlJ`4B4mED{()TtmtX;O5C~Oh9q2eMW z`7J*jnN|bL_KBB?#KSAH>q3GxzD3w+LoLO&O+|Ha5>P}R z(96*Rv|AL}T4_{JR&%p>< zTMY8eNV7m_3@)7cRiQjq@2LeXTULTIOSQkNQYA-K_$w>sxg!9)qno zJ8LBAX73%MULVcj`wf}xi&O6Bf7$=(MzU?};O)tXULSqtT9X`?Fqa|J$c1GF@yIPl&4nn^MUSd zHg-&d3v-hA#MTU@6qRzYn|DSF0N5=|>BTLjQIs zLFxdE*7@To{pQfqhrEc+k7#Q!R~Z}l`K28xo@tWV#C0uXp`Ar>OIY1tQ60U@0hbvc zYZ;0d-2Q^A;7HPX5SZS4Qwt*U(Xr}%f`3?FGstU=<|R3@lvMh%{yzO5swzdRbd$En)45tAuhdo&sfT@ja1&)V8TuDsD*w$?%1 zaCtMYdPH5PYP#^dM^RM6(yZzoMrVwj<&*FLB&7Gy9~OO?LJWN#{i*W-dbnPc%f=RB z5kYACv^>@QboKC_)hxzJmREz9`X)IpFUBrKn*aBt4sWv*nM8ALpyHZ19Lj~OrqzgBo$2BW0%KM7gWvLOsGq3m0cWE|GCB<}}t78a297#hS zIM%TW9d6`@aK1bJqUgzkk~%k^7{BmLYlNu@6|d&w`zb99cY?5f%Uu{1toOo!sAx@c z78Ka|B=Z+7P?<1nx7u3-NJ7h$wqaC6=$yWi;QR?m+sKm71eVGD3xpA$;73U=SAF{1l#yHFOz`4E@x%iCa(qcAAa3l`?t_J(DY35B@XhBkDnfDou_txJ`Ei` z(b8Hg__N#BLsJPOfn6{*CV-JkR@(#nLf<}+50 zbox_l zVze^)bVNeFK2MmS?`wX=9{-8(upah2#-qZB*}Pfqn3BoX&lGYV#`RO-$_>AWmAXq&>hu0c`U-;%0z;$*sNUr zol>~g!40!Ci|E|`_o1F^<=3$7pOd;Q?q^fK$oguYPrqvIjL-0fpRs25+~^f1=w%`~ zAGZsWmoFQLVY7i;leY@u{`hF5JlL{Sj&dI@HCEIf2z?xrEUZ8IOL(l633HYS%QUp{ zkXL=RE%6h*_Snfr2X7r?l(h6D-;%K!q zKO2g#9tpggs;=t0g*N(EP{LyctHEX2rR=QlRKJ~aIl$q9(!im85XQ509pMM8)TvbE zr5L*`I%elkxhri~B3)$2yk8w{cEG^>hr?kYvDoB96Cp%k%v`j6?zSuRUoBkL)${L} zSgKO!l{aLDDii7tSF59`+pa_2vR96P=z`#F_nZq}lOI zEH(rLb>QdL-jjtZJ_&j>@|H8lAX>v4B_~WRd>0w!tXC_z5T_D*WSaH9EZ5?wb8!Iu z+c;YZDkYp&MmxkS$mc{kKnD+UV)!c&P)j(ok7AK*i}E|OBaOjt+~aU8+k+&^;IClB zea4*Ma)BGStZ9#fOMazV-UD&Z&^A!3qd(2rpUgk&6JEEH5$GeDLn$ja#7oM#t(9i| z%o6mTlbllX&=;Z-jywL7Po2|6#2MdGJ3=jkoz_$7mWF{=>PkTzIfg5w&#tWK-i$tw z+}`h)r*NP4HxRIJ#Yu!m01U_ZfkMEXh*U>Q3id6rAQTHOarqBGFrtZoN1eI@5c!XR zr#qcGnm_zjxqHKnTk$xmB0Dvo5=r}s$VaFL5;}27*)Qx9`Mi%p_o@?TFOLqj>$zH( zR!rn8r>g;uOQ|+MRRBQwm8t*M2c?2`)!m02;mSsgf+sp5iFkg}Int8HYrU|UwYrvE zqDF%VJ0o?j*3mXu`y`J3idaR+1==dZ^5$~)0gOHiO;w5O219Q?-Wiy}sl0pTXI@MU zYgb^Ne~do5ymlGm^re(tf3(E#Gh2JAx145JQn7{KBsN?HMJg&pAD2I&&n#un@mU)O zY#%94k(7Hy8P7s*1Q|V^O=5wl#s%57j$bnLXk%*Cbo;AtLE3TFH+c~`_Hl07{9Oq+ zMg@pjp?mkYxC(LY-}dFZa2@oI=<~R;`lvKke7@nA`L&+WZSjmbfma`zdAK^so)?Jf z?QK{PK;#bXWrAqBeHw#jcgoea?dQ%Ly`eLQL|KxG&Mx-qAy1Q2LR=z_h*}B^BKcY} z-;=1Y0c8}ZSa#9rC$eH^=N>-K^=%dug>IQc4Db@nB1f;2wHr{M7*%sU%4i*xLkgo3 zLj;qx&aB<$>AvjFo*+miK|4iagAX|&ah&G>uLe1h5UMU`Qwu>00wn6dv`e9faQwgp z^%HKh5;9|!b_shQ2-FL}%Q+n<=UB#dZ%h)Nb><7{W|iTWL@TC-79EvlA1^_-m$qV{ zg*l>7o}QbFgYd5ZZDUDPZ6W89HA3J+V@Di(c6hRN_|?LU(fBcItV*Z;mMnBA%|CyF zZ~dQzsJPfLn);!$^L^$cZxZ<61#@*Ov()a+^^!;E7zcgc%Xsu@&Lf-v!)2S9@`NXllaA3(Pra8v=kFkMsY2N9gh$aK zLTSlEw9@L^3T8c9@<$H>&`pcjz3Uo9{-TbxUMalSnU3a|-J`E6OS?O$&I6^? zF@|*5g;&~V4+2*r^8Ck>9J7q&tfa3~IP;%_NQ4VV9qM#7 zTFr@>a(KGXd4A|BSqa{^RGm^>aY>f_vkxt{0yEo{C0tp2XUdWlIlqpoW93VAB%TXA zwrwT-&LQAeyxckdH*1%FWefPEbUzDMMR2E?5X;ZEB|S!@jRyZouYI*)1$|4A(1GsT zR$Ve%FZ9cBqj-F8K7DY9|MY@p|MW9!r`!E(FyhRKe|yELZD*o!qV&$jx+Ua_u<$8e zxL47Zb~gVNQMYOM?i~z8))|=9AeHx!`MKqPa^YxR{2Ldz(Q(qNTik3HZj-n8wj7@E<%Vy`b!w@aB#3 zZk}S*A8yRI$(j{DyvHFVsO5o_Jc^ND5|L+P608+_Udd~;=CPvHM%-vNz`SHzUa^t> z*Rty@IIlJ@01AkQSOX}_AFfSk2V}Kt$}#&%`Wd^2}e{>im>XWY#ZEB8toj)lq&u0`cEIeK8I!+Hh@g<&#le89ZI=Adn#j}qr7s8xPxC)etDJn6W z3^QzgZSe*cf!9j??SLYjE0}TO=A7QLo@*W8yXH%-USAX5`XfO|#&#_61AJY_MS{fv z9}0jb48SS410pZKQ}iU9>Bs<-BexQVy#`S3*P>F3%ArSoxp_q|C@veQbf zMu0u;II-l~!YEEmfD8~B9{S#g9)>k}1VnHH%xJzBC@de$nYke)7p%Ix6Y(&y!{GyB zuBk#fP^FjZm*R05+qD+4*rxT+w$c}W>c&ydnBIjaO_G(E?m z2c6#z*57eB7Ow<^CpWxrD;#S&c^p7L*D-{pA%K=Ly|!(l%0%%e?)P}eGdV2O@nO;Y zlsZ8yt9-pzQO)Aq(>x-Qq8MDxbZF>xU^+@#Iy;!4vi8ePvRv1{b#$DZ&!m6UjFoedl-OQp)0}mQeaJRwW2{XpUF^0TFOs%|1Rm)p@BC~um?kZY+iHz&S6HGBo3FpOC$c) zZO9bAKuxf(-J8P=JB^$RENXUOv%@|s8~`0DdBvRAyFEo9_k>!~cq$*@A$6iO{EKa* z7nJ@gg!*6%Ai~_Uz%)o!vEhApQsA^9Oz)95Vy*w@YG02cZD`#TDK~eP6$-y$r{t&9 z0k8+pm4x4w0Eqm+K_Z#qZ1k|A1r(f00AI|uUEb79QRX03SIhu0SGVY~C7}1izzmBG z!WXbF!C4Xq!=CWG!#HBxNZc&^p|zpujP{KZtuBX1iGIy(2b1=BuqSPo zwcf5or0n^rx`J4kNC)!?W&pn}NidFd@2QSHD6BTHVVbI&nYf}D8DMpGTZj&&AlFps_WLI+9 z{krpXiZ&Sge?UFCWW`9L+GgXmJ-~CK(%ae-e#^w8T+70h{w~Y**&X=G=7@q%pPG)2Jx_ zz#-&+~0u3n-Wvm|owXli5^3ouBHZnfkF<9#G#)X@w_LeFlF`J-uk#<RjX8zsX?MKd$axCd0aZouIy3m&YQdSkTiwYPhw>Vc^!p4iq$uVrU9X z9*rMEn;mcng}57R|Ln(!Y3yRS$jvKy`iFz4c6rC|*~yDxB<^wW_D75>gB0sMHv0Xt_=E-cy~T+23&KlpAaqeong6b`Gc?b~#`Oc`&Bjlr#|a_jnr61JH=z zQ%xB2U!laE%Xz$fs8n^7=hkFx6{;R11j=K=`{w0!pUf>4DrcvzPm?V9DV5N2lQ{u4i(| zl(HBkN!1&`007s`qP(}DUvZqR&yaxPVv;yJg z6}nD@TZL}$NR(JKAg}|sJMRq?*00LN%#In(MYS6BwayD+ADtWWBs>Xk^bx|jI7+{_ z^6o{4lpegifwbO>QfJLRtryp|*3_cslE4@Bv=0ucf9d;uzvHidevb?LSL4Kh?M1Y+ z&=EM6%g1Jevoa5*F84?7`VW3MwJ?ojxY zVVLqU0~5A>Y8l@W9bGlIj74PYJO3yN18oF02pgAa2(khY3Z8JAMUkN0)iHf!mX9a_p(_#Lu?#^nMW__(QHW0K(X(T~^LQ;Z@*$!3?$^ z%R4Gq4FR_fOkRZ-rE^;{t|>34hb#uKNKBK=%~_{MD>Ov*m+;agq9imn+Y8Ww9YySr zz+^)KHsidcXm}9J-=7xXAqBu&Fx;rS(!%@upzRvRWJBV%Yjj1=FaW6?<=ua3kweR^ z;Ot!ct+aCMn@0KK@bbl;z{4+Gp9JFpo<9G}xYTT4Wzeb%MZa2f6{$KT zDWe;O*9HO9wND5psh&qBxQ7sL8VVePn6_rnCiAzlkk(}^i_v7DSMOS!S#NDAf?Z2ngFAQmiCO_ z1Yw5~Nt+imrVM!|PKIZ8Yv4d`mK+6U@cr$a$_c_}iOcKvr7%PP5ZfljWHe!~R!jA8 z14sPrObA1pt($-I0!;?365+EWde(yFS=rcTztmv;7NJ*X;jTmyK=pnNn{9$qx`fKM z^xz_oLagA^ob5#Z1~3bee3r;YFo`%YvpQUndU7bPcVjsWG5xo>ekKg|AnLI6OOFxH0g<$C1jYc+8BXYLX$ zG%5XU=a}HT^49D#c@LuunOY#+_6$ayw$ZaRe}Ux$ zih%=%MH}Gw^!>j)lgS!^-p+kcQVpl!A!FcN+ooo?!UB$8V==j5 zcGolPR@ky!`O)&RWCMp9nA` zu-7@ho6HY7ErNr?lsE0fYbz)kl@s6gVeK12#N%FxtYfbU?%ARhvu@bZkn+6@Q0&O3 z1E0C_QT+lKldhSE583#d9sbq*n@S_BC~ye_ce)uJdaznkaWgE+g8$N zfqS6_e=?{zpMh7)>V^aAGd$pyHVw)IsHX&m`5hoV_fn{~k7IhBF)C1@H+d(L0PhA9 zW{U}C1>eR92`DeP2Qi8(ZrtW-zAeYaoSzZy;sI30STogpdl46>fRPL_8p;GSr2mM` zjA&W}cNCS80cp=pXUYOmid~X|Vbd?S0Y(!y&VW2nYG>mR#0vIU6?Opz>O;R_fTj!Yp1`uX>EkRe= zLeMI+x3vAlk{USGQ9$>Ardu;3qRFF9LD-f_1zlBUfb#Jzm2fa~b6iOr??f_S(mN_d z^-Dl+%*GfX3)D{Uh{uCK94pat3Br%`-OmSA2x)b9lwcCe3^b~3Xa++lCs~qw9g@FB z!Bq=rt*i9kPlYEPS|izc3i&5rweC@t@R9?ax}|}~g^xO4hLwL;G6z6_(3ZPzIWH`m z;wO6u{fvAspP>Be8XrvG<4Us>cD|?rR9NMy!(q2Rn0#783l5&&b_EaMc%M%sFUiI_ zwH0*f)-m!8iK#H~J4F7ROaKY}n z2(_ge@FKJ^_n+82BL!K!XoXzAIety#N%PdkDUSh28mZAhz6F^e@WP280JJ?4Jx*Ji}U}DjKw(-Ux+Zx3s%eeg#Ibg){ zqxuFOtkqkY>Wvjv!*4_a?!W*jtF_YQ)0}8z-;IVeFvV|W!XjqZ@ZwZpG88nNe)3h_ z9*5}?6lj^Nfm{B5ZmW*=*3?Qy(Ni{mWS%S}IyD!7BK(cuPhuBzFoUNgQPzQw=)D(C zt1El8)zmv;Q=Xrd`tC5RcyQKSw8H^tc8(1aba1Zs(zjIIm6 z7+5cX=c1j>awmiCyhgsU2~OJ9Sr9#a@P{YW(MAtVHp*>3QU-wHdj^^D!y_{quwg+M z4E7;pZnYHLsJZ3$WC;*h3HtZh(=}|uMAr*f&R1KhA1?6JM!f>Up(LwLs7m%53=9?; z4Xc)Us9P>S=)J$cR*ApnWZ&cBLd zvpGBA0yir#5pQK;8Re300#~l47IT5vnF;C|H%dVaMYK-e<2R430m~MF&Km&* zrXDbi8!|7L8V@c2T;dJdcK8n{onv%^vRBs&PO z=P)d9?b);GZio;2+ z%j(y0t$aY+KdDs-)t*}}`zm{1_}aRI{|9fE_8mwZ0mYk|tFMba6N%0ixrKT0fKLhW zy@pDJK(V%j1)=|583IDGADDOuQsW&)5pc)A_InzT1;h*Eoc~vrv;9%KA+p@B#^UH> zg0PuR5S}>u_)6o3bgiL!PIrwrs+f}G`3dkD0L$-LfJ_It>4;mh-+lq8A5Q${M?q+K zSbWc7CWs^eGyi_ndz8x?gXOnvAowbKQx<8UQvkw}w&V^$C_{^K(guFKp8F4guPq8d;d5aQaCt3F3>VjN3A?9VPYT*R z9|Yz?&&zK^;lI{=K?lIaEqj0>#$t|lXVxzQwz@1sQbSz zrg@-T->a!<{(CO~CvBh_psW!)oEonwLDxoFT?O?gXo`3Vu*0GkKYzAEBWeu<23a52 zXX2HKz#V`d!+_zgSr6;>mtal&8%2P?l);DJPtzDPxN6i;zQ3m_RQ4KFt!mcifoV-Y zyRP#`qJ`fN#O0N}_@m?J&t1AoX=4jP^Msr=nyLtPeoz&GI|0k0om~Y=qkFT@n1i>l z*gF%>70v65#<(XiLioeKdRIW@D9;65?0AH6Nd73bxo{$v1!lb6Ou(9jtiamVe=3F{ za}7x}A_q&p=3zjRz|u#PFUWB42w=<1MxO+!L0c~aw;y9PiscKqZb|=}$2FD4SgGN; zeX#T`S3`5kI#Ml3>Y*HGvqD5{*M-jEjTWVQ!BpQmxb@#Xq;j}vhB@egrXJvY<%3|c zaf-h5;;AKu{40&VB>5RK_ELrTCOMdc8-DC&$dh`2_V9bCdJPEpw-p;M+Jk+1V?St<+_}iWdx%PcB3NfY z{ZMKKW&m3K!~hp>Z35VO5G+Hu2y>u*VCr*UN+YN@_W9t!i`Y! zjB^259yZb=KgSf}MR-fhn7xfeB5gw~$bVb@^K0j>gPpi%H3`wjk3lXB$xz`C_y z_80GkFT+EorvXpm9qFN&cPXX}=Y<&L|B`G9maiB)AEi@7pzW)hq6Q5!`+T@D=`h@> z0IL=Rl&r)6D;(%*J%ma&eZnh%8#3z{2<5ZMjIy3&Wtt;seyB7gAGaRiJ&dxZ*lDg)A+peg)JtOGtuFoO-cM!tt95d&ft55omX9YfJyE(juh z7xw_P3tysu>6E1CxQy6VfQkNcfb;O5))h?x^xha<+hxCsz9775{;Z_|ycfbF)WCSk zI*>3%12`P1VJx-Bo!?X)Ev=8D6ngB0o-aTyVnG51s986jTjuv&)VWGfQlx-FKS$FI z!SV{<+#E_bX1vHH`}o(MeORGR@eJ9Yl>@ta$J0QvIoy^byJJB4z8}E;&f&sxLn`Q( zP47^FMRiZHSK%qFtDsq>j_nBA!Av?{-V&>J{ucFartC1&oCG#I3Lk0-w{= zDVjfPD;90K`%C{!H{cPSKAHqQxTlj0Z@bgto;o{o_~_Kns5Z6@E&N42sZ@_IfKF*qo@7T5#X_ai2d)G z4{#`V7tj_R269*`Ac3+oKnG&ojE)2Z&X)Yu?NJMHCgVACSr5h6r$r`Ir)tJ!1(_Miu#TGF;%A?MTP zbaZ7CEx@Gx%=bF1cR;&)s2jj2M)eP~oLUQj$z`}_0j9IHkUnhvBG{e(G#Y7x`zYks z+zCd7fDF2)skmYcENL6dUn{m`gh((gp-FS25e!awAp2_#@t1wif#YSDeG5vXc>$;Y zI;CjF{jzk@AjMaKrr6Fm9dAa1o&$QE_PMJAV256>m;QG{L5Y)6&rG!_KktQ++|1VJ z7XC_j5meVJ1Z|ir2tttF26yDWoa)!6iFx>k^}ugnd^-vt;dB9b1|UATPW>#&DDuL= z?FRQ@K>kl6)JS1F0xL9C^_b)vo0I3}QZn#!Sa(FKwQ~}E(PMf~70YMI%H%^ueMzR( z_8WyrNXAqS9?&*s!Ywl+rSt0|1lbHO3r77$>p@@6vFlya_PfQeFuI$4=@rTW>es#r zoXW<2^<{mhttiI8o#s#iI%Eq)lq)zr)slMME>9j^DgH-Z{*#+kb2kbDvSN;+e5Pqy zYBfz0uq4;I&BOl=N1zc&0&t=+>5;TLl3>3vO#n37<8=iZdg`x}n>Hc}eK>C{d zy^00p*P!8HMpHnLcagnsc_^l8C;4=Z>4iS2anDe9rM5j6Zhq0^AFp`jg=HY4jN}$j zM`#43AtGK1=s#CAfE3#MjiLmNXV=|*sA*x=%n`!S4%GHPf)*1^9ivsBrnu4=+{^TS z@gxWZ$uIulp{WC36+Yk#o)a}J%^P2RNt=w+WgzU1R6yT@DlYGw?Nd$P9Xz)P2WzwLQNjC2`wqF!lKQ>E++s{z2Fa-t6Ctk-!PG_Zy8<8jtpGf5WqYPQD%! zqen9HsF1bdp7-z`D0ZDAPMX0K%s)x$ny0!eJI+*|i(EpYHUVxdu<)Lr*DIf~ZgOX;nxB>7FE@?)Q z)Bo$#1zbvVzn9q)n3Z4PSIL@YR!y%XTswnsHW5arx?u4?4Hj>B-+NUHh@mgBCtPuVMB7EdFizM2j#-ijR@k9K{)Lk*KdD4w%0g4L2jq>)7JvKMaF2M;N>jXgT7y zqb2*hYc`!Jy?0X@DZIx75^d@7XSAi(c3u{lW2^8Hu5&EJ{m%-ev0e;Y>Nqg(v9b50 zhWPwf%Kdjx@o=k+X8Lg~=kcGF)>>|HjO%JyfcNQgW6eRsgK8_DBCW{YW{pR4#oIv? zuKM37qyMw|YHYNF+lc@%q!HQs%qZFta12GDrNBlxjCYgB|F<|`Y%BoiIU(&p1zGz-|+hR)58yDQ9_hm`idv+_Io)Sf0i*cB0!u)CSp}wD+ zY2YiGTMqYL{J|g)zU9r&{k6Y$>pr}%BJ3n-!s7&w_Hzp z?G8=X977}(1}HrqD=UK@UdQtoh}L&FAGn~_8hl0b@ZSRrJ}P(DJTr6Y$S=+7E@X|2 zF>(uh0sAj5$bU;fKLvx~zq&(yWM61Gzfu}&NEJVK$O;5z?GN3C2I)LOhPAdR9&1ozGhuD&Tjt@_4@*c;Q*A7e=|p^+W!i2O7l2ccQ?*DRN_ zu*h!1&(!EqD;N9Su+hCCAk76Mf_vu$d)^X|Mtx&N{FE2T#0ZC%;`;8#IIw^mhyk}J zmPOvthzx$_Um3baE}9Fpf_uvo-Yvm}PhjeHtlnM=^IGn+h;ug-pQJWTc z9ijnrU>Ff70A`EAp7rTmOLj!RS1(%7SS5v|2#6;Jn_5f7V4K+iaRM}epr{&2@@-`N z9_9J>8t_Qp>+$+wIpP%F)Sf8WxG}~4-O5%0PSRJbZsNWmjvvRx_n|V?=G;N^VtA)- zOSLDDtWv?-nkZ;!<$C;ZLw}ZDrMTj~Divh0h6JG{G-ye!fu}WJNL)d@!8%4Sl++q{Ju*_I<~aag%R#&^%HH&0keNI%uvfKtNTfgXS;Gd|ef< z1%xT>XUup3jmc!ByZsiacXho)Ax zO+%N&aV2qF0tw&YhU@49OT;EnRZqr6SuJeUdlwNvfmWEYAR)SIEBNQ!6Rg0_oae`o6 zfO7%P5W32gUu8@1kc2AjaRvXsCXioadxBzid<|2Eg!o)ae-EX<8igOMk_JL2hEYla zUbp~yQ57{Vs)#6LBEHH7Th;g24OJQU*e&jH-va1439DF+?_{>%=mM#6u?;AJiqT3M zdWeN68y8#)MX_6XGp?r?*VE^wJb?-YTkUWnF<_4tEQVFiiBLF0T?kpA9(Q~Mui>R? zQ{d5jh+%kg!W3Q3$QQ&@<0f)a`&p_oHcGw`e;=`+od4c= zC@}F;j-Y{K8YNsDjVYa7ECI`ui>YFk`e~m)-AYv~N*On!M8$NRK{NzlX#vt6y>XMh zagW9YcRW5Ot(bs)?L@0P*{QhhR1RfeDs>^$pl(Kh$>NnDJ;&{rZ&}AtJ65%-rE_}Wwm(oR`+(RG_jgMRf04mti zm0|!jO5L#4ZY}B>lx&HmvY)FeM=2ZHN&UB#C&!H)qS#!SJ)SSV7FF3Jjv+~6V3*+z zOyfQyJ7UL$Je5}ygxkgkj|~tgR0upb1i((^=Xhm%0Hh>A;dtePaf3$$SiU_PFG8y~ zcrqsv+2DZrrBbyS&nBksOssfq>3Q7EWcPxU~rQre7Gf|~slP9z*RAqE1d zGpg89`jcfzrTj&1F$|Wwr;=5t+EL1C6jpHO@w72hVt;01!pO0erssj7pjOhtnXq;|#kwT%x+2RJ5@3dTjr zi|bAhg0>7GrUty+71K2mXH>jF7J_r#@KsCxpkgOODiP3nK z0M1IMib)a?j?;sOz!|vDwyAOX z#|NZK1x!rUT+TrZ-JTPQ0Uf#+#k0xCXvG5%e)f-}>~jPuJyJ}T|3NN`6B^)!eL2t& z35Q`~IN^$ZfjR^qJU*skU|j0TL8|GHRNR45cNBw0XC_5;5j8{z!3!=kORA+#Uam(!|#hEN_U7D$r?1FXyA$IZc!^wAnOSNP(4iLFl3 z+j<;zHC1XrY6=@Sh?rb4SkvR9K%by?p2CZ>k*M)O*}x^7f(j{6HRDHAP%)rmR1f-k zsOlb-aP9c06dtd;A4=?3j8~DNn^dvy_=t=U z$uSY%>x(E?)pKR`D&Z-`4(bcHR*E=~Di&vQWto=hxgAuw8}x$BRGveM#Ng5%G*-ECD~Xe|!W~XCPi_6<99?Nu#jzD5*1PoHWVR zR!IStJ7jbpe35T;}^+ z9Ow9ax3XAO#1(JtZ-&%dNKu_FWrOpD*%F*UBBw*FaTGJDe2FrP4bDf7RcIPY^wiJv zDE(AbG1N)MJslSV1q)33@k^A>aeXXY9#uY0VvKI_)ab?sx)l++6%o394dCrMG%G^> zWJQE-Ujwj5iY`>>LWM3==t6~Vx1^WbDd~1gdbQ$Ow_DQ771wWWC#6Fb@Frlojn!V? zc}};n+Gyvg4prz-g$`BdP=$7{!I4t@-J4YYH$p``RK>}h5X@zXAjHL9KesKX^7no} z=jW3r=dHQ(!_qB=%{5PIcAj3j)uU$48As2gydcxvkAC`j%C>#kpPab3CK3mwV_~DO!AC{A!H>w@ zp1XVR&@85CehfcZci8)h=EqJ$?WN%QHI9*6e$ZYYAKQEJmPL@}N1-aR9wT%}WV$5N z8)!Tau-oLok56|RvNex*`8{j+%SE$jBi{l$Boe)6YR2;r}!FZ zE(Je+r;m)M_fF9r?WHfrue}}~f24N&>3zlF90GqgHZsg1%i4tbAu-_aIwbyILKUF*aAZn(_$3c%*c#0>yA&k}sJ5D;B z2k&jImDbqzjt`E{*M19p_0_9r-aDvDql~xSTPxJJ#y>@K{TRLZEBZ1d?Ja+;_h>tcNFWkAKS#@bzQ#X3a+PD-4)b z(%$FM4#8j-P@ovS-@$upwf)+A|A`4|>~+ML-gOWN&BU zIu}?dP|U;wp1Vi14OBixg>Si@=2bNII$}(3zW0a;zUe|2Z^75U^0)ZlB5r+*WmbZ) z`MxmvDQuxYG2a8%(`x&56~5a9b)n-m7t)1}u`Z+w9k02NE_CQn2MoF1oxOCSLl-*! z6!_{whc0yJLWict8&jz13YM`UQ&+IOhPK}33N$sdu3*s>EHG#F&YD?Qu)O9%|NlcB zE(jU|lF||V2NUd#LZYkYnm~GBvwuV#DlCkzJW}sd{{D#&_|%iLbcfD^WK3(%!0*nE z2TW)N#_ylK%hBVd+UW5Db@VQvr#1Ah68~biauzwdT8MaYN0cS0fcW?a&4D)WnbW&= zQ;#PpWyU2_lhr8o>Hu=8--_(cVA%3%a~FqU4XN zwvptG*?rKn$&OvC>5wvm?~p(pniExz?CQNacU*Wq5!1p`)0tDbEgcq7ZI+d^@gzei za=gTC-Q3r8-Q3qlV0u$G_cdnvSU2}IGR>}=Hh+8CUpH;8X%-lr{?>(#*E&yhp+grs zK(i2_B;L_6qzfIo(DAbA>#e3WUFgt-4qfPYSCgf#S#K0={Yk&8E_CQZhc0yJLWibL zR@djNTOy!aBJgTygl^hg*Kwm;BA{DcG_p=ex4KBTx@fdxNVmG^wFQQ{g^43;GPV7> zx`l}&YiTuWM0E=jHKe{SbifOn-&$Cy3mv-9@p4)Hn|;c<(4h+*y6q6&+y+9o9l~oN z^WWPJ;dp{D!QuJ5gmli(<%Aw@L2QC2SKgGcz_ELSaz3jhd_1%egKZmZ@rL;4rihr5 z{s=NA`bm{ZWVF12ge!qZTylF_$ie7`1fRPoIWls*^v?42cx<<0+`f>g@HuB^pWNA&3^r=dRox7HY6qP! zWQDx{9aS=%5~y-@6<{57@WXIBQ~3+0&51mPrw5ng5Jn`lLQ?MHsI$vp^4aY`g9 z@)~8cc$PRJL63D@H$?SnGwYBm!JXIUH9?`*zd>$$t3E$9sRV28DN{mv(cY(4!N+BF|&uGpns zs=nv!Gh3>GRW%IBEv+@+NPM48AKoRQQTBMR%*D zK}9c$NP&0~uo@{;@85B`x_L_a^nl!;T^V`TyAKxCfDaa@>jx|eT9uJkus>Au0er=h zV6cr$VRMjmdY%7U|3v0-z)nbSf3Cf_cUL9Yk>?#>)BgEpEZ7<5U3b;|IdHghH};(m zg+FAyr~{kqM1+uIALm|E@?kNjs_S13GD0DM)M)i3?YO7*fy0WZ(p2q0Rfwlx&^`fC zaKrz=C6xS90$!ejZ|A1fMRKbVu|Dc5rtHWAkDCN>*5K(Jqby-!z%xO;=F7|B15%?B zkg5ixqnb}r-X}SLl%jW=d=|VWE6-7Mj#)8CLZ}B45Lv@l1H9$sg&u_gNJX=VZ(F!G zmD&|(ohwdTh24ygv-aE%-XD|M+`+x-B)jkGR3WmbirO}~#y+})vP!orSI+%8wc`2w z1OuwHI#t~t)YNN5jHzvlWtDTu2iX0St^F?_@YRp#gz zMpXc=Gr+_|dTX+Yk6_qGFaXZhJ54=gy)8;8fDe1XXW`wLL&2m9Os;3^i-9<h_i9g14j_p5=bS$N8g_6bFka3&c7!F*cCY( zb8Di>L1j{JD)xTr0J_er@99P$=xVbkFm=HwJlUg zK}GaqxC#?ahc~C=U^=^{b3h+0)EBij`FHonFfk^A_RRi6IaLa9{JTjAIAa+>kzlA% zlwDDmLwXJ>+N=7QH{oM<5lEb@kE;$=7c>WzYE76l>_LifcmxjG2_{nWGwV(Slj>w* zAmjuPPi7<_6@^Iq7EQ1Mr^!3vdTN_%TN+5}p(+3mTC3_pA`7dC>Mr6SnhAv0ejz|H zC6TClLUhE61MuRu1iUnXtW?Qv;r#n9R@0rt`E=wD@--Hp7<5L01*L1oIv>( z`xQZ#ZK99Z*y0%Hnrk^AqqOh>!b7N`tz!6GGCI>*4Gv#F?52nir6)9JmAA0O#w&`r z6*!&^ju%*wUf7(aP;ntuRXU|R-GnkQl{z3z*en32i$s;MTl*tB1g@!tj!`lb^;5Fy zXpW zUl5mr?+LSB9x23733zANbcJmnyGGv;vfc2vCSz|2oj2F|cNXavBNpVVV%o)_HR26( z;Df5dI|R%qDE%Df!4Z=~(i^Ud8$3LRlFbkYoM12bG?-1PgiuUk`q_`!l)}eiQ3L-!0(%EW$@Cr;qtQ21UVl)f1Zv43tBBd(;>44J zbCpxCDQ`B{J`vAqw<*29IT?ykOu64@GaX2Wi&qNRZoXbhesd79^0I5m6Q>d`jwTh) z&MG{Y&oB=m2`Q@Rg2eaEoNAYTMWS;tjAUe%%&fwyoo@sZWI`N&bV z{yjk;w6SWu)tkG->5USA2JCQOP*oKKS7^wUIw&bZ9^_Ki1lvC=Y&I#staq%46yN`Y z>zxS`yb3mavwFMuHbQwjfe9)9AC|s5kgEUx|6M6bL_)X-7e(gHUdhhZy<}JRcI_=x zvRC%X=33dCWY3$uM`UGh*EN2x`}uwU>W@nIobx)5=VLv)Xo;)NKY>2$fh8AB+cART z>jC2CK6HK7@%l%#-p)JV3o2`9cb89!BRH!s8AeWGqYt-hOg(oqJGO6O05UEJUt!M< z+Ll8MFP7LNmS@o4z}=iTyc{#6Z4vguw)0)R4cdMXUoDrhZ98b{X@g}8sqt*65%EBu zdZCwH9o>5r4IC~!XO_=SV9F9(ZQ@&dNcvM`pnzAKfT-wxkZSuirD=ut$pJv!Gi=8; z*;B4&HuD)i?-W?FK7ijsYY7Qg>{QN7S2x;g6&B&^jeXs{eHuRF`hG^-!W^qEy1Pfb z%e^q@;ev3*blRkUD)UAToXo+laNnbIHgR9ubFR$mRHBVrq5qFpTGMP z{idEWm|py z!wf$B!m46wY;pyiJ9EYDjScSJ4mjSf;IBdNc!BTj>RUE*GqA?Kw8VzBb$Ru7=`EWr z4S)kTgYKJa9vDB-PE9`xN_VTp4A}XcWviZNv-@6$G_Qw*`(6W#HsiW1HLWOY*EF+4@F0xzDuJ)TEo#Uv5=yn}P>qi(S0Ly#x^tBxY(p&A4D5NY|)-nDJ?; z_5Rgq?Na43G)ZW%=iesh)h-A5-;FvOi=5HQu!(p%gRX0FgZP5$b@lo-OXpWzDw{lT zGAR=?qqT19yH_XnU(-1 z%1w)6@B>kPiSVgA7pY~J#pf1&}lgPXpVK=J3&OuXsy~B z7BpN!P}_x2E+3WYj_Oj#g2xe1fP0uyKq2?B_C?bU~cQ$P==Fd*wL`0G;C?u4$Ot zF1bitx=8!qUn8#J(gBxej(zLyxL?s;Lvz*xn01$G_oPpR+a@q+&h$&bgZ%e@50V~Y z({)xB;k~tQcC3FTGzgxokI&x|8v-snpF`n>>4r5xt(PVVun}6X2~n z#SQqgpqGFyrhBZIN!mVG!LfgD>*$E(iKCA~$(MF6L!)niQZRDGeP^&v;~B;b2eOyc z^j+Xs)oniru1;^NnQM0Sta0_MYN_#VEEfx3Smmz~^1A5qz3?iZR+>8bUEw|3SXBcK zuX{rvrNoHOrUMq5v0ZAj@XC$9VcPTRFx}Vf`qcN@$Y^1YXu7XUfgG1yu_!l z#-p%pB<-ZK@45nWxwM6{!!jlpw{4fL9k^OfU3oW(h1;cPmwD^84KV%QvfOC~O#ROt zf!fynhGHAvxLN>^F1NVox@v5ET`DfIo>(LqrQti6Z?0fN(u}yf7xvYcZIKMy1qJZl z)%ER*8+Z*3oTRGP?DAGNwJHTxT(kf0$$X&1KRv*gnxQYZiwLV}x!jI0B*k1;xh@Q8 zBG!I&$`!rq@TaB;E+c99`1C^;HQ~(@|H&+W@N3+pBq=#uZS*ZkAGxG-+54~$6HnPD zf5hj%i%{6PTop(Ya4l&VJRxB+v-{L`NE2|wY}@Nj<`%h-g0rwvjW z#ZNpc^mb`6^>zB`yeAi}*IsMXN@BqBF_I}qCu5tGs=YsViR9`>8!`Jh&31UW@W1>I zF6nS(4CZ1fgiBUrk%B`f+b1`dkt8)#te$gUQ-o0cyMg^i(|#3M zEq)U2@_;X>4BqbpXy_rYfq(l!g!p;>;F=RDV_BmEUIg66u5n*B6q0P$j2Vtm6{ngw zm}1%u5#nyYl11GXeiWG?nPW?3^y*nXgWy#UYx*eWKKSf>LqY*UOL*-ST+L^h`E+KF z;NE(HNn=!xC#*}ovU0?ZfsUEToO?pU<{z1CQ>_gVF6&wPqN3(CqAbAoUqRB+lb918 z6Nt-jZc>}uG&bvi`(=`PQBGspKV7X^ro@TEGfZG!aQb|0(aj(PYU8$8Vwp7W@j;?|OjEbRlL>9}*I6F>Zl0$2 zE4=sz{(D2EQ;NOjgfdPA119qw1DPu6gWG14b%b`Zc{ii`g$x_7&Nr8Jyw@U!i+X?7 zQWKfJcw^Fep@;J645cEQ*sWa-@7{wJ)+#6SlaiQeCRK34t&Hss4qhr!{UstRHgQ9K zk7+&IUtXR*;{Py=mQ*9Hn-2Z^{l=|3?S$6Eybr$_@1RNLAf&}31&n64)KHH-PxEB#sWuY=SPH1~&q}OoJkVmN?IK2)|Iz|jZvL?< zsWXjwB5j}YNfG?rOZ(A$b&D{*@vlQHmG5(O`@TJU!>AB&dA}8Tb@E-m3a%o~gbX{C zQM+tA*F$!YkL>zhW0yo4bv0-eHm@DDZdu62K0sTLH+E3yO5;;b?lX<3?r2iP4#zL}OQf*3?L($g1lVW?3t6^kMqb>cQ_fhs6nVll<-HFOoS)x+#3~okU^fQQZ zj0<_Czp&8gxMtzIZ={qYU`LnLDOmHzJL;0a)8hB9r0+sj(c{G$#VKw}LF!cVO`fNZ zd%CYMjfaCF=y~_G)Vud$7wB^=pQS(3WC>~kr_v%PRx^12p%<60YlPUz*QR66hOKD+ z$#(%u*x^vNQmKz1)w6{=8V)E3tgqA}D&kp0&}8-3eivy`UP4GDh#0|AdJ9M?hX} zw_IPTrz*buT5?B2YA7j*5>fu8V9>+}?I&_79L})ZhjB^A2GwHd}$kATg136wABbIE-EI#(y2dnM(OQPf}!9j^{p&t9QS=!FdsIrVn01wIH zoWXVzUuMHOR$wv@c?4@1ZUp$Qm1u3{X=$6px^X(DbqoJB)3a~Ajil#$T=3;-X`A0I zd|3w)qt3^52}8r_;~Pa{4`FWu>ymHiKNZ5pnX=yL@ zwzVNhqbk-=)WW@|kDcS~TB2E+)5U(5oUF)ZxH37{Be>D;`dDifpVJXl3FQ``1cE8_ zHKfF@&X)Iid+RgjbnEV20C@YV{D@62P_vamyhLFve5{?zj0G_{teRrGk z81>lVmhz15{>REc{eq-*r;ZF^gjvmkkEhdUH%zGj-tgxOjn9$N ziJn}Q{g4c$%*%&P!{3X24MLn8dwOD1{wS3ieIF%Bc^3VN-~9J?x1Hg#k86jY2*{Qk z>(vgF#8(rHhQWB!OYw&J_`iar%`{7(GT!Ps4ZcgklOcK}@T2np;`wlFP~!SRo_no9 zs35w`-HYq``2g|#P^;?B+^_dt*@nz(Vs))nC^jm8tp0q1%5g-C2^c``UVLPuCr(Ph zysqj@G(C%e&+BT!l{*z7kVCnH(w6 zZ}`_W;FU!|*u`0j1PiJ_=ioAu51}G$6iQ61?orrlNWIg%gEVSzUx6}g}d#{=~7j~aOm&D59{)H&&Os_bp z4h}0ee2Rim=8PIqO@BVcV@~}2J5LbGBO+SH5x<(EB%s75MIAOZbNo6O#j0Q8e}55j zh5AO&itrEK(+-5*DU08jt#eFmf@K}9zibz(H48@FN$X>h3lDX^UH=wYJ%D8lq6Syu z`-pxR`ES+BzZ30tRja%lYbAAWe5RpUB^ zlNCx_3C}JeE3j5lCfRI7&N?U?RH*NUgyZhA-8`wgZAC=JobS%wQe7rBsu@dl`pQ4_ zk%vt)W|hZKG)szYHz*|_eh3*L{mw#m)RoKVQx707A82{+>~2+{!#9igqCMPIM&?kV z*+&jP^pa8mZFpYlRUy8S9g{i7%#{b><|$2#gDuL$_U;o2%yR4eo^_tk@7P_5e_O!t z&(rTVEbrx$6s1ACmP>x0+zw$I0{61w3;h{ml#bwwrQ5M}C$Zq3>&-cjYj?#X#@cc0 zB01Z2-ksHlMJV9AQC)pt8%s%X!%lt24OXid<#;$@CBbU5eB@fw>3oRQ%BjN57;M5zM+7EKC5L23aIF6^eyxBqegjg%4=n)ies{mEP_c{3yZ6D zsKMO3P)h!{GuI&JH+dwxp&+5>~2Km#ad$O ztu+m)J!=jL!;R5jZ&Mpr^BcdvinhITI_rpzNE;jO|2dy<`s-wSM()5?Wi-FtGj~)i z_Ec!lv3IUm9`eE0;p|T!B_d)gt55W!7T>p9!J-i&&i@|uNX(LzHlP2KA6`u5PT!7% zO6$J68f>xh|AN~1_C_V>=kjaz;&0Q{HZ(n!c&r2(!*!prOCWj4TPLZVGEFAmO_(Pe zQ(L*bw;7q;6UU+3>G{X|bdGXj!{EnyW{504g+0P=oa=;K5HTY0a`{Y0A6{foh zF-4Le%Mpc|Lu`|Ax~MOW40)I0`UKt9c>^?v+F2THL%ffZqZ5>nwbS{M1ZOTjJVZ6& z^B*rO_qmd}-@kIQ-B@VxO^v&R$HDAo|LKjr!cggo*Z!rAJeB0fuIw{c4nTXpN@(!? z{1(8(V`*mOrS$e2)*zr^cmaN5uS0~W$j zV;f7Id7+o9ky5=8T5oir-zm1|PssVTN}eQN)I$jscTf>WtOb9?`wQy&hUsS;h5>6H zY~`k@b>Lq7g{_ofC%}pMdwz(t{yob$eV3`bp8_nA_2g=^Ki?8`1)y|#!R>uASgjq= zcs*2X0tj>1pyhXwU7VEIjGPSKR11spLk{qpD|FMNpak-~r}(p#1hNN#N-TR0eadP4 ziJCVH7|7ioBMCC_4 zm{hPe8pR`!qVqsaABY`9$Scj%QsdMQ4_N>D-+tO{X6@NKV z=|_}H>ntl*!&bR)pc*_YYPOS(*|j>_^0RBlN}Mj|JP}4JuS(apj#+#;O(C9-c%cTI zycY{zhdFs^9;hYXNT22dvE5H(D)IwKd?DjMq3~+kxq6j9H_`T;uOppQG;-kVKyaw^ zJ+Dv-eQYFZR6;CA9Ddj=PZ=P}ri$W!$b_RJYhxx`8OZ{62G9eCYAN1iT0%JCBUg2z z!MYEfLPS_&;hW-@M^>`k~9IMN6U~mKSIHQB&S)KCl^Zt;=2M%SNGo!zW*`jmZU(d|-CQja7=Nan zI?LK4ookW{SH^o}f$41GLZ=FYmpjXo&;~zosLMkS%v8qzdTx;2$XrYJZC>(?Mg@En zpxT9>NB-ak!X^iB_D->fm8Q%j3LjSf6_(PGf79UOVY(=)7z{S!H78X1nDVLjAcIWM zn3qSYN|uoeJr8s>i_fZ${E}>o&t>o@#+!dGF0c#N%_>?%1=NDZtnTryQa8i;N%9Iw zb|Ye2k+Ri#eG$P^&(){}JG{QHY&`B=>#qyAeim!8CsD;V&(aRsMLTf3G|MUX??*wb~KJYDH zcYC%C6e2aBCA`c%lYHXh5KS-O$p5x7HN)h+?7Qx9Iuy(JD6U|T5TV$ijS&`%T!ZNX zV7U8Kn;SFiyFBzv>sVqP;vqK4Fu5<%%D z{YWuI*IE$FQ4cI>>QAh`%dV3iJnBG8ecvk0!)M;T(wh>vd?mwO%83r-Ae$g9t(p2M-@fI&OO)QFJJx zAr(8pfd6_+8w{d@>QMd!&z(9m9nbBm&HnLN%dn||N>x_LESV4r(nn~l&2&M9=C(CX z;IG*GX1{WcQRWZAey9@`#qYv|N`!w?;}~lC43lDWI&5H^EF^*Cy^5D6n#eGvh#r27 zUw<^Ln3w7@{j6l=&|26M0%*btpxFrVJO)9P{eiMBIvOmw{mvYYsw*lw-UQTtg=8^< zoQ3?)6uFijDU~xs7|FEuU+@4$GRn)y)u>dbhsN9BgTnN?TQZqw5n*$VNO1VG|ntJ%;eE_{r83EZUttW>g$F4Wq@nHAjOYyQ6iS2&uY* z2J7K$g`wh)#)mk=sAL8E1V0V1F00g!IdCF(IDr`qkB+LaZNBc}Vih*%+S@~|QMKau zDs4p9rV0@{(!=C(u=YUgNbQl}-p*4Xv%HHBiiGjIHflS?9J**-mB2Hzt2`^cD1qCo z9QT+LOrt7@rwW~RX6sb;Ge#$YIr{u_Tp{Gc!^Mo-Rwt(|*J9=KiOJY2>~6fzvSE?R z8$NjR&$tzI_ji%yP}m0-7n zO%A%)b|_*L7Tx~`rc)KU`}w%1bsrF6)_VQe232Z^R&i{4L%U9#oyBdDgr`IesdPYLojE!mSPb3A;hH57Gap!SxZr;SlKUG7Yx%LC zVo`~8{n!H0{%pQkz@Ci3o#DajvcT_ZWnE^@wq*KG+UzF(&S5`HwSS#``DM`5Qrj1z zkk<~YA*D6*9^%Oe7(6NJhwPGV34}E&%vDMUui{RQj>l$kO{Nw3+<0%$E1_;FXXw}x z{3b{3qJIIQYmUROLNaX3ab~CYXr>TOlzV;SHQTEc} z12eO9Zx_)l&*OjOG6FwteP6}HO~CFc`Nr(>_Y3a=i2gN7|IUp3j-hp(Ro9E5MmeO7 z0?k0?6tPD`H{n$#(L|lv(=vyG4dP z9}9?}^Y%2Kf3yZi=wlv6;>3IkH0`Henu`$?5f1S@eU=+Xv%?6>d@n(1%0gaQd6I9| zV<-eEgf%G3^K_@cs4INv&r4L!hO#HHLfPFFzDFj`@S~?~{Y%esrAJz)IuG+)7dGS5K7O6PM zq6e6>BIp@!Q$3Z7b0VHCJP5o`_0Wj%@hy)l%-TeyhhB&M$bvE$cZv3E zVp)f;()Q7d4`5vR_ojj0nZ=mZoLu*UNw6|Ilhv&KPf5d0@beG+$JfA!jmIBUPk>8ZsqOw7?W0%FB-#F8a=E4n>fi*5k(ZcJ+)c>DeI1aNlOQX327A7)(+MaXR`& zHHV{ma!Jl8^=|lxPvmJ=PYi1p-)gXDes-dXM=SDVV3xk%KStz$StTm$$r(<%EdW;{ zwjQsmwEiEewzD}Y6CdpWXBQGG4$VG*K+tpd*14S){qXB45M+2A@qmX_o-Kvs$BU8-QCiJLuVMa69I`ya8%KX*RKwd}oS=`Nm8#BD3IMKl1ZGuSGXtLi zI5i`eql%KmvOfDAlrp-`270Q3e?EPvSWzrO{Q-aKe>`m(sM?U=v}D`*XFA;%-yG~y z!mfc*{^ui!V%YJ+_?*2tg)1cKuV5h+4t)yNaM2x*Rggk(9yBtw5`^UGt2tK{RNPQe0e1-brkRynVx;&ZT+VO^F$4apNZu?3* zjWTrTVeZ)dSph-JNIejHw7EBJRa$)URNrYxW(XN}@-J5ZN-qRe4K57kA414csRr@@ z31`zbv6##7Q8j2#Y4x<_zQAMhUJv8hx7rwY1B3L?+St`c;NsuJ+Ev5#xhadqSE~P2 ze#;XhPk>tHUrAXg2yB<=QOfTJ=);9 zn@PmY@)I^sjL(|tz4=3n>NvwbelFV`Il;wD8#I__;n zQ5v}YF)t(@Y|Il#8$T`(!U>ZXD+46OqT~9mbLNzXV_Jr>&BwmIHe*^Koe_NG{H9c= zyLBH>b{Sb7E62UX*8MebxI;9rD!-R;6hakX4-$7mII7>`H6_wFFZvVaa_ZMV;uv+8 zn+1@xv}mziHKBqQ$69w>KN(19w4)MoFt(g3H=*W!_p;w&aH4wFq0gMJyMX~;p-9zq zO=mmHkh&H5(5KE-qZkX+Gr~vae5x0Jb#_-Cnm0ndzV0xpE(g4}t;sS{?zQJ;r*p2V z4@CD~rc!L;I1i&^ySw;WRfn4hs;#dTDNJy@F~XWQH$oH3da1YK>^5Z*FXKcMZKJu-`42frLDB{Wtn-CjcJ&<{~zXUP7GKW%B*TcFh83iGm|9 z;4T~HBFZx805C#h0_DIVnf=}4)Ho;fPzUq+p_NZ3OjO>@Hq*7TCB}& zxd^({>Uob%H#_Tz5`C!690F;in#BsI=#b`i2>s+7Z+T!D~;=FUNvnrdC5m- ze_2kAd?JvHsas1EL2>jtFwW3C#-kBp^-|QT2*0SEO9g)4Q~LGzv$xW zeISAB392l|*zVCb(xWKyoAm*?2^oAIN@#NZ%bT0Y@9WFDR{61B;EyH|OI+;9Q+Qxj z;+LtdqO#}2PG@1V^1lgUKN;v0CI4W#q`$R{qADN7xiZ2N3039bX;8p8dL?DqTJr>A z&;#F4K}9O%AwqBSJ`3BJCJ+%2PbF&g@)3*naS=8P%16URR)BvxKIbxH6QVtcab)YV znDY2}1AkIMp0A>ZC0RP>0oy5K^^Dey0;bCVEy1V!ovx2MxQ; zAuSkpT-=9Y*KRI=Q62y)bFE(ZsvxPPb}86R`+KYe;!ouSCP9G3Qg8QRvuCCADBdACFl%O;qTL1M$YdNM!4W6ml`h>+4Y~ zKD~5^uZXsSWl^tW%Ec}L8H;o*r(>JC_}jcHVMitQfEW!9i2L6t3t%IjtJ?>DM}+C(eeEk+BsLOnVUTr3L3(IX(ieY*+dPjDy#rLuX1QwOv7n)ZsT&A#9+9ka4)f zBo?8mu-kI&1B?w8^o~)uQ0|d(!Jph33a-YzUr$}WZyI7!BY)wKFRc${DK5lOn|f?U zelkEgJws)rI%9+@wgEgEo<2tf_u4i-avOl5MJ-aD$Ak92Q|XW24Hp0r_Pjb-iCh6E zIMr;Xn4+>y>JGFqBlt7gL*?~=m__BZ_x%H7KaE1yx~vjR;ymw@Db{I^NQ?Lx54Q@< zVv$*0SvbQnTaBJyH#R)Ot5dhOeZD z+g8|Rh{=|X@MIvbbF$i)uCnSMoHL2n4_0Rlr1xW zCco+In;=DehO`yDFOSE%&-Lt@jv0dns`4$LwQ%wFxo3Q)cnzkAk&gHA*ob9eahwVW z|IR{pG($~|(|;+g0Z1Hi&POrGMxVCz6QL*5BO_azqQdjKt>Ol)+TVwk%3r5}J42rM z8bV!VJ;lrMy42;L-)S=;| z)831cb&w*&qO0Bc-`w`;eU>*`CmVS4WH`2Uzu)<(a=IUq@3G?vi&BExMZal~Hl@u> zwQ>xrDyR3MVJp(W*rL!NO_6!*)gY%SnW_s9*ZsQT=UTToszx;|=A0+O?i=Z16r&!@-Kw6itH#P5rb=xzv}a*`@{bwIQmE>|r*Ql8d7rL( zGDWZGrCL7FsK}CpOMoH!r;DutBf^_SWUZMSVZUQge%L26{FUll=vsW*`%mu~88RW> zV47Vzsm!FyDADguV1PBtBmuK+6R)cPk|$U;J0>%%3RLEuzQB}5?%&nIG{87D<`|=d zwEROA_Wy+|^W5S^vknOE>3MAA7?pT?>%{;BmGv=%0HzG#z62%Pr?NZ0T_OiLAP&Qb z_4Ccr2(}Cv3TQ+7+m8aFmceUrQbOS)1J%{F3QZ z>ZDspPzCXK-gB205e<(pGxR=2KnuaM;@GO1Fi@X-X)lT{r|0<~eGW+JWfqhuZHC7B zhh(F~>oMLYwq4|MXNPch2*Ta))%R%aWv7uyRE`(tT$WsfJhcwEb6GhO>qSuk%T65K z`4j@`^5^a#fc~=gD-tFo`LhgIzdTu`jqXc(L5pXax;BW}_P+blsA<7QV{cmk|7CO) zDf&vx7$D+oT*`o;pgMd#HE9SaiNrmAhgyXv(E)=X++}wLyd1DPMdxOwfxGjBhiS`P zz8J-NaM+e3gKiCO^~2Diw=iNSlT61rXOCUGutZWJOdSJR1!Vo}>k7UJJ(#IbfPWw^ zZU?n8^tb<*VJ-xrpq69VD``&xVf`an7=Xd)8Ba=Jp7b4*4WaC zczyPdCk!Ke*Z2T?tC8utKTh}AAjQ*hzB{eMGt=_V*>Z#`P6|1_T+tTwI@?=jCZ5gq+ERNkGyJjI#QG6wP zphm0m8v;Mph>atg&bItB7uickJf*O11Bgy)FY+OOxadK*pckXLt(u;OQCD{~FiKc% zM$D2~OuY7d$6ZsYnl6qk_Qswd2V+8*?&B3u_uha+bCuMsCdum=;nA&qUDWRPWbCnI zFZ%IH2ZSD(eQI?p^`j&!d?587B(z8!6pJ9H5=x9O(Q=x27sh+%Uw&3N+s@~CGK8`m z1SuW*ec<*N=)3zg{6&MVwfMx3xt}<%|89jcSlj`wC%RKMh|({Jj5U``MUcVZel7r> zbRON$$_NwSybzS9dTFci?SJ?&s-Y-zBAnP!+1XD6HO-=R`Yl-&#Oi*F-uD7&?8d7* zOo^`$ZOYl1!o8gMQ4swBHMGu#(_%6XCM%(41v+kA)?g4*zc5#W*blQMoOT0wGD;AB zNabt(=BYv$)#*$BFnG`#K9&EX*Lc@NgapF@;&4Xy9$wXzFNch(IWfP>CRpPKQIAi` zuG%xu;Dx?%xB}fyS^iZ=nTbqPQ1$5SwR|=xR|vdG=C{$<952q=G&ZnuopaWDv}oUa z*~?zq&KCm%G5x*RhMk8-?`PjmVRu{Teb>w?%w~ttAViRup)~Z0A{IYe?EbC`yp0e{ zB*x?ku)h4<0g;OJ<$Da#;=2#ZfH+>a0dkXwuwl^hA)VT?-Z~__0n-%xWY(rS!&DwJ zVd(2!5yGs1Dmz#em9TR+u&e#^1^Hciuozd@f**L01A({86&ur@-buiI-LtMVgd zQLu+9Rus_N{WN)2e^IU!$8Er@Wse1oSj=@(1D0!M9TZcz9LBr9(iJdV&DA;itn!+E z0C6X0iSyPZk4?+viNtwYc6t{uckd>?hKC(yMph)a@eBwU88A+&4GlA)3!+hHBzbOK&n%H}gjtNYaS$&(&>VYStzI|UR8X6Q+ooq$j)U{f9P92LE!|pm$7^TmRtpTtf}k zbl?#mV=0=kEtSF2+&BqL{Qh3OKb7^wHpw5Kf!EqK%-TAW?tia{?Rd1|Ne5yn& zfWi!;pvn_b5spqB=Wbc?^UOM8^Ow3DM-E2dE zSwW*X>Sd23CR&j*C|yW8^S~b!9QqGQoZuGcD9FXvt}HP3D2U>kSij_PK?UI5vStj? zJc4dIR>f7rIMs-8RJ^lNB5`P&#)`7+wb}r>H^jB|*_+%)6K`XH)Q)&*(mBProPMzDp$g|qbsQ8z zx(t|?0Y8E59*F{{#QOJuU{4l;MkfbtfE7f+iU?zH7^X{4IlT^@IAK@N^50=Re%o+jPGg1$;OPbVPJnC8co80`nur zj0=VlbXxlnl(@9grV{;3NN$5q;MVMqK@;a^U%M+gPS zFo;Xwp`W9(JE8c}b3MY7F~|m#ZL7Ax$3)u3AntTYo>C0j zt>5cDXX0+xCZ;7isLKR*j4YKmPRFufGV6EjVkm6zs~BOc>^(cdiQLD5_w)yA4$mEF zQAsQ+DG5N(P^RkOWMdAlh+-OybHO)o+Yvj7xA2Ld`~SJqk@Jt6yJUk+Vh@LDlTe)b z!^Uf_Jy&Mbbp%>lookhaxPF!fK;g`7rFnVG@OPcwF;k$)!?Qy}<3HgBpsCDw&3-gg zVa&CUqb3`~zr4iL7DBitl3arT|FQcm)ep=SLv+3u`@suwrn5lpfxOcquM*h&xj<;u z)*2YvUjX!u$PNW2+|=N!XzHeev#;m=%cAK*Eq}y&T^*kc2e|S*vI-*Rfa)KCb5Lur zKt+UJ1?%8k1t1rR`q|l#wH6GTC`s7E^lQisB%6~$g7NOF8;?_AZ>>VTr@;b8=I55R z8C351I7!8}!lN}zLqBg{_WQOk9mguE!X01Lt~g7<&Z)*RsHLegI&R?V+sV$diq7Ri ziR55JeSekT*cxrgp%sq1RWkM|R40(79gh4=M9du)9;dU2uK^Z%JV+#PDA)n*S5Kh#oAD{8>#2`PXLdfzOL^$qYYUKl-5=+zb)g^m#_qi_BKGCTzEL;dmhb>xYfE( zC*~ndqdd(pbg)pz4h$DVZyHDcB|xKeoF4R=$E&ENXq&^s-zX1Octpui1O<2$==9!5 z!4cZ}EXT-Jq=IA8LDLR^#jpUE>2ilOM{tfQ)5lJq$6J1eB?4n&l@+WONGOg;Gf;Nu z&prTF^UvDOy7xX2qM#U^Wh0#R{oiH@!vK&)NnmsqwE{>i8=TJk8Nzvc5nFj=)-Jv3 zsEZN@{tDB=w?2V2kcB?usQVjj0f?eJSfnvcMH7Tq41G8WK2l^9^Z`zwbWWrsQXv|} zW!EhMTI>JYb@<}bpTJ`QFik=6d6{%&)AKBuKcKynh=WiVQ_YNRLw966>b(bkEC7vo z+-1F())cUt7wueBFP;HsJt50;P-yMZ!hWQFmBvCHBY7>zvh=`l6AM2M1T;o`b>30p zZ&AZV6EL`Ph-rl_ZCoYwr(n+#AgTKMR6W*P@rhQdLzMbzhdM{%&&Iy^ zA<28Wi27%+K!pEKn43-KaDhP?RrbQ9_gih8T|XYw8~MBF<8jhN+5fJLRl<is9NMP&DFCqv5$vzGWXaE{aJ`R^TQvkHEa! z@|olLMjqt?li;AoELaX1Nx!~WTdIfhnHqZS%Y;&9otK4vCqY}Wjop1@a>oyB4LCj? zU%-vPwcti}pW`ttWa_&It;kg&twHxV)HbkvlyKE5YJTgtkDN{5Au*yo!WV$A#o(;& zO5k%*&apOep_1G<-w>slO$K=W0k%1$bsspX)koXcMy>I1-8-FY_OTB<3#oW&Y z>XsGfB7-RNLkA%q;$Gd(5E z@82fK(QyMi2P0sB8TQXJ`Ncce0s}o44BnEnc@VOsfj&^g+w|zs$5TkUi6{+i7#AK_dWw0a2 zY)}dh*>)B3@3YDi$bLQu$IYrPjH@ZP#+dBICvPX))WM2mxBHvOiUIzR@vG3Med!sz z222I#_d^T!K-of0b`7-aNF-pe=v%odLr1tG##I`4&mkV;H3TuZ>uq~Vt5c!jhXtRT zd}0^ao&vNEIn{^74y>KL#D~c zy3rXq0467gBx>CUOJ>C?_9=s$cNh>Hv8TNC84u4o!3NRDcgcWZRAC8qD(vaKTOEeb zqiE|M*LOASeA4c;ao4~m^2`IRJ_krk$P=OEw0wF)1gs1k{wV82LoEiS?7eUf**j?LQWu@4^}cpm zA3obiwq>Se%HKI&;t8Rx>{wy27KwIfy-nHP0!HgIEm%06n6R)ZDB04Ie){`6m45^8 z%g;#y6tn;k8v1VjTHVDhv1us8-$B6!z%c~b=cTZtfF z0xZgrpEwL8?1iJ5G9j4=mdVj%(E=Tzy`Q&?(s{uS#u9ZZVgAbo@G3IBZT~~2PvpS- z1w*mZViquP9prCj`$)j%`IKodqy)#Z*Rsj}6h2%J6t6V3iDSUajI_WQw z+HG85DmUYcCI<)JD#%t7w*ZT4x*?18suUeaZuFfG);KOjj*^HPO(mzeuO?cY?ftR9 zHFRAz=)LwNy$1WSxO@?3SCZaG6IeR6FtW4>J4tX1Kmh1*ui#5?<#s_Fl`L_6FkV_( zVW+Q@lYm$Y;&Nr)xbAnoh?lWk55QF?RWyQdlJw&&s2ipP<|cIqzH(~l#ewYO)AoK1 zrBq=)oG-=3r3u!$%%_qHaWrbuI7WHtqZrd~sl8-lhSjf33oxNl26vm4SS=Nt>4{7| zq(`v|&15+Lc#+j>=HGbh;hyPpro)tUaFt6i6hpzomXm3#EE(=??BT9(m-bkDcj_#4 z>OVVKF?HrNQtrHD5>HCDUa}`vw_Y5M=;q& zd*@eO1z3dNzK}e|Pc@gDD--?(HwuCFJNZXNrGl%!UA#9%q+yn~=oO-&e!WP342?~t zdMX=3LB_!ivcoe$^A>JHN9y@*)!AIAP;T6N{)@#PZbr7SKI4UQpgR-0+G>3F_3jBa2y{Omv zI%feToIqYG;0V0*>wqZKXByjDhUib`pRayjhhP6yN{+j9dfGh*Ok20WBQM>tU-Ee= zKl?mB*u7$I5BZP0nxIs$Eq4m9)*4t7Z0`eV&8u4fHiUyQ)~k!Jp@p~!*2YenLSMywzK)yfVB z)ab9X1-AxK?5>IXC$gR>P|hc622K6W+TRkF8>$w?V2?RFv%HZQJWr`IRUC5xP4KK? z+zNHA?EW9m-6l!}^ZthtyN!oS!s#*6(l_xt0!^UR-hfj$mnb9&feUrwa^Dx#iO*F}TSiY5;Z2KBll--%3)ODM~rhp;{D5#}LtlAkH_Qr zwE9l-+OQb)+#;vQzZ3NWy*aJdcT!`S8oEWhN2~f00>5P6Cru-RY|78iiyuHc5V&Sl z3bVI7+HFDcRx*8{hZsE9B{v&!>a6)*$@~n;!$GmNU?E>VoZ9 zO-ma~J-27YYCJtXqjH9kO{x?oZJsq2y!YlVlQ~%6em<={OYskKt*r0)ed;X8-VA>> zbG~8-I1lF+`7(m64Da(B-J?>S{hmk}Kbig7`<2hsZ8H?T42k>Wa*i*BZ6;}%$Symj zH0vJBL(*MQ+bdDG8oV5Q0EeuI1FdZf$) z^2pQ27N6dVICw2iPlev#=vZ>L7yyEgKIs%<$ri|K&LZYX5ijX*C21iDv0484SvXEs znq5-M6serL%-qyqucnzR*=LB9+mB+p{;mcTZyX5eGS8P{SL{a~?jM2mN9j=gvocdc za-Itpj1GMqMo_cc^slwOfz##lz z)+Gww@#b1Va60TH*4&ryB=O+$unRKi3;I>^3lQVjwQ1!KL#J7Tp>@ud9rt{7`w-_1 z^|gJ1PIBXouH8w6va*Md6aM?!jX?;mDap(sp+NWIGP|IjrO!$tBCF~*yz+29CP$8( zwmF5xxX}b*M54bv@vY5>mU8T4xQa^<_K7p607-9QyEF>VBq;C3^s12-qBfGKpsmoh0T@OV@cp4XZ zHnrCe(m;g&V|crk4TF*4RpuL-RB{mO>Da00ZC``t0^HGpPj(bc zT|qzfXomF3K{?WI>8Jn9GDl1OLM4IDo0uyCqnhq+J25-6Pd}^TnUaGU$b(3P5#9XK2M3@pJl01?R_GdwZV|B7VAS-97i3OKBrS}DA zlktpc#q(JY9ESonI;Y(GPTXdHd1~)OrsH7VION-VSW#1_IJXIdL;Lng)69j ze=}4l&fUDMJ=gmQl5T88>{<$gN-NNb+6>>JeqkImyTWTHRC*YQI}7`K;Cy>?u(ooi z$X-XIUVf%5AvGQ69vaOn>8%uK$2LjWlJ(}Um;RfWxB}wMw8PS9?Z=n6Y{CQ(fe_TY zIau6XDz5F()9n>t18iRTOfCGK)wY`&G00}J3TFpa#u$tRbiTglAITIjZJ)j=TIa5< z)pBaCEQ*hJd{Pva$0(f(^9C^3+}1zW~@zI8njJUJvj z_<+f4{PqzArUf{EQ*c`}!e(%)RWjFHJ|>wHkkN^>Z;EcQmyR^ ztv)+EeDI4u!R~e*H@Qxu2kgf&{c&MPr^2+wRTAazyjq6hb_#aUV$yr?6c6K+-JwlNz|n`CYe( zwg6LHP6t*IcZtZsL|VKNG>V;Td>;y*g{udo z*_*8vVJcQmJg>TaLnIVr#(Vp#bsV>TZB6mB)?vbsZ1n3F1iULn6l0*ik3|MN@dU9; z;AAt1T+kh9!Awp$Ik8|h+h|CZClW*_SvJ-643)6YL}JoWP4Yt zo>w+YGMAN2**168Rj!nFX0stV^+|HQmvqk?C>~t9sRYPJhz2{=(mvRtGvISgCrt;h zaSgcO{yj1lmMp1x{L#wD#2#3N>#O^NXJP%%Lg)#q!M|oLa(Umov!mq3z6VUMZ-ZOl z$E8zxD%V6NYi3<)K~lcVr^jFSo*B*GGA-ym=vXs2X{=wOrg3hMekwPVe!~d0*0>KG zCu1KX}l+-d-^@sSg>J<#$@4=^cv?zQr z+kZRYfWIUyf6lbFsAlxY&TaDfJtT>?5>H8Y5!~QBq|8WOT$4|{$56;J(&O#avR21eKC_zu9E&2WX*@chj4f(Q988`*saFhlabHfFXM7#gEW3 zSnZKWJ@tLt_ec(u5U;)T%0T62kNhegz*0g3vX05QARIUzsXH zA2)2-R$qQ>$)EOM49!$#pVfOh8Z+>SwBHUYP3<7ys6;c?P?cu29tSf#VGmh4;e|($ z_E{A=gHKQa#{<{TlR10W_lC-PBX$L$S?GvYQI%ei3L7=DmYTJmQHJjTy(y=ku zfM!)MD?5?M*V|e=2?j0CN08xP&nr!0q8(Y#N0L|$Y@pogm29uALv70Avh{7C=4w69 zubH^I@|*lT#JHrtB1I}d#n9r0{ZP7;q5;^6rHNIkMQ%}kqdsbm=boU8(E5G;h-c!s zpOR^8R@uX%iI|~g6s?%J0EYAeD6VN}NY^AzOL}knQ&AI?T`NdVmz`o0i-}+fu!`|D zi)r}}hFqeh9h;fMpR92~_)Zb4<4;hO?1H``*6*kp!QQKmp6Gv;sO#%?Gg{IptD*AH z$ncl0y%4kR4}HFlt0XZE7Dd2OxkpP53}JZ&@2HVjl4u7eM`s1+WEhxC!b=$s9EPj=5LyVH5KLQW(2OuyN*q z6n06}BTix!n9nh+TP8w_i6%i6+-lS&?Z=frpF~~?j!Z}mK8h0@(XD01?(89g%gHEH zvd^%sR!`ny2Qkm11D`n8%5(9!z9;hqaNcM@U&#Hq#JH) z$Y|F-!b@A_yHp916QvZpVd-?Mz5M6tt$1eMDs=YYB~<9e3Gw6TyC+h7)3( zIzsvisu|OD@);X3af+}p{Z$)V&yD{`bDLker-k@SR3r%FA<`}rMc}F$mx*ex7+;OE z(0zJS`u~2suyqKYEq<-BGKaA3>#Fto(nM_)6<@qJ?JvPqnXs)gcg4y_RHjmCTgk0) ziXOyymTqKt1))J|7q&~aG(}F4#8KuilS^)J`r$5sbmAL~6Q3=++dgl0G|-wKIph%I z*Kk013xT%HVWQc2J-65m1?QpqnMoMzRUo)Sc5O6v@(hkGyX(!q-HGZ~J@fZQPa;efZYX6~Y3(#q|-}Sos`e zvNR&70|;jp89CKJTaG7J;^1|VVks406z6ZGYTz@fbSEE!xge~KjDzdMmr9Aj0gzTI znoEud(#@Vuw3KkVh%Vs6vjs-xp{hdp(DDa_W}8kbw6rpk@v!)=s4W9 z8bK$G*@QSo^>WmO^u$LHxQL4-RsWWsVQxmeoG*bitk3~z{r&}q---!KgZBwOJ`R1~&CgB&Z zZ51)QpivsMwY~$S!vnY+`e&=w8O!PV983bsz$5Ls_ApRXM1))jZzBd&;jiPd;S(p+ zWzN4pP$3I@tXN4!q@4BpAu?1BfH0m6s}a0Jv{?E_%hg>e{dkc0(g~K#4`WLvR-VQ< zP7VT@QA0=5#A#Mfw%G-ZOn6V}7-u2s3&CX4g(>7XWy0Jj93Oo!zW4jW5C6AX<&db( z5ieFjt&u`HBF99hYfk??&n5-B6)N_X2TWCBB;+)WCES z7l^oEKaRnymO-wh;d>>=$736zEiAn0{gf8_eJN_D)rnOX*>$PDCwUsBLB0x^6x8?{i?er_Z@ovfB)#$@hRya WmtAVx#4h~-e`q@_w&!kj4ErB4)@d~W literal 47320 zcmeFacU+Wdwlzv?+jbjU8^i$GDky@0sGt(epnxbz$)F%e28jX^+C(cVT7amiWXT{( zMuGtl5Xm{2$RJ8m^0#)OGtNxk`R1H^zWc|$w|+AXQdLjA?-O=dYwcYZHnK3^B(POr^ZMTnZEP&9MFj=T{`LX^3#$`?E4WwNVvt`f_p4d6u*^S3|NkL; zN0<={ONylQ-d)P4?sPXgoqjbqnfIwJvEM-Cg4sO1Rqoqj)~GY)74nJ(-uG4uI_tB( zKy-t$*SUgqR*`@1R` z?)7We?)%|~A8v0w8I-XHD9(jMPkhm}cLrecaW~E@}Hn`|2*ed%B|(H*ik)|c*?FTtgvqB7WBt5oE-MFp4X zGBJFDU-iMx49CGT?{!MuoCc{cii!@Ko5waZG!*pQl$MsJ*%%!DWBSa^$6x$9*i*04 zQ=goiw8gMwr&)c{VT_@vsrml((W_ahE$`ol+x65f-Eh=RS5MD_!|TS4`;mvE)cx(- zOWp(-%x-IGS%)Qxjg9q{_)Qe+hKs0vo8uv9vSY@E!;)Lw;+US@a`K@)3rlxf_8-$bn=;bzabYW5zU&embz;%0^bHIm6+>l)<~_^3ykt{FtX8`2?4%Q~?kKdqx|`@Y zXe?b;7AjkrXrhAmK6ox!Q^;SXE%$2Dv;u#k4S0Kp>-JQxql&FJBWGj_NL^VPipe#p zOE40dQ9lg}pgcJ-9#d>>Z7r<({AXe84ovi{Iddxb;vNS@N3ZwmnN>A+e7McO^1j@% z)YR0D+a|H4c`IB^IUk*TCxtytGxscC7E zYL5(VZ+A{BvYXL$py;lcN?gX!ZLBf>{Q2|sk00m3wt9A9Zh2z0Hs6*lTMkDZk=xRI z`q2%34THwaaA7mq{yTyy3MO?;j$JXvO^;KP%o`);af-fp_G}OBo!G>|cb+>Uv7fE% z?N#38UP(MX+_!s_Ki@>-4))YKY}L6-H}m*dub06f=2)#aZ{9>|XQVYWHXhD$8TXJ8 z$(o#SnlW=`VNsFXu3fv-Q!I}K3K(JtE^Ff9 z#lY8-m9$i8wkE0R>0Qs}??@PGc4)ghg2}b*d!N71AJ&>j0z2eLW#r+?2&J{QT~#qP z!V~3XWt)#?v&{JEr-I7L!?v9j3dQ#%Y~McrrRyeM%qt*p5Ld*&#~D{eM+j)8sb={c zZWy0CZ@D-REy?OfDskE{LlqO_qqfBopTn(M3OY-J-MKo#{9VdmT0+unyJ%#XV3>fN z*9(JiOl9Zq7<*t`5q_*_t&a2HVd=6ypbr#i`zq+#xhFnHQhIc4Y%xNfk z;lhQiixwR*opk=(7`DQ7tn|J!Oz5RQ|CBd3H;*6Pu7l}!`S@feuJD$)xVUnn@!>@C zMs@4fm+ML=U@Zp<_~KqPH|zND9xkIHp1IFwUz%**DB5USt<^U)bi>`w7i?4Qo4xJ=$shv_`7FB4|) zK7hHFs@Al=VxHDOB_=?G_LaSi03R;NioE@87-+N=exYtKXHs z)?e9A#LWBp^+4<(V+@><Gr)aoavmM3OSKNWck1mgTCU@Rq9@;;%bXZ78J`&KB?EvxSCMLcf_u%HNX%+y zb5_>DyP~1`g*Q5qn_bdFc3f*~ZRHgg*CcBs<++^9m2}zIaKCNq%Ow*1FE>RViMd@B zrxQ1S`oc0gty+`)%|MS8;PTrX_XBOQQ+Cz8jGSaa&(WqQXE6t`9-fp1wguhXDO*~B% zR|K5K$a}9t(f%4ELp^-bRg=?0Kh=is+`oVStr)-A0j!HzctZvTZ1yltPL6-}H5|k7 znSZA=P>u9kYsPweqc4z9QNh=BJ#Tmcjc8&3U%rNVMv){TpLb zTbG(>{DEPY+!eJQDYoViwFy5t;hFPrYuoHv?1`26H-+@10yEF_YsBlGPmI~)KJNmT z9xPZ@l2*Fi62=@%gB~ni)ljIc@1i<;X`T9@_wU~?rZBPXbG44^5=OKqoX%L=7gs+a zy$dVzw_3h-LwNQHws0xqG8~a+h0C0G{#^-2pG-5+aCA&lcw`{=F0Cu}QPP#gLQlh$ zBN*`~ikJKK;Jle`6%}2YpPzr>*y3CS33H`$O9J=I=8V$Ga*|qEw4Vn54 zojZ4Kch8JZYq+>r9k%bg$|1zH1b%4f{GsJ6EZ&hY68LcJ*d-Whcz9R}cYhfszW(yE zjmQ67nXgK&%=vv=+cG?l6S0MBiC*sIb#@07QUmbW2smc7oVtGfI_Bcz>nG=jRFqxV zSFEs5UBBoZuBa+W!#p)79FehH{qU#7xv8g{Dl04Z%gR1Vf(?0TX=TN`affL0z7@{L z_is!xs~_@zmwT1d#7lOO&)P{}%vQ~N|If{?S*6ixk38z!kB#+@Ah4@{@#4jUB-e@I zwhLVE5PEN#^RA)Gx+k%EDg3fXT;;7zCoYc+b}h4SatKT8xoh7PpKo2P5~0YwdgI22 z@=ivlMR-5r6fI#~uBfP3m^F{3V1D6h7B=N4)w(ii?PJ);eA1ULUW_-QB!N4kq%Ijg zU1UpHq(bn8rCZFSC&znTtCIfw^Jhf7H7~D?!5H`;*2N#B+p0clJ~;Zvhm6nKOD+Wd zw)UXv3S5)Sp+nWJMSf;iIhQOk8t1eBgZFTxA%a4jNsEiGlA~Xxu0Q4ING7DJ4GVGv z{;MLvNXajKG5c8h@AqUK%!=*%-b-VNti9s<w_GXoCGYCkqXx2s~l<1)K^yyP?U*9nNL(6KczpdCeIXU^mV zQTD@kB>KS;Y#ryW=zdqeQ$59##@q-ZR=_pf@b~wp_*lB_#$Gt$l7>`kA79@SoXOav ziN=+YC8iZ;%x2{($7yRJDA0`86UfWUq)xQ>pCYwyX!-gsOIJ5hYEprGKPKoR%d(JD>&`RkugY;8v7wockEpBo96y;;qJ zH;km-+DBnUC&JAe(;^ItZdVo0(?~KcZOU*OY!CMAEZ#Ah(CT>W)~&R6SH%*HD|x%D zG}xmOwl8D*-S@|rSAzKEcR4sXNHbua4+nj6pU25Kdv;G?`kRr_(I^=oUO1*}a-&&Z zVjY@_mJ_${#;7MMdaaS$Y|$icQvcYYUpd3kw$Wv@hw+O0GNQWM8}%>EIdMh)zS}R{b0GYk+8Bl zYR#(BnT?OPn*6F-*--bl#6J?P0A2b{7>di!9ZH9X4 z!{E^(5a}YuIIOE1WZR9iUWr?rY-pz6VCoe!*2Q{xd3C}|giCDOriR>*;pyoqE+KIk z9@@6OWEaEx-aUa1{~digJRjD`-EP0I%>02`p*?(Q+Na!Qvs+4X*7a)k!K~c~3kySv z5Nr~cab$dC@R6Fy(C*lbA$7mDMIMqAdFQVVwazJXZF5}VJa8R>Omtj-4sT4c)QlJ0 zKUnL@X#||X9VlWs2I?)cv?}bl$IpL27IYkI^$^D`W4P6iTr#gpY&3#nbx-lWKZTF+ zf8iU@^OQJZ*i%0ZJ}bdNyF!Oyk=nATr#VXpQK(NQf^}zjx}{=v(Q6Lfi(c!0x^G8u zXhLyn0-Jrq>>TTLaXrJp4G^dnJmwj2 zeYr%01pL~MRe$;A7j+n@H3#p6uiSINu;#%r-;BvK)gNIc6)Y_+uj>g2>+pYDfndYg z%142&Q~?|C_Vym|-}1|~5XZsJbvt%sm7JN3h=`Dpl~uw95mz%vC7B8LkS6+{f1m!N4v62>yzZ1hPugFRXjRz$Yk`PexYeD!AK0{pkJ0Z zrrWC=IBCX{`C`1slv}Tz_xEUiBEWa;h46zcIZ9X8?7f+cQlyjU@3Nc{ls!}I{C92 zS;34DSho(!35<=6198AVcfntJaWz^OOE5xCzRRrz7+`Rj3&7GrU)DX?CzU~xvw_PT zqI?92ifti(vg#-&CnpmcP<;^aBIMJTPw%)z4?elAHUl=H^1-oZlVdW@YIh&0dFPre zTe_4&ibb_;Z{7qd1WU0w_8jux9)r}-INR+9U)Ik^LJk`aSv^^QfwBI&<@*;4oQGR{ ztJ;<vT>M&T2_-|YsRczvK3rW|Hz&v6NQ-_x77(84 zzQRQn`)a9>?%qH%z;qrBDknwE>bN{|te)&h18!N?4?}A5=IQxPJB5NiISU82>3m$qb@ND7?i^MIhALGfbMw(2kQkU4#It@zO9*^dC}HI zK!5QC;Wj~eZ0Sa;LViLc7+zi;m&11A<+h(2?Yqhk?Wn{KLaJ%+W7ukd=oo3mr6TUa zf-Gw#c6Rm|GiKy(xV*4Y@D0$0BsJX+9)g)ArNR5?&9b8Q+Q1k}+Dn6NVa9jY`}^y$ z`wA`^7*Ee2H&&Bi^qT|MQC(&h3UJC0fhXm~v|`w5!WK=M2z}L&ry+%igKdx&O`b5N zyz{`VAnZ^w+g;Uh)!3Q?+0*B%T4YY1lg58e}Zc%t{?j2Ru2f#B|NniBxtI3q~9boHu$o!>+86OU3Fs)rB{ zRl}wRAs}H*J-!o`e-P^#j)+;*>iwS~)|(!;75Ulcn0k&}<(P$5R zA@wYmjJjltxR);v@atr17H=Po{*X24;^5@e73jXa40+z~$j4~iKfTRiPpoo5bW(w| zk8nuWu}|)pF=P#AR{5qoB6qUwFWSfEbOt`<%qf0Kp738JRHnJ`(&e|aSO?uZ zf>N;Za5}WO7v`_feruhgiKyH=D=Q0l_gWxtKr)ox<$0=v9lR@J<7Kd7#frw${Y959 zT~ahQj?BHXEbPMEWsx}F3@#);YeTt>ZQWGrz-$c5Mk7&K+`z7Gz z5*FUa*j0;>(F;wRK7B4uRWNWUDJ;lk`?_W_sIR31$u@T_G#ZO4km*OB+31PDfPmJruBevg) z4ZML1cJ?Jtg^HyM1>3c*j|_@tUEvp4E-eKB(X>vcuWr#=%2|;q3+X=pbCM(3q*~xv zb8~YF5bRkQcI+Dgryw0&Hj5vMuJ1UuVwJ z_C*r96v5ACO)ae}h|ySwU0ZRwq)bhtOTiC3%gM>1k2+Z{w`BJs! z4R6>Q8vF~Qa)DD#EGz*74Nkoo7XT|>zjaHk`zB1&5&$&07k^ue@h|U~94r6x>Qxb7 zxn?OTDT@J13IRKc8ArBB`MEYgLzAWsEP_o}RZ$JN2-}y)cdr1yUb=kwaw$M&R{-%8 zeCbJUp4H-sL{utN#^=&yGn`*3Rn?nsIy#Jyn>4nUhk1U&SzUDN?8H#LMJ_Ckp>l*` zj-|ErCZu$xk0(aD&JW%4@zGa~QLp?sG$b+sE5>+ynG)=$=V#9a1j@B`ExVWj;Ebx` zSvf8FlP%4|U*Lrd8{iJ4kPFQL${UQJKR004*~I8nu8td~Y4V7i@I!TCz_}Is1QV7V4QWmV1T8uDGgoA^_)T)WF1qzht zz56%P)YNN!II$X2C+)Rn2~B~NRPGGzT`WVoLYJ*F53go<9k~$@JRvgJ*!)ASCvb|6 zcU44KKe2r(zY#cCIaV_PF=L_jGIkc0*H-UaTY*EVr-8!~HmgfmXaT}2EFj=_30xBQ zy+Dt$lYmdj=vUUn>-BKXa?^cqpP%LR!#RsL>f)eaDFAb6ubs0-3Dk_qB3QG$_w))&Jzg^OtWYb^0a^)^u;$iGb@O2+o+%%dZ z5e4ettrOs|fqLw&j8dW0xrcKhi>C13UlAhhHSls%@r&we6$Hca8P9%ji<-6R?`-0? zSF*5tPe?w`Z~EcpE*F=~s~p>c3Viqqb>nNqIKaGV0*Dc{?evCO=AP+(No@Wf;$Sgf z7bupA-w28cS-tNn&+oryHyy)EU$5faep(g#6fC9Pp5?e?OZ_zY<+7zm6k7^>xbHf} z!x(pS%*9IuxK^*;13OYB9vigE6J%-1L+{O=p@w{FSu(9HEs_uE%`+m}Wv5-s1FNce zOTattNW+Y5yp`;tPt3irzPF81%~j9sz6H_xW?&$1TgI<7$5GQ?*`q1$PLVHhYq9zm2oAbIY4I zyW&d@&UsdX_}iwtMo3x_Ub=^40R~IGo_BkXQENPtwYB*Wb$rG}bL{(W&V%RMK70_6 z76lv!L!`!wU^-y&`}kHxT*j7F_^4~qCaw;rPStE%$jXzlD7p|yki2-GYX9Fp-Qf%G zEeA`Yi)F4h9N1war=z%Qbgqz;5V{*>;#c(K3e&bCM1LGatzJ^|wWsvU2w)%t4Q3YF7Z^Zxql zuK}snZJoVYlSxT;C7g~>VgZao(4<-a^Wa8MA%PizgW}XCZbPkiCp) z)21a@zS(V34@+@4*<4G@zR|WS>D;*=$z6hWy965aKFqTTaNMeivoq%x=*Am1J$C4X z4+7D;8A-$LJTV*-^<;Abb>-F6)C!7=RZgFN+*utbXm4+iX_lcJ)OHNpRv!dPqkS{^ zO|Wc4{1moV(uVOl^SA3-cu4%yIiLu~oVPoV*uWpxUe15{QW^P&hb%yXJ;gc4d<~>dAG$Kh}ww# zVfs()@p;S3DS$D|!U3tk{Z%62QFI|yfF<`TPyWlobRc6FdP)r4L;`~tbfq+LvBH2P zVl-3BMDHVg7O6krzgjYu||rtpGHEgxniR zZgP{OEM;)uOSzatBH*8Bq}1{NQXZJ^IQ_-45lM5XLme(8<>~djMN*_mc+HC-a4rt6 z^rjQ9ypJsgz~L3C5@){2ptmVwDZoymZwRqFu?CjDo+};koUYUIeh?~c|=9k8JmwiJ-2%tb~O%n9fq)j>?|!T z+_%r&yJnkWLCKy^vbD863}3ZKP&*p#g90Z;2%uJ3#*OuAx@KaoezSXEs9ApZOq86u z5i6t^>%_z~7I}o0EY$`O8fMqq6hq@9TV~iOY@t-XghFs2tIElZc7D)NL;w+3$HTK9 zTSn&K!KC3?OE9Lq;`{g{p1R$a`jhN`ocvQZ*&e4uvy_9AGts*3U{_YL3OF_Zc5+Jh z4=BJ8_}sY@jm@GnFag{yX5Vs$y#)JX^mRzvC>ovq(;RthDf7%>gV3-r69ngk;2b}G zJmT((Zb8eB3!U7L+FBl8>*S74m-)pZntx_GCtZpDQ$U!=X3y7ga&Qo61y>|Z3IVqt zrVqRl>(v~XxO?XgF$xhNgVV-`>TO}2$VCA8NqhV5T_q4x;`!7c9>0iFqq8&&x8gR- zO?+s@B7{uoQb(sD%LEH}#L?05`lL{-Ba$hWow$!mpxDZYlaUG4igC1O#YvwpZl3U$ z(4J`kwaZZfSTd!h4(q>jFMn;Azf-~Jeh%s(lqPSv(*eXNUg_!S1PLNFRv(@FtmN9t zJ<0(0LE!SOUw;5jFtIid*+FISzWIoi$xL@PrY8fn&}s^UXK5L9c?iuoCVV zx5S6ssS5#t*lR;((RB*zvXFn0W%NW4%Z@=gCp%mFM_F_z* z#5!D)@_rBz3Y4d^v2jb2UBYn*>L?F0AtEAo^vjnTaep!USP;cnE|u`?C$im~XDXju ze_1orjqk43i8`0@!D_HxiIk%akBoGJYAHvM95X(19*4Y)%q8IF(Pm9eO~&;}F$@NS z3u+i?vgV0L`J|5>J7$G5Mx1x=aa@i3G5r%KOpuO?I7FR2dzRkn&~^TRpNJeKmxxIV zcn{!Znk&0S%ZFSiNBt3$huplmIHRwCk0?jVMpIK#cs6cSgs_Cch3w0(2gHB0Gyd5( zIGEUBZ)HU|tnJGzeSQ6P$Q4G!SFaA($i=J~WMPo}Z_=H2qaVQE_oKHc1?rQ5cfDVB@0TBn*+3>N_^T&Li#ASb-JOcNE9i zue0bb_|+5S!7EQ#b2zMLVcD5;wE9*O_*-Rp`D=0-gfD^*kY*rtGFpT$KjI zY-#xb7i{t2nx9`3ys>)nWHM;FoafI~{ZAY}e(mEl7Pq%K4!-Z=%LE<0kQ*3PMXO!% z^h{P0vqj|S2(Dj$v6PY2sal|pp~x+8884awy>G*wQsNE-Qltm}$!QIlu@MA5%*Nam z+sDr2t)y^^J(Qe$5Yx%m+TJ%qmY4t-)GrAsB(fhz#;Zo}myiowym+x{0@OMT{y{LM zj1UNOLPJB1a9dSEJH|eqg=V7?*upB$uFg(gK|w*kndEF=LhM0`4x|o(PVR~;Y59;q zt+Buat00sYw%~sSu9sk0n1SxE7p1bSuO_HA(NGLpj2Nx-BP8A;Tpl7Z!vgPhuw4FJ z`MUA#=h?MOmRURwBwV{b$xI1i4eWh>iI3US8Lx_7GL>FohgQVsWNAmK#2vA*Nr3o_ zP~X~5-B`7PL+~hL9L>?A0Zq=IwftN>auf}#b^=b`4Ea!3 zKaufUx2l4Gp(6n~8IZIf7=!%`81M=ZRg{+#5ZaIu-{jOQ(KU4PGsnpn23KElJ{gFyxSLG(bmk&T)#Ruuj zf&~i*JiF&Sq(>{O-%O_F`ob1K2&uH_ z4J^N=5Gf`wqzMFcIPmt~7 zfk+gMyaWvz?)(y>FbMXP5z7t2r3j#R5lXQ%NrLMehZ|9LFs+W9P53Vx~)1~L!gcm!HbcBJdWJESi<4LswSp(8iS~TusG<3A>D)8t*FUcdAG^$-)2B~QL;~H@l%W9#JEdLiS6otx==Hpbv&>}g zFl{0PqRHCTCWK2;7>R52hJGrs8F5HaH8 zrnNz@UmwjF>5QW2nMkwupzk~H=TENR?dF_zG{cY*s8F!`V2E+;({Z2@#M)aXf+g|vVyp470{>N2fSd>q3?+SB3p>3A^#eh z_Y15$UfwVkk~|@`s>1K8)2}yy~>f>ZuF= zI0=qI87V8+C3v>trZ>ulSFc{Rva-^>@iUIfFW2&J?=xzdO8H;u#jC#W?p}Z5#0e*N zT0-x|4&9aKdv9Y@di)=dg#Z5o(+n`x@%R~f{9n?5$+j;Di%4V)2ks5S1Qm4%MJ0EF zutnX}w$+N!2(OJdK+J%+i=sS=X9>Ok_|cLq1c?W7v8G+fvb+IPW`21)yE0Zw1B{3w z!h?bxr!c_HSxg0g04oSXDH7BvD|>SpVK7R!h?+rG$i8$3u!g(h4vLW2T}O7z1?lVq z@jXC$+@#~hX~$UJdidxO6k==6pFgiwkG)F5vXGFFRmsD!Tx?(uks#z37ayX`4`TgL zfN6cR;9FSMxM|11N)=&;zW2Jx z7d<^mEi(uXLH_CjIFf+JQ^1sf5>i`$ZsDM${D$zK;n7j{R5`>vB-Yr5?7tp$gP$XQli9^(lB)VE0*pPs#WDv9)k4qX!bmCS(y2}!W>eDDqkLM6>{V`2av4#7}V zF(X!>T2?1duCrPOoEZ~POToG2;8fD-eFnLN9Mmm<@yme33<4bGf&NB8L4gqhzrZUZ zQYAWZ>%=`Gp(?XnU3)-}6DLG-j47|}V#R1j7cv1g7M>aZHdYzibYj)-zwacMgXEkf znzUU&E?2{ln#L(%Bsb}rM8jo}H6MF+p*BI-yrIU?y%BPvPSA4|q#;63jO{0PXKQI| z>|+So99wUlU{Xbnj=ac$K7IORWfim>D5_f@8))11_V!hyfEm=n`8RJ?MvBA#F5FUh zHpk=VO)FIa{N*>T#AH-HaMFK-IF}f$!QtVYDxg$`P!L1Bp7}Ty!~_L?@b!_Hrh?j9HN=TWQvE5bVo_H8-V?bMBCC(_@>05PUm*uYSAtRb<`b_t zn8KhC&^zHs7}V44V$2_ZlA;){E?&?5hMaL_r)L&?Ej=s6E7Dl)i`4oV}Qc%UY*62ej- z8g-e@PQ&+LteFq*X~^*4dt(e<-HE^T6X}o&lKmh+3$U9eJg~kU zlEyE)x9r66U_LfI814JHEee@386!Ya`AD+Bia{*$Xo=VD+s0bctMM1)ZRba3k6ij~ z*|I3;KD*w1eDf^SY1P+3EH557oFL&mk}%ZU9ES*-7e^Ds5gn5u1bG_RGqdK-_2EJV z1+K@8uxHzsz#reYw1fr(1n7Fc`+Kg&9P;KLTE6=hLoiL^JkSb<;2uk%QKUmmKo&v} z0$q^2ynpv@HL^S^d%#C9rP$J)HSxh`&YT$lQejdNt{@{VT?$pxeX>?iG~Nq?jblE1 zpT|npH@i-1krruUtp5-Nv?Tu}^$@0GKLY|sKG3D4KLE@-YySM|=NEsAsTUI!H3qp$ z=YSPvka$s?1DK2C>UH0iK_AOthd^zzwzswQK4dC}@CP|pBS30qB6uvDhx5XGbU#%# z`Btd4Q*z&bza(pFxc@IX_O(t(?&)D!`8_Bq^XDy8*f1aeT(&m#*MFF!H&TQN;jweZ zuUk)D%-Hilbn8Q}j6EBKzZSo}j?|5O>g|`Y8c4aW1At2AGIsw^%IBq>8>vI0vGiuCGjR!m-W7YRQSJcYzel!Fgc+aJV`^fc4 zTSKS< z5ds2%q$GJe*pSm|Ct=;P@BCYz&0V)oTl*e>qWiE{i^Osv;RB#8G9MqS>3pY~6F*rj zh5x*GQ31jnqIzKC4I5HULb{?%;Zy{$=t!;fBwRGNG%=+laf8Hym!Ds*_i_JK=%GUZ z>X+RMBZKf;0JG@Q`wWzKoI6}Jse%mF9C@wJwQEN+wy9ZJJ>=k+7!InZ!Vds^Jlx!p z05?dw1C%Wo46oB&ih~ZEVb*EN%OInNi_|*a435d{o+qc+5vW7>-NWg!Lz%22eq&+z z_q!A?_=ltL9Vq_4FAXhcHG@^A+5>j&Q%~ybhPu@#v?T-`QqnZLo=6I}dVma2VG|fd zB7o2SG*=O@)wX>!(ewy?#wuA4F=9JZN6LgT)Xki;IDn*^5IT?`e*$=%$r3!gP4$~`qUOUiLO!MY*bB4qc9KR|Ju6k z6@l}Kh`DwXZ2Zm9*Nu;jc{3QgoYpQ+k^-^<;X4# zAO>CM1fT{Ijsp)S>Jgj~RBJ|L7eF(3ei;T1KgwlAsG303p!T6Yu}$*w^6`0gwzd$e z`xbo*N?vB0o77PRiOeF|C<-b|K#vAyz5h*@pV&~0+?)9j@YS-|Yqt_-#RtegZBhhL zU`I^eKEGJyvTxr$(%n*36Rs8IQrwbKQpUJvyURe)5U~U(;QkJ?QW~+tMzK@a;Lii(s(#s?9Wa{w9J{K~ zLsYvy4x+M@^x(*>Oi)}4jL_c(#V9w_1HQgTet~z2;V4i-AHzGy(1Gi9$jfIH^xr71IgO?%8TX#P!tS~QcAH)0FwE$$$ z1t^7u38H(I72e5w84FAJet`(XWspU{{GbSr7kVAi2XcWn(}6OKuabrQaNjjxQXsid zd0X8Sh_?ouNe>C?ArgPNgkv~X2M6tjIkGC$kjNQ=)-lSZ5X(D)mK4g!aihkNl)gCpM*T7I4<2lQ35a_>dTWctJ)$ZQ1(`KH zR->YiQ>Rj}>X2EJWRgJHrcE@zDIvh%AsQvYgf|>fBFTt>X=_FNNO8ZnJBUjo$nMin zj{tE-bPr&Fa;U9kAN&8CfQon(WP7BVM@?yw{#}}t*yRVgIk!THJJ{PybPGF2I*3}r zJg_`z(Xa6mw}*CPUrvIsTx4AQ@@2xhw-dfliW6OqfTc_WcbaZq` zMZ05spnQg>7xHdcr>_3XmoG2O!3T2XIMj=saeW?IFE;I_=EW0rA8O+FAlIyXRJ=W^ zH*@qLbw(HhI%o{pJGE9o>FXN8cOkpfUr2%@`P&rAaLe9M^=D@_-hsvkFw8d$R{(gH zM>qry)AXwYQhYB0z275vxcl&{L88H;a&=*G!Z>oQL1^`;kbw#keXd_$ z2`h>+@J^KAGeRJUJa}mPFcl)`KUz+a=usw%_91iBya!hgw?%s01iid@F=~&bz-{Q= z;^+sl>j?ddFo-D>*aSk$vmfeU9J4u3AGAx+BM`v>nvlAZ(wl9iB% z4nRmAg-@F`Z(e5jx!>@C8_q(`sz#fNga?q!Q@)=DDBZ`~I|SCm+i(RkOyA((U_~V* zvpQ%>TY7tS49Bo1Y|*r!5t7KNBoui|Ba-nVcLr%MYuzgZj8JE=TRY+-Ql@DsmnONv zqadP#E?%?X4gSi~0?)dS3TU-aj_RR)|8+9}T-JpPgCOad;s)7{{r1~$6vr8!hDsP% zfQVmGwYHNSM7(AA+`MizY@t~nU|BF+c4aoaDvCBrFySt>qbPg(-v_7I4fhde)SK#1 zB^qzVJPdfro#~Bu6J{k3E&+3>Q(1JCg5ivo^TsiSzFkwTzhrC_{)Vrvy*~n9LfS zg#7!(y78LI%)FOpuJ7T5#$*eB0uoS;D2Y>rDqv`55_HMX7BoQq0kQzU9(~7HR7tD_ z8&F9&n+{_BK(%m71EHycTwis{-nQgyn1fdpD4 z?%<)_K)XVtO}X6pItv{Ct_%WX-TG{#H<|52CkU|*DSIG1Rz^V!%AdHA=V`4ybgu#s zk*t^$UK#KbWo+^!uhw>GVe=b;mZYQ3BrZ`&8Bd14fRjOyoi!nnBsPbN5+x8uRQUqN zhl)_XjWP$85;F^q+s9U}9tVuDQX2MdFcxyBBz%E}Gz30XDB4%Q9go3TT4LcCV4v;+ zb)hAYW>9Q^oT7{BYAr130T}6BA1qKsR%qFCbI$yFl!LRHyBMVyQ6q=r^6SBE(lQqxamigz`Rr!*6f^omt>GT zC0i`lEhAis3h-?(sQ`TI#d&J5`CiLxL_?cZ)~e4oP+h&)Uc~lpm*|)RC&RJ4?(eT- zL{uh=cNA6q@$iJpJJA70DbRN>rbZ=5-(H=m0+tHcFmR3a12EA2p$iF~l&_+soe1v{5R#RsHjr+cm4rSAtG3io60fDH zSULnRQ4Z@U2rrGJskq1^r|v|&DsyAKz9a;$j}%$PP{u=Tw_2JyvzVh0@5*wCx>u#N z`&XqQRKRjYB4;szZpX?pC4`MU0gDnu*j31*6HIG2j)NPZ)*R6OQ~>RCa);M-o7KtS zjYTqrGMJR^Gd)lQ6A}$z9Ku*xAm}5&E2GDeQd5f#sJew6P-2VSfXc_j+I={w5d3lh z-GC!San~+X7-d9LU}-rhix?1nX;6@wq>2qe+yIO~6yDnjTmWT?-5iv^@dn98ra^uU z+SI|<`6GrieE;G)At5BOq=H4NrU#yn9s09Mpl=lzUV$t%zi#+@VP z^(J;0g&vF27SuulAd}z^n_t~J41X1YAlV2N>Lh+39}LAwJ9eut7iF$UW-F1o+M$(YX6jl0C zFisfAKc&A+t^iUslgk)BJBz|m5r_Ab*QfVp){l <>AR-U&UECQBagJM@im&DAFMFPs>0|Jklr+^`Y6z2u311i<;&_=*! zmtlZ82&Za?i7)Ty`~ZaulSP&vvsG=#)B6y@pWjhjt#G zirc=#>*Z0oCo%3M7DQ%7XON^ejLml4st{U-V>*DhP?dHt<51| zclM0!w+aZB(B2?}IvGXiSp-y5B*0~I&-?}!vDHelo4L29059c z2$17f-QLB!?LMpUB3XmZD1(Dnj4r|L*@qX0?Jf@ zl#FAG!HiXqULF;@tYQLue6oNP{sDVVdHVeCx!z8O4O%}TZ~@s~e7_MU4eBgjyL8~1 zMCJk@BqcAHDbn9B5<@2$ZR~o!#1t(3(SuV!P&@AUNa zeD2*dcjkbAZ|n?aa%bu>ruN4279{rq$mW0u)+K~7VZp4L9tZ)bZJo3t^rC=3B9It1 zVooHl+5^RliZY4FA!f^3_v;vcPkBr*{U zT=$;=?sql(lVCfj{v4)k0HAIsTF;astmt;2W3u4~<`Q5ybiqIgnlMUdLqkGHGl`n| zDnBomJ**+JTQXstgZ{iR9`agFT{$D$=Zgr3u%4Vp8(_w(U(6%_f{$nOn_AB() zvEULf)HDF4$_E67moMFHcn#oIVLv4&^axOflwlN-u}HWC;`b3<-3K#|)31QgrAi1T z4<*?B!NdJ+U4(z55|Tu4Cayp~5EO<0oFN>7lo`M;Xx~hU>S=JzMQfdwi$e$6I*jKi z(nKKEaYMsg0nqsWtst%DT?Ksr9XR)5^V+{!``3LCQTGf(Ei?FZwqQ6a6G<>g>H$e;(MT9y zqzbJA4uxvo9i0dK?Sk!mGDnWwz;TK$-n4O}OxNbA3qtMSO@sSp2+OIhhY<*EeV6F( zupoVx17Bar^NSLz8BmnOQ=fFHGM)0{q|wn)zOTf>yD4PFf)lC&^>8p7Jl}D4nwL&c zFh&pGSFw^B&o;0^07A$pX}X~+wkTimw}?f({y$(9uIHgt=bu=GSQ93HgJQ8#6&>eL ztzR_6%x3xTyuuu#xvWX%+1X=^Z;*$Pr#vva1J@v<@;_o@ME&&pUb&00i{+ zqa*}8uNak77>rfFGBd1`@&R|a!EV{QHwl$`XhhxSRG3i$e33aNl^ zl7p%oR8*=9uTs@nYwlOw-PiJl)-}2_wy@2fJD1-s9Vn3zWKDp&fdhCcZ}P{{lb4BV}r zpam$9Dd&njjDF0(_xK-nY?-r%q!7RjTRwb<1)P8u!PN#Y|GK>H>WQz;jU_Y>!Yyo2 zskZ=u8?_AHw_3uk5gq3&}B;C&@It~vmCq)!JG)v%YhW(J-ld*oc#FUx6x#L zyd>tkP*)hBPy!E&H_Sol-$CkHOQ4!`8RCTX@Rbnq?MK#C4aFuz8v(!(!f{*}A=D2N zD4{2MMuL_y_Nh;$X~kow(T1MeN&mI_Gzo*L_p+pD+TAr8)j@{KB@HQoNDt`=G9gg~ z1Xed7wryK}US6K)eV*^s0X#7ezTOgiw%vU*D$e>Qh&|eS?d!lh8OX>Xpkai3Q^T70 z180D#H7NK0TKSd7N<3UgM@Lm2uo>BH#BtCgW%GUhIv^4{ct2AEi+;X_Krz3G`$+tP z!n220hu^rbAZQx;nnmdr-)UfJl|zuM+`>cJ+h0GsN0Z%NGnq;YvGjq!JVy zMj`3z0$`*FRP((oS$4f!@?#8TAJG)JTM8#I(!@-1|Nfvm_Nn@Uz0ief)!Xe_Dg549ge*}v&m0>RA4nBn&~Cy0s<`|?xkbf+^r<|G`!5;LQ-RCc z7E!xaqEN4JZ0p-$5kw^pm-48x=Zo+6Bz-oU{>z&}#Id6nE0;7X{Hhq-vPf!efMunW zd|76mvTI9&|+YgHJi|7fhmY-uKFE z<52>H9j44fZ}i zfHrT2H-~P}5KY#Gm~v^h=O`YAT;Lv#mH=9??I$MW&jqV(Kr5I)f$$Z@YrP~`XE0Ua z{Z9eD-DRR_*>XbPhalV+dB+W*?HvvRI8`S4e*P!hl>j7oRdPPCU?j$aFyLui5VQt& zPaF3gi{8{?7d<=w!Q!ELZ&tLNn)~X+jjkt&8;TRJr7UMS#LldKDqg+&t>gaI)a7i$ z>a6e0E*!gB{aVqg46PYg<%NZWusU({scNtdOUS!*Da;YmoF6MGbjm{c3r6AjsrSk( zCkdI`dD@>O>tz>U*1)*28PEUu=LdT>54D-~vb@?-rd;YRrgYbNM%JQ*)z1zbupMF> zQ{S&Sf0IyUZGDIO9Z_4Erl$Lj+y$&?+n}U1JZqwV|69kK7b9L}p3#|ovFq5H`=X<> zT==7jA*@^)vow1z)PuB1xbDhof7M957nw5uiAE?_kXf?N*c@3p+Xp$_ zBbu|V-#FgHq-&-uZ&kP+cm9?^jke6erO4jl42Tx3NCwUJ5e%E0@7zqem1%K(r>$TDgM< zZ$Lm|J4Om>qWh@h0(~O`HDpDwtccXPX9|zd3tqb_aB}bpYm=ok-*{5!nS>Bcp!n{o%X$!nS3{CQw6*uWrt7yu)h_ z_u2EmnspX$_J7D0;&cH!p&k-fo?`eO)O|ouu%VL=^$RR&ym)K}ppHuP(}3jS<0ROX zf|O8mPY;z%!nFo+_^)AOD;!l?L2yFoRj;H-pkMd zYNfC6{O^8v@;e_Kn#U4a2%Zd98q9mvW(WnS2$yQt0LEqPiL!+FErzOGs4Wa&PD<_p zApqsdYYIHPA5?Y)L^afinc9h<<6Q|h3}izFy(PSNKwcry*oP>MIwhk=Q6Pnh1nt=y zve6e%5CcFkC^mm3-j|3bljxLEFP1QiDgz_U&u|Il;XMjz51&ZY9L->vo6 zmB*4RBxDqj{sa^Th3>Gx0!Y<7_f{VSjT4H-yItrL;ER|H`XE8HI0PqhQm#7yWn}d2 z9cZw(h172l`yse~XL6vW9dQ^b8u8^H!j1!m$k*gxS4ch%0ywy#L37)-PbLwMiS0&b zxjRseuY}3~2=Ily)yz`E*RKOiVpFA%Qyu{XN?ip=wFY{#%H_4O)N&Jn_YYflEjO8m zAA9IWyuQ0i5AOynLRU4>P5>ao5Z6=d#<7KhCy?t@(3f4D2@SIEspAH>f?UKHtt1#+ z_}%<>`Geqi=}MXKFS;jynGwJhQT|$%8YdK=U|&nU-qE5$h#Ed3kB08;Fj|Wd9Efgo z^zAC>%v(@c_{ct-?J1D(Sf>3(iy~+~RnP*i3Ldy;v04rZSEwB*2)4t3F-b&AI_-1k z3{kPgY!c~mzun^3J4J&XP=!5gAMk|(rBGRgLsvsI8&K-&s5l4x9fBHHW~Y=y z6BLL?{|ZPMTu&GU)YP7uSpi!Ay!2WhUyErD<;j1cg9om5Fmu>fKA%&H5Ht<;T5^~8s3-8W)z(_Ns4 zQ4IbF-@mk08k)c=G>)R)-B3lggB4D+xW@JZXCo%ZgWUwx9aUmIkQgZ9jVg#)27w0{ zKRGjl@kQ-?)O?5N)N6&eXF5a==-Yf~s2#cYB0oP~_Ay%yfR!syI8}$^yDUDTC6QWvF|O<} zL~IveEIy(6aE;K@ODj=azG2QYbnf3k?c{^Oze=HX$Ds2AyB_sm=z6JJBesqXYu&t* zrGK}V(^D}wd6*m1;Q&LY8Df*{PuO)%A45IQVknw~={rA6Mwwk0Ay1P+T}?dBx7c1a zp*xdj*Me7}=90igFXP)y>P9@a^Ya&dLCX(n9Qw{jMLS*v8+v%f@0p_?#DWo;NkI=H zgzVL16m3_GnG>Xu9Qz3lqki?vS#QQZ=i=lur;@s7ek$2Be+l|nh|GDXd1erl-x6eP zxj2h?GqX3^OqirYK(iY1g$JtZa{=&2_aL7a)^_Rix^4yZu?y?^m0G%e>vCk0&qdI{mE9RD&r z{J$zY4|uNM_5Xjq?V`P`7Mi4_l892sXqPA{p-Bprl(cA(h?e&HR#Zjv~v>>j- z)h0_fY05@HDtIKuYA{CF&`@~m?Wae|tXj!wQ99r}zQO0??Z zZF9nO1G9gZzqmT@p$tOX$@6i);~1M@2z~5}cr8!6m}v;`*hq0QY^+EP5%<<7pb({tJ#5A9$5F?K*WDD`vF5S+afh#J;Po|3uRN zAB`hyU_3fjbE$-~-K5K8_#wsyF)FRIP z7CXV88KFK3T3Hy`y}I`7X+Ld46Z3z9`z;+JFD6mGrQ0lY(cNsZ(8*&fT`OO|6jNf= z?K66mN0HTJ$&;lcitR=JfHx#rI1poX9>kMx@S2$PGUiZ=Lp~T-rlg_q+~VFAtykxK zJm^RF#x3&=03eUgD}`ozU%Xg}BxxC%d)Ma^Zye2zjq*A@&%OeIVifJfs~ierbAK@l z#g}guL}5<$Wn4!I@BH??MrNIiWw9iES!Xrxf0Z>W$eIZgeiiLZA)+}xzm5KuNS~`) zMR=;iy`|;{S9~)ZD$m&by|b8uQ^ID6xXnNwS;Vyn`jc#a@K`e!%&G4-RGUNBu0BJ* z3BFC%m1jcVNEFs3utw)I$0}Rcm#}w7vZ4)L zv$GGH>+74MX|e20d_0qAeUNv0z4qpIKM=jfe2yyVM9c2eWsYI=aLFPAY2w1RLu|=) zNUUo)U+O;q&OdO+Z}{d;Kks#wQ=5jEgQh8x70KIpHi=I-^XV@W^XvplMo%UrDt$bC zw_0o5+Fa8)4^Fx;VMX-v&^VC$`a@;qHJly7StOPKi*Z?`=o61lSaS~ zCrqrEu9rvlyKeANC2A~OvGAYX1ROP!t$p2!{=&g24iywtLhj+y^Df>fS}soL13H!# z74-*Din;SwIgO=oYn`JQ{U8Ww8k1K?4*$~G>f^r`LZzQo_6hNo&h7ex)m8&KY*p_u z#BGk4gDm(?8cM|s5RkKR0JupsOp5U|6qMPO9e%I-xG$zxR`lZj45tvzrO^F{7-RE!Z$M==BQdt?TH zQyfY}?l?=2%aPTgJtwWx3lS}SC}#!&*(!rEO~)UR-Uu>asfzOfGBhpD1pY6AZfJvr z&-bRlR&&G%oyMgWF!bOmq29B@Fsxk#Ks94j*5(!TNMQ{p#*3p$#O!OYpW970Hjem$ zi)6KEU5C*XgQ>)jC$dSl) zW|RT}7YLjoEc@SlyqH5JfRboe7e{PC1Pj}z+ywX0ug?3#(uODw)}#V>K_ihE-2=vY zCJx4=db&z4j(B1mvelij86R=eQJBB|Fn8$qrZ^G5*t}CQ6$!dmc-WC!7_adXxW5aUKNm`p4LAcF2?hShY>2p4FR9+2(Azq+YbRQL#KW5eMr@*D1vIM$gKEjTSv#HR^q>ncZVnxKZe33(4IfOO1Vok6oIWW6J3* zCNn3g&pIEzBivv|AKS@2OdTG`jgh;)Z$-uBA*)7@J-%m0phr_ zt)IG_mcLZtaw5{@eVm5Nle5v$TI4R%g$ozH&dnWadnb0n&%j^HT~lX%udcSoaGk>o z+3+#TYtW!UC&sN)MAyXqJ}o9DCIY}2IxnObUWCb)E?#{1OZwZILS0Sz{*&oE2xkbW z>a26EcqP*U^22Oe60oPQUcK5p#N1b`|4);jFh;V$niLsqZf>rixkkarN9KW|!M-pY zk6Rk+wTzwVF?4u&wP$5zWiDg#wKSc#e-8ig?VHWsZUf7Ti}g2e-fVt(=5}z$dGnBW zr0ra+3cZHT-nC)F1_v4hPRK`3tg)DWeDq#yaZ8a=RysM&;1c@kZaJzP&3r);<8ef> zr5N^eneKF@v;P<41M_;PM|>KfWxh!H;nm%x7VG^LR&Q~BZl3HKpHFA64QTxro{w*M zTk|8Aca|TsDhWYGW6YQ!)B|vvgCrAvyqjR0$l;=K+~ZQ zMK^kO$b3JxqtdZ?z45ulZm2wda~@A#fO0NajVfu9-jNErv4; zww`m^nyNl}wOW*z7qedF@uNo;46(Jox5d(_0CW6`HEW*GI9i%B<5cYX11yX%BzcE| zm@(bEb^CGPI~Z@p>eUZ9hD@O@J=(^Np_Mmp-+s@8;rld=2h2Tn-Ehj3%&e>cEV^1h z54k991XqXYdv$PS{u7ppLQW_iLEGFh>?RrEr(YO-&WYWmQTO9xA}agP53P-LH~;j3 zByioZ;XSPzE)|0vgNC%@miF6ORv%OdQ46qtIoUZhX6k`4YO-q$-eJKA!_L3U+q;+- zmf`GcNHr#R|K-b`z}-}qxO0Vyt+$Ld(CAs*=8Zb;KiYwmCEHLHCHLv_^XBx(VyaTE zHQZNvgpyKAW)Nnsx)rs#Mr~iO;T<6LF6y#ScJm8y-v zf`ccAygUa7LA@f!lu_r3!5<7i(}u4;%sg}tHNaaIk0fH*#@5@I=~6@yHx3i;frbmZ z&PH~2%AA6+mmsr9tMk-zoc>ytAFc8EO}rlztU{gf!hu!V>EWN0EY17y5d8l25Nv%- z@4Xzh+}_@c^Z_g@m)*k-Z2dZ?H` zO1rqlQ`I_F2EHV7#pt6N%R8~-FPoJuS-!ljl+^zC`T;|xY#pp+?!gY`GT7IvK`H)OD_&jilW}->$VT0=7-lyTn)Z>yZ84_|Srjw+$!R@55NQda zrfTv?KMM6To=uJ7J1|GHVq90YYSX5eX*Nk%_%2?{Xc?nnOby!|FMfBNsb68%b|{lz zy%=430v_!3uvta_wS~tiKYTwTz}9z$b7%({TeCLbYV7Y>n? zXIl(cRUNnFzQmm~%sgZ=0hkta-aR=!Wx~;pt^0_zrdDIm% zwO@<#>WN?B^g+K^FM7!_t-3&H7i*ghlBCVXjf4t|Chh7osHPhW4< zV@iPS_fvMc^^=?(3VFL~eAziYJY}HY-nrYHCcN8R=fp-^acjZc>sI2J3<|;|s-{e4 z0-K!}*HzB9jZDTY>pdjDCcXKdkH*pl=&EJ*nQBROX!{% zGW$Y1TnRHTOzmph(7i*Cg5Q}z9tZ4R8Tl(3jKnX=-{Ks!zrY?GM>v38a6wnv4mvYx zL#5o$&jnJ~%tLf4vmI{$!faNp>HyFc7jpRU;dY%n8!cNlh~GGP?AXv5OFB-e$&XrJ z-U~Xfq2C?o)uEtYDNMRn_qNC)t9oJRw9=F=)4Me<*UZc;@VhNK(MS!AMCy;3Ox1I9 zb33#4Z2|`v&WUMk?|=H=tKdKKUC5fOVwq#+A@kI_`SsH?hV zZLICYl$18C*~x3yj+|<;(_Rwe`;zi}Cks#kEcHCk`$D1eU(7l>I?BESSL%y7^)2zT zTd4GuJGwM$dt=Ki;ip@D{nAe%H;=8In2^wdP@2G8>9rr8^pWnTtH9ia+5Osleg9Zq zvhDQge&RrK1uf@Ryf3$J#jQunb)33GnGG3~Zig1V2hVCkdU|_Yj1P#PJj7uIi#rxo zt}%heOT<>;;?UBr;TQ2h1WLM;s7(5vVQEceXk)qe_Ok3$qUO4fSp(y$mCwFh#RpvV z@zMnCi*QFznFiLbZ)MMv+Q%p^4)~A6aCjao>yx6^*nVl{^EW3i#_1u)e@(y{ziVs1 zQSi8zGk_wRUS2A!Ks)o^8aO}jn;!&Xk1_O@Ql-85u*&=cB-s*Byi4c|6O&?S;b@$H z_LcHdA;T3ik(yZOW8Z>7i=N znB!r?wk`Yo`PAR<)b_ZRD-Id5hghXL=aRH32o2U)rQqSzQCeEZ=&&f}M64kD`PZ#% zC`&$;mL`ECv^1r~IA4)&nE{f-<0`d)T%N(zTSq$^SoZ4L^+C6RCSlyhCd(H!JN*1Q zlmDK8QzEGlbU2W(@HduNwFNi#2;1^uI(`lTEk1C-)W!X$(_Z~avL*jKOXKDwJ8wkv z%GJs-B_15o6i!&Ty0fOw-)6@W;Lu&@Osl8(3v#3`PU3s&_||i4Kz$0MS9Dl;>g&%Jsdq4)!?r|+@#tp$&2 z5n*TCrfu8rFi7V@LR2a%DsZ2eabD>`<3DPR{h0?;#j+ngJ6-A$Vj9cv z5b|pOW7SGu_!IfJR`mEodTecfuaJqdkq|D6o&(?Am?jKTV_PB7ZTkU~Ti|PQ?)mHxoZDVxP!)tfv-y9>( z9^|wze|~+=VR-bD@NJ^i!Mf<>gh_>so;ETtY2?sxH+Y~RxpnBsHzOl$X$r9nUus}z zVBoKeS?Jnm*V%2OOVASpSmiZeQ#Cac<(Y7Hhp@j(n)sIE*vh8 z*M%Q+NgP?&X#Mxai{Y%J6P!eE+_2vjxWvqcG1GBAZ8Cj(Lw9n$i|2Y(CQq%fwVg~o zc6i9F9YFkLTp9D5(l}nvnOes^?_spRHlzLw2{HDDwu?AEt1a>LDFN063BkSmg6@<_ z{k1^m53l-Bz;NSz#R_nCXzbtJUzRg@ABVx~_82y|9m1?WC^_Of6SG9$Z#YXyv|c;u z&6)E&R&CYx^V&r}v2qDQ&nm{X{rmUt+qG{$Y2ri)Qj;Gr?*YgcM_gEV_$1KH4_%{W zoyYi8Jb1r7G+nl$q9V*Ly&Fua6tG>KTpUrDmDR21u(_Gqiz7huGP!TcX#`f()$O}% z*|H&v@9A(0yAew4e_BbxA|rQFnr3BX%{(_{uY!2YYZk7r{xoljfx%lMd{|i6eS(-e zwr5T)>TK`OgJ_U9=pge9!7I);`X4vlPiO3(8E zyC4o+-2Awr{?vQb{;DCJn$e(xu6@PblrK^RgUy`u9Dk#+iKWkC9FrPlEzUmDNgp|~ z;6VvHcq)JP@UVIN=uuBs*Sb%5knPiF8WcT!v=%Xcm9{AOZo_CF_w{Q*kCa-0X!g_w zL()ShWs33{jdR@7hrjPp2~hb)k6P39SoO`9uU{Xh+5|-_zKPv5IiTXzjeY>KYbWN9 z(%Sj9tgk7FgOWYmAvYp)!L4Cjk!hSGtLN9RZ?+rCvwgW3|8hgMOve!5ZyH$aJKHM?%g@t{w@+Xin{Er>m3oX(@g2~et2DF6)9!yG-_UKXIV7WFEyMd!5V2#!w&R%sj zUyK+I7!KLD%gbxP(4p;#Dy=1yyi%|alil6Ni$lD1f`Ve)Hs-GP_gG_gN=KFv3Nk=G z*rDAhox(RUb=tIK0&dZ#Py0xTXCaJ1GKs*>YF(pP`GO55=bxS7dBA7&VH*2|7B+W6RBNSf2#k&B`9J zC?*ho=p9k9wluwaNzWe>_yY+rfG(oYj=;(BVj!-RY=W=eA65 z`y$e(9-|Xj>ejh~m^PjJnb#J&I9~VU$&f3-O3;o$|G$gHRFIIYKxZgta9kqv5s*~w=n3XIR6h6 zO;OS^FZi<+OAaXnH>FR@)9D7(V0~3_@yd~MVH|OjnVo$Y_kj8Bg<(tn+KfP5Fv|HE zDMJetbHT!e-yzSk7uFOfOQGu?Ay1Z2D*nh$_sSp1@lMg4^zd%!iBqShLk`e8a9nv) z0E)lw?g2Gh$PXo}bRl7Zx&$S7OJ zCrL4axebg=i!*v}IqUUU`c!T2fPx?go+TR97>=b-7k_B??h-@{Nn=5;pdqtbh!MMW zAFucJ{(i%=K?*3#HR)5M1-(Nzd_Z&1H&U*snrJxRasIp#l68yj-47{z*w>{t=ued| z;<{0UU~}dT1_%X1TebG=IUCBv7v51Ij|>VhjnON!T2c0=Vg?R;mpf^Gil+O<1=q~A z4sZzsw@v zwu3-@`c9C;f}M!su1;OZ(Er{X=-iV&E1T7;_a7OtxP?TBdE!2pnsJ$~fBz+Eawb;I2+6%{@V$Y=p0D;_UIcElQ> zs|@H4WO?3a*3r256Do>&|Dg6s&Y|Ot4+~WJNlEMWHuWb`_gWwMytF@L)Jm zzT(;WwlhxYTyf~$uisOajLM?ryWW;%CnLEVks`-yYrD`{?6Y_80ApRZuiKDWFGfak zz;J`h7H`)t8VUu!$9XpD2!z8gyRex}G>=w119u?ZB6t9u;(vc1j2v%?Axb!jMIkJ zt}P+-jBQpY7@>I~A$=O~k9X@)Bp`pqAZB|t-nd4Su8-MfIo+{!%ybv=&i}y46+<#Oc0d<;uO-9ys;0513B;BX~ND(4CZ= zJU~&g1^<{4tXI|M>D}58+dI@7t=hJ=g`znMbfxC*Dl5AeV1Et+!_?{1Q>dk9qM{x> zemvuwW*Dl^lB^5H9K*T&@4p9QDcVUzX2Z5G%L!K^h4LR73f9&AbUdNGu~SbK^IgHg z@`B)CN|w{m(j8ovEgha;tE&xg7cFsg>=*z_?~0z_#XGR&Fk5)9n}u6P-MyaX_a{C= z)vn$GRSShT-uS@qi9RkjqLx_Rc90z-*O6l@Oak~zY9PT>X0wKK(W0E#xnsxF07!EV z`5Nr#=qRdQG3#9(oXE^P?$|6Ma8KBqWWiJnV~l@pnZ47wmcH86{fWplhpD+ z5v=jYe1D|6x<9yr%{jPFpFYtz>XzBreFX}Df5yf$l0MG3Z_?f89#y|dF0 z0EU>N%^pIcDCUrgK-*=uT1L`FHP?1P|3zZybltddIYqs;YuSDN&@8Gbym4Beo#Ia_ zRr_nxurH_vn8-<%mo_)5Qf&h{ilLe>)hr9@QtJ=MIMiGK_|Evl+mA>OZ@r%qPy$B} zUmwoe(b-x*E-o&v7G-@{IRx9vWy?|+)HLiNl$H!;*U=lC)itFHQp!-pm^t1Q@KEgo2o( zB$*-Q7j>m4Yg#au_axBe=FOX>jD<~LC#aMIunlM|dj0zK?DNmQ;)&2STqhM9|9k81 zUAw{%mnz=G>+!T7@n|r6`t$3!6~@AMqzjiwNLuS`K?~aElCA_PNM$kX)rUKGKeJ`_ zi@{l}nwme{Lv?@xwQ)1SkDxL9SLvln?W_synO&10WGIY6+-zXs2BOSGN)ZtBQ$fqG zALYIOtza`~J_1hLY-ab6;~!Ip2Pg{&fFh6yc86C{99q3(MNFy&EQ?)so{ktLe5%Y| zuNKuomGK!*0InLNNB$SLKCJT??f+ptb;$lmo1`fO8Ky;C}j^BDxXuSQ3 zqbF)AFwkjc$jf1Gs$nf7M2yOf7`Xh=(Fgp5hN`L$;i?E5C>&!2mGK3`^Kbd;$KlR> z#s^Xq%xZjcJ(ARZO4nn&k>v1wl`0kD_dv_+C?3m^KVXA`@l9vv=Rfw%BOp?GsIfe| zHkBIMSSjcsB6$jBqrWnQO)3TLFee{7Cd$ab7(Eu_Z9gU6kI3uK%7i((z3SyYYJ)l1 zgO!yt-@G|3m;(w#MN9LO%Bgav?JU=m*?4mcy!g2VHtskT|Dz|q9--!UV zi;4{iFpVWG5}`?0z&3js)WKrT9S)4Ap*)8z6X#+>U{!@c7N5so0mp<0DUb*L<7f9T z(`z>O>E1mNoJAwTY0;&f_nEwoH%2SfSN_N=gDp48ywAb4hyoNdc?E(F@LmhQq`gBL zdd=&coG{O(#tiAViZ$5n_EM-Xg`qL7f^w>O8PZ#1Rsf_}q~MQB`}a%X2)^Lkt@hL( z`lvZ|{a5S&H8r%QEFXuSE=pNo-4bNtA1MJDedQlzWSDG{YaH7?_RI8;m!B3ff9H8E zGbaT#g_^5%dvUb$z=bzgG{bp7y1Cl0Zq6NXS)CwI{P_BH$=ta;(nB*p6&2lQJXx6| z(0-P?jYmR?K7AQWifVEmG}DK6(5=uCc|wejest+!oFA;5VqPPH&q)U0CHD3{lnAKF5oogx0K@6=UITEqwOE|YCkvnQC?m!!rED4 zfy%tANn(&>+(`{K9FJEBgQJ%Y^-Ajlq@5HII{qICNl{D0(0hx;s#r^zyK1zY{mkD8 z2?BJB1SL>|?xw0;l$^HYjUW%@uX2T17fq?FAv_bo7Ag-ZV~m%*H04LcMtJwQwT(s0 zn{wd8s#^=*72o4TFwP@61xzlbgmk0N!Q0Dg9i(SI2CP=YT0XzFA5QiF^CsVtEo8cN z(?{pSa51g@%%&?7h`!@uoO*G1%Wgj50yScDiLj-rv^}ww+CEU}>#{??)0*WTao-&G z67a}f!w%91KS430IRh&h7x#d$CvzoGzqBuTbHJax`Hx?KBK?gXc{4ukUVppANs&z( zu-tPT-7f%sfaQ!J+Qk+M8wRM?4vg#K^G;D(sZOyQGAgXbT3&}hoov1kheICzQ%y{k z@}%CK^M43*Poi0IfJrMy0v*jagUHhIEyRRcs{oLCbq=I#7^V?ZSlXZ_BJDsB6L;^SiDt5}r}m zF*x=m=$jv-V^^{`*ds*ul&J%^>M2Zc4E?ruVuYia|{*LFA^ zt7;Hja)=oU+h09??29lGxFkrb@zdRVjPQdn{YLGr+*ELC5s(Vq8u~Uf&-#-=k5;Vb zX%Ps|E>J(y~?kY7Q+4+DJVmVlZj@ zIW(D23*eD1bb^;!H-`Y$JxllR6Yh; zxpAJbs5yW?s#pV%gCDd1?1Smj8oMHMTW>Y1k%sHM(#F6fwM~!MB#bRo@12^fcXK<9 z?P=RD6gHs($^ucH?{o5j!}p`BA;uPsh$ zZ((84f7q}NWO8`y6gBJZBN0>BZEXh)vV5F^KwI?o?RE^rXR)0~BvMjR!e7A$KVZ&O zCkEwRxOnlwhj6Ek2RpwFqV$n1YUF!Bt8?_>bm zyRf-PBphMhMMh>1E9E?|;A>6IZpuoDka)SoDd;zuJZy0_tPJxf9eoXLA9rv?3OgFl(>!~(m)A4YVnXGd!}w-{#WWtx z%g(u=*I(eXugJD0bwLHa#Gq1#7ni#NIn1=RwW%A-a}5hGb%R37*+I0oF+T8E55LF+ zQ8H+mAH3kbr9AxiW}gdHu4zx7b|%J4VLM>D&jb!d?*ok_q0ZdTh(I9%P#;VL!O#b? zkIf<<`K$FlTn4Aq8Wuk+JWMxXtn9gkr;7{h`#t3z&r_TV_5+S19Kx22*@8n9?$go` zRLRWgaL(bxq&=fBJAJaCy0F=a5jas%QDE?ncNv*> zkal*x z#lcnWS(k3rpL#-Sv?Q}|D0tc?#j1fk3o3|3%Bk=B1=xQVs_Q`omB#L}ecv`ccMO9@ z-bHPA4p=e-I-Hpj-t1My0R#S;m>u#-*b^AQ5l(DMN=Z>9$Q?}{F%x_p=v@ZiCA=Q+ zwci)tRra5&CWAfj;K9}XbDyqmJ7Ol)BU)g+>3-y4hL+ZyD&&kVEc_o8);$O_+WYkC{%dt-E9Q+f-O@Vht{Ji>x$21P+7Yc5^C7#oP zaJT_@0!v9G#-HMy3o#*)_vTwq_7sDbUj+|%_3G6THrwC6!c*#yhDo75)nol#RR(7U zVE;0?Gx6KghI#YDkolh{Cl5@0bQX1X39ibrl9EHOl6!c*g>&#ZaNs?Kbl`2{7=KxB z^AP*lv%680rmS&bY$J7$*{#r_by zXawM-PvM4p9+KT5-tF_?K;u0EFs=hI_7oAup)Xn}-89+o!eEz&U$jS(dRMqd(v(5+?U1~MuUWWX5wqha^c4uNAi@JNa zgugB{SUOQ#u=AA%4a$f<(}K_Rx^`_i^4q`})*a1|*D&9p0Y4yOuEp<4(@nyI2Y;b- z(Gie@b|Chu{p{Ti_)@SPO*S<2_BUDll|6TkUmU5anaZM`#WB!U@mu>yzCJVRcNPMQ zF|zj&+*(M0bOZ1keZ^?Ql97)r)0YYS!`%xHmtJ~A^dC27&!!;tFEa9Zc{3Ix>}Aj% zmX)7>f;aA7{s%{gNE@Dx^K^9`4Fjm8r1U*Bzx6z7ODbCnVpE&et+D!C%!q+j6;dg= z9aS(QqQ3G@x6shgC?dYj@Qstu#X@Upu*H5XD@((bxE7wexylfJXmOO=N$Ap{GZO3* z+RVy?lL(&;x-aT%UB+naAl{LSL+EACiq%p)EI5qqEk{_j04pr`^uc%cqscD zf2NU>_h8HBwZs>TbQDIWA7#0-nWVyii3p~W)MWSvj5MWh_IN%v4CZTD=Lw|4wTLWQKWCVkJqkJ3Yidl#Y2v<5o&;u(!_E}Y)M*-ZsO!3QHc*bv z^-&g7CSB7WM(FfMvzYH^IA4UhC^yr@(h8<1Zzo!(X;r=A%)cn0k+9apW^PW zybJJ)3)e%36buUTwuR--A0OvzWysmhTes|4662_N1g*-(&DK_rkRP!!sZpz7CWnPg z9Hmkr$VmQf5sP?gW1UQDa`Lm>++$+KWyBjo<_dfJfdjD6p1#^^*xWtPaK$*4vFNU` z_pq>#xfU8a4w=zDmoTNZGE4bg@qnbdGQJRxM8pkd@q)s_DF084UKwDe;yg)TzePcQ zeom;R26)1s-S>g5S)F}{+rNGLwn>pk@J-9+(5U-|j_lIpQ<@V?ey#8G<>j{se%E}Q2NPN|-xt3{i zM;GFC7K(n1>rf^v|0Ut!&DTv$DvcZO5Iv}F+;r4V%(UB3j0iq$g>58}T4@%B;VlTx zd;I)%8=n8m*uv8C0aCfF!~^-jWWrw0_h`w&0U)`bnAr0Fn(V;4Ix3M}R?T4?1P1f_ zX!{&VHb%`lb`Q+J0H!`1lQyV#fslDXdjAR+MA_3;*mG%N*$*5q0k()+*cQ5B$b<=f zSmsH@EJ=T(g#)K-RsQ1#z=}Me@MLLDzIgApDfd>NnD2VJlQ3stlR9_zt~O$XSndes zdtsT-*fN^H37Dr+aTbOFtdW(XyEe6M{ammxNM?+{p$(2<5NdV~Tc6gx`lXHd&nnCRY{01MDq zrLik*#y|Rizk8MJuLede8>@fmN1 zgTCY_R@{D8$bx$^=|%y3{v{DkBWwY4ug**CBL8OG~C28$S>NG9?0uTjUNywQF69 zJ$eJIA%a(omJ>MbGV;5Ch@^0vH}^cK$-*KcgvK)K^2`CYz31=oG=tdu!(-|1sh{E; z`b)b*{j@v>0AVViaVD-t*N<7PMKK75hMZKV|NO_Nmr`J;iG+gcpUjls1|;@rsy;yb zhcD!W4VKZbRaFUapnDOpN?7}s=ilgSctBw%-gd_sL~ch;PIp||YOt1T{vKIbf^?oyq1wFkno=q3`fGx(K;7bW4{e4dgb z9PG<+!}*(7tz{Il3+ZFDrsi^#Kue-YoNN~+t|g;cWI-y#H~f_MQ_xBf8Zk{SS@!Ae zc+?M}IO&l*Ak9+6EG>w2lKIdNL67gYOG##{zH@BVvXW;4$saJqZ zNfAcUqa$rvuNbk9MUVvBwp#RRdW5Ls&+Ow?b1BBg zX=|THm9oI|lyPK)AH;u437sWxi;H)V*hF(gD@o4l*Y9O|o0XoV{8Z}SUqaw_`~71m z?FemfNnGhXV9Keqdu@yi3?ATelVCh-?;|&+!-8wQ;H{B&#a0Iw?L-MA7Po*QRbLkF zpQN{{#hE(=s6{^-w{G5GZDW-+VRAeFBMgnv zC)Yyr#l0iZGS+AqFqbVAY^%32q&cBrp?dG>?AS%Wl`TeBMA||9mwY3!|pgWwVn?@YXvQtMYF|)EouYUbHQ_u**2&run=nZ3acd^Eh zJ~`cl9&}BTvS?!uwT^q8cCoj9I~!@I_&&md2yAXkKu89f(#6$gNICOqG=&?FU@Mxi z+H_J_;iD*JGn1z`IfBK$C!RH*HoSH5rQz#SIz-bEg%Z01_t+BZsLPnv5G?m`#r<`3 z6yPx3w`&IbjIqZ#b#mvSL)~eE+fLBZ4|!S49y7`5*r}8EjT<8v*4~MIVHNAv6;DHo z+HL-h&Lw$QWVt=4T?x;gb;TXE6S7P61VGCUIr|g*AcC{#A|*mhj=8&U%e3Z5*;BvW zw>N;%=%H`k+^Jv%e0rH;q%9y}eqrqXzyI#S6g1^sI1(zkCX zPM8rt%iM@$($h$~VF=U%E1=7Y0gq8z8^+_4Xbl4{yV|7PfI?fOz+`H&S!eZI=R-)) z_dv&fYl_lc+qOv_9Umh_iS-NHqS&|CGbW#ZmUe4$=|UbS9eV$IoE>Z35AUEnkIVf< ztC&Rcf2;WvPrnH~-V{5w;2lHQ&ipt8a3JTJaU$nJ=Q8`vxAtXMGEWR9>0*dbBL3Vh(;5*C1Kip zabs~m`iO>DMkqr*HC9!&`L>2(^_fhaq+3o@*pNLVFxl?GoHLOXf+8)FsbGkxUo23! z@22Uu8{jkXj*2Bk8i1lpj~=_2I5ean7OUbuSn#EcJYmVFpa~od4BQ1y$q21bd;?j$ zCcs`?vnJPkDm_H@sq{Pc6D}j#GR7M_Q;gTsLoP%nyzI{t5_X7LND-otxVyeQ1?v?( z3JWaNBBSLB*(wu#4JUJxqESIvi*gO#cH);HkaQ?`(`#zTyAkd7O9Rf{l**wuUY zwiiyL+A4doflv$dX;>mTI8_vP7_e*RcFMn{r1ur5A;Lq~zx*}LvsK*K5OAUiq$_S&<8>Dx#CW3`GadVHotTRSv`P~T!(f0MW|(W zR=lB`+c-KL_p+_eW1nXvjVL$ODSod=@v0@;prOTZot^M-g6?F0sgw)b8n+|zES$nv zBttj$h%jIhoHmjyOMAYIUm5aj#Y`G25IT!U+2ht0Nx%_ri;5(afW&jrn{9!|R}9&@ zbAvel$0%lVA*;8la?n9&$+<#BpphYgEm>4vz8|~C8K`59BKr&0$_rgZ^uu08YKGZT zT+ij|i#QG5?*k8Fac_$j4QZD` zGfO%%X{YEpC3)M@zbB*PHgcQij87x~KylMo7p=y^JZxd=vbYt|`-a5ZLIP6}Mh6%Z zaQnmi{?pfoBguE+5-#ZKdN!7ijCu>qOn_%JubqYNvWat=B`E{(yxL8MQFdN)~1HsuTN(%t8bJt-a+Cl1WRJ^rw&E%(E5ZLbl)+IEnzy z(UG%gzaa~5ic#Lxr8r*nsb4fsls^SMDNo-u<(*ur~Z*K@9#k@7eP&2X57KH&|_|TKm+im zExX%L?Ig>&-ek?eJvCm&tucZ<0#>_4E4edin?7{?D(^D)N{9 col > c.col_id] + + # This is a column that is entirely dollar signs + if all([ + all_dollars, + len(col_cells) > 1, + len(span_into_col) == 0, + all([c == 1 for c in colspans]), + col < max_col + ]): + next_col_cells = [c for c in table.cells if c.col_id == col + 1] + next_col_rows = [c.row_id for c in next_col_cells] + col_rows = [c.row_id for c in col_cells] + if len(next_col_cells) == len(col_cells) and next_col_rows == col_rows: + dollar_cols.append(col) + + + if len(dollar_cols) == 0: + continue + + dollar_cols = sorted(dollar_cols) + col_offset = 0 + for col in unique_cols: + col_cells = [c for c in table.cells if c.col_id == col] + if col_offset == 0 and col not in dollar_cols: + continue + + if col in dollar_cols: + col_offset += 1 + for cell in col_cells: + text_lines = cell.text_lines if cell.text_lines else [] + next_row_col = [c for c in table.cells if c.row_id == cell.row_id and c.col_id == col + 1] + + # Add dollar to start of the next column + next_text_lines = next_row_col[0].text_lines if next_row_col[0].text_lines else [] + next_row_col[0].text_lines = deepcopy(text_lines) + deepcopy(next_text_lines) + table.cells = [c for c in table.cells if c.cell_id != cell.cell_id] # Remove original cell + next_row_col[0].col_id -= col_offset + else: + for cell in col_cells: + cell.col_id -= col_offset + + def split_combined_rows(self, tables: List[TableResult]): for table in tables: if len(table.cells) == 0: From 7dfafefc8f1766a9bcf46407556a34a7fb00b688 Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Tue, 11 Feb 2025 10:53:51 -0500 Subject: [PATCH 26/27] Update benchmarks --- README.md | 16 +++++++++++++--- data/images/table.png | Bin 0 -> 32520 bytes marker/processors/llm/llm_form.py | 15 +++++++-------- marker/processors/llm/llm_table.py | 12 ++++++------ poetry.lock | 27 +++++++++++++-------------- pyproject.toml | 4 ++-- 6 files changed, 41 insertions(+), 33 deletions(-) create mode 100644 data/images/table.png diff --git a/README.md b/README.md index b9ab54f0..c06670d3 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,16 @@ The above results are running single PDF pages serially. Marker is significantl See [below](#benchmarks) for detailed speed and accuracy benchmarks, and instructions on how to run your own benchmarks. +## Hybrid Mode + +For the highest accuracy, pass the `--use_llm` flag to use an LLM alongside marker. This will do things like merge tables across pages, format tables properly, and extract values from forms. It uses `gemini-flash-2.0`, which is cheap and fast. + +Here is a table benchmark comparing marker, gemini flash alone, and marker with use_llm: + + + +As you can see the use_llm mode offers higher accuracy than marker or gemini alone. + # Commercial usage I want marker to be as widely accessible as possible, while still funding my development/training costs. Research and personal usage is always okay, but there are some restrictions on commercial usage. @@ -63,10 +73,10 @@ There's a hosted API for marker available [here](https://www.datalab.to/): PDF is a tricky format, so marker will not always work perfectly. Here are some known limitations that are on the roadmap to address: - Marker will only convert block equations -- Tables are not always formatted 100% correctly -- Forms are not converted optimally - Very complex layouts, with nested tables and forms, may not work +Passing the `--use_llm` flag will format tables and forms properly, and merge tables across pages. + Note: Passing the `--use_llm` flag will mostly solve these issues. # Installation @@ -426,7 +436,7 @@ Marker can extract tables from PDFs using `marker.converters.table.TableConverte | Method | Avg score | Total tables | |------------------|-----------|--------------| | marker | 0.816 | 99 | -| marker w/use_llm | 0.887 | 54 | +| marker w/use_llm | 0.907 | 99 | | gemini | 0.829 | 99 | The `--use_llm` flag can significantly improve table recognition performance, as you can see. diff --git a/data/images/table.png b/data/images/table.png new file mode 100644 index 0000000000000000000000000000000000000000..8c6d81cb60f1f6bb193b819f8311771d6355eb58 GIT binary patch literal 32520 zcmeFa2UJt*);1b-BMJ%@ib}K7!3IcI5mAsX1PMh{q=Pgmp{W!F6_ln_K?6vOAfbe& zARM+?4&zD zheDxtUb%cx1BKc`Mxi#VY}*R&BrDzSK%qRUuUtHT&Dmh2koIM0FCm9(UxcFOwawdj zL@(}DV%o`a_{y0*=bx&}?YQwci76v)pY$f|voAX5&aj-5=Hs60F%>9(KqHdz`O>n$ zF8bY@%93^mO}VYjx~)0on`HQ1AK9eGSnY1u1-M_G-y>GzM?d;cH)sm`H5nMKq2FJeWz zzdYIh2JLS3ElT|K>eYv|jEyNbc6Lv$s0$Y0;es5`!Pk8aiNBHhbz!<6pQ&+7{*r-0 zk&WD>&a>MxQ>QBmw0*l=dPS|<&SqIO2`x|f$?kCKaL@7Z=htpnD&3MYl)BSceq5L(TclOp*{iDNL|VbrZ^qz@#XKI+wsiBj@kMl%Oc2 z^ZwxSQpXyD=h{+7XxLHwv?!Q_`otbjp*j_ghps!@XncK?+I z(-U_J8I9K!R3q|hN{u&SY#LHZ3|FVZbJOknzq^LA@sLSJxNbMp*Ly2)x?IHjV{^xg z*F5GNwxDh!8*@-I6_=l`am=sEKh}Nj`q@tJy%+o#IJ(*kZQO@3o!PC{7;B8nPOI_u zLZ#L=m;UHZ6WOKf0s_OSA5^5+odTujw&iqqETM&G)-(;C8Mx0j)a2N7)FdcJzHfIO zE|bfQ#g2)ugv=NAh#Pt=7%%JlmwM#1*^%z{hN6e6j%YkdFJ4`&NkQXllC@Ii?cMt& zJ06oNc?~f99dBf`uMvHAivB$4(V=)XT0&%`cx`o3&0#@5`{&JoyM3=rzP#C6$-Bn3 zI*a$*DLem(@d9i{_Y+2`iH{Pp+gz4Lb4pK^v+orpJJiiDvRP%RFY9wFM_g-3h-ea8 zokNyk--#RNmKS=(r<_8@d9TLGNVDJlxXEZ`wkdG9=dxGx2a^;u-K5QrV$Zd+!+lke z<)g;qcDNGE>Ia$GddkW9`kPB>?@xHnclz1pkLGk3;FDPw zu$AKi*iz&6NXyJCQtT^_xnq(D z3&l~WKM)hlPW7)PXWS~EAdh>9`Hpp#`uV+iCc&4QKSqp$3+%j4%gJMf9mIuy*>f0w z+OFs8yzWgGm*CBZeDvu8+5NFIKNQMea98Jb`SUBn6;&7Y$6&wI6jR!<<^7lMThzY2 zvb;aWrSFw)HLrh0h*-0|)a|c9cb6y2%S*UP_`JRd-BzWH7fr#A8sBi&p6ok$cr`t9 zBt%n%*NVSbVRCV5nzcA~xqVhWBwi7Rk&y~=%pWhxH()+~_2sU90UxFB9LU<1Qi>>r zxrJ_*56Wk}o@Y}$8C-H~Ei|@RHCDzp&#mTA;+~KrK1%){+yc2)1{7?IPCbl~#`<#H z`x%#|FM<~&6|aoil%FbYwa)hmR+SO=a6U-NcN}J^_Oouywwhn16muD-fK864KJUsI zPS5{B30Quzg_hw7SJ2lu@}k>(mq})uZzTX$TopwBfA)>oy+!u2$C z4Or<}g`SL`=l!nnEbXgH`;>G~Ar|(%TysayINx-T=V_W#N(*dXtMlqqBz<5PrP*yi z@3x)GbXsBboR@1(@t9rZTa}sm6eiV=E?(5qTl=BFgZ0- zALAIVa;h zDST8slhXO6rs7pC-POfme7W5rTroRuXXeUt1?vnk8NzFf z@zc>Tc5xXD@A~;pGf_uxZMk1I+-habdAyLKXow%Npf8)_j&{&U>zz9ENFcmK@L1M4 z8PxMyp5YkJ)rCwG?-RGaJkbrLX=dZ++E(FrtT$vWbuYI{DnX?NhnY(y#TfAph>;yM zN1viUs(MTX&r3{khA=+qao1S2xp>6W@D*YC+hT{`^4NzvReaJiOCuk;EtF0QcXY3o z&^>#A@cQ*@XU_`tgTlqjc4jW0;IvKWuC`d<(+GNVKS_R?Q=ib2aTQ!^^X0i| zO!iNgF`*Q2#{!J&zO*vH;#fdo)@L^SWn*_2ULi#%T6Dk1 zorRgJljU4^f}Rf1!AwBWZlBxvNwGFq^2+SgLX|mD)@6$4nv>pftnTnFV?#`kvEA2P znH>fmT-Uj|{uq?YwSn-Fo33#^-J3o1VCz~p*BXx?zXxZtoZjQ9+7*Z!WwCNH^GP$G zO8tWvFZeUwp~LMW?cNp+UfXW`U03N=()e+%lCOTKUM@GLukTP(5~orgbgkfIf9>k& zdX(v;H`jx{Aaf$9e|1pR(?2kvR*;Jh!Xn4tqT=}T#?&MSrNK0B_5?ocSmiss11Uy? zcdOleHv3kLfzD@)bsjDYY!WTern67-o~52)5hb0cu@{?1e;Xf{ilva9x^MA5wA+?8 zG8)=};rHvSoReLh3{=9aheVnyIW4r`$jc;z$S+9u^aqtbCgJbtvl|Hb=&f1CkDLrG zF;H9H$#634sl3_Y(=SCBrDxRioZ4Ob z8+3&fE-z1o7e6t+_T47Tsxn&CO4qCvd_!puzj^__$Mn(MXluD#>Z};d1a?^ zWqS3xH!oZ_ZcgSQ_cC#93t{~=wK?Z7{m>#CU70|DTrv%BMq`KZy8TR1YU&AvYJ*Fc zuY}vT7~7=jeN_G~yc)PRADF8=NfBD(@NWtV@YRZ+U!97WGHgDb7I?~Zn{d(m&B0|e zaoxCct1%k4dwG1A4`Fk9^Loeg?fg4bS3PLTmQreWb56y)aGd4kT4gGj{w_H=encUY zX}I^q3{mLPHWcbU;?nqI6e;qo`b)Nvwsm1<`?Y=u76+@1YG$H1BGCV*Ke%wfM*l zQFZ#;^@%jRqINRl%L~)REq7_6?7xdoiu7Et%fFi+ZD{7PV#h*F(lItLtKp zD?5D@<g2dw zimS_@kc^bhD|;e8^GsKVIu3pa9b=T4KR9L-F1@%j?ksC@`VmzW$}Z7v5e2zwjEFul zpfEACWQ~SZZ&y==czkc0+jNy}p3Xam07w18f`a;Op&khjV8zc~TuGE&F)Ndfw%e39 zHCi6XRoqr>W+abW>JF5BmecTJwwEFkjxlFO>xGxPi%gxruUAEUon9RGIw0X7IxY-* zk>s*)N>$mV6Vu$pavQNcyH>I5$NP_mYZka_4etv&HpJ83nT+7Vhr~H=I{a8XoGkGoGKae`n}Ah;vrS*BdOZZ6NWlbldr2^KmoxN) zIFB7m%;nB(!|2ZK3S%|SdmSPWY}MNz-eJ?0Ym1w8g#2a-<6E&XGUe_+eWzEZdC|=< z5BKVu(wL^;=@;1JO3#C_s!9ud#X35_EtaWg^KJMhN~=xryjZYwW+pnT)}4s%Bdnj+ zsZS3^W$wxGP! z#NAP-kGj`=b-w~fej zkypU(eA-lLCAgkf-e6|utYeSid1b?w+w4}8^}Z@v8z)fisKiK#vxIl?wK!HKLVjXq zyj5eYH)?E(KRo=1A0O#ZVW!_TkL;adZ6{*J1&s&BJ=d^i`NUP<3;Ob!L7JQVGkNgE zlV8}%I|`R4dd&?mYoqi-Ee1)$OJD7HOHT=llh3u&+X;^=8h=P{;?em*3AYb7jt+5k zKZ_Ng>U`w(@u>Xu^z8@H=fmUq##MPMG81*vHydnUSeWbZT$?uvXuEl}Pl)S_6y&Rf z?BK#dyHB}k*)7QqHr5L_de#&DXf-p219?8LmRmz&(W^Tv{PsW z!AZ&s{%TSvxz}CDFP~J#z3_uc2mJf2AHrYQObY%65pa z>#vXINFuoY`ak=pXdHH^{+-d<3WbqF!eLtn6y_!RPGp?e^=ArP?q^1zmWjLm;f~RC zl^A32apl^`-xrB;6U^l!Lr^z7JN@l+X|z+j86Wxdop0M99k)Ka&=XubGgw!eYukN6 zSy?%EZFOlj;m?0)!H&k|vnvhCjAqr9AO)&LvymSM%N*IMLl=(u`m2cHg%BCyhRHs0 zyW45TkR$F$!{nqBcKrVJui=A(wS~Q57+-p!+}Dkn*zrO!845(-BtpTB&Jv$a!?i_p z1%L*AP~1vFZN^OqJ92pa)rfQW3bl~1fIjhB(|6t5Xkz)3{d=4K{H^FU-gf@OgSGlq zoxQHA6ew*l2SQeRHt$}4WxS|8gj1^I9A|#+Fc?Pv4#^P+VLPD;#RD=!ZKbmZA^Z*d`(VlXByIr3i4v@!Mi0Kge9 z&zNbX&CVMA)4wyMZ9M-cLc^q!e-zhxmgo29dTq7{f9t?T`{$La!Ig$scfbDqOB7wI z{)<{G_W$J?G>A_hZvOpSp7-~zDCXFz{rR^vjY3xjMHvu83tq23wjARnfBDas{(X6a z4tJLaGV15w+gTMU#wd-Q+fgmEl-mCGGbH8vzeDv>12&zzk^{x=M8E9Xb(`#xrYLdy zJIiYgimRQBo;+A)2=-YSe_!->CygsZYErL%IAPoMc>;AV>RR{J zEHRoz;@-%x`+|Ku57>nlTNV<@@uwW5i)|!Qc`> zzZdik1%39_(yVKqbMF#kYE!_F{^4WbX1mskl_N#d8m9pLC@X9TML?ecK-HYPpNe&H zj?i+soq+UNV)CwCTtt)9>U?)8lBTnQvppI}Q2Nt+FF!V?n}WqH*+k2}>wuUwJH^z7 z(4cKJP!l&Z`L8A6Rj-QsMMKXbunj?-bh0h47vx5a ztgvX$i6C`Z4T3cE!3s2r{{8rzGymG&58zEWC+Sdkw1lH>r+jtgbIc>FqJ_TBC#S#j z2x49}jga%5&H|n|dlSj({_9`{9k9a+=Y`MpRQz_ML5JkJMhov$$8a<;vY$Qigj1TB zwQ{;zYVR0$XXpI))V^=f#`qxaNm;b{Xq0o;W1~+GXmGQvdUwzN1gKgL@lco8 zte6gtP%FIf!Zw@2S?#kEAGehHvRUubC73fai1!4Yz|M{6wxV?YW#5bLBwT&9 zCql%6<+w`JJ_GD1;qJGS<=)v=t-g%1%W5_U@)r6cZ;6MmeTNnV;Lcm(z|D2O4OZc^ z`Y>L!KP}?Av2KS6%dk*G3`uo*j7ou!JOB%pQ8Ph$NKn zjZ+E_GobzW;Gb=GLgWZGd)=uD;Bs!2KkEd%N}zR!|8}d4`(nNBJ;+e}Apa?B%(UzK z_G<5k)%tgufR+qBWfMyeU6F-!swPSO%|wkHV`$i>e{-9cU=^9n&CQX_P0M28_4$Vv z)>c<61o?xc+rX*uay(c?!oe6h-*38F*3fE!*`{dW&bLK;!IYxga=Gm4NskU6de*S) z=Xm4hmnUy2l28Kwa$pRP0PNoIfj1U%hK!k5>6FvZLUh0fh%b7D~wMS*vz#Y{^ zh6G9XBgl@$==N~w4v29KM--mzSc4>|yR)ypg$EnAbwuvsz-kQEyf#9kK@dF1AN5zE62DSeNMMv9egEYS9F%<&66d zSdoIw*7tqC-3(j;B!7VRDV&AEz(O#8Fd?!;0{SIw@-;onHWk+)$%QmCD$l;(7c$Ip zJnoyJ!w-3LdN1Z%f4U;R42g82ONTqf2YmEt4_6F&Fbtz_`To<^$+}+R^V=t9jLiUIG;HDfT^=+r+2$a4h`f7^SnassOd*63(9c zc`MKd5Q*14tcGstEtl?vNv+J(*(Umo@P>2je^HLoX8F^Hzkb*EJ}I$Y5HE-P_6wW)8gP@1|;`A*{}oF{(Oc9#XP z`auumOT1v>V8uck97;is!$OzuCBJRU&;K=3ryA%NOrmMdMmkXy$HG5L_n`O#{ycOw z*X%d#GBQplUVrC6N+e|JeeYg{UJ5Ici07K{8pwL`! zi_V*W2~q2Iaa{!##*LESp+9!PoR-u1mmRivQzOs%nw-9sU(tH#=7j{WKkp^!^HAqW6A!;u_=O4M2HgOj#3+>)zC0zC*>_Wfon$EZ`+VM z8QL5#+Ot$rA5M9#U+>88>)gNo6`D{B>4#c3e~`BP`M$OrqJr-;4oP2n;ez*;Dfl3( zrI|rZi{=XMSoVA1C3H#J-i~S;-DqTOq3Mr!HBHUEkTs|4l4y4xU|OV%8(LhyKhkMk zm!LejYX+%u9mdgoKKhr;r@B=Ixc> z`Q`3sk}5VD0T2$p#4#0tqfI#Pa&Fxy{&Dky-X0D+nta&ZGen* zLR*pnI?Jv|sGo;KmMz>J+WoHfO*$KHaD);C3iuN62N!@jA_owjX?>EqyfGP4XyzRK z+`Dle`<cm)85dUjkRs=4L9V> zd2ZD!4tX}gLmW-*Q=O1B5OMMAjEQ8G9yXFa)E3F;ypoNN<253yut6&8E3>cmO@B0Q zXqySOg`&4O{yQf{PY|~RZLk3JN!3joI-QCJZ6cePbt-HA=Z!B_kC>P$@30ohb}X;|i`+NOkJN5S766V3Bz>%wUDLxW z9@ub+uGkM6U^b3j|AQRT+jkCl;5{MqD*kmELMfoP8rkwhD*K`61hKjlnjLDcZBWbI z1w1|6>bVd#gCqmmTLY{OB>tSRX)n+rBK({QpqixBFumU}pE(mc1{K1|df<#f9Kwh2 z-q_Hr0kba|#usy@7cCAs%jApe{k?T_Fc}Q#J<#`jppH0*uqa)jjpJA3zVs#FT}2+5%En(uk`!os zFdhnjpr0##U0H!dO`tA(CxdQp~ zaUhA~8BWaG_+H?g>2o3Y93TY^0s1#Lba;8X ziWKkddoSeQ8=bjHWjE5pYTYY&6TQO(F+)^$IoUxqT7nQyX|q=qXoEh*PCs_LM;;MW z_AZ5wyvWzS!hHa`B08D39~&@-S4cD;r=fbYg1TATpv~44 zp!D@G|8}%BhcPk6HQ7M#LZcP*ZCo>`Z<7Lt<26&*o!r?*%xto=LC#1obYtJl^6kwG zFR*AzpPU8U`EP(WlyF?WP|6j7cU-5MLos^FK%VSRZMCp~I~zq>Wj|)UIt-^@$`2EQ zH@+UoDP4k4&g+Kc>GGNSAMfS%Z8E?LvfF<<8E4g+-3dpdK5`xMy$3Wmgs?r)6Rhg{ z>a>Y(IVV;C0jujG8>xn6jL!QT=W|CgNhRP%*oaQ!oS}G#g;Tld2X0r-y*DaV3=9|2 zKM=6&!)l7!RD`nHldW zZp?fDasy#@kNHm9z}zUgBJfeZ07e&gX*CRQoFmjK55|aCXfNMSRF2fPSb%`n#W|s9 z303)In~iw+a$bK-fM22wV5>$WKQmnG_8`PwIbJtlq+b2^*vyU4urUOX1WLM=aC$h< z>W|7aT@b-xgd!RN)YZji3+MH`a8@S}_np3Tby=omaqGXWgFo^KlA-bDR`BsvRE{8S z7JyPlaKo?K1>6<~6FVVG+i4Qxa_6UcN?MnJXn1kQ^w7CFhlQhnr5Pkqrv4-XfHm>VGSOo>*=xvtcs`AylmqQi%@d<)KwPHZPab| zDHG6-FRhM`A-D%SHW(=DeXj^B><;EuWvEM3Wt8*X@i=Ekq%`)P9)LD813z%1fL@Ui z=D$w;TJ_q3Y^UdHzh@Us!*^if)rtZqVWV;?uNH@OJPr774Pax=XY)rEJU|biINW3K z3;K|&=G|+I;a>Ee z&`GDUwp?W@qn*SpOIlYND1V03zO>8Z93(I&Uk7mzwls;OaL?Fwwb zXSpx(yyUe>t~GU6y)nqt_qxswF@yAieq|4pU;`B%0=#Dm`aQ*8Zccz0)d`)qCgILK z8zy^Dx5WU8mJ7$b6mnRWfZ4Clk92q2jYBu+E|QRf5Mwwof8)HmR*y(}g$qtl}=F?G*jn`dmUr_zDSfDf2 zK${W)QwFsvHa=sJ;*T}q4F>>tGzCuMPwV{A3mrx7-I;bvlyQ7)-^LpX>6%khx_)Uw zq&4w^SgYmfl`~XQCAh<;;~U3lYIvYl_}DR}FmH@ap~#N?r;f5a|GM7^c+fKv(($}o z&f(;ZXn0KKsV08Xo~z66fo9?hR4O0sG~F`#Wa;IJQrqZ|NtI{`pVQyZ%1M*BsTvg^ z(4twE&8MUQsz7!$!;KY(L8xaxJ(fmEF`cX%+CH*Wkl8C?GyAI|bMyv_rYia6B1Nqf z1En_xlVE48f5gk@LITH!hKhResO)Yk_ZoI_Q58?R3DYe3>Q zhAa@u*+!T(QgZ<=VFbaS0pVRCz2`JtLNZR^FUk&tIE7bhhwy*5<~*&>k!NS)_*1XbD2O0U$q{;(C0DqknE(pH{Dmvb0sh$zPAMH>6{;SE zf-pJKi9>vaXRmDREuefka_Px7DCHUgf~g++6(r?FkRO(DaN5$#y~3W4(I=Y{sM{lO z$d1i2lge(oP!G=Bsxg+QsIMutG7Di;ZqR+INLHdC)L6)5OMr>T4lLrmbAZ0;R%TKU zIUPWFi-WW}%k;-#OMrAcu^meW@>F0t1bl)M@E-UReqh-{e-SLgq}%o$zgqJb%TE4a zQhgVCbc1DAHm=B2hjK$e@e=*7*%|@6uPf3-d z9mU0eRWBl^9HP`99oh0j)l8ip)oU_{+!!>ahAu;d>i|9y^CG0|zT);qQ$xQ*jfDl+Ta%mNH0=>}nxcu&RKN_ih<8`l14WjNw3( zRxyGQZq)Pj`J^q7a7w_7#~Xg3q3ZZy+l3)XKk_YB)(}s}8y38Km2s*V&B zHW)2%8gD0vIXs}YH`rih(d`I%d8TVmxSrjW0f3bTCqdlIp$jy~Y#%PFXBm+hx3%iC z_D9*Pv?ct4!pB_)7+jUv6A!6=DZX%y4pIC%FZ71bCQsBV370wc`UN8_2czM%oK^E9 z3REK{P6w$7k<$W%M$XPhN1i>ZY|6Am!dL4y;Bh`;)t3a?dPB+gSO#VR?@2ga#KuK1*&>zbMw0m_4S}7`Co$VSC(l}N` z%Zv~+bTT8Mdx65$CkKTKx1}Takm48jpw=1jvxNMYgv| zZ+u?+YTc1a31J@~DQxR~cbGfC2&j+M{Csy{DZ=w%{Kiy8>q6*4Tvr^*CYCm|82LNQqdd=34zD;R71@eYJ%^ z8%B7DKEP%00|sQd+_zr6F+{6~5W-Tj97sMt5xS@Fw?>E*6C)q0hm`pa_eYR~-9K)k z9nf;z*h9nB_X@z=1;|v7%?vjQ8(dZvia-90UHnu5=to?qLoXPh4eYxGf^mRjb^`4t0Nce&el+muAL%H-@?nG0 z^?6|x(D|&9LQ9~7p9GEbE~8;^`{=aZhg)~jtt?USgw!aIq%?JY+`{<8unA3Mg$&$J zV;;i(s|~=VCJiFVrPGIXCf@aVz=sM(X`pfHk`zEKLh(YwT#1-RexcRTl43IHc!$9K zr;mFuTDkvwyz7^RFuCD3R$7)QiF}uNpRv;BQm@i*aM#uAD z7Q+8x7Eo&lvK|W#h#>=zg-h{E^xZft0T@0-5@v*UsYd}<4q|rYy&6TW5UpVbhU@H( zSgH|;8~DfgT{1U5dYv$A$dl($^z_PjIdvIv|CPAyny9aDPUhAMGBxE<+XXsD{vLnOSi>4nWIJK#Bs;!iY@d zER^nY0W5+gkgJ4!6Z}kdVM_eCJoY^UOy0TmM01^Bwl!H|j3(Ui-Nxbfy5pdb2?Dy$ zjWin#*5X`4u^WE_`pTfn{tb;5X+%S4D7;V&#N@j_l{q8t%ze$>v40ML&UmOEUPxxN(dH^bnyPA+%-(f?eK) zJRLp4G9r#{OZcg6ePj-z+B@_l)iujMm#*jWiH6l1L5%neNzC6gH*Q^Io(J~B#`z^R zVjh@j5qRkmMvsYykjS!mM?^#bKLZ%O|K7(#BmzGHG*GNSL&sucP6;4{-DPzQvda?_ z)3Iw(>xU9Hc^haC8dUA6wuY7@I~G)a0+3q*L`tL&b!!lMHh$eiq`w3M6a3h$bI<=3 zq6R=Fx8A*G_;wff$&rbQ~uBxVp_Ypv!N@ zP|0gyZw(4veSQnB;`$)uY)3E;M6v~<5A;vh;k3WP@hD}ihU^kfW9$e##5nT&NtU}ftkqMu1?7M*IRNeE!6Q}`2ZH5hYS)tF**8C#Vbf;cSI~_w1q$q zgytC6FjcDOE;0I^t|_Qu=2yl&xhQv)CWRDp5S-Qr!DmPIdr*yo&?(AuPBL`{lR2FpZ^K~7l@Hyx8`$G^gx+iwO4mhdJTcpMRSMW|rrZK4mVTB_Suo{@7RRfeboou8HmMg?eWmAO67W7oj z8Gt}&Z9AX;3)IpHQjso~Haq5NkeKopEKiYKgKtxfQ=ZWLoopLL;CUE0SGHNG-!T^0 z@SU?whE0+ARM%VoJ4_iVg|0a({!Tma@oPiBA;G@z9Tl3wdI5sHhm6yRa~y8WaH9ek z^ziYLktuGuP_C39laY|ttuua1NS`=RMGZQ095g6!wna!Ir3jS?`WrWYi~(N(88^2M z@ck7j1SG*g};~H)hupH08(X%7aY$!rA9TSFrmoIPoE6 zDkVR}X(ZKJhsuEqxGNyL?eNuf0~sjR-hLm=-mp=j_pTPYr8bC6f*av7WZVWfARgs9 z^lmc9j<+4=rucdIQ^`!La9Ds80^~6a99$9(rT~j^D-OfJK0!swfq$pfSTEfLaYTf{ z0U)LUNRL9sMAk*Gt^ZQf0=W#vChk}#?KB}i5E}p61Er=eOL>AA<<16zoID3sPERP(ISt5nFy?E!E)8e(1XG}DtCh>I)rQLeJX(DB*V*dI^4=AKhx}~z1qevR)0pb?c^D; z+Rase?Bxl+a7F!$<{wRe{J6YJgn9R;oy;Q2ntxPW-Tw8Pz>hzw+D>I;EfjUQnZ%5{ z*u+wdKN=LSjq5duH+?ec^f}F)(6JJWy;GA@W`xMvvQJndatQ7FZMrYwCGB5J-B^G3 z?E1SbzB>{17+(-ZNyEW}A%yn#yMF8Mmb^!V+cHgU_-R#S_gB&-VS_c=VAQ)F2nk}KEH2=uBY*ZP{0@xkiZdpBN;XlCxOme z=YjhIoQM(xozpgSpEZSnk(lM8V@@01Gm)Rj4d5lxLO>uuL`%FlH_8?;2x_ht{H145 zF5Yc64)=jP>F_t`0jxvKFb4sYX%i1t%;oGhytlXc@fiNrEn9aa6?ykk5btLKBA9y7A*t24mRy!J&0iHyUv>;FfG^*=ZH{};FWpPl^APX2!=OZbnu{>NPZW3K>_mLm*VybD~fxO0Z2$=zPPxI1>+4Q%oh9^B`T>p+ClZ@i9^VP6_8>YNgULQp=`Lk-%{na*8MG8%_Mu!h; zUtY4wt&((RzL@%AuH!D7Qyoc6{xhA~+D_~>k+mx3o!R^ z)WlYMCX}>+RMF+tRh`C@+(j zZ%`jke^XbZX=5f6PhF*01#-#C7;I_;WwEpI&j&R0UHyQnGJx&n$@Se&OUV&_@)tgaLrR+4Q7^7zo_+Y^{PzxEh?Os>^Cp6&J1PjA=%z!^IP1ik$#}In4hE^k4!}q~}lkCX@iPV3q7spr^2w=vt&P zBQw%QFNsK{t#^FFE79Wgjv2VYMqI=LD+K|rarwNN>1^ucI!J0(_rsC!g?a}n&$I# zgIjWJxVT;5^1gv=YWyrMe16AHAk!3gVS(J_cy{n@aF^WWN#OsABai=xv*{>WH(vy` z1RcDs4#L2E$SAX~_|3Qn0xlXZRHC~q{H;;*CklX~l7YP>ZdE1H^kljop(rB*w|wb& zZ$dxE=tGrgK(T-Qr#A~$Aky3J2@gXUDrR8ziD&zD>!@;Si(er;9zx_guJI@qhAR6a zPZgM)4dK-+IUZ@bCqXrq({T9l05UJQ^=KJ*&|96W&>pP!*or!bIYNWB!Zq6zuav;p z69yB{r|?vxrdF(Alg4Iuo9~CG=*J_ocuXlbAA`^qwu#pwv@+P!=j>;yLq=SZcUKy9* zVbpXB*1qrCb^w!8MrmnLmw60(gHAOeT7-sp#UWj{U-xN|QNV0~rN=CGXvhT!z*x__ zdZxPvP0VC-`rg9?tnAfcHFf05k5&=raTWy!n2TKvmolKgH1hMF0~q_41{T6In94AW zh)(HZVRDy+k%lFV+e8RK88xf*i89P+hpbq2?0xU!^EPhXY(_A?I1nGXgx!R4mYWXM zj$o?_51WaWUG8Mez%3&qJ@ClXIzJ?oSQD zi7MWL%5sONeGr}f=K)yh8oq>C(>+{uOv2A3R7?A@{WW{jIC*UDFq&XP985TyXckA4 z%qTDR(etsx;%Jw@K1~6$w$7okh6Q*M2M01Sk^3frH4R!$o$#Ul*jV^I;%+!y|MC77*$EG6zU<-&AK}mG zUpnB?ASat$0ZxM^DFjUoR*t`Jpr3Q~cmA;jQYIvB=VqNT) z;(+7kbBOPNN&u$qd*JaarTbopu+lkhc_)|T zF?-o+FoJuS;+W!djlUnIcKm4{wGHt4g!U%B+~Y1QoQ3HMp1v(4M{*SyFq>!g-2IdZ z)zMc`cH2?ySDHBJmV3KRr)ZSPNHV$)x>aszp?u6Et|wmgnO4Qk$~)PgVU61KTGx~mvqFhtIes9nItw@|WuTj7 zL7o)VLU_=EPD}G#S)5IAUjsUKz_f~lRa<%x_slN!rL52n?>;H=nP4@AH$?1EPVu=` zaL)&~aA+qKikgqFHs%y*VU!!S3|&_qaxEY9y-2R&VLBo5c$i6lw6*auQP(ODTt@fW zZc{DHUaZ7ZsW{1O=F50E>hrpS{d9pay4RmZ6y?RLLAv*5R803+?Q&=hL7(WST{a1f z{cis(PBn8OtsiOG6QEuw8#~Csl-H0XW8<+K;WMz|XP5Nq4ZCi-VCc$%

y@xlRi}OS5)`&=Q{m~HOhnsDCUPUdcAX@w zNN#gHm3&LxNRt516#jdRON5D-~k@1jQ{A&?HQQ4_9RWt^;EKj1)_GJ zchJJ7NP&VMKBW4W?BN_h<#b5$P78P{;g zN9Vb=+nHiq-A57GGvd?@-DZ*JZp@=LM%HMz`lL-d82f53nuNR9zAzNum-vl#*1=ET4t^3k^>Np zbQ4{IxFTk&So7`R<9@>CB!31e&ErgFV@k#5IH|O-E!*HoUcGiX#kBEg>fVY#iJ_#k zCx`3p2h4q-${zt|otMH0uU-bf8r* zsq9VZ7SAMF|3Tsf=HiyfG|Yh+EMZNZR<6-dD@o{}v}t|&kn0&IzR%{&nEu;u?N2!T z3d-&tIC#Wy5ATGzu@v(tOIpnD0kutX$|zH|BsR6{SNy*CFWwZj+WTXHkywGtEBqnh zuD6s($W;#{S~{G2L);-!ZEAbbFTNo-8#Tn&_*Arohf_Op%5Wv@93eF2^z%{AAbNSR zbH~k%ukB9DzwM))!_4RC>T*FIIC2eZ)OwuBG5)CM@)GGs^iKe{~CAu=S;y zW@guq^DyDGzQ`VQ$xZ2Jt|t4p!=_PKdsx{B8>6GI=mvZ_tze0=2s~Ur=X-D53 z%rPxf#GVjIp^v0vX^%eO<;)ytB zE`l>%)kEFTMmmiPo-uRRhCaRJyuf#EcvCO(#6EOZ-C&PkT7)B$V!Ndl zT5LBsj1@M+7xIs1^rhYEuJ7S{=BSw4Ct!uWVj?&2VGEPpW?3#7|g4C8s2AQEneaSM;^{fLf;*4{=-4U@}|($FaD*g7CN7=f<34^`eS`<+>I zP@pM!%`pzXGnd!_?rO_yO39pinl^t3GKdS|5T-f~yuI=6`w^F6ehh7!`p`_o z@NKJ}L)b$VdM+3f;ifN}gL6@e7DE!UU@PI-BsmknYy#1bw@aU?=d`L)UvwWL{xHw6 zJma)^IS2eP%tU|4GctxeyOGI|;J*d~aRd0Jo~F0d9x{`-ulMQN(m_Y7XC-MZrH;vX z?TlfJpyRJsC7G9HRcYT2R@aEgRB>1*#9sMGW19cd&+I8@>fp0sgiAGsZ#@4YBJoVq z+tcbO=PU$XZdYj0c^!;LD4I2`N-&YA``P5i2f+!=JzmaOBjawrtT55=b{ z-x}`AmSjsMG)dgLKrtLnw@A~0QCOpdY62Sb>6&17)$;4c25nq?%*%mQDhkys#?;0*Nv%fQK%-&CU3ZwSZ?@>mOryxAc*gS>M zzdGv7^B8zFE?a+o!TID5RKuIRY0DVM2@&bQ@stYJyGh~~(JtFZ8`5Jnz58}DlVJi* zAl?%a?Xq~u=E<<3 z9691tSkRJpx*#9liBPoKSl?+KC0Hd#Yo5^+QA|?S^T{fCKWEJCur>#ePTWJe$DPHM zxoTjI;@MjzZC0GE{oe8P{W)cenA!%+cIJACm+JOGx}~$yj$Mz=P|Vt|BtEt%!!Yix z6Qq4Q=a{0eM;4!*=JG;AC5J*q#f4hZR9d9x9dEyqR{GsLy;CFK>B3r?bUpH5nE@C& zQZRPO?~hT?PjjklJ5z<@_u1vf!PeNnHQhI(QWW* zCCEhieo2n`BsM08<32iVZ8X0)03|?qtsSoNk)u%}$crxO#hNwfj)v>XH*%VDTuaEF zfVsY3+*`~m>gnd7!nY_^9Szleg$VJwrGHAyd^3oCqir9p*RJu=fPGQY^JNDkic7YN z;4Qp;$n^&wAv=DcGfMk|#9gX*{i=Hwm|rX*6lK;O;3Cbnd}S;4ujM z3&;Z`mIgoHC0Ll_uJ9|(4iNn9X+}WG`uX4z47L?rP*D;XcbJoXl+)q!Xv42p4ujb>U3JpWf~*Z$PR6@??xf>;F9 z;0uU40z#x*sOwXnM3B5n<%-+4Td-p!h{mwbx_sumjH!WH*nF3hwguK2L z<)#KF(A>GOpLq>u#Pk{X!mG0tJac_-OTwGIS7qGnGmB!nO6YbEXz5?#&%FM$+IEq@ zNOP_jxGi+V=!-sFxov}beSXy&0rd-ku5Fk1VA82XRj(D%J| zXBG-dFo%U*!3?|*(hJI2H3Cz3RvEN6x9&yT@!-Vc2T1P=^{PXyeO}Sy*2(1in7foF zOw4?ah$?JK6NF>e6w18mSVazNLS|KMxxoX>BhmA%N5k66Tuw_m7_vw;;FPFI7$QX{ zEyS|v%C6}CR#|e$H3Ycx8yxgI?ZDW?{ZWQTa?B*8Op^_fHw#n=xMK8yx1#ap}!A+z@Qrk&0wc6w% z+N=Qyo-2abX{B(udXj7YgBI>nwrRb;XC<|{75PDUze3h=8r0x;PbO?`$W5@6HVN_m-4UpJVmy~cK3f%EbJtUh}um7_UDQ2 z!!uIWg`SU;C9dSb#J?lbdXQYS+sP|d7Z?3yLS-*#xVja=5)7)}Lq{5P^_mOxqfOy!$Th z>wTSgxCQ7i)6;bj8sBf|~SlZ)p>lA%c)}vTS+jJ^+7^`Hp436TWN!$;rfaHph2(oX4dzMm=P;XnP$OC1zI)p}QJH47o8tK?k>n%fF7nhXJ&ihHH z7VeiSeQLIT&%j4@PxS-Cr%8qHzU(3Liaa>zeT@j%Q)P|d#0diHB(RF$AH+I3Vc1L* z%_n6t$AOYp?fp}S@YO~~ zf6=m}wM3fEU_;WZqjFpu0kcNdBByz`S}hVXkpc{FEKj;uIT w`0jK4hc2-N=p9kXVdbI;h>E=Af2{f>o0PtllDB+nYa?nqbVtN?C1Zc?zm|a|MF0Q* literal 0 HcmV?d00001 diff --git a/marker/processors/llm/llm_form.py b/marker/processors/llm/llm_form.py index 8fb4a32a..a47bad3c 100644 --- a/marker/processors/llm/llm_form.py +++ b/marker/processors/llm/llm_form.py @@ -13,13 +13,14 @@ class LLMFormProcessor(BaseLLMProcessor): form_rewriting_prompt = """You are a text correction expert specializing in accurately reproducing text from images. You will receive an image of a text block and an html representation of the form in the image. Your task is to correct any errors in the html representation, and format it properly. -Values and labels should appear in html tables, with the labels on the left side, and values on the right. The headers should be "Labels" and "Values". Other text in the form can appear between the tables. Only use the tags `table, p, span, i, b, th, td, tr, and div`. Do not omit any text from the form - make sure everything is included in the html representation. It should be as faithful to the original form as possible. +Values and labels should appear in html tables, with the labels on the left side, and values on the right. Other text in the form can appear between the tables. Only use the tags `table, p, span, i, b, th, td, tr, and div`. Do not omit any text from the form - make sure everything is included in the html representation. It should be as faithful to the original form as possible. **Instructions:** 1. Carefully examine the provided form block image. 2. Analyze the html representation of the form. -3. If the html representation is largely correct, or you cannot read the image properly, then write "No corrections needed." -4. If the html representation contains errors, generate the corrected html representation. -5. Output only either the corrected html representation or "No corrections needed." +3. Compare the html representation to the image. +4. If the html representation is correct, or you cannot read the image properly, then write "No corrections needed." +5. If the html representation contains errors, generate the corrected html representation. +6. Output only either the corrected html representation or "No corrections needed." **Example:** Input: ```html @@ -37,12 +38,9 @@ class LLMFormProcessor(BaseLLMProcessor): ``` Output: +Comparison: The html representation has the labels in the first row and the values in the second row. It should be corrected to have the labels on the left side and the values on the right side. ```html - - - - @@ -95,4 +93,5 @@ def process_rewriting(self, document: Document, page: PageGroup, block: Block): block.html = corrected_html class FormSchema(BaseModel): + comparison: str corrected_html: str \ No newline at end of file diff --git a/marker/processors/llm/llm_table.py b/marker/processors/llm/llm_table.py index a6f0718b..1ec1f8cd 100644 --- a/marker/processors/llm/llm_table.py +++ b/marker/processors/llm/llm_table.py @@ -34,21 +34,21 @@ class LLMTableProcessor(BaseLLMProcessor): "The prompt to use for rewriting text.", "Default is a string containing the Gemini rewriting prompt." ] = """You are a text correction expert specializing in accurately reproducing text from images. -You will receive an image of a text block and an html representation of the table in the image. +You will receive an image and an html representation of the table in the image. Your task is to correct any errors in the html representation. The html representation should be as faithful to the original table as possible. Some guidelines: - Make sure to reproduce the original values as faithfully as possible. -- If you see any math in a table cell, fence it with the tag. Block math should be fenced with . +- If you see any math in a table cell, fence it with the tag. Block math should be fenced with . - Replace any images with a description, like "Image: [description]". - Only use the tags th, td, tr, br, span, i, b, math, and table. Only use the attributes display, style, colspan, and rowspan if necessary. You can use br to break up text lines in cells. -- If you see a dollar sign ($), or a percent sign (%) associated with a number, combine it with the number it is associated with in a single column versus splitting it into multiple columns. +- Make sure the columns and rows match the image faithfully, and are easily readable and interpretable by a human. **Instructions:** 1. Carefully examine the provided text block image. 2. Analyze the html representation of the table. 3. Write a comparison of the image and the html representation. -4. If the html representation is largely correct, or you cannot read the image properly, then write "No corrections needed." If the html representation contains errors, generate the corrected html representation. Output only either the corrected html representation or "No corrections needed." +4. If the html representation is completely correct, or you cannot read the image properly, then write "No corrections needed." If the html representation has errors, generate the corrected html representation. Output only either the corrected html representation or "No corrections needed." **Example:** Input: ```html @@ -238,5 +238,5 @@ def parse_html_table(self, html_text: str, block: Block, page: PageGroup) -> Lis return cells class TableSchema(BaseModel): - description: str - correct_html: str + comparison: str + corrected_html: str diff --git a/poetry.lock b/poetry.lock index 8322e04f..0234f789 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "aiohappyeyeballs" -version = "2.4.4" +version = "2.4.6" description = "Happy Eyeballs for asyncio" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"}, - {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"}, + {file = "aiohappyeyeballs-2.4.6-py3-none-any.whl", hash = "sha256:147ec992cf873d74f5062644332c539fcd42956dc69453fe5204195e560517e1"}, + {file = "aiohappyeyeballs-2.4.6.tar.gz", hash = "sha256:9b05052f9042985d32ecbe4b59a77ae19c006a78f1344d7fdad69d28ded3d0b0"}, ] [[package]] @@ -1092,13 +1092,12 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "google-genai" -version = "1.0.0" +version = "1.1.0" description = "GenAI Python SDK" optional = false python-versions = ">=3.9" files = [ - {file = "google_genai-1.0.0-py3-none-any.whl", hash = "sha256:e9c3abd48f46ecb2b0a51efa7f65c6830b50f9784df603a91019b43918a7531f"}, - {file = "google_genai-1.0.0.tar.gz", hash = "sha256:15712abb808f891a14eafc9edf21b8cf92ea952f627dd0e2e939657efd234acd"}, + {file = "google_genai-1.1.0-py3-none-any.whl", hash = "sha256:c48ac44612ad6aadc0bf96b12fa4314756baa16382c890fff793bcb53e9a9cc8"}, ] [package.dependencies] @@ -2267,13 +2266,13 @@ dill = ">=0.3.8" [[package]] name = "narwhals" -version = "1.25.2" +version = "1.26.0" description = "Extremely lightweight compatibility layer between dataframe libraries" optional = false python-versions = ">=3.8" files = [ - {file = "narwhals-1.25.2-py3-none-any.whl", hash = "sha256:e645f7fc1f8c0a3563a6cdcd0191586cdf88470ad90f0818abba7ceb6c181b00"}, - {file = "narwhals-1.25.2.tar.gz", hash = "sha256:37594746fc06fe4a588967a34a2974b1f3a7ad6ff1571b6e31ac5e58c9591000"}, + {file = "narwhals-1.26.0-py3-none-any.whl", hash = "sha256:4af8bbdea9e45638bb9a981568a8dfa880e40eb7dcf740d19fd32aea79223c6f"}, + {file = "narwhals-1.26.0.tar.gz", hash = "sha256:b9d7605bf1d97a9d87783a69748c39150964e2a1ab0e5a6fef3e59e56772639e"}, ] [package.extras] @@ -4556,13 +4555,13 @@ snowflake = ["snowflake-connector-python (>=3.3.0)", "snowflake-snowpark-python[ [[package]] name = "surya-ocr" -version = "0.10.3" +version = "0.11.0" description = "OCR, layout, reading order, and table recognition in 90+ languages" optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "surya_ocr-0.10.3-py3-none-any.whl", hash = "sha256:9831e6aca929f60374385cf40ce79a7a70eefab4f8508fe6948bf49a33487937"}, - {file = "surya_ocr-0.10.3.tar.gz", hash = "sha256:c78b3db6daaf324fd7c976e8ac100a15827cb070339744d76f3bedca00e7aad9"}, + {file = "surya_ocr-0.11.0-py3-none-any.whl", hash = "sha256:2314a04d6aa2f362eefb14145b9d1b2c5b6568fb287ff8205cc0d580b9a304a3"}, + {file = "surya_ocr-0.11.0.tar.gz", hash = "sha256:c13475981929ad1a50e0151085815bbff183f9f328d2efba9b77c119e9ca754a"}, ] [package.dependencies] @@ -5451,4 +5450,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "0ab5205db01e1abea947536074593b29b16347a16ca5e9489c024a2c3a05df8f" +content-hash = "d98a730ed15cb2a34a91a60062f5d6faa7eec256b2c42e79d868e5f0c9874c94" diff --git a/pyproject.toml b/pyproject.toml index 36cdba0a..9d4cedf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "marker-pdf" -version = "1.3.5" +version = "1.4.0" description = "Convert PDF to markdown with high speed and accuracy." authors = ["Vik Paruchuri "] readme = "README.md" @@ -26,7 +26,7 @@ torch = "^2.5.1" tqdm = "^4.66.1" ftfy = "^6.1.1" rapidfuzz = "^3.8.1" -surya-ocr = "~0.10.2" +surya-ocr = "~0.11.0" regex = "^2024.4.28" pdftext = "~0.5.1" markdownify = "^0.13.1" From 264ed4131bb2455a9a9901a302c7e5a1e61e72cb Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Tue, 11 Feb 2025 11:00:15 -0500 Subject: [PATCH 27/27] Update README --- README.md | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index c06670d3..f1a52aa9 100644 --- a/README.md +++ b/README.md @@ -10,26 +10,6 @@ Marker converts PDFs and images to markdown, JSON, and HTML quickly and accurate - Optionally boost accuracy with an LLM - Works on GPU, CPU, or MPS -## How it works - -Marker is a pipeline of deep learning models: - -- Extract text, OCR if necessary (heuristics, [surya](https://github.com/VikParuchuri/surya)) -- Detect page layout and find reading order ([surya](https://github.com/VikParuchuri/surya)) -- Clean and format each block (heuristics, [texify](https://github.com/VikParuchuri/texify), [surya](https://github.com/VikParuchuri/surya)) -- Optionally use an LLM to improve quality -- Combine blocks and postprocess complete text - -It only uses models where necessary, which improves speed and accuracy. - -## Examples - -| PDF | File type | Markdown | JSON | -|-----|-----------|------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------| -| [Think Python](https://greenteapress.com/thinkpython/thinkpython.pdf) | Textbook | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/markdown/thinkpython/thinkpython.md) | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/json/thinkpython.json) | -| [Switch Transformers](https://arxiv.org/pdf/2101.03961.pdf) | arXiv paper | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/markdown/switch_transformers/switch_trans.md) | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/json/switch_trans.json) | -| [Multi-column CNN](https://arxiv.org/pdf/1804.07821.pdf) | arXiv paper | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/markdown/multicolcnn/multicolcnn.md) | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/json/multicolcnn.json) | - ## Performance @@ -48,7 +28,15 @@ Here is a table benchmark comparing marker, gemini flash alone, and marker with -As you can see the use_llm mode offers higher accuracy than marker or gemini alone. +As you can see, the use_llm mode offers higher accuracy than marker or gemini alone. + +## Examples + +| PDF | File type | Markdown | JSON | +|-----|-----------|------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------| +| [Think Python](https://greenteapress.com/thinkpython/thinkpython.pdf) | Textbook | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/markdown/thinkpython/thinkpython.md) | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/json/thinkpython.json) | +| [Switch Transformers](https://arxiv.org/pdf/2101.03961.pdf) | arXiv paper | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/markdown/switch_transformers/switch_trans.md) | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/json/switch_trans.json) | +| [Multi-column CNN](https://arxiv.org/pdf/1804.07821.pdf) | arXiv paper | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/markdown/multicolcnn/multicolcnn.md) | [View](https://github.com/VikParuchuri/marker/blob/master/data/examples/json/multicolcnn.json) | # Commercial usage @@ -68,17 +56,6 @@ There's a hosted API for marker available [here](https://www.datalab.to/): [Discord](https://discord.gg//KuZwXNGnfH) is where we discuss future development. -# Limitations - -PDF is a tricky format, so marker will not always work perfectly. Here are some known limitations that are on the roadmap to address: - -- Marker will only convert block equations -- Very complex layouts, with nested tables and forms, may not work - -Passing the `--use_llm` flag will format tables and forms properly, and merge tables across pages. - -Note: Passing the `--use_llm` flag will mostly solve these issues. - # Installation You'll need python 3.10+ and PyTorch. You may need to install the CPU version of torch first if you're not using a Mac or a GPU machine. See [here](https://pytorch.org/get-started/locally/) for more details. @@ -94,7 +71,7 @@ pip install marker-pdf First, some configuration: - Your torch device will be automatically detected, but you can override this. For example, `TORCH_DEVICE=cuda`. -- Some PDFs, even digital ones, have bad text in them. Set the `force_ocr` flag on the CLI or via configuration to ensure your PDF runs through OCR, or the `strip_existing_ocr` to keep all digital text, and only strip out any existing OCR text. +- Some PDFs, even digital ones, have bad text in them. Set the `force_ocr` flag to ensure your PDF runs through OCR, or the `strip_existing_ocr` to keep all digital text, and strip out any existing OCR text. ## Interactive App @@ -479,6 +456,27 @@ Options: - `--use_llm` uses an llm with marker to improve accuracy. - `--use_gemini` also benchmarks gemini 2.0 flash. +# How it works + +Marker is a pipeline of deep learning models: + +- Extract text, OCR if necessary (heuristics, [surya](https://github.com/VikParuchuri/surya)) +- Detect page layout and find reading order ([surya](https://github.com/VikParuchuri/surya)) +- Clean and format each block (heuristics, [texify](https://github.com/VikParuchuri/texify), [surya](https://github.com/VikParuchuri/surya)) +- Optionally use an LLM to improve quality +- Combine blocks and postprocess complete text + +It only uses models where necessary, which improves speed and accuracy. + +# Limitations + +PDF is a tricky format, so marker will not always work perfectly. Here are some known limitations that are on the roadmap to address: + +- Marker will only convert block equations +- Very complex layouts, with nested tables and forms, may not work + +Note: Passing the `--use_llm` flag will mostly solve these issues. + # Thanks This work would not have been possible without amazing open source models and datasets, including (but not limited to):
LabelsValues
Label 1 Value 1