From 82b97caeb9b15a17b9e39e3e8d8cb419d64e717e Mon Sep 17 00:00:00 2001 From: Vik Paruchuri Date: Wed, 15 Jan 2025 12:12:53 -0500 Subject: [PATCH] Fix whitespace inside tags --- marker/renderers/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/marker/renderers/__init__.py b/marker/renderers/__init__.py index 5a53432c..e45744e7 100644 --- a/marker/renderers/__init__.py +++ b/marker/renderers/__init__.py @@ -47,7 +47,11 @@ def merge_consecutive_tags(html, tag): return html def replace_whitespace(match): - return match.group(1) + whitespace = match.group(1) + if len(whitespace) == 0: + return "" + else: + return " " pattern = fr'(\s*)<{tag}>' @@ -57,9 +61,6 @@ def replace_whitespace(match): break html = new_merged - # Replace consecutive whitespace - html = re.sub(r'\s+', ' ', html) - return html def generate_page_stats(self, document: Document, document_output):