From 986c2ab0a35b180f746ebd24a73a03dd7eb47829 Mon Sep 17 00:00:00 2001
From: Akhilesh Sharma <sharmaninenine@gmail.com>
Date: Thu, 3 Oct 2024 17:28:09 -0700
Subject: [PATCH] ocr result fix

---
 services/task/src/process.py | 43 ++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/services/task/src/process.py b/services/task/src/process.py
index 97aed1ec3..7c90e2fed 100644
--- a/services/task/src/process.py
+++ b/services/task/src/process.py
@@ -52,26 +52,27 @@ def process_segment_ocr(
 
     if segment.segment_type == SegmentType.Table:
         if LLM__BASE_URL:
-            def llm_task():
-                detail, response = process_llm(
-                    segment_temp_file, table_to_html)
-                return detail, response, extract_html_from_response(response)
-
-            def ocr_task():
-                return ppocr(ocr, segment_temp_file)
-
-            with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
-                llm_future = executor.submit(llm_task)
-                ocr_future = executor.submit(ocr_task)
-
-                detail, response, html = llm_future.result()
-                ocr_results = ocr_future.result()
-
-            segment.html = html
-            process_info.detail = detail
-            process_info.input_tokens = response.usage.prompt_tokens
-            process_info.output_tokens = response.usage.completion_tokens
-            segment.ocr = ocr_results
+            with ocr_lock:
+                def llm_task():
+                    detail, response = process_llm(
+                        segment_temp_file, table_to_html)
+                    return detail, response, extract_html_from_response(response)
+
+                def ocr_task():
+                    return ppocr(ocr, segment_temp_file)
+
+                with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+                    llm_future = executor.submit(llm_task)
+                    ocr_future = executor.submit(ocr_task)
+
+                    detail, response, html = llm_future.result()
+                    ocr_results = ocr_future.result()
+
+                segment.html = html
+                process_info.detail = detail
+                process_info.input_tokens = response.usage.prompt_tokens
+                process_info.output_tokens = response.usage.completion_tokens
+                segment.ocr = ocr_results
         else:
             with table_engine_lock:
                 table_ocr_results = ppstructure_table(
@@ -82,7 +83,7 @@ def ocr_task():
     else:
         with ocr_lock:
             ocr_results = ppocr(ocr, segment_temp_file)
-            segment.ocr = ocr_results.results
+            segment.ocr = ocr_results
             process_info.model_name = "paddleocr"
 
     process_info.avg_ocr_confidence = segment.calculate_avg_ocr_confidence()