From 7eeab9e3762374e65a5a435f80881f8d4c380354 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Thu, 9 Jan 2025 22:31:59 +0800
Subject: [PATCH] fix code block display bug

---
 requirements.txt                         |  2 +-
 shared_utils/advanced_markdown_format.py | 30 +++++++++++++++++++-
 tests/test_markdown.py                   | 35 ++++++++++++++++++++++--
 3 files changed, 62 insertions(+), 5 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 39b523fb9..7708fef06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ scipdf_parser>=0.52
 spacy==3.7.4
 anthropic>=0.18.1
 python-markdown-math
-pymdown-extensions
+pymdown-extensions>=10.14
 websocket-client
 beautifulsoup4
 prompt_toolkit
diff --git a/shared_utils/advanced_markdown_format.py b/shared_utils/advanced_markdown_format.py
index dd17c18b6..46981091a 100644
--- a/shared_utils/advanced_markdown_format.py
+++ b/shared_utils/advanced_markdown_format.py
@@ -385,6 +385,24 @@ def markdown_convertion(txt):
         )
 
 
+def code_block_title_replace_format(match):
+    lang = match.group(1)
+    filename = match.group(2)
+    return f"```{lang} {{title=\"{filename}\"}}\n"
+
+
+def get_last_backticks_indent(text):
+    # 从后向前查找最后一个 ``` 
+    lines = text.splitlines()
+    for line in reversed(lines):
+        if '```' in line:
+            # 计算前面的空格数量
+            indent = len(line) - len(line.lstrip())
+            return indent
+    return 0 # 如果没找到返回0
+
+
+@lru_cache(maxsize=16)  # 使用lru缓存
 def close_up_code_segment_during_stream(gpt_reply):
     """
     在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
@@ -398,6 +416,12 @@ def close_up_code_segment_during_stream(gpt_reply):
     """
     if "```" not in gpt_reply:
         return gpt_reply
+
+    # replace [```python:warp.py] to [```python {title="warp.py"}]
+    pattern = re.compile(r"```([a-z]{1,12}):([^:\n]{1,35}\.([a-zA-Z^:\n]{1,3}))\n")
+    if pattern.search(gpt_reply):
+        gpt_reply = pattern.sub(code_block_title_replace_format, gpt_reply)
+
     if gpt_reply.endswith("```"):
         return gpt_reply
 
@@ -405,7 +429,11 @@ def close_up_code_segment_during_stream(gpt_reply):
     segments = gpt_reply.split("```")
     n_mark = len(segments) - 1
     if n_mark % 2 == 1:
-        return gpt_reply + "\n```"  # 输出代码片段中！
+        try:
+            num_padding = get_last_backticks_indent(gpt_reply)
+        except:
+            num_padding = 0
+        return gpt_reply + "\n" + " "*num_padding + "```"  # 输出代码片段中！
     else:
         return gpt_reply
 
diff --git a/tests/test_markdown.py b/tests/test_markdown.py
index 68bbe2e5b..b372154ce 100644
--- a/tests/test_markdown.py
+++ b/tests/test_markdown.py
@@ -20,7 +20,7 @@
 md = """
 Following code including wrapper
 
-```mermaid
+```python:wrapper.py
 graph TD
     A[Enter Chart Definition] --> B(Preview)
     B --> C{decide}
@@ -41,6 +41,33 @@
 
 </details>
 
+"""
+
+md ="""
+
+在这种场景中，您希望机器 B 能够通过轮询机制来间接地“请求”机器 A，而实际上机器 A 只能主动向机器 B 发出请求。这是一种典型的客户端-服务器轮询模式。下面是如何实现这种机制的详细步骤：
+
+### 机器 B 的实现
+
+1. **安装 FastAPI 和必要的依赖库**：
+   ```bash
+   pip install fastapi uvicorn
+   ```
+
+2. **创建 FastAPI 服务**：
+   ```python
+   from fastapi import FastAPI
+   from fastapi.responses import JSONResponse
+   from uuid import uuid4
+   from threading import Lock
+   import time
+
+   app = FastAPI()
+
+   # 字典用于存储请求和状态
+   requests = {}
+   process_lock = Lock()
+
 """
 def validate_path():
     import os, sys
@@ -53,10 +80,12 @@ def validate_path():
 
 validate_path()  # validate path so you can run from base directory
 from toolbox import markdown_convertion
-from shared_utils.advanced_markdown_format import markdown_convertion_for_file
+# from shared_utils.advanced_markdown_format import markdown_convertion_for_file
+from shared_utils.advanced_markdown_format import close_up_code_segment_during_stream
 # with open("gpt_log/default_user/shared/2024-04-22-01-27-43.zip.extract/translated_markdown.md", "r", encoding="utf-8") as f:
     # md = f.read()
-html = markdown_convertion_for_file(md)
+md = close_up_code_segment_during_stream(md)
+html = markdown_convertion(md)
 # print(html)
 with open("test.html", "w", encoding="utf-8") as f:
     f.write(html)