From 7eeab9e3762374e65a5a435f80881f8d4c380354 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 9 Jan 2025 22:31:59 +0800 Subject: [PATCH] fix code block display bug --- requirements.txt | 2 +- shared_utils/advanced_markdown_format.py | 30 +++++++++++++++++++- tests/test_markdown.py | 35 ++++++++++++++++++++++-- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 39b523fb9..7708fef06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ scipdf_parser>=0.52 spacy==3.7.4 anthropic>=0.18.1 python-markdown-math -pymdown-extensions +pymdown-extensions>=10.14 websocket-client beautifulsoup4 prompt_toolkit diff --git a/shared_utils/advanced_markdown_format.py b/shared_utils/advanced_markdown_format.py index dd17c18b6..46981091a 100644 --- a/shared_utils/advanced_markdown_format.py +++ b/shared_utils/advanced_markdown_format.py @@ -385,6 +385,24 @@ def markdown_convertion(txt): ) +def code_block_title_replace_format(match): + lang = match.group(1) + filename = match.group(2) + return f"```{lang} {{title=\"{filename}\"}}\n" + + +def get_last_backticks_indent(text): + # 从后向前查找最后一个 ``` + lines = text.splitlines() + for line in reversed(lines): + if '```' in line: + # 计算前面的空格数量 + indent = len(line) - len(line.lstrip()) + return indent + return 0 # 如果没找到返回0 + + +@lru_cache(maxsize=16) # 使用lru缓存 def close_up_code_segment_during_stream(gpt_reply): """ 在gpt输出代码的中途(输出了前面的```,但还没输出完后面的```),补上后面的``` @@ -398,6 +416,12 @@ def close_up_code_segment_during_stream(gpt_reply): """ if "```" not in gpt_reply: return gpt_reply + + # replace [```python:warp.py] to [```python {title="warp.py"}] + pattern = re.compile(r"```([a-z]{1,12}):([^:\n]{1,35}\.([a-zA-Z^:\n]{1,3}))\n") + if pattern.search(gpt_reply): + gpt_reply = pattern.sub(code_block_title_replace_format, gpt_reply) + if gpt_reply.endswith("```"): return gpt_reply @@ -405,7 +429,11 @@ def close_up_code_segment_during_stream(gpt_reply): segments = gpt_reply.split("```") n_mark = len(segments) - 1 if n_mark % 2 == 1: - return gpt_reply + "\n```" # 输出代码片段中! + try: + num_padding = get_last_backticks_indent(gpt_reply) + except: + num_padding = 0 + return gpt_reply + "\n" + " "*num_padding + "```" # 输出代码片段中! else: return gpt_reply diff --git a/tests/test_markdown.py b/tests/test_markdown.py index 68bbe2e5b..b372154ce 100644 --- a/tests/test_markdown.py +++ b/tests/test_markdown.py @@ -20,7 +20,7 @@ md = """ Following code including wrapper -```mermaid +```python:wrapper.py graph TD A[Enter Chart Definition] --> B(Preview) B --> C{decide} @@ -41,6 +41,33 @@ +""" + +md =""" + +在这种场景中,您希望机器 B 能够通过轮询机制来间接地“请求”机器 A,而实际上机器 A 只能主动向机器 B 发出请求。这是一种典型的客户端-服务器轮询模式。下面是如何实现这种机制的详细步骤: + +### 机器 B 的实现 + +1. **安装 FastAPI 和必要的依赖库**: + ```bash + pip install fastapi uvicorn + ``` + +2. **创建 FastAPI 服务**: + ```python + from fastapi import FastAPI + from fastapi.responses import JSONResponse + from uuid import uuid4 + from threading import Lock + import time + + app = FastAPI() + + # 字典用于存储请求和状态 + requests = {} + process_lock = Lock() + """ def validate_path(): import os, sys @@ -53,10 +80,12 @@ def validate_path(): validate_path() # validate path so you can run from base directory from toolbox import markdown_convertion -from shared_utils.advanced_markdown_format import markdown_convertion_for_file +# from shared_utils.advanced_markdown_format import markdown_convertion_for_file +from shared_utils.advanced_markdown_format import close_up_code_segment_during_stream # with open("gpt_log/default_user/shared/2024-04-22-01-27-43.zip.extract/translated_markdown.md", "r", encoding="utf-8") as f: # md = f.read() -html = markdown_convertion_for_file(md) +md = close_up_code_segment_during_stream(md) +html = markdown_convertion(md) # print(html) with open("test.html", "w", encoding="utf-8") as f: f.write(html)