From 9138726e7077f84ceee68b9dcd2b4008f5e04429 Mon Sep 17 00:00:00 2001 From: baishihao Date: Thu, 23 Jan 2025 14:07:28 +0800 Subject: [PATCH] fix openai stream & update chat template --- lightllm/server/api_models.py | 3 +++ lightllm/server/api_server.py | 3 ++- lightllm/server/build_prompt.py | 7 ++++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lightllm/server/api_models.py b/lightllm/server/api_models.py index f28c01ba9..0fefc11e9 100644 --- a/lightllm/server/api_models.py +++ b/lightllm/server/api_models.py @@ -24,7 +24,10 @@ class ChatCompletionRequest(BaseModel): # Additional parameters supported by LightLLM do_sample: Optional[bool] = False top_k: Optional[int] = -1 + repetition_penalty: Optional[float] = 1.0 ignore_eos: Optional[bool] = False + role_settings: Optional[Dict[str, str]] = None + character_settings: Optional[List[Dict[str, str]]] = None class UsageInfo(BaseModel): diff --git a/lightllm/server/api_server.py b/lightllm/server/api_server.py index e49a72d4e..e286c1e8b 100755 --- a/lightllm/server/api_server.py +++ b/lightllm/server/api_server.py @@ -206,6 +206,7 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request) do_sample=request.do_sample, presence_penalty=request.presence_penalty, frequency_penalty=request.frequency_penalty, + repetition_penalty=request.repetition_penalty, temperature=request.temperature, top_p=request.top_p, top_k=request.top_k, @@ -283,7 +284,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]: model=request.model, choices=[stream_choice], ) - yield ("data: " + stream_resp.json(ensure_ascii=False) + "\n\n").encode("utf-8") + yield ("data: " + json.dumps(stream_resp.dict(), ensure_ascii=False) + "\n\n").encode("utf-8") background_tasks = BackgroundTasks() return StreamingResponse(stream_results(), media_type="text/event-stream", background=background_tasks) diff --git a/lightllm/server/build_prompt.py b/lightllm/server/build_prompt.py index e3f057f00..4f8d1f9f4 100644 --- a/lightllm/server/build_prompt.py +++ b/lightllm/server/build_prompt.py @@ -11,5 +11,10 @@ def init_tokenizer(args): async def build_prompt(request) -> str: global tokenizer messages = request.messages - input_str = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + kwargs = {"conversation": messages} + if request.character_settings: + kwargs["character_settings"] = request.character_settings + if request.role_settings: + kwargs["role_setting"] = request.role_settings + input_str = tokenizer.apply_chat_template(**kwargs, tokenize=False, add_generation_prompt=True) return input_str