From 0458590a776616112191752f267cf5acd7791298 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Wed, 29 Jan 2025 21:30:54 +0800 Subject: [PATCH] support qwen2.5-max! --- request_llms/bridge_all.py | 32 ++++++++++++++++++++++++++++---- request_llms/com_qwenapi.py | 7 +------ 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 2d6d3f50f..612f62ff9 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -812,7 +812,8 @@ def decode(self, *args, **kwargs): except: logger.error(trimmed_format_exc()) # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=- -if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai +qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus"] +if any(item in qwen_models for item in AVAIL_LLM_MODELS): try: from .bridge_qwen import predict_no_ui_long_connection as qwen_noui from .bridge_qwen import predict as qwen_ui @@ -822,7 +823,7 @@ def decode(self, *args, **kwargs): "fn_without_ui": qwen_noui, "can_multi_thread": True, "endpoint": None, - "max_token": 6144, + "max_token": 100000, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, @@ -831,7 +832,7 @@ def decode(self, *args, **kwargs): "fn_without_ui": qwen_noui, "can_multi_thread": True, "endpoint": None, - "max_token": 30720, + "max_token": 129024, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, @@ -840,7 +841,25 @@ def decode(self, *args, **kwargs): "fn_without_ui": qwen_noui, "can_multi_thread": True, "endpoint": None, - "max_token": 28672, + "max_token": 30720, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-max-latest": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "can_multi_thread": True, + "endpoint": None, + "max_token": 30720, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-max-2025-01-25": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "can_multi_thread": True, + "endpoint": None, + "max_token": 30720, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, } @@ -1362,6 +1381,11 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot, inputs = apply_gpt_academic_string_mask(inputs, mode="show_llm") + if llm_kwargs['llm_model'] not in model_info: + from toolbox import update_ui + chatbot.append([inputs, f"很抱歉,模型 '{llm_kwargs['llm_model']}' 暂不支持
(1) 检查config中的AVAIL_LLM_MODELS选项
(2) 检查request_llms/bridge_all.py中的模型路由"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + method = model_info[llm_kwargs['llm_model']]["fn_with_ui"] # 如果这里报错,检查config中的AVAIL_LLM_MODELS选项 if additional_fn: # 根据基础功能区 ModelOverride 参数调整模型类型 diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py index 8e0377817..70872e16d 100644 --- a/request_llms/com_qwenapi.py +++ b/request_llms/com_qwenapi.py @@ -24,18 +24,13 @@ def validate_key(): def generate(self, inputs, llm_kwargs, history, system_prompt): # import _thread as thread from dashscope import Generation - QWEN_MODEL = { - 'qwen-turbo': Generation.Models.qwen_turbo, - 'qwen-plus': Generation.Models.qwen_plus, - 'qwen-max': Generation.Models.qwen_max, - }[llm_kwargs['llm_model']] top_p = llm_kwargs.get('top_p', 0.8) if top_p == 0: top_p += 1e-5 if top_p == 1: top_p -= 1e-5 self.result_buf = "" responses = Generation.call( - model=QWEN_MODEL, + model=llm_kwargs['llm_model'], messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt), top_p=top_p, temperature=llm_kwargs.get('temperature', 1.0),