From 0458590a776616112191752f267cf5acd7791298 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Wed, 29 Jan 2025 21:30:54 +0800
Subject: [PATCH] support qwen2.5-max!

---
 request_llms/bridge_all.py  | 32 ++++++++++++++++++++++++++++----
 request_llms/com_qwenapi.py |  7 +------
 2 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 2d6d3f50f..612f62ff9 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -812,7 +812,8 @@ def decode(self, *args, **kwargs):
     except:
         logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
-if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
+qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus"]
+if any(item in qwen_models for item in AVAIL_LLM_MODELS):
     try:
         from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
         from .bridge_qwen import predict as qwen_ui
@@ -822,7 +823,7 @@ def decode(self, *args, **kwargs):
                 "fn_without_ui": qwen_noui,
                 "can_multi_thread": True,
                 "endpoint": None,
-                "max_token": 6144,
+                "max_token": 100000,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             },
@@ -831,7 +832,7 @@ def decode(self, *args, **kwargs):
                 "fn_without_ui": qwen_noui,
                 "can_multi_thread": True,
                 "endpoint": None,
-                "max_token": 30720,
+                "max_token": 129024,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             },
@@ -840,7 +841,25 @@ def decode(self, *args, **kwargs):
                 "fn_without_ui": qwen_noui,
                 "can_multi_thread": True,
                 "endpoint": None,
-                "max_token": 28672,
+                "max_token": 30720,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-max-latest": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "can_multi_thread": True,
+                "endpoint": None,
+                "max_token": 30720,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-max-2025-01-25": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "can_multi_thread": True,
+                "endpoint": None,
+                "max_token": 30720,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             }
@@ -1362,6 +1381,11 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot,
 
     inputs = apply_gpt_academic_string_mask(inputs, mode="show_llm")
 
+    if llm_kwargs['llm_model'] not in model_info:
+        from toolbox import update_ui
+        chatbot.append([inputs, f"很抱歉，模型 '{llm_kwargs['llm_model']}' 暂不支持<br/>(1) 检查config中的AVAIL_LLM_MODELS选项<br/>(2) 检查request_llms/bridge_all.py中的模型路由"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
     method = model_info[llm_kwargs['llm_model']]["fn_with_ui"]  # 如果这里报错，检查config中的AVAIL_LLM_MODELS选项
 
     if additional_fn: # 根据基础功能区 ModelOverride 参数调整模型类型
diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py
index 8e0377817..70872e16d 100644
--- a/request_llms/com_qwenapi.py
+++ b/request_llms/com_qwenapi.py
@@ -24,18 +24,13 @@ def validate_key():
     def generate(self, inputs, llm_kwargs, history, system_prompt):
         # import _thread as thread
         from dashscope import Generation
-        QWEN_MODEL = {
-            'qwen-turbo': Generation.Models.qwen_turbo,
-            'qwen-plus': Generation.Models.qwen_plus,
-            'qwen-max': Generation.Models.qwen_max,
-        }[llm_kwargs['llm_model']]
         top_p = llm_kwargs.get('top_p', 0.8)
         if top_p == 0: top_p += 1e-5
         if top_p == 1: top_p -= 1e-5
 
         self.result_buf = ""
         responses = Generation.call(
-            model=QWEN_MODEL,
+            model=llm_kwargs['llm_model'],
             messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
             top_p=top_p,
             temperature=llm_kwargs.get('temperature', 1.0),