Skip to content

Commit

Permalink
V4 (xd2333#70)
Browse files Browse the repository at this point in the history
* 启动器优化

* 启动器优化

* fix bug in CTranslateProblem.比日文长

* 代理检测重试3次

* 禁止修改pre_jp

* 细节修改

* add yapsy

* Create Baseplugin.py

* 微调cache读取逻辑

* rename baseplugin

* fix find_ifword_text error

* add update_json_with_transList

* Update PluginManager.py

* GPT35/GPT4:add rewriteModelName

* 统一gpt_dic参数

* Update V3.py

* 精简配置文件

* fix bug, clean code

* 重构翻译前端

* 插件系统-初版

* 细节完善

* 修改输入输出目录,支持遍历子文件夹

* 细节调整

* 去掉chatgpt模拟网页

* 增加依赖文件、细节调整

* 更新插件框架

* 更新插件框架2

* 细节完善

* colorlog

* Update Runner.py

* sleep 5min to 1min in try again later

* 精简配置文件

* GPT字典:全称与简称都有gpt字典时,句子里只有全称时简称不会再被代入请求

* Update file_subtitle_srt.py

* fix bug

* GPT35/GPT4:修复使用多家中转时偶尔令牌无效的问题

* openai key测试添加提示

* 跳过空文件

* 打印插件列表

* 整理插件

* 调整插件

* Sakura:限制线程数

* fix bug

* bug fix

* 缓存逻辑调整

* 缓存调整2

* support gpt-4-0125

* 细节调整

* bug fix+细节调整

* 增加插件text_common_skipNoJP

* 几处细节调整

* fix循环引用

* 细节调整

* 保存日志到文件

* 普通字典:处理转义字符

* 细节优化

* Update Utils.py

---------

Co-authored-by: cx2333 <99069487+cx2333-gal@users.noreply.github.com>
  • Loading branch information
xd2333 and xd2333 authored Feb 1, 2024
1 parent a3fbf2e commit f70cd41
Show file tree
Hide file tree
Showing 57 changed files with 4,449 additions and 2,651 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__/
.vscode
useful_tools/GalTransl_DumpInjector/config.ini
useful_tools/GalTransl_DumpInjector/VNTextPatch/names.xml
*.log
4 changes: 2 additions & 2 deletions Dict/00通用字典_符号_译后.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
? ?
! !
~ ~
· ・
- —
& &
・ ·
ー —

//一些对于符号的fix
post_jp 「 [and] !" 1^" 「
Expand Down
12 changes: 8 additions & 4 deletions GalTransl/Backend/BingGPT4Translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from GalTransl.Cache import get_transCache_from_json, save_transCache_to_json
from GalTransl.CSentense import CTransList, CSentense
from GalTransl.Dictionary import CGptDict
from GalTransl.Utils import extract_code_blocks
from GalTransl.Backend.Prompts import (
NewBing_CONF_PROMPT,
NewBing_FORCE_PROMPT,
Expand Down Expand Up @@ -207,6 +208,10 @@ async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
LOGGER.info(result_text)
else:
print("")
if "```json" in result_text:
lang_list, code_list = extract_code_blocks(result_text)
if len(lang_list) > 0 and len(code_list) > 0:
result_text = code_list[0]
result_text = result_text[result_text.find('{"id') :]
# 修复丢冒号
result_text = (
Expand All @@ -233,7 +238,6 @@ async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
)
break
else:
LOGGER.warning("NB输出格式异常")
continue
error_flag = False
# 本行输出不正常
Expand Down Expand Up @@ -346,8 +350,8 @@ async def batch_translate(
cache_file_path,
trans_list: CTransList,
num_pre_request: int,
chatgpt_dict: CGptDict = None,
retry_failed: bool = False,
gpt_dic: CGptDict = None,
proofread: bool = False,
retran_key: str = "",
) -> CTransList:
Expand Down Expand Up @@ -386,8 +390,8 @@ async def batch_translate(
trans_list_split = trans_list_unhit[i : i + num_pre_request]

# 生成dic prompt
if chatgpt_dict:
dic_prompt = chatgpt_dict.gen_prompt(trans_list_split)
if gpt_dic:
dic_prompt = gpt_dic.gen_prompt(trans_list_split)
else:
dic_prompt = ""

Expand Down
59 changes: 26 additions & 33 deletions GalTransl/Backend/GPT3Translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from GalTransl.Dictionary import CGptDict
from GalTransl.Cache import get_transCache_from_json, save_transCache_to_json
from GalTransl.Backend.revChatGPT.typings import APIConnectionError
from GalTransl.StringUtils import extract_code_blocks
from GalTransl.Utils import extract_code_blocks
from httpx import ProtocolError
from GalTransl import LOGGER, LANG_SUPPORTED
from GalTransl.Backend.Prompts import (
Expand All @@ -40,20 +40,21 @@ def __init__(
self.eng_type = eng_type
self.last_file_name = ""
self.retry_count = 0
# 源语言
if val := config.getKey("sourceLanguage"):
# 语言设置
if val := config.getKey("language"):
sp = val.split("2")
self.source_lang = sp[0]
self.target_lang = sp[1]
elif val := config.getKey("sourceLanguage"): # 兼容旧版本配置
self.source_lang = val
self.target_lang = config.getKey("targetLanguage")
else:
self.source_lang = "ja"
self.target_lang = "zh-cn"
if self.source_lang not in LANG_SUPPORTED.keys():
raise ValueError("错误的源语言代码:" + self.source_lang)
else:
self.source_lang = LANG_SUPPORTED[self.source_lang]
# 目标语言
if val := config.getKey("targetLanguage"):
self.target_lang = val
else:
self.target_lang = "zh-cn"
if self.target_lang not in LANG_SUPPORTED.keys():
raise ValueError("错误的目标语言代码:" + self.target_lang)
else:
Expand Down Expand Up @@ -98,7 +99,7 @@ def __init__(
if val := config.getKey("gpt.translStyle"):
self.transl_style = val
else:
self.transl_style = "normal"
self.transl_style = "auto"
self._current_style = ""

if self.target_lang == "Simplified Chinese":
Expand All @@ -116,14 +117,17 @@ def init(self) -> bool:
pass

def init_chatbot(self, eng_type, config):
eng_name = config.getBackendConfigSection("GPT35").get("rewriteModelName", "")

if eng_type == "gpt35-0613":
from GalTransl.Backend.revChatGPT.V3 import Chatbot as ChatbotV3

self.token = self.tokenProvider.getToken(True, False)
eng_name = "gpt-3.5-turbo-0613" if eng_name == "" else eng_name
# it's a workarounds, and we'll replace this soloution with a custom OpenAI API wrapper?
self.chatbot = ChatbotV3(
api_key=self.token.token,
engine="gpt-3.5-turbo-0613",
engine=eng_name,
system_prompt=GPT35_0613_SYSTEM_PROMPT,
api_address=self.token.domain + "/v1/chat/completions",
timeout=30,
Expand All @@ -137,10 +141,11 @@ def init_chatbot(self, eng_type, config):
from GalTransl.Backend.revChatGPT.V3 import Chatbot as ChatbotV3

self.token = self.tokenProvider.getToken(True, False)
eng_name = "gpt-3.5-turbo-1106" if eng_name == "" else eng_name
# it's a workarounds, and we'll replace this soloution with a custom OpenAI API wrapper?
self.chatbot = ChatbotV3(
api_key=self.token.token,
engine="gpt-3.5-turbo-1106",
engine=eng_name,
system_prompt=GPT35_1106_SYSTEM_PROMPT,
api_address=self.token.domain + "/v1/chat/completions",
timeout=30,
Expand All @@ -150,21 +155,6 @@ def init_chatbot(self, eng_type, config):
self.chatbot.update_proxy(
self.proxyProvider.getProxy().addr if self.proxyProvider else None # type: ignore
)
elif eng_type == "unoffapi":
from GalTransl.Backend.revChatGPT.V1 import AsyncChatbot as ChatbotV1

gpt_config = {
"access_token": choice(
config.getBackendConfigSection("ChatGPT")["access_tokens"]
)["access_token"],
"proxy": self.proxyProvider.getProxy().addr
if self.proxyProvider
else "",
}
if gpt_config["proxy"] == "":
del gpt_config["proxy"]
self.chatbot = ChatbotV1(config=gpt_config)
self.chatbot.clear_conversations()

if self.transl_style == "auto":
self._set_gpt_style("precise")
Expand Down Expand Up @@ -201,6 +191,9 @@ async def asyncTranslate(self, content: CTransList, gptdict="") -> CTransList:
if self.eng_type != "unoffapi":
self.token = self.tokenProvider.getToken(True, False)
self.chatbot.set_api_key(self.token.token)
self.chatbot.set_api_addr(
f"{self.token.domain}/v1/chat/completions"
)
LOGGER.info(f"-> 翻译输入:\n{gptdict}\n{input_json}\n")
if self.streamOutputMode:
LOGGER.info("-> 输出:\n")
Expand Down Expand Up @@ -231,8 +224,8 @@ async def asyncTranslate(self, content: CTransList, gptdict="") -> CTransList:
self.token = self.tokenProvider.getToken(True, False)
self.chatbot.set_api_key(self.token.token)
elif "try again later" in str_ex or "too many requests" in str_ex:
LOGGER.warning("-> 请求受限,5分钟后继续尝试")
await asyncio.sleep(300)
LOGGER.warning("-> 请求受限,1分钟后继续尝试")
await asyncio.sleep(60)
continue
elif "expired" in str_ex:
LOGGER.error("-> access_token过期,请更换")
Expand Down Expand Up @@ -262,11 +255,11 @@ async def asyncTranslate(self, content: CTransList, gptdict="") -> CTransList:
result_json = json.loads(result_text) # 尝试解析json
if len(result_json) != len(input_list): # 输出行数错误
LOGGER.error("-> 错误的输出行数:\n" + result_text + "\n")
error_message="输出行数错误"
error_message = "输出行数错误"
error_flag = True
except:
LOGGER.error("-> 非json:\n" + result_text + "\n")
error_message="输出非json"
error_message = "输出非json"
error_flag = True

if not error_flag:
Expand Down Expand Up @@ -466,7 +459,7 @@ async def batch_translate(
trans_list: CTransList,
num_pre_req: int,
retry_failed: bool = False,
gptdict: CGptDict = None,
gpt_dic: CGptDict = None,
proofread: bool = False,
retran_key: str = "",
) -> CTransList:
Expand Down Expand Up @@ -501,8 +494,8 @@ async def batch_translate(
trans_list_split = trans_list_unhit[i : i + num_pre_req]

dic_prompt = ""
if gptdict != None:
dic_prompt = gptdict.gen_prompt(trans_list_split)
if gpt_dic != None:
dic_prompt = gpt_dic.gen_prompt(trans_list_split)
num, trans_result = await self.asyncTranslate(trans_list_split, dic_prompt)
trans_result_list += trans_result
i += num if num > 0 else 0
Expand Down
67 changes: 30 additions & 37 deletions GalTransl/Backend/GPT4Translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from GalTransl.CSentense import CSentense, CTransList
from GalTransl.Cache import get_transCache_from_json, save_transCache_to_json
from GalTransl.Dictionary import CGptDict
from GalTransl.StringUtils import extract_code_blocks
from GalTransl.Utils import extract_code_blocks
from GalTransl.Backend.Prompts import (
GPT4_CONF_PROMPT,
GPT4_TRANS_PROMPT,
Expand Down Expand Up @@ -58,20 +58,21 @@ def __init__(
self.record_confidence = val
else:
self.record_confidence = False
# 源语言
if val := config.getKey("sourceLanguage"):
# 语言设置
if val := config.getKey("language"):
sp = val.split("2")
self.source_lang = sp[0]
self.target_lang = sp[1]
elif val := config.getKey("sourceLanguage"): # 兼容旧版本配置
self.source_lang = val
self.target_lang = config.getKey("targetLanguage")
else:
self.source_lang = "ja"
self.target_lang = "zh-cn"
if self.source_lang not in LANG_SUPPORTED.keys():
raise ValueError("错误的源语言代码:" + self.source_lang)
else:
self.source_lang = LANG_SUPPORTED[self.source_lang]
# 目标语言
if val := config.getKey("targetLanguage"):
self.target_lang = val
else:
self.target_lang = "zh-cn"
if self.target_lang not in LANG_SUPPORTED.keys():
raise ValueError("错误的目标语言代码:" + self.target_lang)
else:
Expand Down Expand Up @@ -105,7 +106,7 @@ def __init__(
if val := config.getKey("gpt.translStyle"):
self.transl_style = val
else:
self.transl_style = "normal"
self.transl_style = "auto"
self._current_style = ""

self.init_chatbot(eng_type=eng_type, config=config) # 模型选择
Expand All @@ -123,16 +124,18 @@ def __init__(
pass

def init_chatbot(self, eng_type, config):
eng_name = config.getBackendConfigSection("GPT4").get("rewriteModelName", "")
if eng_type == "gpt4":
from GalTransl.Backend.revChatGPT.V3 import Chatbot as ChatbotV3

self.token = self.tokenProvider.getToken(False, True)
eng_name = "gpt-4" if eng_name == "" else eng_name
self.chatbot = ChatbotV3(
api_key=self.token.token,
temperature=0.4,
frequency_penalty=0.2,
system_prompt=GPT4_SYSTEM_PROMPT,
engine="gpt-4",
engine=eng_name,
api_address=self.token.domain + "/v1/chat/completions",
timeout=30,
)
Expand All @@ -145,14 +148,14 @@ def init_chatbot(self, eng_type, config):
from GalTransl.Backend.revChatGPT.V3 import Chatbot as ChatbotV3

self.token = self.tokenProvider.getToken(False, True)

eng_name = "gpt-4-0125-preview" if eng_name == "" else eng_name
system_prompt = GPT4Turbo_SYSTEM_PROMPT
self.chatbot = ChatbotV3(
api_key=self.token.token,
temperature=0.4,
frequency_penalty=0.2,
system_prompt=system_prompt,
engine="gpt-4-1106-preview",
engine=eng_name,
api_address=self.token.domain + "/v1/chat/completions",
timeout=30,
# response_format="json",
Expand All @@ -162,22 +165,6 @@ def init_chatbot(self, eng_type, config):
self.chatbot.update_proxy(
self.proxyProvider.getProxy().addr if self.proxyProvider else None
)
elif eng_type == "unoffapi":
from GalTransl.Backend.revChatGPT.V1 import Chatbot as ChatbotV1

gpt_config = {
"model": "gpt-4",
"paid": True,
"access_token": choice(
config.getBackendConfigSection("ChatGPT")["access_tokens"]
)["access_token"],
"proxy": self.proxyProvider.getProxy().addr if self.proxies else None,
}
if gpt_config["proxy"] == "":
del gpt_config["proxy"]
self.chatbot = ChatbotV1(config=gpt_config)
self.chatbot.trans_prompt = GPT4_TRANS_PROMPT
self.chatbot.clear_conversations()

async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
input_list = []
Expand Down Expand Up @@ -232,6 +219,9 @@ async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
if self.eng_type != "unoffapi":
self.token = self.tokenProvider.getToken(False, True)
self.chatbot.set_api_key(self.token.token)
self.chatbot.set_api_addr(
f"{self.token.domain}/v1/chat/completions"
)
# LOGGER.info("->输入:\n" + prompt_req + "\n")
LOGGER.info(
f"->{'翻译输入' if not proofread else '校对输入'}{gptdict}\n{input_json}\n"
Expand Down Expand Up @@ -259,6 +249,8 @@ async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
print("")
except asyncio.CancelledError:
raise
except RuntimeError:
raise
except Exception as ex:
str_ex = str(ex).lower()
LOGGER.error(f"-> {str_ex}")
Expand All @@ -268,8 +260,8 @@ async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
self.token = self.tokenProvider.getToken(False, True)
self.chatbot.set_api_key(self.token.token)
elif "try again later" in str_ex or "too many requests" in str_ex:
LOGGER.warning("-> 请求受限,5分钟后继续尝试")
await asyncio.sleep(300)
LOGGER.warning("-> 请求受限,1分钟后继续尝试")
await asyncio.sleep(60)
continue
elif "expired" in str_ex:
LOGGER.error("-> access_token过期,请更换")
Expand All @@ -284,7 +276,12 @@ async def translate(self, trans_list: CTransList, gptdict="", proofread=False):
await asyncio.sleep(5)
continue

result_text = resp[resp.find('{"id') :]
result_text = resp
if "```json" in result_text:
lang_list, code_list = extract_code_blocks(result_text)
if len(lang_list) > 0 and len(code_list) > 0:
result_text = code_list[0]
result_text = result_text[result_text.find('{"id') :]

result_text = (
result_text.replace(", doub:", ', "doub":')
Expand Down Expand Up @@ -396,7 +393,7 @@ async def batch_translate(
trans_list: CTransList,
num_pre_request: int,
retry_failed: bool = False,
chatgpt_dict: CGptDict = None,
gpt_dic: CGptDict = None,
proofread: bool = False,
retran_key: str = "",
) -> CTransList:
Expand Down Expand Up @@ -438,11 +435,7 @@ async def batch_translate(
else trans_list_unhit[i:]
)

dic_prompt = (
chatgpt_dict.gen_prompt(trans_list_split)
if chatgpt_dict != None
else ""
)
dic_prompt = gpt_dic.gen_prompt(trans_list_split) if gpt_dic != None else ""

num, trans_result = await self.translate(
trans_list_split, dic_prompt, proofread=proofread
Expand Down
Loading

0 comments on commit f70cd41

Please sign in to comment.