From 7871efce5114a19401fc97aaf7ce3a2941bf3296 Mon Sep 17 00:00:00 2001 From: HUANYU XU <147359634+Huanshere@users.noreply.github.com> Date: Thu, 12 Sep 2024 18:26:37 +0800 Subject: [PATCH] fix bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gpt有时会在翻译结果中加入\n导致对齐错误,加入了replace --- core/translate_once.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/core/translate_once.py b/core/translate_once.py index 82781d45..f2a06cab 100644 --- a/core/translate_once.py +++ b/core/translate_once.py @@ -21,7 +21,10 @@ def retry_translation(prompt, model, step_name): ## Step 1: Faithful to the Original Text prompt1 = get_prompt_faithfulness(lines, shared_prompt) faith_result = retry_translation(prompt1, step4_2_translate_direct_model, 'faithfulness') - + # ! Replace '\n' with ' ' in faith_result[i]["Direct Translation"], sometimes gpt will add '\n' + for i in faith_result: + faith_result[i]["Direct Translation"] = faith_result[i]["Direct Translation"].replace('\n', ' ') + for i in faith_result: print(f'📄 Original Subtitle: {faith_result[i]["Original Subtitle"]}') print(f'📚 Direct Translation: {faith_result[i]["Direct Translation"]}') @@ -34,11 +37,14 @@ def retry_translation(prompt, model, step_name): print(f'📄 Original Subtitle: {express_result[i]["Original Subtitle"]}') print(f'🧠 Free Translation: {express_result[i]["Free Translation"]}') - translate_result = "\n".join([express_result[i]["Free Translation"].strip() for i in express_result]) + # ! Replace '\n' with ' ', sometimes gpt will add '\n' in the result and will cause the length of the original text and the translated text to be different + translate_result = "\n".join([express_result[i]["Free Translation"].replace('\n', ' ').strip() for i in express_result]) if len(lines.split('\n')) != len(translate_result.split('\n')): - print(f'❌ Translation of block {index} failed') - print(f'✅ Translation of block {index} completed') + print(f'❌ Translation of block {index} failed, Length Mismatch, Please check `output\gpt_log\translate_expressiveness.json`, expected {len(lines.split("\n"))} lines, but got {len(translate_result.split("\n"))} lines.') + raise ValueError(f'Original ···{lines}···,\nbut got ···{translate_result}···') + else: + print(f'✅ Translation of block {index} completed') return translate_result, lines