From 584ae89c4559a36f7666e82a6ec524123e159ac5 Mon Sep 17 00:00:00 2001 From: Xiaodong Wang Date: Sun, 9 Apr 2023 23:23:13 +0800 Subject: [PATCH] support Chinese & refine --- visual_chatgpt.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/visual_chatgpt.py b/visual_chatgpt.py index cf794853..be7a8a4d 100644 --- a/visual_chatgpt.py +++ b/visual_chatgpt.py @@ -1075,7 +1075,7 @@ def run_text(self, text, state): f"Current Memory: {self.agent.memory.buffer}") return state, state - def run_image(self, image, state, txt): + def run_image(self, image, state, txt, lang): image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png") print("======>Auto Resize Image...") img = Image.open(image.name) @@ -1088,7 +1088,15 @@ def run_image(self, image, state, txt): img = img.convert('RGB') img.save(image_filename, "PNG") print(f"Resize image form {width}x{height} to {width_new}x{height_new}") - state = state + [(f"![](/file={image_filename})*{image_filename}*", '')] + description = self.models['ImageCaptioning'].inference(image_filename) + if lang == 'Chinese': + Human_prompt = f'\nHuman: 提供一张名为 {image_filename}的图片。它的描述是: {description}。 这些信息帮助你理解这个图像,但是你应该使用工具来完成下面的任务,而不是直接从我的描述中想象。 如果你明白了, 说 \"收到\". \n' + AI_prompt = "收到。 " + else: + Human_prompt = f'\nHuman: provide a figure named {image_filename}. The description is: {description}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n' + AI_prompt = "Received. " + self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt + state = state + [(f"![](/file={image_filename})*{image_filename}*", AI_prompt)] print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n" f"Current Memory: {self.agent.memory.buffer}") return state, state, f'{txt} {image_filename} ' @@ -1116,7 +1124,7 @@ def run_image(self, image, state, txt): lang.change(bot.init_agent, [lang], [input_raws, lang, txt, clear]) txt.submit(bot.run_text, [txt, state], [chatbot, state]) txt.submit(lambda: "", None, txt) - btn.upload(bot.run_image, [btn, state, txt], [chatbot, state, txt]) + btn.upload(bot.run_image, [btn, state, txt, lang], [chatbot, state, txt]) clear.click(bot.memory.clear) clear.click(lambda: [], None, chatbot) clear.click(lambda: [], None, state)