support Chinese & refine

changzy00 · Apr 9, 2023 · 584ae89 · 584ae89
1 parent 43c440a
commit 584ae89
Showing 1 changed file with 11 additions and 3 deletions.
diff --git a/visual_chatgpt.py b/visual_chatgpt.py
@@ -1075,7 +1075,7 @@ def run_text(self, text, state):
               f"Current Memory: {self.agent.memory.buffer}")
         return state, state
 
-    def run_image(self, image, state, txt):
+    def run_image(self, image, state, txt, lang):
         image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png")
         print("======>Auto Resize Image...")
         img = Image.open(image.name)
@@ -1088,7 +1088,15 @@ def run_image(self, image, state, txt):
         img = img.convert('RGB')
         img.save(image_filename, "PNG")
         print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
-        state = state + [(f"![](/file={image_filename})*{image_filename}*", '')]
+        description = self.models['ImageCaptioning'].inference(image_filename)
+        if lang == 'Chinese':
+            Human_prompt = f'\nHuman: 提供一张名为 {image_filename}的图片。它的描述是: {description}。 这些信息帮助你理解这个图像，但是你应该使用工具来完成下面的任务，而不是直接从我的描述中想象。 如果你明白了, 说 \"收到\". \n'
+            AI_prompt = "收到。  "
+        else:
+            Human_prompt = f'\nHuman: provide a figure named {image_filename}. The description is: {description}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
+            AI_prompt = "Received.  "
+        self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
+        state = state + [(f"![](/file={image_filename})*{image_filename}*", AI_prompt)]
         print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
               f"Current Memory: {self.agent.memory.buffer}")
         return state, state, f'{txt} {image_filename} '
@@ -1116,7 +1124,7 @@ def run_image(self, image, state, txt):
         lang.change(bot.init_agent, [lang], [input_raws, lang, txt, clear])
         txt.submit(bot.run_text, [txt, state], [chatbot, state])
         txt.submit(lambda: "", None, txt)
-        btn.upload(bot.run_image, [btn, state, txt], [chatbot, state, txt])
+        btn.upload(bot.run_image, [btn, state, txt, lang], [chatbot, state, txt])
         clear.click(bot.memory.clear)
         clear.click(lambda: [], None, chatbot)
         clear.click(lambda: [], None, state)