From 584ae89c4559a36f7666e82a6ec524123e159ac5 Mon Sep 17 00:00:00 2001
From: Xiaodong Wang <v-xiaodwang@microsoft.com>
Date: Sun, 9 Apr 2023 23:23:13 +0800
Subject: [PATCH] support Chinese & refine

---
 visual_chatgpt.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/visual_chatgpt.py b/visual_chatgpt.py
index cf794853..be7a8a4d 100644
--- a/visual_chatgpt.py
+++ b/visual_chatgpt.py
@@ -1075,7 +1075,7 @@ def run_text(self, text, state):
               f"Current Memory: {self.agent.memory.buffer}")
         return state, state
 
-    def run_image(self, image, state, txt):
+    def run_image(self, image, state, txt, lang):
         image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png")
         print("======>Auto Resize Image...")
         img = Image.open(image.name)
@@ -1088,7 +1088,15 @@ def run_image(self, image, state, txt):
         img = img.convert('RGB')
         img.save(image_filename, "PNG")
         print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
-        state = state + [(f"![](/file={image_filename})*{image_filename}*", '')]
+        description = self.models['ImageCaptioning'].inference(image_filename)
+        if lang == 'Chinese':
+            Human_prompt = f'\nHuman: 提供一张名为 {image_filename}的图片。它的描述是: {description}。 这些信息帮助你理解这个图像，但是你应该使用工具来完成下面的任务，而不是直接从我的描述中想象。 如果你明白了, 说 \"收到\". \n'
+            AI_prompt = "收到。  "
+        else:
+            Human_prompt = f'\nHuman: provide a figure named {image_filename}. The description is: {description}. This information helps you to understand this image, but you should use tools to finish following tasks, rather than directly imagine from my description. If you understand, say \"Received\". \n'
+            AI_prompt = "Received.  "
+        self.agent.memory.buffer = self.agent.memory.buffer + Human_prompt + 'AI: ' + AI_prompt
+        state = state + [(f"![](/file={image_filename})*{image_filename}*", AI_prompt)]
         print(f"\nProcessed run_image, Input image: {image_filename}\nCurrent state: {state}\n"
               f"Current Memory: {self.agent.memory.buffer}")
         return state, state, f'{txt} {image_filename} '
@@ -1116,7 +1124,7 @@ def run_image(self, image, state, txt):
         lang.change(bot.init_agent, [lang], [input_raws, lang, txt, clear])
         txt.submit(bot.run_text, [txt, state], [chatbot, state])
         txt.submit(lambda: "", None, txt)
-        btn.upload(bot.run_image, [btn, state, txt], [chatbot, state, txt])
+        btn.upload(bot.run_image, [btn, state, txt, lang], [chatbot, state, txt])
         clear.click(bot.memory.clear)
         clear.click(lambda: [], None, chatbot)
         clear.click(lambda: [], None, state)