From 2052dce8e0a531df0b44ed531d49ab84cef53659 Mon Sep 17 00:00:00 2001
From: Marco Neves <marcotneves@gmail.com>
Date: Sun, 3 Dec 2023 22:56:25 -0500
Subject: [PATCH] final tweaks

---
 maestro/llms/llm.py | 96 ++++++++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 40 deletions(-)

diff --git a/maestro/llms/llm.py b/maestro/llms/llm.py
index 47a8d3d..ca1f33a 100644
--- a/maestro/llms/llm.py
+++ b/maestro/llms/llm.py
@@ -1,63 +1,69 @@
 import base64
+
 import cv2
 import numpy as np
 import requests
 
-# Constants
-META_PROMPT = "For any labels or markings on an image that you reference in your response, please enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be numbers or letters and typically correspond to specific segments or parts of the image."
-OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
+META_PROMPT = (
+    "For any labels or markings on an image that you reference in your response, please "
+    "enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for "
+    "example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be "
+    "numbers or letters and typically correspond to specific segments or parts of the image."
+)
+
+API_URL = "https://api.openai.com/v1/chat/completions"
+
 
-def encode_image_to_base64(image: np.ndarray, format: str = '.jpg') -> str:
+def encode_image_to_base64(image: np.ndarray) -> str:
     """
-    Encodes an image into a base64-encoded string.
+    Encodes an image into a base64-encoded string in JPEG format.
 
     Parameters:
-        image (np.ndarray): The image to be encoded.
-        format (str): The format to use for encoding ('.jpg' or '.png').
+        image (np.ndarray): The image to be encoded. This should be a numpy array as
+            typically used in OpenCV.
 
     Returns:
-        str: A base64-encoded string representing the image.
+        str: A base64-encoded string representing the image in JPEG format.
     """
-    success, buffer = cv2.imencode(format, image)
+    success, buffer = cv2.imencode('.jpg', image)
     if not success:
-        raise ValueError(f"Could not encode image to {format} format.")
-    return base64.b64encode(buffer).decode('utf-8')
+        raise ValueError("Could not encode image to JPEG format.")
 
-def compose_headers(api_key: str) -> dict:
-    """
-    Composes the headers needed for an API request.
+    encoded_image = base64.b64encode(buffer).decode('utf-8')
+    return encoded_image
 
-    Parameters:
-        api_key (str): The API key for authenticating requests.
 
-    Returns:
-        dict: A dictionary of headers.
-    """
+def compose_headers(api_key: str) -> dict:
     return {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {api_key}"
     }
 
-def compose_openai_payload(image_base64: str, prompt: str) -> dict:
-    """
-    Composes the payload for a request to the OpenAI API.
 
-    Parameters:
-        image_base64 (str): The base64-encoded string of the image.
-        prompt (str): The textual prompt to accompany the image.
-
-    Returns:
-        dict: A dictionary representing the payload for the API request.
-    """
+def compose_payload(image: np.ndarray, prompt: str) -> dict:
+    base64_image = encode_image_to_base64(image)
     return {
         "model": "gpt-4-vision-preview",
         "messages": [
-            {"role": "system", "content": [META_PROMPT]},
+            {
+                "role": "system",
+                "content": [
+                    META_PROMPT
+                ]
+            },
             {
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": prompt},
-                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
+                    {
+                        "type": "text",
+                        "text": prompt
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    }
                 ]
             }
         ],
@@ -84,20 +90,30 @@ def prompt_image_local(image: np.ndarray, prompt: str, server_url: str, custom_p
 
 def prompt_image(api_key: str, image: np.ndarray, prompt: str) -> str:
     """
-    Sends an image and a textual prompt to the OpenAI API.
+    Sends an image and a textual prompt to the OpenAI API and returns the API's textual
+    response.
+
+    This function integrates an image with a user-defined prompt to generate a response
+    using OpenAI's API.
 
     Parameters:
-        api_key (str): The API key.
+        api_key (str): The API key for authenticating requests to the OpenAI API.
         image (np.ndarray): The image to be sent to the API.
-        prompt (str): The textual prompt to accompany the image.
+            used in OpenCV.
+        prompt (str): The textual prompt to accompany the image in the API request.
 
     Returns:
-        str: The response from the OpenAI API.
+        str: The textual response from the OpenAI API based on the input image and
+            prompt.
+
+    Raises:
+        ValueError: If there is an error in encoding the image or if the API response
+            contains an error.
     """
-    image_base64 = encode_image_to_base64(image)
-    payload = compose_openai_payload(image_base64, prompt)
-    headers = compose_headers(api_key)
-    response = requests.post(OPENAI_API_URL, headers=headers, json=payload).json()
+    headers = compose_headers(api_key=api_key)
+    payload = compose_payload(image=image, prompt=prompt)
+    response = requests.post(url=API_URL, headers=headers, json=payload).json()
+
     if 'error' in response:
         raise ValueError(response['error']['message'])
     return response['choices'][0]['message']['content']