Update docs

OFAI · Jan 23, 2025 · 8f75e04 · 8f75e04
1 parent f6a38ec
commit 8f75e04
Show file tree

Hide file tree

Showing 6 changed files with 296 additions and 67 deletions.
diff --git a/docs/llms_wrapper/config.html b/docs/llms_wrapper/config.html
@@ -129,7 +129,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
             if not &#39;llm&#39; in llm:
                 raise ValueError(f&#34;Error: Missing &#39;llm&#39; field in llm config&#34;)
             llm = llm[&#34;llm&#34;]
-        if not re.match(r&#34;^[a-zA-Z0-9]+/\S+$&#34;, llm):
+        if not re.match(r&#34;^[a-zA-Z0-9]+/.+$&#34;, llm):
             raise ValueError(f&#34;Error: &#39;llm&#39; field must be in the format &#39;provider/model&#39; in line: {llm}&#34;)
     for provider, provider_config in config[&#39;providers&#39;].items():
         # provider name must be one of the supported providers by litellm
@@ -228,56 +228,43 @@ <h2 id="returns">Returns</h2>
     &#34;&#34;&#34;
     for i, llm in enumerate(config[&#34;llms&#34;]):
         if isinstance(llm, str):
-            provider, model = llm.split(&#34;:&#34;)
+            provider, model = llm.split(&#34;/&#34;)
             if provider in config.get(&#34;providers&#34;, {}):
-                provider_config = config[&#34;providers&#34;][provider]
-                llm = {
-                    &#34;llm&#34;: llm,
-                    &#34;api_key&#34;: provider_config.get(&#34;api_key&#34;, os.getenv(f&#34;{provider.upper()}_API_KEY&#34;))
-                }
-                # copy over user, password, user_env, and password_env if they exist
-                if &#34;user&#34; in provider_config:
-                    llm[&#34;user&#34;] = provider_config[&#34;user&#34;]
-                if &#34;password&#34; in provider_config:
-                    llm[&#34;password&#34;] = provider_config[&#34;password&#34;]
-                if &#34;user_env&#34; in provider_config:
-                    llm[&#34;user&#34;] = os.getenv(provider_config[&#34;user_env&#34;])
-                if &#34;password_env&#34; in provider_config:
-                    llm[&#34;password&#34;] = os.getenv(provider_config[&#34;password_env&#34;])
+                llm = {}
+                llm.update(config[&#34;providers&#34;][provider])
             else:
                 llm = {
                     &#34;llm&#34;: llm,
-                    &#34;api_key&#34;: os.getenv(f&#34;{provider.upper()}_API_KEY&#34;)
                 }
         else:
             provider, model = llm[&#34;llm&#34;].split(&#34;/&#34;, 1)
-            if &#34;api_key&#34; not in llm:
-                if &#34;api_key_env&#34; in llm:
-                    llm[&#34;api_key&#34;] = os.getenv(llm[&#34;api_key_env&#34;])
-                else:
-                    if provider in config.get(&#34;providers&#34;, {}):
-                        provider_config = config[&#34;providers&#34;][provider]
-                        llm[&#34;api_key&#34;] = provider_config.get(&#34;api_key&#34;, os.getenv(f&#34;{provider.upper()}_API_KEY&#34;))
-                    else:
-                        llm[&#34;api_key&#34;] = os.getenv(f&#34;{provider.upper()}_API_KEY&#34;)
-        if not llm.get(&#34;password&#34;) and &#34;password_env&#34; in llm:
-            llm[&#34;password&#34;] = os.getenv(llm[&#34;password_env&#34;])
-        if not llm.get(&#34;user&#34;) and &#34;user_env&#34; in llm:
-            llm[&#34;user&#34;] = os.getenv(llm[&#34;user_env&#34;])
-        if not llm.get(&#34;api_key&#34;) and &#34;api_key_env&#34; in llm:
-            llm[&#34;api_key&#34;] = os.getenv(llm[&#34;api_key_env&#34;])
+            provider_config = config.get(&#34;providers&#34;, {}).get(provider, {})
+            for key in provider_config:
+                if key not in llm:
+                    llm[key] = provider_config[key]
+        if &#34;api_key&#34; not in llm and &#34;api_key_env&#34; not in llm and os.environ.get(f&#34;{provider.upper()}_API_KEY&#34;):
+            llm[&#34;api_key_env&#34;] = f&#34;{provider.upper()}_API_KEY&#34;
         config[&#34;llms&#34;][i] = llm
         if &#34;api_url&#34; in llm:
-            if &#34;api_key&#34; in llm and llm[&#34;api_key&#34;]:
-                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${api_key}&#34;, llm[&#34;api_key&#34;])
-            if &#34;user&#34; in llm:
-                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${user}&#34;, llm[&#34;user&#34;])
-            if &#34;password&#34; in llm:
-                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${password}&#34;, llm[&#34;password&#34;])
-            if &#34;model&#34; in llm:
-                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${model}&#34;, llm[&#34;model&#34;])
+            # get the user, password and api_key for substitution
+            user = llm.get(&#34;user&#34;)
+            if user is None and &#34;user_env&#34; in llm:
+                user = os.environ.get(llm[&#34;user_env&#34;])
+            password = llm.get(&#34;password&#34;)
+            if password is None and &#34;password_env&#34; in llm:
+                password = os.environ.get(llm[&#34;password_env&#34;])
+            api_key = llm.get(&#34;api_key&#34;)
+            if api_key is None and &#34;api_key_env&#34; in llm:
+                api_key = os.environ.get(llm[&#34;api_key_env&#34;])
+            if api_key is not None:
+                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${api_key}&#34;, api_key)
+            if user is not None:
+                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${user}&#34;, user)
+            if password is not None:
+                llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${password}&#34;, password)
+            llm[&#34;api_url&#34;] = llm[&#34;api_url&#34;].replace(&#34;${model}&#34;, model)
         # if there is no alias defined, set the alias to the model name
-        if not &#34;alias&#34; in llm:
+        if &#34;alias&#34; not in llm:
             llm[&#34;alias&#34;] = llm[&#34;llm&#34;]
     # make sure all the aliases are unique
     aliases = set()

diff --git a/docs/llms_wrapper/index.html b/docs/llms_wrapper/index.html
@@ -53,6 +53,11 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <div class="desc"><p>Module for the llms_wrapper_test command to perform a simple test to check
 if one or more LLMs are working.</p></div>
 </dd>
+<dt><code class="name"><a title="llms_wrapper.logging" href="logging.html">llms_wrapper.logging</a></code></dt>
+<dd>
+<div class="desc"><p>Module to handle logging. This module is used to set up the logging for the entire package. This uses the Python
+logging module, but with a more …</p></div>
+</dd>
 <dt><code class="name"><a title="llms_wrapper.utils" href="utils.html">llms_wrapper.utils</a></code></dt>
 <dd>
 <div class="desc"><p>Module for various utility functions.</p></div>
@@ -80,6 +85,7 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <li><code><a title="llms_wrapper.config" href="config.html">llms_wrapper.config</a></code></li>
 <li><code><a title="llms_wrapper.llms" href="llms.html">llms_wrapper.llms</a></code></li>
 <li><code><a title="llms_wrapper.llms_wrapper_test" href="llms_wrapper_test.html">llms_wrapper.llms_wrapper_test</a></code></li>
+<li><code><a title="llms_wrapper.logging" href="logging.html">llms_wrapper.logging</a></code></li>
 <li><code><a title="llms_wrapper.utils" href="utils.html">llms_wrapper.utils</a></code></li>
 <li><code><a title="llms_wrapper.version" href="version.html">llms_wrapper.version</a></code></li>
 </ul>

diff --git a/docs/llms_wrapper/llms.html b/docs/llms_wrapper/llms.html
@@ -74,7 +74,8 @@ <h2 class="section-title" id="header-classes">Classes</h2>
             if alias in self.llms:
                 raise ValueError(f&#34;Error: Duplicate LLM alis {alias} in configuration&#34;)
             self.llms[alias] = llm
-            self.llms[alias][&#34;cost&#34;] = 0
+            self.llms[alias][&#34;_cost&#34;] = 0
+            self.llms[alias][&#34;_elapsed_time&#34;] = 0
 
     def list_models(self) -&gt; List[Dict]:
         &#34;&#34;&#34;
@@ -100,17 +101,29 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
         return self.llms[item]
 
+    def elapsed(self, llmalias: Union[str, List[str], None] = None):
+        &#34;&#34;&#34;
+        Return the elapsed time so far for the given llm alias given list of llm aliases
+        or all llms if llmalias is None. Elapsed time is only accumulated for invocations of
+        the query method with return_cost=True.
+        &#34;&#34;&#34;
+        if llmalias is None:
+            return sum([llm[&#34;_elapsed_time&#34;] for llm in self.llms.values()])
+        if isinstance(llmalias, str):
+            return self.llms[llmalias][&#34;_elapsed_time&#34;]
+        return sum([self.llms[alias][&#34;_elapsed_time&#34;] for alias in llmalias])
+
     def cost(self, llmalias: Union[str, List[str], None] = None):
         &#34;&#34;&#34;
         Return the cost accumulated so far for the given llm alias given list of llm aliases
         or all llms if llmalias is None. Costs are only accumulated for invocations of
         the query method with return_cost=True.
         &#34;&#34;&#34;
         if llmalias is None:
-            return sum([llm[&#34;cost&#34;] for llm in self.llms.values()])
+            return sum([llm[&#34;_cost&#34;] for llm in self.llms.values()])
         if isinstance(llmalias, str):
-            return self.llms[llmalias][&#34;cost&#34;]
-        return sum([self.llms[alias][&#34;cost&#34;] for alias in llmalias])
+            return self.llms[llmalias][&#34;_cost&#34;]
+        return sum([self.llms[alias][&#34;_cost&#34;] for alias in llmalias])
 
     def cost_per_token(self, llmalias: str) -&gt; Tuple[float, float]:
         &#34;&#34;&#34;
@@ -120,12 +133,23 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
         return litellm.cost_per_token(self.llms[llmalias][&#34;llm&#34;], prompt_tokens=1, completion_tokens=1)
 
-    def max_prompt_tokens(self, llmalias: str) -&gt; int:
+    def max_output_tokens(self, llmalias: str) -&gt; int:
         &#34;&#34;&#34;
         Return the maximum number of prompt tokens that can be sent to the model.
         &#34;&#34;&#34;
         return litellm.get_max_tokens(self.llms[llmalias][&#34;llm&#34;])
 
+    def max_input_tokens(self, llmalias: str) -&gt; Optional[int]:
+        &#34;&#34;&#34;
+        Return the maximum number of tokens possible in the prompt or None if not known.
+        &#34;&#34;&#34;
+        try:
+            info = get_model_info(self.llms[llmalias][&#34;llm&#34;])
+            return info[&#34;max_input_tokens&#34;]
+        except:
+            # the model is not mapped yet, return None to indicate we do not know
+            return None
+
     def set_model_attributes(
             self, llmalias: str,
             input_cost_per_token: float,
@@ -229,7 +253,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
             llm,
             [
                 &#34;llm&#34;, &#34;alias&#34;, &#34;api_key&#34;, &#34;api_url&#34;, &#34;user&#34;, &#34;password&#34;,
-                &#34;api_key_env&#34;, &#34;user_env&#34;, &#34;password_env&#34;])
+                &#34;api_key_env&#34;, &#34;user_env&#34;, &#34;password_env&#34;, &#34;_cost&#34;, &#34;_elapsed_time&#34;])
         error = None
         if llm.get(&#34;api_key&#34;):
             completion_kwargs[&#34;api_key&#34;] = llm[&#34;api_key&#34;]
@@ -242,11 +266,15 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         if debug:
             logger.debug(f&#34;Calling completion with {completion_kwargs}&#34;)
         try:
+            start = time.time()
             response = completion(
                 model=llm[&#34;llm&#34;],
                 messages=messages,
                 **completion_kwargs)
+            elapsed = time.time() - start
             logger.debug(f&#34;Full Response: {response}&#34;)
+            llm[&#34;_elapsed_time&#34;] += elapsed
+            ret[&#34;elapsed_time&#34;] = elapsed
             if return_response:
                 ret[&#34;response&#34;] = response
                 ret[&#34;kwargs&#34;] = completion_kwargs
@@ -256,7 +284,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
                     model=llm[&#34;llm&#34;],
                     messages=messages,
                 )
-                llm[&#34;cost&#34;] += ret[&#34;cost&#34;]
+                llm[&#34;_cost&#34;] += ret[&#34;cost&#34;]
                 usage = response[&#39;usage&#39;]
                 logger.debug(f&#34;Usage: {usage}&#34;)
                 ret[&#34;n_completion_tokens&#34;] = usage.completion_tokens
@@ -292,10 +320,10 @@ <h3>Methods</h3>
     the query method with return_cost=True.
     &#34;&#34;&#34;
     if llmalias is None:
-        return sum([llm[&#34;cost&#34;] for llm in self.llms.values()])
+        return sum([llm[&#34;_cost&#34;] for llm in self.llms.values()])
     if isinstance(llmalias, str):
-        return self.llms[llmalias][&#34;cost&#34;]
-    return sum([self.llms[alias][&#34;cost&#34;] for alias in llmalias])</code></pre>
+        return self.llms[llmalias][&#34;_cost&#34;]
+    return sum([self.llms[alias][&#34;_cost&#34;] for alias in llmalias])</code></pre>
 </details>
 <div class="desc"><p>Return the cost accumulated so far for the given llm alias given list of llm aliases
 or all llms if llmalias is None. Costs are only accumulated for invocations of
@@ -321,6 +349,30 @@ <h3>Methods</h3>
 This may be wrong or cost may get calculated in a different way, e.g. depending on
 cache, response time etc.</p></div>
 </dd>
+<dt id="llms_wrapper.llms.LLMS.elapsed"><code class="name flex">
+<span>def <span class="ident">elapsed</span></span>(<span>self, llmalias: str | List[str] | None = None)</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def elapsed(self, llmalias: Union[str, List[str], None] = None):
+    &#34;&#34;&#34;
+    Return the elapsed time so far for the given llm alias given list of llm aliases
+    or all llms if llmalias is None. Elapsed time is only accumulated for invocations of
+    the query method with return_cost=True.
+    &#34;&#34;&#34;
+    if llmalias is None:
+        return sum([llm[&#34;_elapsed_time&#34;] for llm in self.llms.values()])
+    if isinstance(llmalias, str):
+        return self.llms[llmalias][&#34;_elapsed_time&#34;]
+    return sum([self.llms[alias][&#34;_elapsed_time&#34;] for alias in llmalias])</code></pre>
+</details>
+<div class="desc"><p>Return the elapsed time so far for the given llm alias given list of llm aliases
+or all llms if llmalias is None. Elapsed time is only accumulated for invocations of
+the query method with return_cost=True.</p></div>
+</dd>
 <dt id="llms_wrapper.llms.LLMS.get"><code class="name flex">
 <span>def <span class="ident">get</span></span>(<span>self, alias: str) ‑> Dict | None</span>
 </code></dt>
@@ -431,15 +483,36 @@ <h2 id="args">Args</h2>
 <h2 id="returns">Returns</h2>
 <p>A list of message dictionaries</p></div>
 </dd>
-<dt id="llms_wrapper.llms.LLMS.max_prompt_tokens"><code class="name flex">
-<span>def <span class="ident">max_prompt_tokens</span></span>(<span>self, llmalias: str) ‑> int</span>
+<dt id="llms_wrapper.llms.LLMS.max_input_tokens"><code class="name flex">
+<span>def <span class="ident">max_input_tokens</span></span>(<span>self, llmalias: str) ‑> int | None</span>
+</code></dt>
+<dd>
+<details class="source">
+<summary>
+<span>Expand source code</span>
+</summary>
+<pre><code class="python">def max_input_tokens(self, llmalias: str) -&gt; Optional[int]:
+    &#34;&#34;&#34;
+    Return the maximum number of tokens possible in the prompt or None if not known.
+    &#34;&#34;&#34;
+    try:
+        info = get_model_info(self.llms[llmalias][&#34;llm&#34;])
+        return info[&#34;max_input_tokens&#34;]
+    except:
+        # the model is not mapped yet, return None to indicate we do not know
+        return None</code></pre>
+</details>
+<div class="desc"><p>Return the maximum number of tokens possible in the prompt or None if not known.</p></div>
+</dd>
+<dt id="llms_wrapper.llms.LLMS.max_output_tokens"><code class="name flex">
+<span>def <span class="ident">max_output_tokens</span></span>(<span>self, llmalias: str) ‑> int</span>
 </code></dt>
 <dd>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def max_prompt_tokens(self, llmalias: str) -&gt; int:
+<pre><code class="python">def max_output_tokens(self, llmalias: str) -&gt; int:
     &#34;&#34;&#34;
     Return the maximum number of prompt tokens that can be sent to the model.
     &#34;&#34;&#34;
@@ -492,7 +565,7 @@ <h2 id="returns">Returns</h2>
         llm,
         [
             &#34;llm&#34;, &#34;alias&#34;, &#34;api_key&#34;, &#34;api_url&#34;, &#34;user&#34;, &#34;password&#34;,
-            &#34;api_key_env&#34;, &#34;user_env&#34;, &#34;password_env&#34;])
+            &#34;api_key_env&#34;, &#34;user_env&#34;, &#34;password_env&#34;, &#34;_cost&#34;, &#34;_elapsed_time&#34;])
     error = None
     if llm.get(&#34;api_key&#34;):
         completion_kwargs[&#34;api_key&#34;] = llm[&#34;api_key&#34;]
@@ -505,11 +578,15 @@ <h2 id="returns">Returns</h2>
     if debug:
         logger.debug(f&#34;Calling completion with {completion_kwargs}&#34;)
     try:
+        start = time.time()
         response = completion(
             model=llm[&#34;llm&#34;],
             messages=messages,
             **completion_kwargs)
+        elapsed = time.time() - start
         logger.debug(f&#34;Full Response: {response}&#34;)
+        llm[&#34;_elapsed_time&#34;] += elapsed
+        ret[&#34;elapsed_time&#34;] = elapsed
         if return_response:
             ret[&#34;response&#34;] = response
             ret[&#34;kwargs&#34;] = completion_kwargs
@@ -519,7 +596,7 @@ <h2 id="returns">Returns</h2>
                 model=llm[&#34;llm&#34;],
                 messages=messages,
             )
-            llm[&#34;cost&#34;] += ret[&#34;cost&#34;]
+            llm[&#34;_cost&#34;] += ret[&#34;cost&#34;]
             usage = response[&#39;usage&#39;]
             logger.debug(f&#34;Usage: {usage}&#34;)
             ret[&#34;n_completion_tokens&#34;] = usage.completion_tokens
@@ -614,11 +691,13 @@ <h4><code><a title="llms_wrapper.llms.LLMS" href="#llms_wrapper.llms.LLMS">LLMS<
 <ul class="">
 <li><code><a title="llms_wrapper.llms.LLMS.cost" href="#llms_wrapper.llms.LLMS.cost">cost</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.cost_per_token" href="#llms_wrapper.llms.LLMS.cost_per_token">cost_per_token</a></code></li>
+<li><code><a title="llms_wrapper.llms.LLMS.elapsed" href="#llms_wrapper.llms.LLMS.elapsed">elapsed</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.get" href="#llms_wrapper.llms.LLMS.get">get</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.list_aliases" href="#llms_wrapper.llms.LLMS.list_aliases">list_aliases</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.list_models" href="#llms_wrapper.llms.LLMS.list_models">list_models</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.make_messages" href="#llms_wrapper.llms.LLMS.make_messages">make_messages</a></code></li>
-<li><code><a title="llms_wrapper.llms.LLMS.max_prompt_tokens" href="#llms_wrapper.llms.LLMS.max_prompt_tokens">max_prompt_tokens</a></code></li>
+<li><code><a title="llms_wrapper.llms.LLMS.max_input_tokens" href="#llms_wrapper.llms.LLMS.max_input_tokens">max_input_tokens</a></code></li>
+<li><code><a title="llms_wrapper.llms.LLMS.max_output_tokens" href="#llms_wrapper.llms.LLMS.max_output_tokens">max_output_tokens</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.query" href="#llms_wrapper.llms.LLMS.query">query</a></code></li>
 <li><code><a title="llms_wrapper.llms.LLMS.set_model_attributes" href="#llms_wrapper.llms.LLMS.set_model_attributes">set_model_attributes</a></code></li>
 </ul>