NREL · jeisenman23 · Jan 10, 2025 · Nov 14, 2024 · Nov 14, 2024 · Nov 15, 2024
diff --git a/.gitignore b/.gitignore
@@ -122,6 +122,7 @@ examples/research_hub/pdfs/
 examples/research_hub/embed/
 examples/research_hub/txt/
 examples/research_hub/meta.csv
+
 *ignore*.py
 
 # pixi environments
@@ -130,4 +131,4 @@ examples/research_hub/meta.csv
 pixi*
 
 # Scratch
-*scratch*/
+*scratch*/
diff --git a/elm/web/osti.py b/elm/web/osti.py
@@ -2,6 +2,7 @@
 """
 Utilities for retrieving data from OSTI.
 """
+import re
 import copy
 import requests
 import json
@@ -28,7 +29,9 @@ def __init__(self, record):
 
     @staticmethod
     def strip_nested_brackets(text):
-        """Remove text between brackets/parentheses for cleaning OSTI text"""
+        """
+        Remove text between brackets/parentheses for cleaning OSTI text
+        """
         ret = ''
         skip1c = 0
         skip2c = 0
@@ -183,54 +186,54 @@ def __init__(self, url, n_pages=1):
         super().__init__(records)
 
     def _get_first(self):
-        """Get the first page of OSTI records
-
-        Returns
-        -------
-        list
-        """
+        """Get the first page of OSTI records"""
         self._response = self._session.get(self.url)
 
         if not self._response.ok:
-            msg = ('OSTI API Request got error {}: "{}"'
-                   .format(self._response.status_code,
-                           self._response.reason))
+            msg = f'''OSTI API Request got error {self._response.status_code}:
+              "{self._response.reason}"'''
             raise RuntimeError(msg)
-        first_page = self._response.json()
+
+        try:
+            raw_text = self._response.text
+            if raw_text.endswith('}\r\n]'):
+                raw_text = raw_text[:-1]
+            first_page = json.loads(raw_text)
+        except (json.JSONDecodeError, UnicodeError) as e:
+            logger.error(f"JSON decode error: {str(e)}\nRaw text: {raw_text[:500]}...")
+            raise
 
         self._n_pages = 1
         if 'last' in self._response.links:
             url = self._response.links['last']['url']
             self._n_pages = int(url.split('page=')[-1])
 
-        logger.debug('Found approximately {} records.'
-                     .format(self._n_pages * len(first_page)))
-
+        logger.debug(f'Found approximately {self._n_pages * len(first_page)} records.')
         return first_page
 
     def _get_pages(self, n_pages):
-        """Get response pages up to n_pages from OSTI.
-
-        Parameters
-        ----------
-        n_pages : int
-            Number of pages to retrieve
-
-        Returns
-        -------
-        next_pages : list
-            This function will return a generator of next pages, each of which
-            is a list of OSTI records
-        """
-        if n_pages > 1:
-            for page in range(2, self._n_pages + 1):
-                if page <= n_pages:
-                    next_page = self._session.get(self.url,
-                                                  params={'page': page})
-                    next_page = next_page.json()
-                    yield next_page
-                else:
-                    break
+            """Get response pages up to n_pages from OSTI.
+
+            Parameters
+            ----------
+            n_pages : int
+                Number of pages to retrieve
+
+            Returns
+            -------
+            next_pages : list
+                This function will return a generator of next pages, each of which
+                is a list of OSTI records
+            """
+            if n_pages > 1:
+                for page in range(2, self._n_pages + 1):
+                    if page <= n_pages:
+                        next_page = self._session.get(self.url,
+                                                    params={'page': page})
+                        next_page = next_page.json()
+                        yield next_page
+                    else:
+                        break
 
     def _get_all(self, n_pages):
         """Get all pages of records up to n_pages.

diff --git a/elm/wizard.py b/elm/wizard.py
@@ -3,6 +3,7 @@
 ELM energy wizard
 """
 from abc import ABC, abstractmethod
+from time import perf_counter
 import copy
 import os
 import json
@@ -61,8 +62,12 @@ def query_vector_db(self, query, limit=100):
             ranked strings/scores outputs.
         """
 
-    def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
-                       convo=False):
+    def engineer_query(self,
+                       query,
+                       token_budget=None,
+                       new_info_threshold=0.7,
+                       convo=False
+                       ):
         """Engineer a query for GPT using the corpus of information
 
         Parameters
@@ -79,6 +84,7 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
             Flag to perform semantic search with full conversation history
             (True) or just the single query (False). Call EnergyWizard.clear()
             to reset the chat history.
+
         Returns
         -------
         message : str
@@ -87,6 +93,11 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
         references : list
             The list of references (strs) used in the engineered prompt is
             returned here
+        vector_query_time : float
+            measures vector database query time
+        used_index : list
+            Shows the indices of the documents used in making a query to the
+            vector database
         """
 
         self.messages.append({"role": "user", "content": query})
@@ -99,9 +110,10 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
             query = '\n\n'.join(query)
 
         token_budget = token_budget or self.token_budget
-
+        start_time = perf_counter()
         strings, _, idx = self.query_vector_db(query)
-
+        end_time = perf_counter()
+        vector_query_time = end_time - start_time
         message = copy.deepcopy(self.MODEL_INSTRUCTION)
         question = f"\n\nQuestion: {query}"
         used_index = []
@@ -125,8 +137,7 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
         message = message + question
         used_index = np.array(used_index)
         references = self.make_ref_list(used_index)
-
-        return message, references, used_index
+        return message, references, used_index, vector_query_time
 
     @abstractmethod
     def make_ref_list(self, idx):
@@ -144,15 +155,17 @@ def make_ref_list(self, idx):
             ["{ref_title} ({ref_url})"]
         """
 
-    def chat(self, query,
+    def chat(self,
+             query,
              debug=True,
              stream=True,
              temperature=0,
              convo=False,
              token_budget=None,
              new_info_threshold=0.7,
              print_references=False,
-             return_chat_obj=False):
+             return_chat_obj=False
+             ):
         """Answers a query by doing a semantic search of relevant text with
         embeddings and then sending engineered query to the LLM.
 
@@ -195,12 +208,15 @@ def chat(self, query,
         references : list
             If debug is True, the list of references (strs) used in the
             engineered prompt is returned here
+        performance : dict
+            dictionary with keys of total_chat_time,
+            chat_completion_time and vectordb_query_time.
         """
-
+        start_chat_time = perf_counter()
         out = self.engineer_query(query, token_budget=token_budget,
                                   new_info_threshold=new_info_threshold,
                                   convo=convo)
-        query, references, _ = out
+        query, references, _, vector_query_time = out
 
         messages = [{"role": "system", "content": self.MODEL_ROLE},
                     {"role": "user", "content": query}]
@@ -209,20 +225,20 @@ def chat(self, query,
                       messages=messages,
                       temperature=temperature,
                       stream=stream)
+        start_completion_time = perf_counter()
 
         response = self._client.chat.completions.create(**kwargs)
-
         if return_chat_obj:
             return response, query, references
-
         if stream:
             for chunk in response:
                 chunk_msg = chunk.choices[0].delta.content or ""
                 response_message += chunk_msg
                 print(chunk_msg, end='')
-
         else:
             response_message = response.choices[0].message.content
+        finish_completion_time = perf_counter()
+        chat_completion_time = finish_completion_time - start_completion_time
 
         self.messages.append({'role': 'assistant',
                               'content': response_message})
@@ -234,11 +250,16 @@ def chat(self, query,
             response_message += ref_msg
             if stream:
                 print(ref_msg)
-
+        end_time = perf_counter()
+        total_chat_time = end_time - start_chat_time
+        performance = {
+            "total_chat_time": total_chat_time,
+            "chat_completion_time": chat_completion_time,
+            "vectordb_query_time": vector_query_time
+        }
         if debug:
-            return response_message, query, references
-        else:
-            return response_message
+            return response_message, query, references, performance
+        return response_message, query, performance
 
 
 class EnergyWizard(EnergyWizardBase):

diff --git a/tests/test_wizard.py b/tests/test_wizard.py
@@ -70,7 +70,6 @@ def test_chunk_and_embed(mocker):
 
     Note that embedding api is mocked here and not actually tested.
     """
-
     corpus = make_corpus(mocker)
     wizard = EnergyWizard(pd.DataFrame(corpus), token_budget=1000,
                           ref_col='ref')
@@ -81,12 +80,12 @@ def test_chunk_and_embed(mocker):
     question = 'What time is it?'
     out = wizard.chat(question, debug=True, stream=False,
                       print_references=True)
-    msg, query, ref = out
-
-    assert msg.startswith('hello!')
+    response_message, query, references, performance = out
+    assert response_message.startswith('hello!')
     assert query.startswith(EnergyWizard.MODEL_INSTRUCTION)
     assert query.endswith(question)
-    assert 'source0' in ref
+    assert 'source0' in references
+    assert isinstance(performance, dict)
 
 
 def test_convo_query(mocker):