Add mocked test & benchmark for LLM agents pipeline (#1424)

* Add a test for the LLM agents pipeline that doesn't communicate with OpenAI or SerpAPI. * Add benchmark for LLM agents pipeline. Includes changes from PR #1421 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: #1424
nv-morpheus · Dec 14, 2023 · fbc68de · fbc68de
1 parent fb35a5e
commit fbc68de
Show file tree

Hide file tree

Showing 4 changed files with 308 additions and 30 deletions.
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
@@ -77,15 +77,38 @@ def pytest_benchmark_update_json(config, benchmarks, output_json):
         bench['stats']['median_throughput_bytes'] = (byte_count * repeat) / bench['stats']['median']
 
 
+@pytest.fixture(name="mock_openai_request_time")
+def mock_openai_request_time_fixture():
+    return float(os.environ.get("MOCK_OPENAI_REQUEST_TIME", 1.265))
+
+
+@pytest.fixture(name="mock_nemollm_request_time")
+def mock_nemollm_request_time_fixture():
+    return float(os.environ.get("MOCK_NEMOLLM_REQUEST_TIME", 0.412))
+
+
+@pytest.fixture(name="mock_web_scraper_request_time")
+def mock_web_scraper_request_time_fixture():
+    return float(os.environ.get("MOCK_WEB_SCRAPER_REQUEST_TIME", 0.5))
+
+
+@pytest.fixture(name="mock_feedparser_request_time")
+def mock_feedparser_request_time_fixture():
+    return float(os.environ.get("MOCK_FEEDPARSER_REQUEST_TIME", 0.5))
+
+
+@pytest.fixture(name="mock_serpapi_request_time")
+def mock_serpapi_request_time_fixture():
+    return float(os.environ.get("MOCK_SERPAPI_REQUEST_TIME", 1.7))
+
+
 @pytest.mark.usefixtures("openai")
 @pytest.fixture(name="mock_chat_completion")
-@pytest.mark.usefixtures()
-def mock_chat_completion_fixture(mock_chat_completion: mock.MagicMock):
-    sleep_time = float(os.environ.get("MOCK_OPENAI_REQUEST_TIME", 1.265))
+def mock_chat_completion_fixture(mock_chat_completion: mock.MagicMock, mock_openai_request_time: float):
 
     async def sleep_first(*args, **kwargs):
         # Sleep time is based on average request time
-        await asyncio.sleep(sleep_time)
+        await asyncio.sleep(mock_openai_request_time)
         return mock.DEFAULT
 
     mock_chat_completion.acreate.side_effect = sleep_first
@@ -95,13 +118,12 @@ async def sleep_first(*args, **kwargs):
 
 @pytest.mark.usefixtures("nemollm")
 @pytest.fixture(name="mock_nemollm")
-def mock_nemollm_fixture(mock_nemollm: mock.MagicMock):
-    sleep_time = float(os.environ.get("MOCK_NEMOLLM_REQUEST_TIME", 0.412))
+def mock_nemollm_fixture(mock_nemollm: mock.MagicMock, mock_nemollm_request_time: float):
 
     # The generate function is a blocking call that returns a future when return_type="async"
     async def sleep_first(fut: asyncio.Future, value: typing.Any = mock.DEFAULT):
         # Sleep time is based on average request time
-        await asyncio.sleep(sleep_time)
+        await asyncio.sleep(mock_nemollm_request_time)
         fut.set_result(value)
 
     def create_future(*args, **kwargs) -> asyncio.Future:

diff --git a/tests/benchmarks/test_bench_agents_simple_pipeline.py b/tests/benchmarks/test_bench_agents_simple_pipeline.py
@@ -0,0 +1,174 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import collections.abc
+import os
+import typing
+from unittest import mock
+
+import langchain
+import pytest
+from langchain.agents import AgentType
+from langchain.agents import initialize_agent
+from langchain.agents import load_tools
+from langchain.agents.tools import Tool
+from langchain.utilities import serpapi
+
+import cudf
+
+from morpheus.config import Config
+from morpheus.llm import LLMEngine
+from morpheus.llm.nodes.extracter_node import ExtracterNode
+from morpheus.llm.nodes.langchain_agent_node import LangChainAgentNode
+from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler
+from morpheus.messages import ControlMessage
+from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage
+from morpheus.stages.llm.llm_engine_stage import LLMEngineStage
+from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage
+from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
+
+
+def _build_agent_executor(model_name: str):
+
+    llm = langchain.OpenAI(model=model_name, temperature=0, cache=False)
+
+    # Explicitly construct the serpapi tool, loading it via load_tools makes it too difficult to mock
+    tools = [
+        Tool(
+            name="Search",
+            description="",
+            func=serpapi.SerpAPIWrapper().run,
+            coroutine=serpapi.SerpAPIWrapper().arun,
+        )
+    ]
+    tools.extend(load_tools(["llm-math"], llm=llm))
+
+    agent_executor = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+
+    return agent_executor
+
+
+def _build_engine(model_name: str):
+
+    engine = LLMEngine()
+
+    engine.add_node("extracter", node=ExtracterNode())
+
+    engine.add_node("agent",
+                    inputs=[("/extracter")],
+                    node=LangChainAgentNode(agent_executor=_build_agent_executor(model_name=model_name)))
+
+    engine.add_task_handler(inputs=["/agent"], handler=SimpleTaskHandler())
+
+    return engine
+
+
+def _run_pipeline(config: Config, source_dfs: list[cudf.DataFrame], model_name: str = "test_model"):
+    completion_task = {"task_type": "completion", "task_dict": {"input_keys": ["questions"]}}
+
+    pipe = LinearPipeline(config)
+
+    pipe.set_source(InMemorySourceStage(config, dataframes=source_dfs))
+
+    pipe.add_stage(
+        DeserializeStage(config, message_type=ControlMessage, task_type="llm_engine", task_payload=completion_task))
+
+    pipe.add_stage(LLMEngineStage(config, engine=_build_engine(model_name=model_name)))
+
+    pipe.add_stage(InMemorySinkStage(config))
+
+    pipe.run()
+
+
+@pytest.mark.usefixtures("openai", "restore_environ")
+@pytest.mark.use_python
+@pytest.mark.benchmark
+@mock.patch("langchain.utilities.serpapi.SerpAPIWrapper.aresults")
+@mock.patch("langchain.OpenAI._agenerate", autospec=True)  # autospec is needed as langchain will inspect the function
+def test_agents_simple_pipe(mock_openai_agenerate: mock.AsyncMock,
+                            mock_serpapi_aresults: mock.AsyncMock,
+                            mock_openai_request_time: float,
+                            mock_serpapi_request_time: float,
+                            benchmark: collections.abc.Callable[[collections.abc.Callable], typing.Any],
+                            config: Config):
+    os.environ.update({'OPENAI_API_KEY': 'test_api_key', 'SERPAPI_API_KEY': 'test_api_key'})
+
+    from langchain.schema import Generation
+    from langchain.schema import LLMResult
+
+    assert serpapi.SerpAPIWrapper().aresults is mock_serpapi_aresults
+
+    model_name = "test_model"
+
+    mock_responses = [
+        LLMResult(generations=[[
+            Generation(text="I should use a search engine to find information about unittests.\n"
+                       "Action: Search\nAction Input: \"unittests\"",
+                       generation_info={
+                           'finish_reason': 'stop', 'logprobs': None
+                       })
+        ]],
+                  llm_output={
+                      'token_usage': {}, 'model_name': model_name
+                  }),
+        LLMResult(generations=[[
+            Generation(text="I now know the final answer.\nFinal Answer: 3.99.",
+                       generation_info={
+                           'finish_reason': 'stop', 'logprobs': None
+                       })
+        ]],
+                  llm_output={
+                      'token_usage': {}, 'model_name': model_name
+                  })
+    ]
+
+    async def _mock_openai_agenerate(self, *args, **kwargs):  # pylint: disable=unused-argument
+        nonlocal mock_responses
+        call_count = getattr(self, '_unittest_call_count', 0)
+        response = mock_responses[call_count % 2]
+
+        # The OpenAI object will raise a ValueError if we attempt to set the attribute directly or use setattr
+        self.__dict__['_unittest_call_count'] = call_count + 1
+        await asyncio.sleep(mock_openai_request_time)
+        return response
+
+    mock_openai_agenerate.side_effect = _mock_openai_agenerate
+
+    async def _mock_serpapi_aresults(*args, **kwargs):  # pylint: disable=unused-argument
+        await asyncio.sleep(mock_serpapi_request_time)
+        return {
+            'answer_box': {
+                'answer': '25 years', 'link': 'http://unit.test', 'people_also_search_for': []
+            },
+            'inline_people_also_search_for': [],
+            'knowledge_graph': {},
+            'organic_results': [],
+            'pagination': {},
+            'related_questions': [],
+            'related_searches': [],
+            'search_information': {},
+            'search_metadata': {},
+            'search_parameters': {},
+            'serpapi_pagination': None
+        }
+
+    mock_serpapi_aresults.side_effect = _mock_serpapi_aresults
+
+    source_df = cudf.DataFrame(
+        {"questions": ["Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?"]})
+
+    benchmark(_run_pipeline, config, source_dfs=[source_df], model_name=model_name)
diff --git a/tests/benchmarks/test_bench_vdb_upload_pipeline.py b/tests/benchmarks/test_bench_vdb_upload_pipeline.py
@@ -98,6 +98,8 @@ def _run_pipeline(config: Config,
 @mock.patch('requests.Session')
 def test_vdb_upload_pipe(mock_requests_session: mock.MagicMock,
                          mock_feedparser_http_get: mock.MagicMock,
+                         mock_web_scraper_request_time: float,
+                         mock_feedparser_request_time: float,
                          benchmark: collections.abc.Callable[[collections.abc.Callable], typing.Any],
                          config: Config,
                          milvus_server_uri: str,
@@ -106,8 +108,6 @@ def test_vdb_upload_pipe(mock_requests_session: mock.MagicMock,
     with open(os.path.join(TEST_DIRS.tests_data_dir, 'service/cisa_web_responses.json'), encoding='utf-8') as fh:
         web_responses = json.load(fh)
 
-    mock_web_scraper_request_time = float(os.environ.get("MOCK_WEB_SCRAPER_REQUEST_TIME", 0.5))
-
     def mock_get_fn(url: str):
         mock_response = mock.MagicMock()
         mock_response.ok = True
@@ -119,8 +119,6 @@ def mock_get_fn(url: str):
     mock_requests_session.return_value = mock_requests_session
     mock_requests_session.get.side_effect = mock_get_fn
 
-    mock_feedparser_request_time = float(os.environ.get("MOCK_FEEDPARSER_REQUEST_TIME", 0.5))
-
     def mock_feedparser_http_get_fn(*args, **kwargs):  # pylint: disable=unused-argument
         time.sleep(mock_feedparser_request_time)
         # The RSS Parser expects a bytes string