diff --git a/docs/docs/reference/gen_notebooks/feedback_prod.md b/docs/docs/reference/gen_notebooks/feedback_prod.md index 9a55d833fe9f..88b2e3421f27 100644 --- a/docs/docs/reference/gen_notebooks/feedback_prod.md +++ b/docs/docs/reference/gen_notebooks/feedback_prod.md @@ -18,22 +18,24 @@ title: Log Feedback from Production It is often hard to automatically evaluate a generated LLM response so, depending on your risk tolerance, you can gather direct user feedback to find areas to improve. -In this tutorial, we'll use a custom RAG chatbot as an example app with which the users can interact and which allows us to collect user feedback. +In this tutorial, we'll use a custom chatbot as an example app from which to collect user feedback. We'll use Streamlit to build the interface and we'll capture the LLM interactions and feedback in Weave. ## Setup ```python -!pip install weave openai streamlit +!pip install weave openai streamlit wandb +!pip install set-env-colab-kaggle-dotenv -q # for env var ``` -First, create a file called `secrets.toml` and add an OpenAI key so it works with [st.secrets](https://docs.streamlit.io/develop/api-reference/connections/st.secrets). You can [sign up](https://platform.openai.com/signup) on the OpenAI platform to get your own API key. - ```python -# secrets.toml -OPENAI_API_KEY = "your OpenAI key" +# Add a .env file with your OpenAI and WandB API keys +from set_env import set_env + +_ = set_env("OPENAI_API_KEY") +_ = set_env("WANDB_API_KEY") ``` Next, create a file called `chatbot.py` with the following contents: @@ -42,86 +44,151 @@ Next, create a file called `chatbot.py` with the following contents: ```python # chatbot.py +import openai import streamlit as st -from openai import OpenAI +import wandb +from set_env import set_env import weave -st.title("Add feedback") - +_ = set_env("OPENAI_API_KEY") +_ = set_env("WANDB_API_KEY") # highlight-next-line -@weave.op -def chat_response(prompt): - stream = client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "user", "content": prompt}, - *[ - {"role": m["role"], "content": m["content"]} - for m in st.session_state.messages - ], - ], - stream=True, - ) - response = st.write_stream(stream) - return {"response": response} - +wandb.login() -client = OpenAI(api_key=st.secrets["OPENAI_API_KEY"]) # highlight-next-line weave_client = weave.init("feedback-example") +oai_client = openai.OpenAI() -def display_chat_messages(): - for message in st.session_state.messages: +def init_states(): + """Set up session_state keys if they don't exist yet.""" + if "messages" not in st.session_state: + st.session_state["messages"] = [] + if "calls" not in st.session_state: + st.session_state["calls"] = [] + if "session_id" not in st.session_state: + st.session_state["session_id"] = "123abc" + + +# highlight-next-line +@weave.op +def chat_response(full_history): + """ + Calls the OpenAI API in streaming mode given the entire conversation history so far. + full_history is a list of dicts: [{"role":"user"|"assistant","content":...}, ...] + """ + stream = oai_client.chat.completions.create( + model="gpt-4", messages=full_history, stream=True + ) + response_text = st.write_stream(stream) + return {"response": response_text} + + +def render_feedback_buttons(call_idx): + """Renders thumbs up/down and text feedback for the call.""" + col1, col2, col3 = st.columns([1, 1, 4]) + + # Thumbs up button + with col1: + if st.button("👍", key=f"thumbs_up_{call_idx}"): + st.session_state.calls[call_idx].feedback.add_reaction("👍") + st.success("Thanks for the feedback!") + + # Thumbs down button + with col2: + if st.button("👎", key=f"thumbs_down_{call_idx}"): + st.session_state.calls[call_idx].feedback.add_reaction("👎") + st.success("Thanks for the feedback!") + + # Text feedback + with col3: + feedback_text = st.text_input("Feedback", key=f"feedback_input_{call_idx}") + if st.button("Submit Feedback", key=f"submit_feedback_{call_idx}"): + if feedback_text: + st.session_state.calls[call_idx].feedback.add_note(feedback_text) + st.success("Feedback submitted!") + + +def display_old_messages(): + """Displays the conversation stored in st.session_state.messages with feedback buttons""" + for idx, message in enumerate(st.session_state.messages): with st.chat_message(message["role"]): st.markdown(message["content"]) + # If it's an assistant message, show feedback form + if message["role"] == "assistant": + # Figure out index of this assistant message in st.session_state.calls + assistant_idx = ( + len( + [ + m + for m in st.session_state.messages[: idx + 1] + if m["role"] == "assistant" + ] + ) + - 1 + ) + # Render thumbs up/down & text feedback + if assistant_idx < len(st.session_state.calls): + render_feedback_buttons(assistant_idx) -def get_and_process_prompt(): - if prompt := st.chat_input("What is up?"): - st.session_state.messages.append({"role": "user", "content": prompt}) +def display_chat_prompt(): + """Displays the chat prompt input box.""" + if prompt := st.chat_input("Ask me anything!"): + # Immediately render new user message with st.chat_message("user"): st.markdown(prompt) + # Save user message in session + st.session_state.messages.append({"role": "user", "content": prompt}) + + # Prepare chat history for the API + full_history = [ + {"role": msg["role"], "content": msg["content"]} + for msg in st.session_state.messages + ] + with st.chat_message("assistant"): - # highlight-next-line + # Attach Weave attributes for tracking of conversation instances with weave.attributes( {"session": st.session_state["session_id"], "env": "prod"} ): - # This could also be weave model.predict.call if you're using a weave.Model subclass - result, call = chat_response.call( - prompt - ) # call the function with `.call`, this returns a tuple with a new Call object - # highlight-next-line - st.button( - ":thumbsup:", - on_click=lambda: call.feedback.add_reaction("👍"), - key="up", - ) - # highlight-next-line - st.button( - ":thumbsdown:", - on_click=lambda: call.feedback.add_reaction("👎"), - key="down", - ) + # Call the OpenAI API (stream) + result, call = chat_response.call(full_history) + + # Store the assistant message st.session_state.messages.append( {"role": "assistant", "content": result["response"]} ) + # Store the weave call object to link feedback to the specific response + st.session_state.calls.append(call) + + # Render feedback buttons for the new message + new_assistant_idx = ( + len( + [ + m + for m in st.session_state.messages + if m["role"] == "assistant" + ] + ) + - 1 + ) -def init_chat_history(): - if "messages" not in st.session_state: - st.session_state.messages = st.session_state.messages = [] + # Render feedback buttons + if new_assistant_idx < len(st.session_state.calls): + render_feedback_buttons(new_assistant_idx) def main(): - st.session_state["session_id"] = "123abc" - init_chat_history() - display_chat_messages() - get_and_process_prompt() + st.title("Chatbot with immediate feedback forms") + init_states() + display_old_messages() + display_chat_prompt() if __name__ == "__main__": diff --git a/docs/notebooks/feedback_prod.ipynb b/docs/notebooks/feedback_prod.ipynb index 7cdcaf716348..2d514cf85f96 100644 --- a/docs/notebooks/feedback_prod.ipynb +++ b/docs/notebooks/feedback_prod.ipynb @@ -24,7 +24,7 @@ "\n", "It is often hard to automatically evaluate a generated LLM response so, depending on your risk tolerance, you can gather direct user feedback to find areas to improve.\n", "\n", - "In this tutorial, we'll use a custom RAG chatbot as an example app with which the users can interact and which allows us to collect user feedback.\n", + "In this tutorial, we'll use a custom chatbot as an example app from which to collect user feedback.\n", "We'll use Streamlit to build the interface and we'll capture the LLM interactions and feedback in Weave." ] }, @@ -45,37 +45,21 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install weave openai streamlit" + "!pip install weave openai streamlit wandb\n", + "!pip install set-env-colab-kaggle-dotenv -q # for env var" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "# Temporary workaround to fix bug in openai:\n", - "# TypeError: Client.__init__() got an unexpected keyword argument 'proxies'\n", - "# See https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332/15\n", - "!pip install \"httpx<0.28\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, create a file called `secrets.toml` and add an OpenAI key so it works with [st.secrets](https://docs.streamlit.io/develop/api-reference/connections/st.secrets). You can [sign up](https://platform.openai.com/signup) on the OpenAI platform to get your own API key. " - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": {}, "outputs": [], "source": [ - "# secrets.toml\n", - "OPENAI_API_KEY = \"your OpenAI key\"" + "# Add a .env file with your OpenAI and WandB API keys\n", + "from set_env import set_env\n", + "\n", + "_ = set_env(\"OPENAI_API_KEY\")\n", + "_ = set_env(\"WANDB_API_KEY\")" ] }, { @@ -93,86 +77,151 @@ "source": [ "# chatbot.py\n", "\n", + "import openai\n", "import streamlit as st\n", - "from openai import OpenAI\n", + "import wandb\n", + "from set_env import set_env\n", "\n", "import weave\n", "\n", - "st.title(\"Add feedback\")\n", + "_ = set_env(\"OPENAI_API_KEY\")\n", + "_ = set_env(\"WANDB_API_KEY\")\n", + "\n", + "# highlight-next-line\n", + "wandb.login()\n", + "\n", + "# highlight-next-line\n", + "weave_client = weave.init(\"feedback-example\")\n", + "oai_client = openai.OpenAI()\n", + "\n", + "\n", + "def init_states():\n", + " \"\"\"Set up session_state keys if they don't exist yet.\"\"\"\n", + " if \"messages\" not in st.session_state:\n", + " st.session_state[\"messages\"] = []\n", + " if \"calls\" not in st.session_state:\n", + " st.session_state[\"calls\"] = []\n", + " if \"session_id\" not in st.session_state:\n", + " st.session_state[\"session_id\"] = \"123abc\"\n", "\n", "\n", "# highlight-next-line\n", "@weave.op\n", - "def chat_response(prompt):\n", - " stream = client.chat.completions.create(\n", - " model=\"gpt-4o\",\n", - " messages=[\n", - " {\"role\": \"user\", \"content\": prompt},\n", - " *[\n", - " {\"role\": m[\"role\"], \"content\": m[\"content\"]}\n", - " for m in st.session_state.messages\n", - " ],\n", - " ],\n", - " stream=True,\n", + "def chat_response(full_history):\n", + " \"\"\"\n", + " Calls the OpenAI API in streaming mode given the entire conversation history so far.\n", + " full_history is a list of dicts: [{\"role\":\"user\"|\"assistant\",\"content\":...}, ...]\n", + " \"\"\"\n", + " stream = oai_client.chat.completions.create(\n", + " model=\"gpt-4\", messages=full_history, stream=True\n", " )\n", - " response = st.write_stream(stream)\n", - " return {\"response\": response}\n", + " response_text = st.write_stream(stream)\n", + " return {\"response\": response_text}\n", "\n", "\n", - "client = OpenAI(api_key=st.secrets[\"OPENAI_API_KEY\"])\n", - "# highlight-next-line\n", - "weave_client = weave.init(\"feedback-example\")\n", + "def render_feedback_buttons(call_idx):\n", + " \"\"\"Renders thumbs up/down and text feedback for the call.\"\"\"\n", + " col1, col2, col3 = st.columns([1, 1, 4])\n", + "\n", + " # Thumbs up button\n", + " with col1:\n", + " if st.button(\"👍\", key=f\"thumbs_up_{call_idx}\"):\n", + " st.session_state.calls[call_idx].feedback.add_reaction(\"👍\")\n", + " st.success(\"Thanks for the feedback!\")\n", + "\n", + " # Thumbs down button\n", + " with col2:\n", + " if st.button(\"👎\", key=f\"thumbs_down_{call_idx}\"):\n", + " st.session_state.calls[call_idx].feedback.add_reaction(\"👎\")\n", + " st.success(\"Thanks for the feedback!\")\n", "\n", + " # Text feedback\n", + " with col3:\n", + " feedback_text = st.text_input(\"Feedback\", key=f\"feedback_input_{call_idx}\")\n", + " if st.button(\"Submit Feedback\", key=f\"submit_feedback_{call_idx}\"):\n", + " if feedback_text:\n", + " st.session_state.calls[call_idx].feedback.add_note(feedback_text)\n", + " st.success(\"Feedback submitted!\")\n", "\n", - "def display_chat_messages():\n", - " for message in st.session_state.messages:\n", + "\n", + "def display_old_messages():\n", + " \"\"\"Displays the conversation stored in st.session_state.messages with feedback buttons\"\"\"\n", + " for idx, message in enumerate(st.session_state.messages):\n", " with st.chat_message(message[\"role\"]):\n", " st.markdown(message[\"content\"])\n", "\n", + " # If it's an assistant message, show feedback form\n", + " if message[\"role\"] == \"assistant\":\n", + " # Figure out index of this assistant message in st.session_state.calls\n", + " assistant_idx = (\n", + " len(\n", + " [\n", + " m\n", + " for m in st.session_state.messages[: idx + 1]\n", + " if m[\"role\"] == \"assistant\"\n", + " ]\n", + " )\n", + " - 1\n", + " )\n", + " # Render thumbs up/down & text feedback\n", + " if assistant_idx < len(st.session_state.calls):\n", + " render_feedback_buttons(assistant_idx)\n", "\n", - "def get_and_process_prompt():\n", - " if prompt := st.chat_input(\"What is up?\"):\n", - " st.session_state.messages.append({\"role\": \"user\", \"content\": prompt})\n", "\n", + "def display_chat_prompt():\n", + " \"\"\"Displays the chat prompt input box.\"\"\"\n", + " if prompt := st.chat_input(\"Ask me anything!\"):\n", + " # Immediately render new user message\n", " with st.chat_message(\"user\"):\n", " st.markdown(prompt)\n", "\n", + " # Save user message in session\n", + " st.session_state.messages.append({\"role\": \"user\", \"content\": prompt})\n", + "\n", + " # Prepare chat history for the API\n", + " full_history = [\n", + " {\"role\": msg[\"role\"], \"content\": msg[\"content\"]}\n", + " for msg in st.session_state.messages\n", + " ]\n", + "\n", " with st.chat_message(\"assistant\"):\n", - " # highlight-next-line\n", + " # Attach Weave attributes for tracking of conversation instances\n", " with weave.attributes(\n", " {\"session\": st.session_state[\"session_id\"], \"env\": \"prod\"}\n", " ):\n", - " # This could also be weave model.predict.call if you're using a weave.Model subclass\n", - " result, call = chat_response.call(\n", - " prompt\n", - " ) # call the function with `.call`, this returns a tuple with a new Call object\n", - " # highlight-next-line\n", - " st.button(\n", - " \":thumbsup:\",\n", - " on_click=lambda: call.feedback.add_reaction(\"👍\"),\n", - " key=\"up\",\n", - " )\n", - " # highlight-next-line\n", - " st.button(\n", - " \":thumbsdown:\",\n", - " on_click=lambda: call.feedback.add_reaction(\"👎\"),\n", - " key=\"down\",\n", - " )\n", + " # Call the OpenAI API (stream)\n", + " result, call = chat_response.call(full_history)\n", + "\n", + " # Store the assistant message\n", " st.session_state.messages.append(\n", " {\"role\": \"assistant\", \"content\": result[\"response\"]}\n", " )\n", "\n", + " # Store the weave call object to link feedback to the specific response\n", + " st.session_state.calls.append(call)\n", "\n", - "def init_chat_history():\n", - " if \"messages\" not in st.session_state:\n", - " st.session_state.messages = st.session_state.messages = []\n", + " # Render feedback buttons for the new message\n", + " new_assistant_idx = (\n", + " len(\n", + " [\n", + " m\n", + " for m in st.session_state.messages\n", + " if m[\"role\"] == \"assistant\"\n", + " ]\n", + " )\n", + " - 1\n", + " )\n", + "\n", + " # Render feedback buttons\n", + " if new_assistant_idx < len(st.session_state.calls):\n", + " render_feedback_buttons(new_assistant_idx)\n", "\n", "\n", "def main():\n", - " st.session_state[\"session_id\"] = \"123abc\"\n", - " init_chat_history()\n", - " display_chat_messages()\n", - " get_and_process_prompt()\n", + " st.title(\"Chatbot with immediate feedback forms\")\n", + " init_states()\n", + " display_old_messages()\n", + " display_chat_prompt()\n", "\n", "\n", "if __name__ == \"__main__\":\n", @@ -199,21 +248,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'weave'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mweave\u001b[39;00m\n\u001b[1;32m 3\u001b[0m weave\u001b[38;5;241m.\u001b[39minit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfeedback-example\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# highlight-next-line\u001b[39;00m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'weave'" - ] - } - ], + "outputs": [], "source": [ "import weave\n", "\n", @@ -306,7 +343,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.7" + "version": "3.11.11" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index eb5636029b2a..b1a06e90635d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,11 +102,7 @@ test = [ # Integration Tests "pytest-recording>=0.13.2", - # "vcrpy>=6.0.1", - # https://github.com/kevin1024/vcrpy/pull/889 - # This resolves test issues until a new pypi release can be made. Once that release - # is made, we can remove this and revert to the vcrpy>=6.0.1 dependency. - "vcrpy @ git+https://github.com/kevin1024/vcrpy.git@48d0a2e453f6635af343000cdaf9794a781e807e", + "vcrpy>=7.0.0", # serving tests "flask", @@ -116,7 +112,6 @@ test = [ "httpx", ] - [project.scripts] weave = "weave.trace.cli:cli" @@ -147,9 +142,6 @@ exclude = [ "weave/clear_cache.py", ] -[tool.hatch.metadata] -allow-direct-references = true - [tool.pytest.ini_options] filterwarnings = [ # treat warnings as errors @@ -237,7 +229,7 @@ module = "weave_query.*" ignore_errors = true [tool.bumpversion] -current_version = "0.51.26-dev0" +current_version = "0.51.28-dev0" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml index fea579fb73e6..7eeaba4e971d 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml @@ -19,7 +19,7 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: '{"id":"chat-8bfccc9544b64c70b47605a647b69b8a","object":"chat.completion","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello! + string: '{"id":"chat-47f4caab87cf4741abf6b20e7cb7fd7f","object":"chat.completion","created":1735929357,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello! It''s nice to meet you. Is there something I can help you with or would you like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}' headers: @@ -34,9 +34,9 @@ interactions: Content-Type: - application/json Date: - - Mon, 23 Dec 2024 22:21:45 GMT + - Fri, 03 Jan 2025 18:35:58 GMT Nvcf-Reqid: - - 704f40c5-4d25-46fb-8d76-66364bc9e156 + - 83a0a3b7-ea17-401d-8ba4-20559fcb9516 Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml index eab2bd71a7c6..3de9618d656e 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml @@ -20,104 +20,104 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: 'data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}} + string: 'data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} - data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + data: {"id":"chat-ebf3fd9b28674719b66b92ebe257b8a8","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} data: [DONE] @@ -134,9 +134,9 @@ interactions: Content-Type: - text/event-stream; charset=utf-8 Date: - - Mon, 23 Dec 2024 22:21:46 GMT + - Fri, 03 Jan 2025 18:35:58 GMT Nvcf-Reqid: - - 5d8ef74f-0537-47a5-b0dc-0735d916776f + - 63c6afa3-695d-46d0-b00a-367c31300aab Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml index 0f3dc21c319e..7c674fdde12c 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml @@ -19,7 +19,7 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: '{"id":"chat-55c0d02e9caa471694b571312c012a34","object":"chat.completion","created":1734992504,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello! + string: '{"id":"chat-3532f1c8e7184b08af496723ae649ddd","object":"chat.completion","created":1735929357,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello! It''s nice to meet you. Is there something I can help you with or would you like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}' headers: @@ -34,9 +34,9 @@ interactions: Content-Type: - application/json Date: - - Mon, 23 Dec 2024 22:21:44 GMT + - Fri, 03 Jan 2025 18:35:57 GMT Nvcf-Reqid: - - ea89199e-9f54-4c8d-8895-b1fd9034b86e + - b0e6ba91-bc54-4ab7-a4ef-2b1a3f66e52b Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml index 9d6d68b489f6..d208f373c998 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml @@ -20,104 +20,104 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: 'data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}} + string: 'data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} - data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + data: {"id":"chat-5e392da91d234788a10970aa0dc478e1","object":"chat.completion.chunk","created":1735929358,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} data: [DONE] @@ -134,9 +134,9 @@ interactions: Content-Type: - text/event-stream; charset=utf-8 Date: - - Mon, 23 Dec 2024 22:21:45 GMT + - Fri, 03 Jan 2025 18:35:58 GMT Nvcf-Reqid: - - ca6cf115-a5f7-447c-95e7-eafced589f5d + - 8ca0c36f-6917-4286-a189-00b543b433b1 Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml index aa03ceaa8d58..50760ca2372a 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml @@ -26,8 +26,8 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: '{"id":"chat-c827eb3e9dad425dbde170c946ff7cf7","object":"chat.completion","created":1734992507,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-3b35163e6a5a4961aa22b93581a0c5b2","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": - \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}' + string: '{"id":"chat-47a53c8ab28845b39a0f27712da52bbd","object":"chat.completion","created":1735929359,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-f6a1750c001246a0a5edeeea8be28e6b","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 254}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}' headers: Access-Control-Allow-Credentials: - 'true' @@ -40,9 +40,9 @@ interactions: Content-Type: - application/json Date: - - Mon, 23 Dec 2024 22:21:48 GMT + - Fri, 03 Jan 2025 18:36:00 GMT Nvcf-Reqid: - - 497f5f82-973e-4224-8593-97664a04b39c + - 3ae2e86b-10f8-4faf-9b8b-483de1b3efd9 Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml index 9043999a5f7f..6b8b9ff8937a 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml @@ -26,8 +26,8 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: '{"id":"chat-5e1b429e3b704443992df3e6a1f95021","object":"chat.completion","created":1734992509,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-b16ddfa73cb94d9b95d6572615e71589","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": - \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}' + string: '{"id":"chat-1710d5df51b9466eb1f57b3e8758a57a","object":"chat.completion","created":1735929361,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-475f88d43b494e2b984e1d3019511bf7","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 254}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}' headers: Access-Control-Allow-Credentials: - 'true' @@ -40,9 +40,9 @@ interactions: Content-Type: - application/json Date: - - Mon, 23 Dec 2024 22:21:49 GMT + - Fri, 03 Jan 2025 18:36:01 GMT Nvcf-Reqid: - - 60deff33-b1fe-46d1-abc4-35978ee01613 + - 6175baf3-2617-4501-b6bd-446dc0bc3f89 Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml index e58ed33910b8..0323e1541d6c 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml @@ -26,14 +26,14 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: 'data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}} + string: 'data: {"id":"chat-b3afc3517360412dac604a174b1ed0ce","object":"chat.completion.chunk","created":1735929363,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}} - data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-df3daa09595c462297a7253930e4d915","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": - \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + data: {"id":"chat-b3afc3517360412dac604a174b1ed0ce","object":"chat.completion.chunk","created":1735929363,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-110e54e033b14f4da0cc49b633141568","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 254}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} - data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + data: {"id":"chat-b3afc3517360412dac604a174b1ed0ce","object":"chat.completion.chunk","created":1735929363,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} data: [DONE] @@ -50,9 +50,9 @@ interactions: Content-Type: - text/event-stream; charset=utf-8 Date: - - Mon, 23 Dec 2024 22:21:51 GMT + - Fri, 03 Jan 2025 18:36:03 GMT Nvcf-Reqid: - - 43d997a5-3e0a-485d-9567-e16813f1b183 + - c302f2c3-bcf2-452b-89e9-ac7cffaa7f60 Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml index c8bd72e70e11..11f96d3c3ad2 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml @@ -26,14 +26,14 @@ interactions: uri: https://integrate.api.nvidia.com/v1/chat/completions response: body: - string: 'data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}} + string: 'data: {"id":"chat-5ebfe9e3bc2a47c48a4b9171ae79e9be","object":"chat.completion.chunk","created":1735929362,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}} - data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-d0d074128af94984bfc40534381b5860","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": - \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + data: {"id":"chat-5ebfe9e3bc2a47c48a4b9171ae79e9be","object":"chat.completion.chunk","created":1735929362,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-1729f8ff1aef46cd97bb293117894dba","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 254}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} - data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + data: {"id":"chat-5ebfe9e3bc2a47c48a4b9171ae79e9be","object":"chat.completion.chunk","created":1735929362,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} data: [DONE] @@ -50,9 +50,9 @@ interactions: Content-Type: - text/event-stream; charset=utf-8 Date: - - Mon, 23 Dec 2024 22:21:50 GMT + - Fri, 03 Jan 2025 18:36:02 GMT Nvcf-Reqid: - - 2746750e-86b1-4959-bd8b-1da038003578 + - 54484272-cdbc-48a7-99e5-c5a458aa3d60 Nvcf-Status: - fulfilled Server: diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py index e46ebac8e89c..5a6b40f5f341 100644 --- a/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py +++ b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py @@ -40,7 +40,6 @@ def test_chatnvidia_quickstart(client: weave.trace.weave_client.WeaveClient) -> output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" usage = call.summary["usage"][output["model"]] # type: ignore assert usage["requests"] == 1 @@ -88,7 +87,6 @@ async def test_chatnvidia_async_quickstart( output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" usage = call.summary["usage"][output["model"]] assert usage["requests"] == 1 @@ -140,7 +138,6 @@ def test_chatnvidia_stream_quickstart( output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" print(call.summary["usage"][output["model"]]) usage = call.summary["usage"][output["model"]] @@ -192,7 +189,6 @@ async def test_chatnvidia_async_stream_quickstart( output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" print(call.summary["usage"][output["model"]]) usage = call.summary["usage"][output["model"]] @@ -273,7 +269,6 @@ def test_chatnvidia_tool_call(client: weave.trace.weave_client.WeaveClient) -> N output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" usage = call.summary["usage"][output["model"]] assert usage["requests"] == 1 @@ -361,7 +356,6 @@ async def test_chatnvidia_tool_call_async( output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" usage = call.summary["usage"][output["model"]] assert usage["requests"] == 1 @@ -453,7 +447,6 @@ def test_chatnvidia_tool_call_stream( output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" usage = call.summary["usage"][output["model"]] assert usage["requests"] == 1 @@ -546,7 +539,6 @@ async def test_chatnvidia_tool_call_async_stream( output = call.output assert output["model"] == model - assert output["object"] == "chat.completion" usage = call.summary["usage"][output["model"]] assert usage["requests"] == 1 diff --git a/weave-js/src/common/components/FileBrowser.tsx b/weave-js/src/common/components/FileBrowser.tsx index 978e9d4b89eb..82bb4d485986 100644 --- a/weave-js/src/common/components/FileBrowser.tsx +++ b/weave-js/src/common/components/FileBrowser.tsx @@ -2,7 +2,7 @@ import Tooltip from '@mui/material/Tooltip'; import * as _ from 'lodash'; import numeral from 'numeral'; import Prism from 'prismjs'; -import React, {FC, useCallback, useEffect, useRef, useState} from 'react'; +import React, {FC, memo, useCallback, useEffect, useRef, useState} from 'react'; import TimeAgo from 'react-timeago'; import {Header, Icon, Pagination, Segment, Table} from 'semantic-ui-react'; @@ -608,103 +608,107 @@ interface CodePreviewProps { language?: string; } -const CodePreview: FC = ({useLoadFile, file, language}) => { - const [data, setDataVal] = useState(''); - const [error, setErrorVal] = useState(undefined); - const ref = useRef(null); - const setData = useCallback( - (d: string) => { - // Automatically reformat JSON - let lines = d.split('\n'); - if ( - (file.name.endsWith('.json') && lines.length === 1) || - (lines.length === 2 && lines[1] === '') - ) { - try { - const parsed = JSON.parse(lines[0]); - lines = JSON.stringify(parsed, undefined, 2).split('\n'); - } catch { - // ok +const CodePreview: FC = memo( + ({useLoadFile, file, language}) => { + const [data, setDataVal] = useState(''); + const [error, setErrorVal] = useState(undefined); + const ref = useRef(null); + const setData = useCallback( + (d: string) => { + // Automatically reformat JSON + let lines = d.split('\n'); + if ( + (file.name.endsWith('.json') && lines.length === 1) || + (lines.length === 2 && lines[1] === '') + ) { + try { + const parsed = JSON.parse(lines[0]); + lines = JSON.stringify(parsed, undefined, 2).split('\n'); + } catch { + // ok + } } - } - // Truncate long lines - const truncated = lines - .map(line => { - if (line.length > 1000) { - return line.slice(0, 1000) + ' (line truncated to 1000 characters)'; - } else { - return line; - } - }) - .join('\n'); + // Truncate long lines + const truncated = lines + .map(line => { + if (line.length > 1000) { + return ( + line.slice(0, 1000) + ' (line truncated to 1000 characters)' + ); + } else { + return line; + } + }) + .join('\n'); + + setDataVal(truncated); + }, + [setDataVal, file.name] + ); + const setError = useCallback( + (errorString?: string) => + setErrorVal(errorString || 'Error loading file'), + [setErrorVal] + ); - setDataVal(truncated); - }, - [setDataVal, file.name] - ); - const setError = useCallback( - (errorString?: string) => setErrorVal(errorString || 'Error loading file'), - [setErrorVal] - ); + // We don't pass a fallback to allow dev mode zero byte files to render + const loading = useLoadFile(file, { + onSuccess: setData, + onFailure: setError, + }); - // We don't pass a fallback to allow dev mode zero byte files to render - const loading = useLoadFile(file, { - onSuccess: setData, - onFailure: setError, - }); - useEffect(() => { - if (ref.current != null) { - Prism.highlightElement(ref.current); + useEffect(() => { + if (ref.current != null) { + Prism.highlightElement(ref.current); + } + }); + + if (error != null) { + return {error}; } - }); - if (error != null) { - return {error}; - } - if (loading) { - return ; - } - // HACKING TO DISPLAY VOC - // if (file.name.endsWith('.xml')) { - // const parser = new DOMParser(); - // const xmlDoc = parser.parseFromString(data, 'text/xml'); - // const anno = xmlDoc.getElementsByTagName('annotation')[0]; - // if (anno != null) { - // for (let i = 0; i < anno.childNodes.length; i++) { - // const node = anno.childNodes[i]; - // if (node.nodeType !== Node.TEXT_NODE && node.nodeName === 'filename') { - // const filename = node.childNodes[0].textContent; - // console.log('FILE NAME', filename); - // // const imageFile = (node as any).getElementsByTagName('filename')[0]; - // // console.log('IMAGE FILE', imageFile); - // } - // console.log(node); - // } - // // anno.childNodes[] - // // console.log('VOC!'); - // } - // } - return ( -
-
;
+    }
+
+    return (
+      
- - {data} - -
-
- ); -}; + {file.sizeBytes / 1024 < 1024 ? ( + // When the file is under 1MB we use the normal code viewer with highlighting +
+            
+              {data}
+            
+          
+ ) : ( + // Use a div here because Prism seems to have global access and can apply highlighting + // whenever it wants which we don't want here. +
+ {data} +
+ )} + + ); + }, + (prevProps, nextProps) => { + return prevProps.file.url === nextProps.file.url; + } +); interface MarkdownPreviewProps { useLoadFile: UseLoadFile; diff --git a/weave-js/src/common/components/elements/ModifiedDropdown.tsx b/weave-js/src/common/components/elements/ModifiedDropdown.tsx index 02b69a98644c..943d86813d1b 100644 --- a/weave-js/src/common/components/elements/ModifiedDropdown.tsx +++ b/weave-js/src/common/components/elements/ModifiedDropdown.tsx @@ -498,9 +498,16 @@ const ModifiedDropdown: FC = React.memo( }} onChange={(e, {value: val}) => { setSearchQuery(''); - const valCount = _.isArray(val) ? val.length : 0; + const valIsArray = Array.isArray(val); + const valCount = valIsArray ? val.length : 0; + + // HACK: If a multi-select a click on the limit message will append the limiter to the value, make sure to no-op this. A better solution would be to render the limit message as an interactable element, but refactoring this is a much larger task + const valIsLimit = valIsArray + ? val.includes(ITEM_LIMIT_VALUE) + : val === ITEM_LIMIT_VALUE; + if (valCount < itemCount() || !atItemLimit()) { - if (onChange && val !== ITEM_LIMIT_VALUE) { + if (onChange && !valIsLimit) { onChange(e, {value: val}); } } diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/CompareEvaluationsPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/CompareEvaluationsPage.tsx index 478c48875463..54eef453726d 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/CompareEvaluationsPage.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/CompareEvaluationsPage.tsx @@ -4,6 +4,7 @@ import {Box} from '@material-ui/core'; import {Alert} from '@mui/material'; +import {WaveLoader} from '@wandb/weave/components/Loaders/WaveLoader'; import {Tailwind} from '@wandb/weave/components/Tailwind'; import {maybePluralizeWord} from '@wandb/weave/core/util/string'; import React, {FC, useCallback, useContext, useMemo, useState} from 'react'; @@ -179,8 +180,11 @@ const CompareEvaluationsPageInner: React.FC<{ }> = props => { const {state, setSelectedMetrics} = useCompareEvaluationsState(); const showExampleFilter = - Object.keys(state.data.evaluationCalls).length === 2; - const showExamples = Object.keys(state.data.resultRows).length > 0; + Object.keys(state.summary.evaluationCalls).length === 2; + const showExamples = + Object.keys(state.loadableComparisonResults.result?.resultRows ?? {}) + .length > 0; + const resultsLoading = state.loadableComparisonResults.loading; return ( - {showExamples ? ( + {resultsLoading ? ( + + + + ) : showExamples ? ( <> {showExampleFilter && } diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compositeMetricsUtil.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compositeMetricsUtil.ts index a29fb74ffe94..7d4be894867b 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compositeMetricsUtil.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/compositeMetricsUtil.ts @@ -1,7 +1,7 @@ /** * This file contains a few utilities for working with the - * `MetricDefinitionMap`s in the `EvaluationComparisonData` object. The - * `EvaluationComparisonData` state is a normalized representation of the data, + * `MetricDefinitionMap`s in the `EvaluationComparisonSummary` object. The + * `EvaluationComparisonSummary` state is a normalized representation of the data, * which is good for not duplicating data, but does present some challenges when * trying to build the final rendering of the data. As an application-specific * consideration, when comparing evaluations, metrics can be represented by the @@ -13,7 +13,7 @@ */ import _ from 'lodash'; -import {EvaluationComparisonData, MetricDefinition} from './ecpTypes'; +import {EvaluationComparisonSummary, MetricDefinition} from './ecpTypes'; import {MetricType} from './ecpTypes'; import {getScoreKeyNameFromScorerRef} from './ecpUtil'; import {flattenedDimensionPath} from './ecpUtil'; @@ -69,12 +69,12 @@ export type CompositeSummaryMetricGroupForKeyPath = { }; /** - * Builds a `CompositeScoreMetrics` object from the `EvaluationComparisonData`. + * Builds a `CompositeScoreMetrics` object from the `EvaluationComparisonSummary`. * This is the primary utility for converting the normalized data into a form * that is more useful for rendering the data. */ export const buildCompositeMetricsMap = ( - data: EvaluationComparisonData, + summaryData: EvaluationComparisonSummary, mType: MetricType, selectedMetrics: Record | undefined = undefined ): CompositeScoreMetrics => { @@ -83,9 +83,9 @@ export const buildCompositeMetricsMap = ( // Get the metric definition map based on the metric type let metricDefinitionMap; if (mType === 'score') { - metricDefinitionMap = data.scoreMetrics; + metricDefinitionMap = summaryData.scoreMetrics; } else if (mType === 'summary') { - metricDefinitionMap = data.summaryMetrics; + metricDefinitionMap = summaryData.summaryMetrics; } else { throw new Error(`Invalid metric type: ${mType}`); } @@ -128,9 +128,10 @@ export const buildCompositeMetricsMap = ( }; } - const evals = Object.values(data.evaluationCalls) + const evals = Object.values(summaryData.evaluationCalls) .filter(evaluationCall => { - const evaluation = data.evaluations[evaluationCall.evaluationRef]; + const evaluation = + summaryData.evaluations[evaluationCall.evaluationRef]; return ( metric.scorerOpOrObjRef == null || evaluation.scorerRefs.includes(metric.scorerOpOrObjRef) diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpState.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpState.ts index e5c1b03d60a4..669cbe15cd4c 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpState.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpState.ts @@ -7,9 +7,15 @@ import {useMemo} from 'react'; -import {useEvaluationComparisonData} from '../wfReactInterface/tsDataModelHooksEvaluationComparison'; +import { + useEvaluationComparisonResults, + useEvaluationComparisonSummary, +} from '../wfReactInterface/tsDataModelHooksEvaluationComparison'; import {Loadable} from '../wfReactInterface/wfDataModelHooksInterface'; -import {EvaluationComparisonData} from './ecpTypes'; +import { + EvaluationComparisonResults, + EvaluationComparisonSummary, +} from './ecpTypes'; import {getMetricIds} from './ecpUtil'; /** @@ -17,7 +23,9 @@ import {getMetricIds} from './ecpUtil'; */ export type EvaluationComparisonState = { // The normalized data for the evaluations - data: EvaluationComparisonData; + summary: EvaluationComparisonSummary; + // The results of the evaluations + loadableComparisonResults: Loadable; // The dimensions to compare & filter results comparisonDimensions?: ComparisonDimensionsType; // The current digest which is in view @@ -50,18 +58,28 @@ export const useEvaluationComparisonState = ( const orderedCallIds = useMemo(() => { return getCallIdsOrderedForQuery(evaluationCallIds); }, [evaluationCallIds]); - const data = useEvaluationComparisonData(entity, project, orderedCallIds); + const summaryData = useEvaluationComparisonSummary( + entity, + project, + orderedCallIds + ); + const resultsData = useEvaluationComparisonResults( + entity, + project, + orderedCallIds, + summaryData.result + ); const value = useMemo(() => { - if (data.result == null || data.loading) { + if (summaryData.result == null || summaryData.loading) { return {loading: true, result: null}; } const scorerDimensions = Object.keys( - getMetricIds(data.result, 'score', 'scorer') + getMetricIds(summaryData.result, 'score', 'scorer') ); const derivedDimensions = Object.keys( - getMetricIds(data.result, 'score', 'derived') + getMetricIds(summaryData.result, 'score', 'derived') ); let newComparisonDimensions = comparisonDimensions; @@ -93,7 +111,8 @@ export const useEvaluationComparisonState = ( return { loading: false, result: { - data: data.result, + summary: summaryData.result, + loadableComparisonResults: resultsData, comparisonDimensions: newComparisonDimensions, selectedInputDigest, selectedMetrics, @@ -101,9 +120,10 @@ export const useEvaluationComparisonState = ( }, }; }, [ - data.result, - data.loading, + summaryData.result, + summaryData.loading, comparisonDimensions, + resultsData, selectedInputDigest, selectedMetrics, evaluationCallIds, @@ -132,8 +152,8 @@ const getCallIdsOrderedForQuery = (callIds: string[]) => { */ export const getOrderedModelRefs = (state: EvaluationComparisonState) => { const baselineCallId = getBaselineCallId(state); - const baselineRef = state.data.evaluationCalls[baselineCallId].modelRef; - const refs = Object.keys(state.data.models); + const baselineRef = state.summary.evaluationCalls[baselineCallId].modelRef; + const refs = Object.keys(state.summary.models); // Make sure the baseline model is first moveItemToFront(refs, baselineRef); return refs; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpTypes.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpTypes.ts index b4642fae240d..df40b28a92df 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpTypes.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpTypes.ts @@ -7,7 +7,7 @@ */ import {TraceCallSchema} from '../wfReactInterface/traceServerClientTypes'; -export type EvaluationComparisonData = { +export type EvaluationComparisonSummary = { // Entity and Project are constant across all calls entity: string; project: string; @@ -23,6 +23,20 @@ export type EvaluationComparisonData = { [callId: string]: EvaluationCall; }; + // Models are the Weave Objects used to define the model logic and properties. + models: { + [modelRef: string]: ModelObj; + }; + + // ScoreMetrics define the metrics that are associated on each individual prediction + scoreMetrics: MetricDefinitionMap; + + // SummaryMetrics define the metrics that are associated with the evaluation as a whole + // often aggregated from the scoreMetrics. + summaryMetrics: MetricDefinitionMap; +}; + +export type EvaluationComparisonResults = { // Inputs are the intersection of all inputs used in the evaluations. // Note, we are able to "merge" the same input digest even if it is // used in different evaluations. @@ -30,11 +44,6 @@ export type EvaluationComparisonData = { [rowDigest: string]: DatasetRow; }; - // Models are the Weave Objects used to define the model logic and properties. - models: { - [modelRef: string]: ModelObj; - }; - // ResultRows are the actual results of running the evaluation against // the inputs. resultRows: { @@ -54,15 +63,7 @@ export type EvaluationComparisonData = { }; }; }; - - // ScoreMetrics define the metrics that are associated on each individual prediction - scoreMetrics: MetricDefinitionMap; - - // SummaryMetrics define the metrics that are associated with the evaluation as a whole - // often aggregated from the scoreMetrics. - summaryMetrics: MetricDefinitionMap; }; - /** * The EvaluationObj is the primary object that defines the evaluation itself. */ @@ -84,6 +85,7 @@ export type EvaluationCall = { name: string; color: string; summaryMetrics: MetricResultMap; + traceId: string; }; /** diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpUtil.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpUtil.ts index 601116db34d1..b29743dcd40a 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpUtil.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/ecpUtil.ts @@ -1,5 +1,5 @@ /** - * This file contains a handful of utilities for working with the `EvaluationComparisonData` destructure. + * This file contains a handful of utilities for working with the `EvaluationComparisonSummary` destructure. * These are mostly convenience functions for extracting and resolving metrics from the data, but also * include some helper functions for working with the `MetricDefinition` objects and constructing * strings correctly. @@ -8,7 +8,7 @@ import {parseRef, WeaveObjectRef} from '../../../../../../react'; import { EvaluationCall, - EvaluationComparisonData, + EvaluationComparisonSummary, MetricDefinition, MetricDefinitionMap, MetricResult, @@ -79,11 +79,12 @@ export const resolveSummaryMetricValueForEvaluateCall = ( }; export const getMetricIds = ( - data: EvaluationComparisonData, + summaryData: EvaluationComparisonSummary, type: MetricType, source: SourceType ): MetricDefinitionMap => { - const metrics = type === 'score' ? data.scoreMetrics : data.summaryMetrics; + const metrics = + type === 'score' ? summaryData.scoreMetrics : summaryData.summaryMetrics; return Object.fromEntries( Object.entries(metrics).filter(([k, v]) => v.source === source) ); diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/ComparisonDefinitionSection.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/ComparisonDefinitionSection.tsx index 2704a66cbea6..c32cda7fb50c 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/ComparisonDefinitionSection.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/ComparisonDefinitionSection.tsx @@ -44,9 +44,9 @@ export const ComparisonDefinitionSection: React.FC<{ return callIds.map(callId => ({ key: 'evaluations', value: callId, - label: props.state.data.evaluationCalls[callId]?.name ?? callId, + label: props.state.summary.evaluationCalls[callId]?.name ?? callId, })); - }, [callIds, props.state.data.evaluationCalls]); + }, [callIds, props.state.summary.evaluationCalls]); const onSetBaseline = (value: string | null) => { if (!value) { @@ -130,8 +130,8 @@ const AddEvaluationButton: React.FC<{ // Calls query for just evaluations const evaluationsFilter = useEvaluationsFilter( - props.state.data.entity, - props.state.data.project + props.state.summary.entity, + props.state.summary.project ); const page = useMemo( () => ({ @@ -144,8 +144,8 @@ const AddEvaluationButton: React.FC<{ // Don't query for output here, re-queried in tsDataModelHooksEvaluationComparison.ts const columns = useMemo(() => ['inputs', 'display_name'], []); const calls = useCallsForQuery( - props.state.data.entity, - props.state.data.project, + props.state.summary.entity, + props.state.summary.project, evaluationsFilter, DEFAULT_FILTER_CALLS, page, diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx index 5dcf835e378a..1894398e553f 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ComparisonDefinitionSection/EvaluationDefinition.tsx @@ -21,11 +21,11 @@ export const EvaluationCallLink: React.FC<{ callId: string; state: EvaluationComparisonState; }> = props => { - const evaluationCall = props.state.data.evaluationCalls?.[props.callId]; + const evaluationCall = props.state.summary.evaluationCalls?.[props.callId]; if (!evaluationCall) { return null; } - const {entity, project} = props.state.data; + const {entity, project} = props.state.summary; return ( = props => { const {useObjectVersion} = useWFHooks(); - const evaluationCall = props.state.data.evaluationCalls[props.callId]; - const modelObj = props.state.data.models[evaluationCall.modelRef]; + const evaluationCall = props.state.summary.evaluationCalls[props.callId]; + const modelObj = props.state.summary.models[evaluationCall.modelRef]; const objRef = useMemo( () => parseRef(modelObj.ref) as WeaveObjectRef, [modelObj.ref] @@ -95,9 +95,9 @@ export const EvaluationDatasetLink: React.FC<{ callId: string; state: EvaluationComparisonState; }> = props => { - const evaluationCall = props.state.data.evaluationCalls[props.callId]; + const evaluationCall = props.state.summary.evaluationCalls[props.callId]; const evaluationObj = - props.state.data.evaluations[evaluationCall.evaluationRef]; + props.state.summary.evaluations[evaluationCall.evaluationRef]; const parsed = parseRef(evaluationObj.datasetRef); if (!parsed) { return null; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx index 398f65ecd457..4af9900f2dfc 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleCompareSection/ExampleCompareSection.tsx @@ -229,15 +229,15 @@ export const ExampleCompareSection: React.FC<{ }>({}); const onScorerClick = usePeekCall( - props.state.data.entity, - props.state.data.project + props.state.summary.entity, + props.state.summary.project ); const {ref1, ref2} = useLinkHorizontalScroll(); const compositeScoreMetrics = useMemo( - () => buildCompositeMetricsMap(props.state.data, 'score'), - [props.state.data] + () => buildCompositeMetricsMap(props.state.summary, 'score'), + [props.state.summary] ); if (target == null) { @@ -261,7 +261,7 @@ export const ExampleCompareSection: React.FC<{ const numEvals = numTrials.length; // Get derived scores, then filter out any not in the selected metrics const derivedScores = Object.values( - getMetricIds(props.state.data, 'score', 'derived') + getMetricIds(props.state.summary, 'score', 'derived') ).filter( score => props.state.selectedMetrics?.[flattenedDimensionPath(score)] ); @@ -483,7 +483,7 @@ export const ExampleCompareSection: React.FC<{ trialPredict?.op_name ?? '' )?.artifactName; const trialCallId = trialPredict?.id; - const evaluationCall = props.state.data.evaluationCalls[currEvalCallId]; + const evaluationCall = props.state.summary.evaluationCalls[currEvalCallId]; if (trialEntity && trialProject && trialOpName && trialCallId) { return ( { const leafDims = useMemo(() => getOrderedCallIds(state), [state]); const compositeMetricsMap = useMemo( - () => buildCompositeMetricsMap(state.data, 'score'), - [state.data] + () => buildCompositeMetricsMap(state.summary, 'score'), + [state.summary] ); const flattenedRows = useMemo(() => { const rows: FlattenedRow[] = []; - Object.entries(state.data.resultRows).forEach( - ([rowDigest, rowCollection]) => { - Object.values(rowCollection.evaluations).forEach(modelCollection => { - Object.values(modelCollection.predictAndScores).forEach( - predictAndScoreRes => { - const datasetRow = - state.data.inputs[predictAndScoreRes.rowDigest]; - if (datasetRow != null) { - const output = predictAndScoreRes._rawPredictTraceData?.output; - rows.push({ - id: predictAndScoreRes.callId, - evaluationCallId: predictAndScoreRes.evaluationCallId, - inputDigest: datasetRow.digest, - inputRef: predictAndScoreRes.exampleRef, - input: flattenObjectPreservingWeaveTypes({ - input: datasetRow.val, - }), - output: flattenObjectPreservingWeaveTypes({output}), - scores: Object.fromEntries( - [...Object.entries(state.data.scoreMetrics)].map( - ([scoreKey, scoreVal]) => { - return [ - scoreKey, - resolveScoreMetricValueForPASCall( - scoreVal, - predictAndScoreRes - ), - ]; - } - ) - ), - path: [ - rowDigest, - predictAndScoreRes.evaluationCallId, - predictAndScoreRes.callId, - ], - predictAndScore: predictAndScoreRes, - }); - } + Object.entries( + state.loadableComparisonResults.result?.resultRows ?? {} + ).forEach(([rowDigest, rowCollection]) => { + Object.values(rowCollection.evaluations).forEach(modelCollection => { + Object.values(modelCollection.predictAndScores).forEach( + predictAndScoreRes => { + const datasetRow = + state.loadableComparisonResults.result?.inputs[ + predictAndScoreRes.rowDigest + ]; + if (datasetRow != null) { + const output = predictAndScoreRes._rawPredictTraceData?.output; + rows.push({ + id: predictAndScoreRes.callId, + evaluationCallId: predictAndScoreRes.evaluationCallId, + inputDigest: datasetRow.digest, + inputRef: predictAndScoreRes.exampleRef, + input: flattenObjectPreservingWeaveTypes({ + input: datasetRow.val, + }), + output: flattenObjectPreservingWeaveTypes({output}), + scores: Object.fromEntries( + [...Object.entries(state.summary.scoreMetrics)].map( + ([scoreKey, scoreVal]) => { + return [ + scoreKey, + resolveScoreMetricValueForPASCall( + scoreVal, + predictAndScoreRes + ), + ]; + } + ) + ), + path: [ + rowDigest, + predictAndScoreRes.evaluationCallId, + predictAndScoreRes.callId, + ], + predictAndScore: predictAndScoreRes, + }); } - ); - }); - } - ); + } + ); + }); + }); return rows; - }, [state.data.resultRows, state.data.inputs, state.data.scoreMetrics]); + }, [ + state.loadableComparisonResults.result?.resultRows, + state.loadableComparisonResults.result?.inputs, + state.summary.scoreMetrics, + ]); const pivotedRows = useMemo(() => { // Ok, so in this step we are going to pivot - diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleFilterSection/ExampleFilterSection.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleFilterSection/ExampleFilterSection.tsx index 1146c8ea9604..4ba9349b83d4 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleFilterSection/ExampleFilterSection.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/ExampleFilterSection/ExampleFilterSection.tsx @@ -99,12 +99,12 @@ const SingleDimensionFilter: React.FC<{ dimensionIndex: number; }> = props => { const compositeMetricsMap = useMemo(() => { - return buildCompositeMetricsMap(props.state.data, 'score'); - }, [props.state.data]); + return buildCompositeMetricsMap(props.state.summary, 'score'); + }, [props.state.summary]); const {setComparisonDimensions} = useCompareEvaluationsState(); const baselineCallId = getBaselineCallId(props.state); - const compareCallId = Object.keys(props.state.data.evaluationCalls).find( + const compareCallId = Object.keys(props.state.summary.evaluationCalls).find( callId => callId !== baselineCallId )!; @@ -112,14 +112,14 @@ const SingleDimensionFilter: React.FC<{ props.state.comparisonDimensions?.[props.dimensionIndex]; const targetDimension = targetComparisonDimension - ? props.state.data.scoreMetrics[targetComparisonDimension.metricId] + ? props.state.summary.scoreMetrics[targetComparisonDimension.metricId] : undefined; const xIsPercentage = targetDimension?.scoreType === 'binary'; const yIsPercentage = targetDimension?.scoreType === 'binary'; - const xColor = props.state.data.evaluationCalls[baselineCallId].color; - const yColor = props.state.data.evaluationCalls[compareCallId].color; + const xColor = props.state.summary.evaluationCalls[baselineCallId].color; + const yColor = props.state.summary.evaluationCalls[compareCallId].color; const {filteredRows} = useFilteredAggregateRows(props.state); const filteredDigest = useMemo(() => { @@ -141,7 +141,9 @@ const SingleDimensionFilter: React.FC<{ ); if (baselineTargetDimension != null && compareTargetDimension != null) { - Object.entries(props.state.data.resultRows).forEach(([digest, row]) => { + Object.entries( + props.state.loadableComparisonResults.result?.resultRows ?? {} + ).forEach(([digest, row]) => { const xVals: number[] = []; const yVals: number[] = []; Object.values( @@ -230,7 +232,7 @@ const SingleDimensionFilter: React.FC<{ compareCallId, compositeMetricsMap, filteredDigest, - props.state.data.resultRows, + props.state.loadableComparisonResults.result?.resultRows, targetDimension, ]); @@ -281,15 +283,15 @@ const SingleDimensionFilter: React.FC<{ yIsPercentage={yIsPercentage} xTitle={ 'Baseline: ' + - props.state.data.evaluationCalls[baselineCallId].name + + props.state.summary.evaluationCalls[baselineCallId].name + ' ' + - props.state.data.evaluationCalls[baselineCallId].callId.slice(-4) + props.state.summary.evaluationCalls[baselineCallId].callId.slice(-4) } yTitle={ 'Challenger: ' + - props.state.data.evaluationCalls[compareCallId].name + + props.state.summary.evaluationCalls[compareCallId].name + ' ' + - props.state.data.evaluationCalls[compareCallId].callId.slice(-4) + props.state.summary.evaluationCalls[compareCallId].callId.slice(-4) } /> @@ -303,11 +305,11 @@ const DimensionPicker: React.FC<{ props.state.comparisonDimensions?.[props.dimensionIndex]; const currDimension = targetComparisonDimension - ? props.state.data.scoreMetrics[targetComparisonDimension.metricId] + ? props.state.summary.scoreMetrics[targetComparisonDimension.metricId] : undefined; const {setComparisonDimensions} = useCompareEvaluationsState(); - const dimensionMap = props.state.data.scoreMetrics; + const dimensionMap = props.state.summary.scoreMetrics; return ( Object.values(props.state.data.evaluations).map(e => e.datasetRef), + () => Object.values(props.state.summary.evaluations).map(e => e.datasetRef), [props.state] ); const evalCallIds = useMemo( @@ -89,7 +89,7 @@ export const ScorecardSection: React.FC<{ const modelProps = useMemo(() => { const propsRes: {[prop: string]: {[ref: string]: any}} = {}; modelRefs.forEach(ref => { - const model = props.state.data.models[ref]; + const model = props.state.summary.models[ref]; Object.keys(model.properties).forEach(prop => { if (!propsRes[prop]) { propsRes[prop] = {}; @@ -100,7 +100,7 @@ export const ScorecardSection: React.FC<{ // Make sure predict op is last modelRefs.forEach(ref => { - const model = props.state.data.models[ref]; + const model = props.state.summary.models[ref]; if (!propsRes.predict) { propsRes.predict = {}; } @@ -108,7 +108,7 @@ export const ScorecardSection: React.FC<{ }); return propsRes; - }, [modelRefs, props.state.data.models]); + }, [modelRefs, props.state.summary.models]); const propsWithDifferences = useMemo(() => { return Object.keys(modelProps).filter(prop => { const values = Object.values(modelProps[prop]); @@ -119,15 +119,15 @@ export const ScorecardSection: React.FC<{ const compositeSummaryMetrics = useMemo(() => { return buildCompositeMetricsMap( - props.state.data, + props.state.summary, 'summary', props.state.selectedMetrics ); }, [props.state]); const onCallClick = usePeekCall( - props.state.data.entity, - props.state.data.project + props.state.summary.entity, + props.state.summary.project ); const datasetVariation = Array.from(new Set(datasetRefs)).length > 1; @@ -295,7 +295,7 @@ export const ScorecardSection: React.FC<{ {evalCallIds.map((evalCallId, mNdx) => { const model = - props.state.data.evaluationCalls[evalCallId].modelRef; + props.state.summary.evaluationCalls[evalCallId].modelRef; const parsed = parseRefMaybe( modelProps[prop][model] ) as WeaveObjectRef; @@ -560,7 +560,7 @@ const resolveSummaryMetricResult = ( const baseline = baselineDimension ? resolveSummaryMetricResultForEvaluateCall( baselineDimension, - state.data.evaluationCalls[evalCallId] + state.summary.evaluationCalls[evalCallId] ) : undefined; return baseline; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/SummaryPlotsSection/SummaryPlotsSection.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/SummaryPlotsSection/SummaryPlotsSection.tsx index 02c456df8507..2b1250080f39 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/SummaryPlotsSection/SummaryPlotsSection.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/CompareEvaluationsPage/sections/SummaryPlotsSection/SummaryPlotsSection.tsx @@ -430,7 +430,7 @@ const usePlotDataFromMetrics = ( state: EvaluationComparisonState ): {radarData: RadarPlotData; allMetricNames: Set} => { const compositeMetrics = useMemo(() => { - return buildCompositeMetricsMap(state.data, 'summary'); + return buildCompositeMetricsMap(state.summary, 'summary'); }, [state]); const callIds = useMemo(() => { return getOrderedCallIds(state); @@ -450,7 +450,7 @@ const usePlotDataFromMetrics = ( } const val = resolveSummaryMetricValueForEvaluateCall( metricDimension, - state.data.evaluationCalls[callId] + state.summary.evaluationCalls[callId] ); if (typeof val === 'boolean') { return val ? 1 : 0; @@ -471,7 +471,7 @@ const usePlotDataFromMetrics = ( }); const radarData = Object.fromEntries( callIds.map(callId => { - const evalCall = state.data.evaluationCalls[callId]; + const evalCall = state.summary.evaluationCalls[callId]; return [ evalCall.callId, { @@ -491,5 +491,5 @@ const usePlotDataFromMetrics = ( ); const allMetricNames = new Set(metrics.map(m => m.metricLabel)); return {radarData, allMetricNames}; - }, [callIds, compositeMetrics, state.data.evaluationCalls]); + }, [callIds, compositeMetrics, state.summary.evaluationCalls]); }; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/TabUseCall.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/TabUseCall.tsx index 51f268011c01..3041533f98c8 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/TabUseCall.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/TabUseCall.tsx @@ -34,8 +34,8 @@ os.environ["WF_TRACE_SERVER_URL"] = "http://127.0.0.1:6345" const codeFeedbackPython = `call.feedback.add("correctness", {"value": 4})`; const codeFetchJS = `import * as weave from 'weave'; - const client = await weave.init("${entity}/${project}"); - const call = await client.getCall("${callId}")`; +const client = await weave.init("${entity}/${project}"); +const call = await client.getCall("${callId}")`; const codeReactionJS = `await call.feedback.addReaction('👍')`; const codeNoteJS = `await call.feedback.addNote('This is delightful!')`; const codeFeedbackJS = `await call.feedback.add({correctness: {value: 4}})`; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooksEvaluationComparison.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooksEvaluationComparison.ts index ba8941a24c5e..c321accd487e 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooksEvaluationComparison.ts +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/wfReactInterface/tsDataModelHooksEvaluationComparison.ts @@ -70,6 +70,7 @@ */ import {sum} from 'lodash'; +import _ from 'lodash'; import {useEffect, useMemo, useRef, useState} from 'react'; import {WB_RUN_COLORS} from '../../../../../../common/css/color.styles'; @@ -77,7 +78,8 @@ import {useDeepMemo} from '../../../../../../hookUtils'; import {parseRef, WeaveObjectRef} from '../../../../../../react'; import {PREDICT_AND_SCORE_OP_NAME_POST_PYDANTIC} from '../common/heuristics'; import { - EvaluationComparisonData, + EvaluationComparisonResults, + EvaluationComparisonSummary, MetricDefinition, } from '../CompareEvaluationsPage/ecpTypes'; import { @@ -98,20 +100,20 @@ import {TraceCallSchema} from './traceServerClientTypes'; * Primary react hook for fetching evaluation comparison data. This could be * moved into the Trace Server hooks at some point, hence the location of the file. */ -export const useEvaluationComparisonData = ( +export const useEvaluationComparisonSummary = ( entity: string, project: string, evaluationCallIds: string[] -): Loadable => { +): Loadable => { const getTraceServerClient = useGetTraceServerClientContext(); - const [data, setData] = useState(null); + const [data, setData] = useState(null); const evaluationCallIdsMemo = useDeepMemo(evaluationCallIds); const evaluationCallIdsRef = useRef(evaluationCallIdsMemo); useEffect(() => { setData(null); let mounted = true; - fetchEvaluationComparisonData( + fetchEvaluationSummaryData( getTraceServerClient(), entity, project, @@ -139,25 +141,73 @@ export const useEvaluationComparisonData = ( }; /** - * This function is responsible for building the data structure used to describe - * the comparison of evaluations. It is a complex function that fetches data from - * the trace server and builds a normalized data structure. + * Primary react hook for fetching evaluation comparison data. This could be + * moved into the Trace Server hooks at some point, hence the location of the file. */ -const fetchEvaluationComparisonData = async ( +export const useEvaluationComparisonResults = ( + entity: string, + project: string, + evaluationCallIds: string[], + summaryData: EvaluationComparisonSummary | null +): Loadable => { + const getTraceServerClient = useGetTraceServerClientContext(); + const [data, setData] = useState(null); + const evaluationCallIdsMemo = useDeepMemo(evaluationCallIds); + const evaluationCallIdsRef = useRef(evaluationCallIdsMemo); + + useEffect(() => { + setData(null); + let mounted = true; + if (summaryData == null) { + return; + } + fetchEvaluationComparisonResults( + getTraceServerClient(), + entity, + project, + evaluationCallIdsMemo, + summaryData + ).then(dataRes => { + if (mounted) { + evaluationCallIdsRef.current = evaluationCallIdsMemo; + setData(dataRes); + } + }); + return () => { + mounted = false; + }; + }, [ + entity, + evaluationCallIdsMemo, + project, + getTraceServerClient, + summaryData, + ]); + + return useMemo(() => { + if ( + data == null || + evaluationCallIdsRef.current !== evaluationCallIdsMemo + ) { + return {loading: true, result: null}; + } + return {loading: false, result: data}; + }, [data, evaluationCallIdsMemo]); +}; + +const fetchEvaluationSummaryData = async ( traceServerClient: TraceServerClient, // TODO: Bad that this is leaking into user-land entity: string, project: string, evaluationCallIds: string[] -): Promise => { +): Promise => { const projectId = projectIdFromParts({entity, project}); - const result: EvaluationComparisonData = { + const result: EvaluationComparisonSummary = { entity, project, evaluationCalls: {}, evaluations: {}, - inputs: {}, models: {}, - resultRows: {}, scoreMetrics: {}, summaryMetrics: {}, }; @@ -175,37 +225,6 @@ const fetchEvaluationComparisonData = async ( filter: {call_ids: evaluationCallIds}, }); - // Kick off the trace query to get the actual trace data - // Note: we split this into 2 steps to ensure we only get level 2 children - // of the evaluations. This avoids massive overhead of fetching gigantic traces - // for every evaluation. - const evalTraceIds = evalRes.calls.map(call => call.trace_id); - // First, get all the children of the evaluations (predictAndScoreCalls + summary) - const evalTraceResProm = traceServerClient - .callsStreamQuery({ - project_id: projectId, - filter: {trace_ids: evalTraceIds, parent_ids: evaluationCallIds}, - }) - .then(predictAndScoreCallRes => { - // Then, get all the children of those calls (predictions + scores) - const predictAndScoreIds = predictAndScoreCallRes.calls.map( - call => call.id - ); - return traceServerClient - .callsStreamQuery({ - project_id: projectId, - filter: {trace_ids: evalTraceIds, parent_ids: predictAndScoreIds}, - }) - .then(predictionsAndScoresCallsRes => { - return { - calls: [ - ...predictAndScoreCallRes.calls, - ...predictionsAndScoresCallsRes.calls, - ], - }; - }); - }); - const evaluationCallCache: {[callId: string]: EvaluationEvaluateCallSchema} = Object.fromEntries( evalRes.calls.map(call => [call.id, call as EvaluationEvaluateCallSchema]) @@ -220,6 +239,7 @@ const fetchEvaluationComparisonData = async ( evaluationRef: call.inputs.self, modelRef: call.inputs.model, summaryMetrics: {}, // These cannot be filled out yet since we don't know the IDs yet + traceId: call.trace_id, }, ]) ); @@ -415,9 +435,71 @@ const fetchEvaluationComparisonData = async ( }) ); + return result; +}; + +/** + * This function is responsible for building the data structure used to describe + * the comparison of evaluations. It is a complex function that fetches data from + * the trace server and builds a normalized data structure. + */ +const fetchEvaluationComparisonResults = async ( + traceServerClient: TraceServerClient, // TODO: Bad that this is leaking into user-land + entity: string, + project: string, + evaluationCallIds: string[], + summaryData: EvaluationComparisonSummary +): Promise => { + const projectId = projectIdFromParts({entity, project}); + const result: EvaluationComparisonResults = { + inputs: {}, + resultRows: {}, + }; + + // Kick off the trace query to get the actual trace data + // Note: we split this into 2 steps to ensure we only get level 2 children + // of the evaluations. This avoids massive overhead of fetching gigantic traces + // for every evaluation. + const evalTraceIds = Object.values(summaryData.evaluationCalls).map( + call => call.traceId + ); + // First, get all the children of the evaluations (predictAndScoreCalls + summary) + const evalTraceResProm = traceServerClient + .callsStreamQuery({ + project_id: projectId, + filter: {trace_ids: evalTraceIds, parent_ids: evaluationCallIds}, + }) + .then(predictAndScoreCallRes => { + // Then, get all the children of those calls (predictions + scores) + const predictAndScoreIds = predictAndScoreCallRes.calls.map( + call => call.id + ); + + return Promise.all( + _.chunk(predictAndScoreIds, 500).map(chunk => { + return traceServerClient + .callsStreamQuery({ + project_id: projectId, + filter: {trace_ids: evalTraceIds, parent_ids: chunk}, + }) + .then(predictionsAndScoresCallsRes => { + return predictionsAndScoresCallsRes.calls; + }); + }) + ).then(predictionsAndScoresCallsResMany => { + return { + calls: [ + ...predictAndScoreCallRes.calls, + ...predictionsAndScoresCallsResMany.flat(), + ], + }; + }); + }); + // 3.5 Populate the inputs // We only ned 1 since we are going to effectively do an inner join on the rowDigest - const datasetRef = Object.values(result.evaluations)[0].datasetRef as string; + const datasetRef = Object.values(summaryData.evaluations)[0] + .datasetRef as string; const datasetObjRes = await traceServerClient.readBatch({refs: [datasetRef]}); const rowsRef = datasetObjRes.vals[0].rows; const parsedRowsRef = parseRef(rowsRef) as WeaveObjectRef; @@ -440,7 +522,7 @@ const fetchEvaluationComparisonData = async ( // Create a set of all of the scorer refs const scorerRefs = new Set( - Object.values(result.evaluations).flatMap( + Object.values(summaryData.evaluations).flatMap( evaluation => evaluation.scorerRefs ) ); @@ -464,14 +546,14 @@ const fetchEvaluationComparisonData = async ( // Fill in the autosummary source calls summaryOps.forEach(summarizedOp => { const evalCallId = summarizedOp.parent_id!; - const evalCall = result.evaluationCalls[evalCallId]; + const evalCall = summaryData.evaluationCalls[evalCallId]; if (evalCall == null) { return; } Object.entries(evalCall.summaryMetrics).forEach( ([metricId, metricResult]) => { if ( - result.summaryMetrics[metricId].source === 'scorer' || + summaryData.summaryMetrics[metricId].source === 'scorer' || // Special case that the model latency is also a summary metric calc metricDefinitionId(modelLatencyMetricDimension) === metricId ) { @@ -481,6 +563,10 @@ const fetchEvaluationComparisonData = async ( ); }); + const modelRefs = Object.values(summaryData.evaluationCalls).map( + evalCall => evalCall.modelRef + ); + // Next, we need to build the predictions object evalTraceRes.calls.forEach(traceCall => { // We are looking for 2 types of calls: @@ -599,7 +685,7 @@ const fetchEvaluationComparisonData = async ( scorerOpOrObjRef: scorerRef, }; const metricId = metricDefinitionId(metricDimension); - result.scoreMetrics[metricId] = metricDimension; + summaryData.scoreMetrics[metricId] = metricDimension; predictAndScoreFinal.scoreMetrics[metricId] = { sourceCallId: traceCall.id, value: scoreVal, @@ -612,7 +698,7 @@ const fetchEvaluationComparisonData = async ( scorerOpOrObjRef: scorerRef, }; const metricId = metricDefinitionId(metricDimension); - result.scoreMetrics[metricId] = metricDimension; + summaryData.scoreMetrics[metricId] = metricDimension; predictAndScoreFinal.scoreMetrics[metricId] = { sourceCallId: traceCall.id, @@ -648,7 +734,7 @@ const fetchEvaluationComparisonData = async ( if (isSummaryChild && isProbablyBoundScoreCall && isSummaryOp) { // Now fill in the source of the eval score const evalCallId = maybeParentSummaryOp!.parent_id!; - const evalCall = result.evaluationCalls[evalCallId]; + const evalCall = summaryData.evaluationCalls[evalCallId]; if (evalCall == null) { return; } @@ -668,7 +754,7 @@ const fetchEvaluationComparisonData = async ( Object.entries(result.resultRows).filter(([digest, row]) => { return ( Object.values(row.evaluations).length === - Object.values(result.evaluationCalls).length + Object.values(summaryData.evaluationCalls).length ); }) ); diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py index 0d376db51d91..5f78d63e4b41 100644 --- a/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py +++ b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py @@ -1,7 +1,6 @@ from __future__ import annotations import importlib -import time from typing import Any, Callable import_failed = False @@ -40,8 +39,6 @@ def postprocess_output_to_openai_format(output: Any) -> dict: Need to post process the output reported to weave to send it on openai format so that Weave front end renders chat view. This only affects what is sent to weave. """ - from openai.types.chat import ChatCompletion - if isinstance(output, ChatResult): # its ChatResult message = output.llm_output enhanced_usage = message.get("token_usage", {}) @@ -52,9 +49,8 @@ def postprocess_output_to_openai_format(output: Any) -> dict: "prompt_tokens", 0 ) - returnable = ChatCompletion( - id="None", - choices=[ + returnable = { + "choices": [ { "index": 0, "message": { @@ -66,15 +62,14 @@ def postprocess_output_to_openai_format(output: Any) -> dict: "finish_reason": message.get("finish_reason", ""), } ], - created=int(time.time()), - model=message.get("model_name", ""), - object="chat.completion", - tool_calls=message.get("tool_calls", []), - system_fingerprint=None, - usage=enhanced_usage, - ) + "model": message.get("model_name", ""), + "tool_calls": message.get("tool_calls", []), + "usage": enhanced_usage, + } - return returnable.model_dump(exclude_unset=True, exclude_none=True) + returnable.update(output.model_dump(exclude_unset=True, exclude_none=True)) + + return returnable elif isinstance(output, ChatGenerationChunk): # its ChatGenerationChunk orig_message = output.message @@ -87,9 +82,8 @@ def postprocess_output_to_openai_format(output: Any) -> dict: "input_tokens", 0 ) - returnable = ChatCompletion( - id="None", - choices=[ + returnable = { + "choices": [ { "index": 0, "message": { @@ -103,17 +97,17 @@ def postprocess_output_to_openai_format(output: Any) -> dict: ), } ], - created=int(time.time()), - model=getattr(orig_message, "response_metadata", {}).get( + "model": getattr(orig_message, "response_metadata", {}).get( "model_name", None ), - tool_calls=openai_message.get("tool_calls", []), - object="chat.completion", - system_fingerprint=None, - usage=enhanced_usage, - ) + "tool_calls": openai_message.get("tool_calls", []), + "usage": enhanced_usage, + } + + returnable.update(output.model_dump(exclude_unset=True, exclude_none=True)) + + return returnable - return returnable.model_dump(exclude_unset=True, exclude_none=True) return output @@ -142,11 +136,14 @@ def postprocess_inputs_to_openai_format( "max_tokens": chat_nvidia_obj.max_tokens, "temperature": chat_nvidia_obj.temperature, "top_p": chat_nvidia_obj.top_p, - "object": "ChatNVIDIA._generate", "n": n, "stream": stream, } + weave_report.update( + chat_nvidia_obj.model_dump(exclude_unset=True, exclude_none=True) + ) + return ProcessedInputs( original_args=original_args, original_kwargs=original_kwargs, diff --git a/weave/integrations/vertexai/vertexai_sdk.py b/weave/integrations/vertexai/vertexai_sdk.py index a64620cbe5fb..03f06ee72c47 100644 --- a/weave/integrations/vertexai/vertexai_sdk.py +++ b/weave/integrations/vertexai/vertexai_sdk.py @@ -89,7 +89,7 @@ def vertexai_on_finish( def vertexai_wrapper_sync(settings: OpSettings) -> Callable[[Callable], Callable]: def wrapper(fn: Callable) -> Callable: - op_kwargs = settings.model_copy() + op_kwargs = settings.model_dump() if not op_kwargs.get("postprocess_inputs"): op_kwargs["postprocess_inputs"] = vertexai_postprocess_inputs @@ -114,7 +114,7 @@ async def _async_wrapper(*args: Any, **kwargs: Any) -> Any: return _async_wrapper - op_kwargs = settings.model_copy() + op_kwargs = settings.model_dump() if not op_kwargs.get("postprocess_inputs"): op_kwargs["postprocess_inputs"] = vertexai_postprocess_inputs diff --git a/weave/trace_server/llm_completion.py b/weave/trace_server/llm_completion.py index e8ac86ffb33a..3c23d6e14b33 100644 --- a/weave/trace_server/llm_completion.py +++ b/weave/trace_server/llm_completion.py @@ -33,53 +33,20 @@ def lite_llm_completion( # This allows us to drop params that are not supported by the LLM provider litellm.drop_params = True - if supports_n_times(inputs.model) or inputs.n == 1: - try: - res = litellm.completion( - **inputs.model_dump(exclude_none=True), - api_key=api_key, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_region_name=aws_region_name, - ) - return tsi.CompletionsCreateRes(response=res.model_dump()) - except Exception as e: - error_message = str(e) - error_message = error_message.replace("litellm.", "") - return tsi.CompletionsCreateRes(response={"error": error_message}) - - # o1 models with n > 1 - results = [] try: - # get n results - for i in range(inputs.n or 1): - results.append( - litellm.completion( - **inputs.model_dump(exclude_none=True), - api_key=api_key, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_region_name=aws_region_name, - ) - ) + res = litellm.completion( + **inputs.model_dump(exclude_none=True), + api_key=api_key, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_region_name=aws_region_name, + ) + return tsi.CompletionsCreateRes(response=res.model_dump()) except Exception as e: error_message = str(e) error_message = error_message.replace("litellm.", "") return tsi.CompletionsCreateRes(response={"error": error_message}) - final_result = results[0] - for idx, result in enumerate(results): - if idx != 0: - # append choices - final_result.choices.append(result.choices[0]) - - # sum usage - final_result.usage = sum_dict_leaves( - [result.usage.model_dump() for result in results] - ) - - return tsi.CompletionsCreateRes(response=final_result.model_dump()) - def get_bedrock_credentials( model_name: str, @@ -122,25 +89,3 @@ def get_bedrock_credentials( ) return aws_access_key_id, aws_secret_access_key, aws_region_name - - -NO_N_TIMES_MODEL_NAMES = ("o1-mini", "o1-preview", "o1") - - -# if the model name contains any of these strings, we don't support n > 1 -def supports_n_times(model_name: str) -> bool: - return not any(x in model_name for x in NO_N_TIMES_MODEL_NAMES) - - -# copied from weave/trace/weave_client.py -def sum_dict_leaves(dicts: list[dict]) -> dict: - # dicts is a list of dictionaries, that may or may not - # have nested dictionaries. Sum all the leaves that match - result: dict = {} - for d in dicts: - for k, v in d.items(): - if isinstance(v, dict): - result[k] = sum_dict_leaves([result.get(k, {}), v]) - elif v is not None: - result[k] = result.get(k, 0) + v - return result diff --git a/weave/version.py b/weave/version.py index 15fdd261727d..62f71e3db053 100644 --- a/weave/version.py +++ b/weave/version.py @@ -44,4 +44,4 @@ """ -VERSION = "0.51.26-dev0" +VERSION = "0.51.28-dev0"