diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_model.png b/docs/docs/guides/integrations/imgs/chatnvidia_model.png new file mode 100644 index 000000000000..4d0782b7719b Binary files /dev/null and b/docs/docs/guides/integrations/imgs/chatnvidia_model.png differ diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_trace.png b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png new file mode 100644 index 000000000000..e573ab26089d Binary files /dev/null and b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png differ diff --git a/docs/docs/guides/integrations/imgs/nvidia_pokedex.png b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png new file mode 100644 index 000000000000..51ab369fa8ca Binary files /dev/null and b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png differ diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md index f83d579b21c2..78504dc154b4 100644 --- a/docs/docs/guides/integrations/index.md +++ b/docs/docs/guides/integrations/index.md @@ -20,6 +20,8 @@ LLM providers are the vendors that offer access to large language models for gen - **[Groq](/guides/integrations/groq)** - **[Open Router](/guides/integrations/openrouter)** - **[LiteLLM](/guides/integrations/litellm)** +- **[NVIDIA NIM](/guides/integrations/nvidia_nim)** + **[Local Models](/guides/integrations/local_models)**: For when you're running models on your own infrastructure. diff --git a/docs/docs/guides/integrations/nvidia_nim.md b/docs/docs/guides/integrations/nvidia_nim.md new file mode 100644 index 000000000000..01007ee7e6a6 --- /dev/null +++ b/docs/docs/guides/integrations/nvidia_nim.md @@ -0,0 +1,176 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# NVIDIA NIM + +Weave automatically tracks and logs LLM calls made via the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) library, after `weave.init()` is called. + +## Tracing + +It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application. + + + + Weave can automatically capture traces for the [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/). + + Start capturing by calling `weave.init()` with a project name your choice. + + ```python + from langchain_nvidia_ai_endpoints import ChatNVIDIA + import weave + client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.8, max_tokens=64, top_p=1) + # highlight-next-line + weave.init('emoji-bot') + + messages=[ + { + "role": "system", + "content": "You are AGI. You will be provided with a message, and your task is to respond using emojis only." + }] + + response = client.invoke(messages) + ``` + + + + ```plaintext + This feature is not available in TypeScript yet since this library is only in Python. + ``` + + + +![chatnvidia_trace.png](imgs/chatnvidia_trace.png) + +## Track your own ops + + + +Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git. + +Simply create a function decorated with [`@weave.op`](/guides/tracking/ops) that calls into [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/). + +In the example below, we have 2 functions wrapped with op. This helps us see how intermediate steps, like the retrieval step in a RAG app, are affecting how our app behaves. + + ```python + # highlight-next-line + import weave + from langchain_nvidia_ai_endpoints import ChatNVIDIA + import requests, random + PROMPT="""Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it. + Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences. """ + POKEMON = ['pikachu', 'charmander', 'squirtle', 'bulbasaur', 'jigglypuff', 'meowth', 'eevee'] + client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.7, max_tokens=100, top_p=1) + + # highlight-next-line + @weave.op + def get_pokemon_data(pokemon_name): + # highlight-next-line + # This is a step within your application, like the retrieval step within a RAG app + url = f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}" + response = requests.get(url) + if response.status_code == 200: + data = response.json() + name = data["name"] + types = [t["type"]["name"] for t in data["types"]] + species_url = data["species"]["url"] + species_response = requests.get(species_url) + evolved_from = "Unknown" + if species_response.status_code == 200: + species_data = species_response.json() + if species_data["evolves_from_species"]: + evolved_from = species_data["evolves_from_species"]["name"] + return {"name": name, "types": types, "evolved_from": evolved_from} + else: + return None + + # highlight-next-line + @weave.op + def pokedex(name: str, prompt: str) -> str: + # highlight-next-line + # This is your root op that calls out to other ops + # highlight-next-line + data = get_pokemon_data(name) + if not data: return "Error: Unable to fetch data" + + messages=[ + {"role": "system","content": prompt}, + {"role": "user", "content": str(data)} + ] + + response = client.invoke(messages) + return response.content + + # highlight-next-line + weave.init('pokedex-nvidia') + # Get data for a specific Pokémon + pokemon_data = pokedex(random.choice(POKEMON), PROMPT) + ``` + +Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inputs & outputs of that step. + + + ```plaintext + This feature is not available in TypeScript yet since this library is only in Python. + ``` + + + +![nvidia_pokedex.png](imgs/nvidia_pokedex.png) + +## Create a `Model` for easier experimentation + + + + Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app. + + In addition to versioning code and capturing inputs/outputs, [`Model`](/guides/core-types/models)s capture structured parameters that control your application’s behavior, making it easy to find what parameters worked best. You can also use Weave Models with `serve`, and [`Evaluation`](/guides/core-types/evaluations)s. + + In the example below, you can experiment with `model` and `system_message`. Every time you change one of these, you'll get a new _version_ of `GrammarCorrectorModel`. + + ```python + import weave + from langchain_nvidia_ai_endpoints import ChatNVIDIA + + weave.init('grammar-nvidia') + + class GrammarCorrectorModel(weave.Model): # Change to `weave.Model` + system_message: str + + @weave.op() + def predict(self, user_input): # Change to `predict` + client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0, max_tokens=100, top_p=1) + + messages=[ + { + "role": "system", + "content": self.system_message + }, + { + "role": "user", + "content": user_input + } + ] + + response = client.invoke(messages) + return response.content + + + corrector = GrammarCorrectorModel( + system_message = "You are a grammar checker, correct the following user input.") + result = corrector.predict("That was so easy, it was a piece of pie!") + print(result) + ``` + + + ```plaintext + This feature is not available in TypeScript yet since this library is only in Python. + ``` + + + +![chatnvidia_model.png](imgs/chatnvidia_model.png) + +## Usage Info + +The ChatNVIDIA integration supports `invoke`, `stream` and their async variants. It also supports tool use. +As ChatNVIDIA is meant to be used with many types of models, it does not have function calling support. diff --git a/docs/sidebars.ts b/docs/sidebars.ts index fa2f7f80d32e..b132e5ac6476 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -101,6 +101,7 @@ const sidebars: SidebarsConfig = { "guides/integrations/groq", "guides/integrations/openrouter", "guides/integrations/litellm", + "guides/integrations/nvidia_nim", ], }, "guides/integrations/local_models", diff --git a/noxfile.py b/noxfile.py index dff1305f21fd..f70339598275 100644 --- a/noxfile.py +++ b/noxfile.py @@ -8,6 +8,7 @@ "cohere", "dspy", "langchain", + "langchain_nvidia_ai_endpoints", "litellm", "notdiamond", "google_ai_studio", @@ -40,6 +41,7 @@ def lint(session): "google_ai_studio", "groq", "instructor", + "langchain_nvidia_ai_endpoints", "langchain", "litellm", "llamaindex", @@ -73,6 +75,10 @@ def tests(session, shard): if shard == "google_ai_studio": env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY") + # Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard + if shard == "langchain_nvidia_ai_endpoints": + env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY") + # we are doing some integration test in test_llm_integrations.py that requires # setting some environment variables for the LLM providers if shard == "scorers_tests": diff --git a/pyproject.toml b/pyproject.toml index 660fab30960f..eb5636029b2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,10 @@ langchain = [ "pysqlite3", "opentelemetry-exporter-otlp", ] +langchain_nvidia_ai_endpoints = [ + "langchain-core>=0.2.1", + "langchain-nvidia-ai-endpoints", +] litellm = ["litellm>=1.36.1"] llamaindex = ["llama-index>=0.10.35"] mistral0 = ["mistralai>=0.1.8,<1.0.0"] @@ -98,7 +102,11 @@ test = [ # Integration Tests "pytest-recording>=0.13.2", - "vcrpy>=6.0.1", + # "vcrpy>=6.0.1", + # https://github.com/kevin1024/vcrpy/pull/889 + # This resolves test issues until a new pypi release can be made. Once that release + # is made, we can remove this and revert to the vcrpy>=6.0.1 dependency. + "vcrpy @ git+https://github.com/kevin1024/vcrpy.git@48d0a2e453f6635af343000cdaf9794a781e807e", # serving tests "flask", @@ -108,6 +116,7 @@ test = [ "httpx", ] + [project.scripts] weave = "weave.trace.cli:cli" @@ -138,6 +147,8 @@ exclude = [ "weave/clear_cache.py", ] +[tool.hatch.metadata] +allow-direct-references = true [tool.pytest.ini_options] filterwarnings = [ diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py b/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml new file mode 100644 index 000000000000..fea579fb73e6 --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml @@ -0,0 +1,50 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '161' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: '{"id":"chat-8bfccc9544b64c70b47605a647b69b8a","object":"chat.completion","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello! + It''s nice to meet you. Is there something I can help you with or would you + like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Length: + - '445' + Content-Type: + - application/json + Date: + - Mon, 23 Dec 2024 22:21:45 GMT + Nvcf-Reqid: + - 704f40c5-4d25-46fb-8d76-66364bc9e156 + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml new file mode 100644 index 000000000000..eab2bd71a7c6 --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml @@ -0,0 +1,152 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options": + {"include_usage": true}}' + headers: + Accept: + - text/event-stream + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '203' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + + + data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + + + data: [DONE] + + + ' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 23 Dec 2024 22:21:46 GMT + Nvcf-Reqid: + - 5d8ef74f-0537-47a5-b0dc-0735d916776f + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Transfer-Encoding: + - chunked + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml new file mode 100644 index 000000000000..0f3dc21c319e --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml @@ -0,0 +1,50 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '161' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: '{"id":"chat-55c0d02e9caa471694b571312c012a34","object":"chat.completion","created":1734992504,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello! + It''s nice to meet you. Is there something I can help you with or would you + like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Length: + - '445' + Content-Type: + - application/json + Date: + - Mon, 23 Dec 2024 22:21:44 GMT + Nvcf-Reqid: + - ea89199e-9f54-4c8d-8895-b1fd9034b86e + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml new file mode 100644 index 000000000000..9d6d68b489f6 --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml @@ -0,0 +1,152 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options": + {"include_usage": true}}' + headers: + Accept: + - text/event-stream + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '203' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":" + chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + + + data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}} + + + data: [DONE] + + + ' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 23 Dec 2024 22:21:45 GMT + Nvcf-Reqid: + - ca6cf115-a5f7-447c-95e7-eafced589f5d + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Transfer-Encoding: + - chunked + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml new file mode 100644 index 000000000000..aa03ceaa8d58 --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml @@ -0,0 +1,56 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Can you name a cricket player + along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false, "tools": + [{"type": "function", "function": {"name": "cricket_player_names", "description": + "store the name of players", "parameters": {"type": "object", "properties": + {"name": {"type": "string", "description": "The name of the player"}, "team:": + {"type": "string", "description": "The team of the player"}, "highest_score": + {"type": "number", "description": "The highest score of the player"}}, "required": + ["name", "team", "highest_score"]}}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '670' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: '{"id":"chat-c827eb3e9dad425dbde170c946ff7cf7","object":"chat.completion","created":1734992507,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-3b35163e6a5a4961aa22b93581a0c5b2","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Length: + - '580' + Content-Type: + - application/json + Date: + - Mon, 23 Dec 2024 22:21:48 GMT + Nvcf-Reqid: + - 497f5f82-973e-4224-8593-97664a04b39c + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml new file mode 100644 index 000000000000..9043999a5f7f --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml @@ -0,0 +1,56 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Can you name a cricket player + along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false, "tools": + [{"type": "function", "function": {"name": "cricket_player_names", "description": + "store the name of players", "parameters": {"type": "object", "properties": + {"name": {"type": "string", "description": "The name of the player"}, "team:": + {"type": "string", "description": "The team of the player"}, "highest_score": + {"type": "number", "description": "The highest score of the player"}}, "required": + ["name", "team", "highest_score"]}}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '670' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: '{"id":"chat-5e1b429e3b704443992df3e6a1f95021","object":"chat.completion","created":1734992509,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-b16ddfa73cb94d9b95d6572615e71589","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Length: + - '580' + Content-Type: + - application/json + Date: + - Mon, 23 Dec 2024 22:21:49 GMT + Nvcf-Reqid: + - 60deff33-b1fe-46d1-abc4-35978ee01613 + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml new file mode 100644 index 000000000000..e58ed33910b8 --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml @@ -0,0 +1,68 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Can you name a cricket player + along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options": + {"include_usage": true}, "tools": [{"type": "function", "function": {"name": + "cricket_player_names", "description": "store the name of players", "parameters": + {"type": "object", "properties": {"name": {"type": "string", "description": + "The name of the player"}, "team:": {"type": "string", "description": "The team + of the player"}, "highest_score": {"type": "number", "description": "The highest + score of the player"}}, "required": ["name", "team", "highest_score"]}}}]}' + headers: + Accept: + - text/event-stream + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '712' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}} + + + data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-df3daa09595c462297a7253930e4d915","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + + + data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + + + data: [DONE] + + + ' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 23 Dec 2024 22:21:51 GMT + Nvcf-Reqid: + - 43d997a5-3e0a-485d-9567-e16813f1b183 + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Transfer-Encoding: + - chunked + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml new file mode 100644 index 000000000000..c8bd72e70e11 --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml @@ -0,0 +1,68 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Can you name a cricket player + along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct", + "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options": + {"include_usage": true}, "tools": [{"type": "function", "function": {"name": + "cricket_player_names", "description": "store the name of players", "parameters": + {"type": "object", "properties": {"name": {"type": "string", "description": + "The name of the player"}, "team:": {"type": "string", "description": "The team + of the player"}, "highest_score": {"type": "number", "description": "The highest + score of the player"}}, "required": ["name", "team", "highest_score"]}}}]}' + headers: + Accept: + - text/event-stream + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '712' + Content-Type: + - application/json + User-Agent: + - langchain-nvidia-ai-endpoints + method: POST + uri: https://integrate.api.nvidia.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}} + + + data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-d0d074128af94984bfc40534381b5860","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\": + \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + + + data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}} + + + data: [DONE] + + + ' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - nvcf-reqid + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 23 Dec 2024 22:21:50 GMT + Nvcf-Reqid: + - 2746750e-86b1-4959-bd8b-1da038003578 + Nvcf-Status: + - fulfilled + Server: + - uvicorn + Transfer-Encoding: + - chunked + Vary: + - Origin + - origin, access-control-request-method, access-control-request-headers + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py new file mode 100644 index 000000000000..e46ebac8e89c --- /dev/null +++ b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py @@ -0,0 +1,567 @@ +import os + +import pytest +from langchain_core.messages import AIMessageChunk +from langchain_nvidia_ai_endpoints import ChatNVIDIA + +import weave +from weave.integrations.integration_utilities import op_name_from_ref + +model = "meta/llama-3.1-8b-instruct" + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], + allowed_hosts=["api.wandb.ai", "localhost"], +) +def test_chatnvidia_quickstart(client: weave.trace.weave_client.WeaveClient) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ) + + response = nvidia_client.invoke("Hello!") + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert response.content is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at # type: ignore + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + usage = call.summary["usage"][output["model"]] # type: ignore + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 24 + assert usage["prompt_tokens"] == 12 + assert usage["total_tokens"] == 36 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [{"role": "user", "content": "Hello!"}] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"] +) +@pytest.mark.asyncio +async def test_chatnvidia_async_quickstart( + client: weave.trace.weave_client.WeaveClient, +) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ) + + response = await nvidia_client.ainvoke("Hello!") + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert response.content is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 24 + assert usage["prompt_tokens"] == 12 + assert usage["total_tokens"] == 36 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [{"role": "user", "content": "Hello!"}] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], + allowed_hosts=["api.wandb.ai", "localhost"], +) +def test_chatnvidia_stream_quickstart( + client: weave.trace.weave_client.WeaveClient, +) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ) + + response = nvidia_client.stream("Hello!") + answer = AIMessageChunk(content="") + for chunk in response: + answer += chunk + answer.usage_metadata = chunk.usage_metadata + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert answer.content is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + print(call.summary["usage"][output["model"]]) + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 24 + assert usage["prompt_tokens"] == 12 + assert usage["total_tokens"] == 36 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [{"role": "user", "content": "Hello!"}] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"] +) +@pytest.mark.asyncio +async def test_chatnvidia_async_stream_quickstart( + client: weave.trace.weave_client.WeaveClient, +) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ) + response = nvidia_client.astream("Hello!") + answer = AIMessageChunk(content="") + async for chunk in response: + answer += chunk + answer.usage_metadata = chunk.usage_metadata + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert answer.content is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + print(call.summary["usage"][output["model"]]) + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 24 + assert usage["prompt_tokens"] == 12 + assert usage["total_tokens"] == 36 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [{"role": "user", "content": "Hello!"}] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"] +) +def test_chatnvidia_tool_call(client: weave.trace.weave_client.WeaveClient) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + function_list = [ + { + "type": "function", + "function": { + "name": "cricket_player_names", # Function Name + "description": "store the name of players", # Meta information of function + "parameters": { # parameters + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the player", + }, + "team:": { + "type": "string", + "description": "The team of the player", + }, + "highest_score": { + "type": "number", + "description": "The highest score of the player", + }, + }, + "required": ["name", "team", "highest_score"], + }, + }, + } + ] + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ).bind_tools(function_list) + + messages = [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + + response = nvidia_client.invoke(messages) + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert response.content is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 30 + assert usage["prompt_tokens"] == 318 + assert usage["total_tokens"] == 348 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"] +) +@pytest.mark.asyncio +async def test_chatnvidia_tool_call_async( + client: weave.trace.weave_client.WeaveClient, +) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + function_list = [ + { + "type": "function", + "function": { + "name": "cricket_player_names", # Function Name + "description": "store the name of players", # Meta information of function + "parameters": { # parameters + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the player", + }, + "team:": { + "type": "string", + "description": "The team of the player", + }, + "highest_score": { + "type": "number", + "description": "The highest score of the player", + }, + }, + "required": ["name", "team", "highest_score"], + }, + }, + } + ] + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ).bind_tools(function_list) + + messages = [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + + response = await nvidia_client.ainvoke(messages) + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert response.content is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 30 + assert usage["prompt_tokens"] == 318 + assert usage["total_tokens"] == 348 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"] +) +def test_chatnvidia_tool_call_stream( + client: weave.trace.weave_client.WeaveClient, +) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + function_list = [ + { + "type": "function", + "function": { + "name": "cricket_player_names", # Function Name + "description": "store the name of players", # Meta information of function + "parameters": { # parameters + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the player", + }, + "team:": { + "type": "string", + "description": "The team of the player", + }, + "highest_score": { + "type": "number", + "description": "The highest score of the player", + }, + }, + "required": ["name", "team", "highest_score"], + }, + }, + } + ] + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ).bind_tools(function_list) + + messages = [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + + response = nvidia_client.stream(messages) + + answer = AIMessageChunk(content="") + for chunk in response: + answer += chunk + answer.usage_metadata = chunk.usage_metadata + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert answer.tool_calls is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 30 + assert usage["prompt_tokens"] == 318 + assert usage["total_tokens"] == 348 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 + + +@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode +@pytest.mark.vcr( + filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"] +) +@pytest.mark.asyncio +async def test_chatnvidia_tool_call_async_stream( + client: weave.trace.weave_client.WeaveClient, +) -> None: + api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY") + + function_list = [ + { + "type": "function", + "function": { + "name": "cricket_player_names", # Function Name + "description": "store the name of players", # Meta information of function + "parameters": { # parameters + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the player", + }, + "team:": { + "type": "string", + "description": "The team of the player", + }, + "highest_score": { + "type": "number", + "description": "The highest score of the player", + }, + }, + "required": ["name", "team", "highest_score"], + }, + }, + } + ] + + nvidia_client = ChatNVIDIA( + api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1 + ).bind_tools(function_list) + + messages = [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + + response = nvidia_client.astream(messages) + + answer = AIMessageChunk(content="") + async for chunk in response: + answer += chunk + answer.usage_metadata = chunk.usage_metadata + + calls = list(client.calls()) + # need to make 2 because of the langchain integration getting a call in there + assert len(calls) == 2 + call = calls[1] + + assert answer.tool_calls is not None + + assert ( + op_name_from_ref(call.op_name) + == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream" + ) + assert call.started_at is not None + assert call.started_at < call.ended_at + + output = call.output + assert output["model"] == model + assert output["object"] == "chat.completion" + + usage = call.summary["usage"][output["model"]] + assert usage["requests"] == 1 + assert usage["completion_tokens"] == 30 + assert usage["prompt_tokens"] == 318 + assert usage["total_tokens"] == 348 + + inputs = call.inputs + assert inputs["model"] == model + assert inputs["messages"] == [ + { + "role": "user", + "content": "Can you name a cricket player along with team name and highest score?", + } + ] + assert inputs["max_tokens"] == 64 + assert inputs["temperature"] == 0.0 + assert inputs["top_p"] == 1 diff --git a/weave-js/src/components/DropdownMenu/DropdownMenu.tsx b/weave-js/src/components/DropdownMenu/DropdownMenu.tsx index 64a93f42006b..56d12fa1432e 100644 --- a/weave-js/src/components/DropdownMenu/DropdownMenu.tsx +++ b/weave-js/src/components/DropdownMenu/DropdownMenu.tsx @@ -111,7 +111,10 @@ export const Separator = ({ }: RadixDropdownMenu.DropdownMenuSeparatorProps) => ( diff --git a/weave-js/src/components/Tag/Tag.tsx b/weave-js/src/components/Tag/Tag.tsx index 85ec9d8a6e2e..b7ebe22123d7 100644 --- a/weave-js/src/components/Tag/Tag.tsx +++ b/weave-js/src/components/Tag/Tag.tsx @@ -1,5 +1,12 @@ import classNames from 'classnames'; -import React, {FC, ReactElement, useMemo, useRef} from 'react'; +import React, { + FC, + ReactElement, + useEffect, + useMemo, + useRef, + useState, +} from 'react'; import {twMerge} from 'tailwind-merge'; import {Icon, IconName} from '../Icon'; @@ -103,11 +110,18 @@ export const RemovableTag: FC = ({ Wrapper = Tailwind, }) => { const labelRef = useRef(null); - const isTooltipEnabled = isTagLabelTruncated(labelRef); + const [isTruncated, setIsTruncated] = useState(false); + + useEffect(() => { + if (labelRef.current) { + setIsTruncated(isTagLabelTruncated(labelRef)); + } + }, [label]); + const classes = useTagClasses({color, isInteractive: true, label}); const nakedTag = ( - +
@@ -132,6 +146,5 @@ export const RemovableTag: FC = ({ if (Wrapper) { return {nakedTag}; } - return nakedTag; }; diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/__init__.py b/weave/integrations/langchain_nvidia_ai_endpoints/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py new file mode 100644 index 000000000000..0d376db51d91 --- /dev/null +++ b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py @@ -0,0 +1,221 @@ +from __future__ import annotations + +import importlib +import time +from typing import Any, Callable + +import_failed = False + +try: + from langchain_core.messages import AIMessageChunk, convert_to_openai_messages + from langchain_core.outputs import ChatGenerationChunk, ChatResult +except ImportError: + import_failed = True + +import weave +from weave.trace.autopatch import IntegrationSettings, OpSettings +from weave.trace.op import Op, ProcessedInputs +from weave.trace.op_extensions.accumulator import add_accumulator +from weave.trace.patcher import MultiPatcher, NoOpPatcher, SymbolPatcher + +_lc_nvidia_patcher: MultiPatcher | None = None + + +# NVIDIA-specific accumulator for parsing the objects of streaming interactions +def nvidia_accumulator(acc: Any | None, value: Any) -> Any: + if acc is None: + acc = ChatGenerationChunk(message=AIMessageChunk(content="")) + acc = acc + value + + # Need to do this since the __add__ impl for the streaming response is wrong + # We will get the actual usage in the final chunk so this will be eventually consistent + acc.message.usage_metadata = value.message.usage_metadata + + return acc + + +# Post processor to transform output into OpenAI's ChatCompletion format -- need to handle stream and non-stream outputs +def postprocess_output_to_openai_format(output: Any) -> dict: + """ + Need to post process the output reported to weave to send it on openai format so that Weave front end renders + chat view. This only affects what is sent to weave. + """ + from openai.types.chat import ChatCompletion + + if isinstance(output, ChatResult): # its ChatResult + message = output.llm_output + enhanced_usage = message.get("token_usage", {}) + enhanced_usage["output_tokens"] = message.get("token_usage").get( + "completion_tokens", 0 + ) + enhanced_usage["input_tokens"] = message.get("token_usage").get( + "prompt_tokens", 0 + ) + + returnable = ChatCompletion( + id="None", + choices=[ + { + "index": 0, + "message": { + "content": message.get("content", ""), + "role": message.get("role", ""), + "tool_calls": message.get("tool_calls", []), + }, + "logprobs": None, + "finish_reason": message.get("finish_reason", ""), + } + ], + created=int(time.time()), + model=message.get("model_name", ""), + object="chat.completion", + tool_calls=message.get("tool_calls", []), + system_fingerprint=None, + usage=enhanced_usage, + ) + + return returnable.model_dump(exclude_unset=True, exclude_none=True) + + elif isinstance(output, ChatGenerationChunk): # its ChatGenerationChunk + orig_message = output.message + openai_message = convert_to_openai_messages(output.message) + enhanced_usage = getattr(orig_message, "usage_metadata", {}) + enhanced_usage["completion_tokens"] = orig_message.usage_metadata.get( + "output_tokens", 0 + ) + enhanced_usage["prompt_tokens"] = orig_message.usage_metadata.get( + "input_tokens", 0 + ) + + returnable = ChatCompletion( + id="None", + choices=[ + { + "index": 0, + "message": { + "content": orig_message.content, + "role": getattr(orig_message, "role", "assistant"), + "tool_calls": openai_message.get("tool_calls", []), + }, + "logprobs": None, + "finish_reason": getattr(orig_message, "response_metadata", {}).get( + "finish_reason", None + ), + } + ], + created=int(time.time()), + model=getattr(orig_message, "response_metadata", {}).get( + "model_name", None + ), + tool_calls=openai_message.get("tool_calls", []), + object="chat.completion", + system_fingerprint=None, + usage=enhanced_usage, + ) + + return returnable.model_dump(exclude_unset=True, exclude_none=True) + return output + + +def postprocess_inputs_to_openai_format( + func: Op, args: tuple, kwargs: dict +) -> ProcessedInputs: + """ + Need to process the input reported to weave to send it on openai format so that Weave front end renders + chat view. This only affects what is sent to weave. + """ + original_args = args + original_kwargs = kwargs + + chat_nvidia_obj = args[0] + messages_array = args[1] + messages_array = convert_to_openai_messages(messages_array) + n = len(messages_array) + + stream = False + if "stream" in func.name: + stream = True + + weave_report = { + "model": chat_nvidia_obj.model, + "messages": messages_array, + "max_tokens": chat_nvidia_obj.max_tokens, + "temperature": chat_nvidia_obj.temperature, + "top_p": chat_nvidia_obj.top_p, + "object": "ChatNVIDIA._generate", + "n": n, + "stream": stream, + } + + return ProcessedInputs( + original_args=original_args, + original_kwargs=original_kwargs, + args=original_args, + kwargs=original_kwargs, + inputs=weave_report, + ) + + +def should_use_accumulator(inputs: dict) -> bool: + return isinstance(inputs, dict) and bool(inputs.get("stream")) + + +def nvidia_ai_endpoints_wrapper(settings: OpSettings) -> Callable[[Callable], Callable]: + def wrapper(fn: Callable) -> Callable: + op_kwargs = settings.model_dump() + op = weave.op(fn, **op_kwargs) + op._set_on_input_handler(postprocess_inputs_to_openai_format) + return add_accumulator( + op, + make_accumulator=lambda inputs: nvidia_accumulator, + should_accumulate=should_use_accumulator, + on_finish_post_processor=postprocess_output_to_openai_format, + ) + + return wrapper + + +def get_nvidia_ai_patcher( + settings: IntegrationSettings | None = None, +) -> MultiPatcher | NoOpPatcher: + if settings is None: + settings = IntegrationSettings() + + if not settings.enabled: + return NoOpPatcher() + + global _lc_nvidia_patcher + if _lc_nvidia_patcher is not None: + return _lc_nvidia_patcher + + base = settings.op_settings + + generate_settings: OpSettings = base.model_copy( + update={ + "name": base.name or "langchain_nvidia_ai_endpoints.ChatNVIDIA._generate", + } + ) + stream_settings: OpSettings = base.model_copy( + update={ + "name": base.name or "langchain_nvidia_ai_endpoints.ChatNVIDIA._stream", + } + ) + + _lc_nvidia_patcher = MultiPatcher( + [ + # Patch invoke method + SymbolPatcher( + lambda: importlib.import_module("langchain_nvidia_ai_endpoints"), + "ChatNVIDIA._generate", + nvidia_ai_endpoints_wrapper(generate_settings), + ), + # Patch stream method + SymbolPatcher( + lambda: importlib.import_module("langchain_nvidia_ai_endpoints"), + "ChatNVIDIA._stream", + nvidia_ai_endpoints_wrapper(stream_settings), + ), + ] + ) + + return _lc_nvidia_patcher diff --git a/weave/trace/autopatch.py b/weave/trace/autopatch.py index c1c47d375127..bc77752957c2 100644 --- a/weave/trace/autopatch.py +++ b/weave/trace/autopatch.py @@ -46,6 +46,7 @@ class AutopatchSettings(BaseModel): notdiamond: IntegrationSettings = Field(default_factory=IntegrationSettings) openai: IntegrationSettings = Field(default_factory=IntegrationSettings) vertexai: IntegrationSettings = Field(default_factory=IntegrationSettings) + chatnvidia: IntegrationSettings = Field(default_factory=IntegrationSettings) @validate_call @@ -60,6 +61,9 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: from weave.integrations.groq.groq_sdk import get_groq_patcher from weave.integrations.instructor.instructor_sdk import get_instructor_patcher from weave.integrations.langchain.langchain import langchain_patcher + from weave.integrations.langchain_nvidia_ai_endpoints.langchain_nv_ai_endpoints import ( + get_nvidia_ai_patcher, + ) from weave.integrations.litellm.litellm import get_litellm_patcher from weave.integrations.llamaindex.llamaindex import llamaindex_patcher from weave.integrations.mistral import get_mistral_patcher @@ -82,6 +86,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: get_google_genai_patcher(settings.google_ai_studio).attempt_patch() get_notdiamond_patcher(settings.notdiamond).attempt_patch() get_vertexai_patcher(settings.vertexai).attempt_patch() + get_nvidia_ai_patcher(settings.chatnvidia).attempt_patch() llamaindex_patcher.attempt_patch() langchain_patcher.attempt_patch() @@ -98,6 +103,9 @@ def reset_autopatch() -> None: from weave.integrations.groq.groq_sdk import get_groq_patcher from weave.integrations.instructor.instructor_sdk import get_instructor_patcher from weave.integrations.langchain.langchain import langchain_patcher + from weave.integrations.langchain_nvidia_ai_endpoints.langchain_nv_ai_endpoints import ( + get_nvidia_ai_patcher, + ) from weave.integrations.litellm.litellm import get_litellm_patcher from weave.integrations.llamaindex.llamaindex import llamaindex_patcher from weave.integrations.mistral import get_mistral_patcher @@ -117,6 +125,7 @@ def reset_autopatch() -> None: get_google_genai_patcher().undo_patch() get_notdiamond_patcher().undo_patch() get_vertexai_patcher().undo_patch() + get_nvidia_ai_patcher().undo_patch() llamaindex_patcher.undo_patch() langchain_patcher.undo_patch() diff --git a/weave/trace/sanitize.py b/weave/trace/sanitize.py index 5125dbfb09dc..24e1fea5dd95 100644 --- a/weave/trace/sanitize.py +++ b/weave/trace/sanitize.py @@ -1,6 +1,11 @@ +# always use lowercase keys for the redact keys REDACT_KEYS = ( "api_key", "auth_headers", - "Authorization", + "authorization", ) REDACTED_VALUE = "REDACTED" + + +def should_redact(key: str) -> bool: + return key.lower() in REDACT_KEYS diff --git a/weave/trace/serialize.py b/weave/trace/serialize.py index 5e0c6006aa50..ae8afa0e12e8 100644 --- a/weave/trace/serialize.py +++ b/weave/trace/serialize.py @@ -8,7 +8,7 @@ from weave.trace import custom_objs from weave.trace.object_record import ObjectRecord from weave.trace.refs import ObjectRef, TableRef, parse_uri -from weave.trace.sanitize import REDACT_KEYS, REDACTED_VALUE +from weave.trace.sanitize import REDACTED_VALUE, should_redact from weave.trace_server.interface.builtin_object_classes.builtin_object_registry import ( BUILTIN_OBJECT_REGISTRY, ) @@ -148,7 +148,7 @@ def dictify( elif isinstance(obj, dict): dict_result = {} for k, v in obj.items(): - if k in REDACT_KEYS: + if should_redact(k): dict_result[k] = REDACTED_VALUE else: dict_result[k] = dictify(v, maxdepth, depth + 1, seen) @@ -160,7 +160,7 @@ def dictify( if isinstance(as_dict, dict): to_dict_result = {} for k, v in as_dict.items(): - if k in REDACT_KEYS: + if should_redact(k): to_dict_result[k] = REDACTED_VALUE elif maxdepth == 0 or depth < maxdepth: to_dict_result[k] = dictify(v, maxdepth, depth + 1) @@ -187,7 +187,7 @@ def dictify( for attr in dir(obj): if attr.startswith("_"): continue - if attr in REDACT_KEYS: + if should_redact(attr): result[attr] = REDACTED_VALUE continue try: diff --git a/weave/trace/weave_client.py b/weave/trace/weave_client.py index 1d5d54b9b23c..87a445383b57 100644 --- a/weave/trace/weave_client.py +++ b/weave/trace/weave_client.py @@ -38,7 +38,7 @@ parse_op_uri, parse_uri, ) -from weave.trace.sanitize import REDACT_KEYS, REDACTED_VALUE +from weave.trace.sanitize import REDACTED_VALUE, should_redact from weave.trace.serialize import from_json, isinstance_namedtuple, to_json from weave.trace.serializer import get_serializer_for_obj from weave.trace.settings import client_parallelism @@ -1648,7 +1648,7 @@ def redact_sensitive_keys(obj: Any) -> Any: if isinstance(obj, dict): dict_res = {} for k, v in obj.items(): - if k in REDACT_KEYS: + if should_redact(k): dict_res[k] = REDACTED_VALUE else: dict_res[k] = redact_sensitive_keys(v) diff --git a/weave/trace_server/async_batch_processor.py b/weave/trace_server/async_batch_processor.py index 03071607aee0..a8a183d94bfe 100644 --- a/weave/trace_server/async_batch_processor.py +++ b/weave/trace_server/async_batch_processor.py @@ -6,7 +6,6 @@ from typing import Callable, Generic, TypeVar from weave.trace.context.tests_context import get_raise_on_captured_errors -from weave.trace_server import requests T = TypeVar("T") logger = logging.getLogger(__name__) @@ -61,14 +60,10 @@ def _process_batches(self) -> None: if current_batch: try: self.processor_fn(current_batch) - except requests.HTTPError as e: - if e.response.status_code == 413: - # 413: payload too large, don't raise just log - if get_raise_on_captured_errors(): - raise - logger.exception(f"Error processing batch: {e}") - else: - raise e + except Exception as e: + if get_raise_on_captured_errors(): + raise + logger.exception(f"Error processing batch: {e}") if self.stop_event.is_set() and self.queue.empty(): break