diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_model.png b/docs/docs/guides/integrations/imgs/chatnvidia_model.png
new file mode 100644
index 000000000000..4d0782b7719b
Binary files /dev/null and b/docs/docs/guides/integrations/imgs/chatnvidia_model.png differ
diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_trace.png b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png
new file mode 100644
index 000000000000..e573ab26089d
Binary files /dev/null and b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png differ
diff --git a/docs/docs/guides/integrations/imgs/nvidia_pokedex.png b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png
new file mode 100644
index 000000000000..51ab369fa8ca
Binary files /dev/null and b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png differ
diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md
index f83d579b21c2..78504dc154b4 100644
--- a/docs/docs/guides/integrations/index.md
+++ b/docs/docs/guides/integrations/index.md
@@ -20,6 +20,8 @@ LLM providers are the vendors that offer access to large language models for gen
- **[Groq](/guides/integrations/groq)**
- **[Open Router](/guides/integrations/openrouter)**
- **[LiteLLM](/guides/integrations/litellm)**
+- **[NVIDIA NIM](/guides/integrations/nvidia_nim)**
+
**[Local Models](/guides/integrations/local_models)**: For when you're running models on your own infrastructure.
diff --git a/docs/docs/guides/integrations/nvidia_nim.md b/docs/docs/guides/integrations/nvidia_nim.md
new file mode 100644
index 000000000000..01007ee7e6a6
--- /dev/null
+++ b/docs/docs/guides/integrations/nvidia_nim.md
@@ -0,0 +1,176 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# NVIDIA NIM
+
+Weave automatically tracks and logs LLM calls made via the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) library, after `weave.init()` is called.
+
+## Tracing
+
+It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application.
+
+
+
+ Weave can automatically capture traces for the [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
+
+ Start capturing by calling `weave.init()` with a project name your choice.
+
+ ```python
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
+ import weave
+ client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.8, max_tokens=64, top_p=1)
+ # highlight-next-line
+ weave.init('emoji-bot')
+
+ messages=[
+ {
+ "role": "system",
+ "content": "You are AGI. You will be provided with a message, and your task is to respond using emojis only."
+ }]
+
+ response = client.invoke(messages)
+ ```
+
+
+
+ ```plaintext
+ This feature is not available in TypeScript yet since this library is only in Python.
+ ```
+
+
+
+
+
+## Track your own ops
+
+
+
+Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git.
+
+Simply create a function decorated with [`@weave.op`](/guides/tracking/ops) that calls into [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
+
+In the example below, we have 2 functions wrapped with op. This helps us see how intermediate steps, like the retrieval step in a RAG app, are affecting how our app behaves.
+
+ ```python
+ # highlight-next-line
+ import weave
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
+ import requests, random
+ PROMPT="""Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it.
+ Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences. """
+ POKEMON = ['pikachu', 'charmander', 'squirtle', 'bulbasaur', 'jigglypuff', 'meowth', 'eevee']
+ client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.7, max_tokens=100, top_p=1)
+
+ # highlight-next-line
+ @weave.op
+ def get_pokemon_data(pokemon_name):
+ # highlight-next-line
+ # This is a step within your application, like the retrieval step within a RAG app
+ url = f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}"
+ response = requests.get(url)
+ if response.status_code == 200:
+ data = response.json()
+ name = data["name"]
+ types = [t["type"]["name"] for t in data["types"]]
+ species_url = data["species"]["url"]
+ species_response = requests.get(species_url)
+ evolved_from = "Unknown"
+ if species_response.status_code == 200:
+ species_data = species_response.json()
+ if species_data["evolves_from_species"]:
+ evolved_from = species_data["evolves_from_species"]["name"]
+ return {"name": name, "types": types, "evolved_from": evolved_from}
+ else:
+ return None
+
+ # highlight-next-line
+ @weave.op
+ def pokedex(name: str, prompt: str) -> str:
+ # highlight-next-line
+ # This is your root op that calls out to other ops
+ # highlight-next-line
+ data = get_pokemon_data(name)
+ if not data: return "Error: Unable to fetch data"
+
+ messages=[
+ {"role": "system","content": prompt},
+ {"role": "user", "content": str(data)}
+ ]
+
+ response = client.invoke(messages)
+ return response.content
+
+ # highlight-next-line
+ weave.init('pokedex-nvidia')
+ # Get data for a specific Pokémon
+ pokemon_data = pokedex(random.choice(POKEMON), PROMPT)
+ ```
+
+Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inputs & outputs of that step.
+
+
+ ```plaintext
+ This feature is not available in TypeScript yet since this library is only in Python.
+ ```
+
+
+
+
+
+## Create a `Model` for easier experimentation
+
+
+
+ Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app.
+
+ In addition to versioning code and capturing inputs/outputs, [`Model`](/guides/core-types/models)s capture structured parameters that control your application’s behavior, making it easy to find what parameters worked best. You can also use Weave Models with `serve`, and [`Evaluation`](/guides/core-types/evaluations)s.
+
+ In the example below, you can experiment with `model` and `system_message`. Every time you change one of these, you'll get a new _version_ of `GrammarCorrectorModel`.
+
+ ```python
+ import weave
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+ weave.init('grammar-nvidia')
+
+ class GrammarCorrectorModel(weave.Model): # Change to `weave.Model`
+ system_message: str
+
+ @weave.op()
+ def predict(self, user_input): # Change to `predict`
+ client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0, max_tokens=100, top_p=1)
+
+ messages=[
+ {
+ "role": "system",
+ "content": self.system_message
+ },
+ {
+ "role": "user",
+ "content": user_input
+ }
+ ]
+
+ response = client.invoke(messages)
+ return response.content
+
+
+ corrector = GrammarCorrectorModel(
+ system_message = "You are a grammar checker, correct the following user input.")
+ result = corrector.predict("That was so easy, it was a piece of pie!")
+ print(result)
+ ```
+
+
+ ```plaintext
+ This feature is not available in TypeScript yet since this library is only in Python.
+ ```
+
+
+
+
+
+## Usage Info
+
+The ChatNVIDIA integration supports `invoke`, `stream` and their async variants. It also supports tool use.
+As ChatNVIDIA is meant to be used with many types of models, it does not have function calling support.
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index fa2f7f80d32e..b132e5ac6476 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -101,6 +101,7 @@ const sidebars: SidebarsConfig = {
"guides/integrations/groq",
"guides/integrations/openrouter",
"guides/integrations/litellm",
+ "guides/integrations/nvidia_nim",
],
},
"guides/integrations/local_models",
diff --git a/noxfile.py b/noxfile.py
index dff1305f21fd..f70339598275 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -8,6 +8,7 @@
"cohere",
"dspy",
"langchain",
+ "langchain_nvidia_ai_endpoints",
"litellm",
"notdiamond",
"google_ai_studio",
@@ -40,6 +41,7 @@ def lint(session):
"google_ai_studio",
"groq",
"instructor",
+ "langchain_nvidia_ai_endpoints",
"langchain",
"litellm",
"llamaindex",
@@ -73,6 +75,10 @@ def tests(session, shard):
if shard == "google_ai_studio":
env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
+ # Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard
+ if shard == "langchain_nvidia_ai_endpoints":
+ env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY")
+
# we are doing some integration test in test_llm_integrations.py that requires
# setting some environment variables for the LLM providers
if shard == "scorers_tests":
diff --git a/pyproject.toml b/pyproject.toml
index 660fab30960f..eb5636029b2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,10 @@ langchain = [
"pysqlite3",
"opentelemetry-exporter-otlp",
]
+langchain_nvidia_ai_endpoints = [
+ "langchain-core>=0.2.1",
+ "langchain-nvidia-ai-endpoints",
+]
litellm = ["litellm>=1.36.1"]
llamaindex = ["llama-index>=0.10.35"]
mistral0 = ["mistralai>=0.1.8,<1.0.0"]
@@ -98,7 +102,11 @@ test = [
# Integration Tests
"pytest-recording>=0.13.2",
- "vcrpy>=6.0.1",
+ # "vcrpy>=6.0.1",
+ # https://github.com/kevin1024/vcrpy/pull/889
+ # This resolves test issues until a new pypi release can be made. Once that release
+ # is made, we can remove this and revert to the vcrpy>=6.0.1 dependency.
+ "vcrpy @ git+https://github.com/kevin1024/vcrpy.git@48d0a2e453f6635af343000cdaf9794a781e807e",
# serving tests
"flask",
@@ -108,6 +116,7 @@ test = [
"httpx",
]
+
[project.scripts]
weave = "weave.trace.cli:cli"
@@ -138,6 +147,8 @@ exclude = [
"weave/clear_cache.py",
]
+[tool.hatch.metadata]
+allow-direct-references = true
[tool.pytest.ini_options]
filterwarnings = [
diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py b/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml
new file mode 100644
index 000000000000..fea579fb73e6
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml
@@ -0,0 +1,50 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '161'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: '{"id":"chat-8bfccc9544b64c70b47605a647b69b8a","object":"chat.completion","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello!
+ It''s nice to meet you. Is there something I can help you with or would you
+ like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}'
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '445'
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 23 Dec 2024 22:21:45 GMT
+ Nvcf-Reqid:
+ - 704f40c5-4d25-46fb-8d76-66364bc9e156
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml
new file mode 100644
index 000000000000..eab2bd71a7c6
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml
@@ -0,0 +1,152 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+ {"include_usage": true}}'
+ headers:
+ Accept:
+ - text/event-stream
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '203'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: 'data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+ data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+ data: [DONE]
+
+
+ '
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Date:
+ - Mon, 23 Dec 2024 22:21:46 GMT
+ Nvcf-Reqid:
+ - 5d8ef74f-0537-47a5-b0dc-0735d916776f
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Transfer-Encoding:
+ - chunked
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml
new file mode 100644
index 000000000000..0f3dc21c319e
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml
@@ -0,0 +1,50 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '161'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: '{"id":"chat-55c0d02e9caa471694b571312c012a34","object":"chat.completion","created":1734992504,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello!
+ It''s nice to meet you. Is there something I can help you with or would you
+ like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}'
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '445'
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 23 Dec 2024 22:21:44 GMT
+ Nvcf-Reqid:
+ - ea89199e-9f54-4c8d-8895-b1fd9034b86e
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml
new file mode 100644
index 000000000000..9d6d68b489f6
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml
@@ -0,0 +1,152 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+ {"include_usage": true}}'
+ headers:
+ Accept:
+ - text/event-stream
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '203'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: 'data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+ chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+ data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+ data: [DONE]
+
+
+ '
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Date:
+ - Mon, 23 Dec 2024 22:21:45 GMT
+ Nvcf-Reqid:
+ - ca6cf115-a5f7-447c-95e7-eafced589f5d
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Transfer-Encoding:
+ - chunked
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml
new file mode 100644
index 000000000000..aa03ceaa8d58
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml
@@ -0,0 +1,56 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+ along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false, "tools":
+ [{"type": "function", "function": {"name": "cricket_player_names", "description":
+ "store the name of players", "parameters": {"type": "object", "properties":
+ {"name": {"type": "string", "description": "The name of the player"}, "team:":
+ {"type": "string", "description": "The team of the player"}, "highest_score":
+ {"type": "number", "description": "The highest score of the player"}}, "required":
+ ["name", "team", "highest_score"]}}}]}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '670'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: '{"id":"chat-c827eb3e9dad425dbde170c946ff7cf7","object":"chat.completion","created":1734992507,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-3b35163e6a5a4961aa22b93581a0c5b2","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+ \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}'
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '580'
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 23 Dec 2024 22:21:48 GMT
+ Nvcf-Reqid:
+ - 497f5f82-973e-4224-8593-97664a04b39c
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml
new file mode 100644
index 000000000000..9043999a5f7f
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml
@@ -0,0 +1,56 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+ along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false, "tools":
+ [{"type": "function", "function": {"name": "cricket_player_names", "description":
+ "store the name of players", "parameters": {"type": "object", "properties":
+ {"name": {"type": "string", "description": "The name of the player"}, "team:":
+ {"type": "string", "description": "The team of the player"}, "highest_score":
+ {"type": "number", "description": "The highest score of the player"}}, "required":
+ ["name", "team", "highest_score"]}}}]}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '670'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: '{"id":"chat-5e1b429e3b704443992df3e6a1f95021","object":"chat.completion","created":1734992509,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-b16ddfa73cb94d9b95d6572615e71589","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+ \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}'
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '580'
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 23 Dec 2024 22:21:49 GMT
+ Nvcf-Reqid:
+ - 60deff33-b1fe-46d1-abc4-35978ee01613
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml
new file mode 100644
index 000000000000..e58ed33910b8
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml
@@ -0,0 +1,68 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+ along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+ {"include_usage": true}, "tools": [{"type": "function", "function": {"name":
+ "cricket_player_names", "description": "store the name of players", "parameters":
+ {"type": "object", "properties": {"name": {"type": "string", "description":
+ "The name of the player"}, "team:": {"type": "string", "description": "The team
+ of the player"}, "highest_score": {"type": "number", "description": "The highest
+ score of the player"}}, "required": ["name", "team", "highest_score"]}}}]}'
+ headers:
+ Accept:
+ - text/event-stream
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '712'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: 'data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}}
+
+
+ data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-df3daa09595c462297a7253930e4d915","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+ \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+ data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+ data: [DONE]
+
+
+ '
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Date:
+ - Mon, 23 Dec 2024 22:21:51 GMT
+ Nvcf-Reqid:
+ - 43d997a5-3e0a-485d-9567-e16813f1b183
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Transfer-Encoding:
+ - chunked
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml
new file mode 100644
index 000000000000..c8bd72e70e11
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml
@@ -0,0 +1,68 @@
+interactions:
+- request:
+ body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+ along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+ "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+ {"include_usage": true}, "tools": [{"type": "function", "function": {"name":
+ "cricket_player_names", "description": "store the name of players", "parameters":
+ {"type": "object", "properties": {"name": {"type": "string", "description":
+ "The name of the player"}, "team:": {"type": "string", "description": "The team
+ of the player"}, "highest_score": {"type": "number", "description": "The highest
+ score of the player"}}, "required": ["name", "team", "highest_score"]}}}]}'
+ headers:
+ Accept:
+ - text/event-stream
+ Accept-Encoding:
+ - gzip, deflate, zstd
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '712'
+ Content-Type:
+ - application/json
+ User-Agent:
+ - langchain-nvidia-ai-endpoints
+ method: POST
+ uri: https://integrate.api.nvidia.com/v1/chat/completions
+ response:
+ body:
+ string: 'data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}}
+
+
+ data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-d0d074128af94984bfc40534381b5860","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+ \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+ data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+ data: [DONE]
+
+
+ '
+ headers:
+ Access-Control-Allow-Credentials:
+ - 'true'
+ Access-Control-Expose-Headers:
+ - nvcf-reqid
+ Connection:
+ - keep-alive
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Date:
+ - Mon, 23 Dec 2024 22:21:50 GMT
+ Nvcf-Reqid:
+ - 2746750e-86b1-4959-bd8b-1da038003578
+ Nvcf-Status:
+ - fulfilled
+ Server:
+ - uvicorn
+ Transfer-Encoding:
+ - chunked
+ Vary:
+ - Origin
+ - origin, access-control-request-method, access-control-request-headers
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py
new file mode 100644
index 000000000000..e46ebac8e89c
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py
@@ -0,0 +1,567 @@
+import os
+
+import pytest
+from langchain_core.messages import AIMessageChunk
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+import weave
+from weave.integrations.integration_utilities import op_name_from_ref
+
+model = "meta/llama-3.1-8b-instruct"
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"],
+ allowed_hosts=["api.wandb.ai", "localhost"],
+)
+def test_chatnvidia_quickstart(client: weave.trace.weave_client.WeaveClient) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ )
+
+ response = nvidia_client.invoke("Hello!")
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert response.content is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at # type: ignore
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ usage = call.summary["usage"][output["model"]] # type: ignore
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 24
+ assert usage["prompt_tokens"] == 12
+ assert usage["total_tokens"] == 36
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_async_quickstart(
+ client: weave.trace.weave_client.WeaveClient,
+) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ )
+
+ response = await nvidia_client.ainvoke("Hello!")
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert response.content is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 24
+ assert usage["prompt_tokens"] == 12
+ assert usage["total_tokens"] == 36
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"],
+ allowed_hosts=["api.wandb.ai", "localhost"],
+)
+def test_chatnvidia_stream_quickstart(
+ client: weave.trace.weave_client.WeaveClient,
+) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ )
+
+ response = nvidia_client.stream("Hello!")
+ answer = AIMessageChunk(content="")
+ for chunk in response:
+ answer += chunk
+ answer.usage_metadata = chunk.usage_metadata
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert answer.content is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ print(call.summary["usage"][output["model"]])
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 24
+ assert usage["prompt_tokens"] == 12
+ assert usage["total_tokens"] == 36
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_async_stream_quickstart(
+ client: weave.trace.weave_client.WeaveClient,
+) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ )
+ response = nvidia_client.astream("Hello!")
+ answer = AIMessageChunk(content="")
+ async for chunk in response:
+ answer += chunk
+ answer.usage_metadata = chunk.usage_metadata
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert answer.content is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ print(call.summary["usage"][output["model"]])
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 24
+ assert usage["prompt_tokens"] == 12
+ assert usage["total_tokens"] == 36
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+def test_chatnvidia_tool_call(client: weave.trace.weave_client.WeaveClient) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ function_list = [
+ {
+ "type": "function",
+ "function": {
+ "name": "cricket_player_names", # Function Name
+ "description": "store the name of players", # Meta information of function
+ "parameters": { # parameters
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the player",
+ },
+ "team:": {
+ "type": "string",
+ "description": "The team of the player",
+ },
+ "highest_score": {
+ "type": "number",
+ "description": "The highest score of the player",
+ },
+ },
+ "required": ["name", "team", "highest_score"],
+ },
+ },
+ }
+ ]
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ ).bind_tools(function_list)
+
+ messages = [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+
+ response = nvidia_client.invoke(messages)
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert response.content is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 30
+ assert usage["prompt_tokens"] == 318
+ assert usage["total_tokens"] == 348
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_tool_call_async(
+ client: weave.trace.weave_client.WeaveClient,
+) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ function_list = [
+ {
+ "type": "function",
+ "function": {
+ "name": "cricket_player_names", # Function Name
+ "description": "store the name of players", # Meta information of function
+ "parameters": { # parameters
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the player",
+ },
+ "team:": {
+ "type": "string",
+ "description": "The team of the player",
+ },
+ "highest_score": {
+ "type": "number",
+ "description": "The highest score of the player",
+ },
+ },
+ "required": ["name", "team", "highest_score"],
+ },
+ },
+ }
+ ]
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ ).bind_tools(function_list)
+
+ messages = [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+
+ response = await nvidia_client.ainvoke(messages)
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert response.content is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 30
+ assert usage["prompt_tokens"] == 318
+ assert usage["total_tokens"] == 348
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+def test_chatnvidia_tool_call_stream(
+ client: weave.trace.weave_client.WeaveClient,
+) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ function_list = [
+ {
+ "type": "function",
+ "function": {
+ "name": "cricket_player_names", # Function Name
+ "description": "store the name of players", # Meta information of function
+ "parameters": { # parameters
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the player",
+ },
+ "team:": {
+ "type": "string",
+ "description": "The team of the player",
+ },
+ "highest_score": {
+ "type": "number",
+ "description": "The highest score of the player",
+ },
+ },
+ "required": ["name", "team", "highest_score"],
+ },
+ },
+ }
+ ]
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ ).bind_tools(function_list)
+
+ messages = [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+
+ response = nvidia_client.stream(messages)
+
+ answer = AIMessageChunk(content="")
+ for chunk in response:
+ answer += chunk
+ answer.usage_metadata = chunk.usage_metadata
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert answer.tool_calls is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 30
+ assert usage["prompt_tokens"] == 318
+ assert usage["total_tokens"] == 348
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+ filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_tool_call_async_stream(
+ client: weave.trace.weave_client.WeaveClient,
+) -> None:
+ api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+ function_list = [
+ {
+ "type": "function",
+ "function": {
+ "name": "cricket_player_names", # Function Name
+ "description": "store the name of players", # Meta information of function
+ "parameters": { # parameters
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the player",
+ },
+ "team:": {
+ "type": "string",
+ "description": "The team of the player",
+ },
+ "highest_score": {
+ "type": "number",
+ "description": "The highest score of the player",
+ },
+ },
+ "required": ["name", "team", "highest_score"],
+ },
+ },
+ }
+ ]
+
+ nvidia_client = ChatNVIDIA(
+ api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+ ).bind_tools(function_list)
+
+ messages = [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+
+ response = nvidia_client.astream(messages)
+
+ answer = AIMessageChunk(content="")
+ async for chunk in response:
+ answer += chunk
+ answer.usage_metadata = chunk.usage_metadata
+
+ calls = list(client.calls())
+ # need to make 2 because of the langchain integration getting a call in there
+ assert len(calls) == 2
+ call = calls[1]
+
+ assert answer.tool_calls is not None
+
+ assert (
+ op_name_from_ref(call.op_name)
+ == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+ )
+ assert call.started_at is not None
+ assert call.started_at < call.ended_at
+
+ output = call.output
+ assert output["model"] == model
+ assert output["object"] == "chat.completion"
+
+ usage = call.summary["usage"][output["model"]]
+ assert usage["requests"] == 1
+ assert usage["completion_tokens"] == 30
+ assert usage["prompt_tokens"] == 318
+ assert usage["total_tokens"] == 348
+
+ inputs = call.inputs
+ assert inputs["model"] == model
+ assert inputs["messages"] == [
+ {
+ "role": "user",
+ "content": "Can you name a cricket player along with team name and highest score?",
+ }
+ ]
+ assert inputs["max_tokens"] == 64
+ assert inputs["temperature"] == 0.0
+ assert inputs["top_p"] == 1
diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/__init__.py b/weave/integrations/langchain_nvidia_ai_endpoints/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py
new file mode 100644
index 000000000000..0d376db51d91
--- /dev/null
+++ b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+import importlib
+import time
+from typing import Any, Callable
+
+import_failed = False
+
+try:
+ from langchain_core.messages import AIMessageChunk, convert_to_openai_messages
+ from langchain_core.outputs import ChatGenerationChunk, ChatResult
+except ImportError:
+ import_failed = True
+
+import weave
+from weave.trace.autopatch import IntegrationSettings, OpSettings
+from weave.trace.op import Op, ProcessedInputs
+from weave.trace.op_extensions.accumulator import add_accumulator
+from weave.trace.patcher import MultiPatcher, NoOpPatcher, SymbolPatcher
+
+_lc_nvidia_patcher: MultiPatcher | None = None
+
+
+# NVIDIA-specific accumulator for parsing the objects of streaming interactions
+def nvidia_accumulator(acc: Any | None, value: Any) -> Any:
+ if acc is None:
+ acc = ChatGenerationChunk(message=AIMessageChunk(content=""))
+ acc = acc + value
+
+ # Need to do this since the __add__ impl for the streaming response is wrong
+ # We will get the actual usage in the final chunk so this will be eventually consistent
+ acc.message.usage_metadata = value.message.usage_metadata
+
+ return acc
+
+
+# Post processor to transform output into OpenAI's ChatCompletion format -- need to handle stream and non-stream outputs
+def postprocess_output_to_openai_format(output: Any) -> dict:
+ """
+ Need to post process the output reported to weave to send it on openai format so that Weave front end renders
+ chat view. This only affects what is sent to weave.
+ """
+ from openai.types.chat import ChatCompletion
+
+ if isinstance(output, ChatResult): # its ChatResult
+ message = output.llm_output
+ enhanced_usage = message.get("token_usage", {})
+ enhanced_usage["output_tokens"] = message.get("token_usage").get(
+ "completion_tokens", 0
+ )
+ enhanced_usage["input_tokens"] = message.get("token_usage").get(
+ "prompt_tokens", 0
+ )
+
+ returnable = ChatCompletion(
+ id="None",
+ choices=[
+ {
+ "index": 0,
+ "message": {
+ "content": message.get("content", ""),
+ "role": message.get("role", ""),
+ "tool_calls": message.get("tool_calls", []),
+ },
+ "logprobs": None,
+ "finish_reason": message.get("finish_reason", ""),
+ }
+ ],
+ created=int(time.time()),
+ model=message.get("model_name", ""),
+ object="chat.completion",
+ tool_calls=message.get("tool_calls", []),
+ system_fingerprint=None,
+ usage=enhanced_usage,
+ )
+
+ return returnable.model_dump(exclude_unset=True, exclude_none=True)
+
+ elif isinstance(output, ChatGenerationChunk): # its ChatGenerationChunk
+ orig_message = output.message
+ openai_message = convert_to_openai_messages(output.message)
+ enhanced_usage = getattr(orig_message, "usage_metadata", {})
+ enhanced_usage["completion_tokens"] = orig_message.usage_metadata.get(
+ "output_tokens", 0
+ )
+ enhanced_usage["prompt_tokens"] = orig_message.usage_metadata.get(
+ "input_tokens", 0
+ )
+
+ returnable = ChatCompletion(
+ id="None",
+ choices=[
+ {
+ "index": 0,
+ "message": {
+ "content": orig_message.content,
+ "role": getattr(orig_message, "role", "assistant"),
+ "tool_calls": openai_message.get("tool_calls", []),
+ },
+ "logprobs": None,
+ "finish_reason": getattr(orig_message, "response_metadata", {}).get(
+ "finish_reason", None
+ ),
+ }
+ ],
+ created=int(time.time()),
+ model=getattr(orig_message, "response_metadata", {}).get(
+ "model_name", None
+ ),
+ tool_calls=openai_message.get("tool_calls", []),
+ object="chat.completion",
+ system_fingerprint=None,
+ usage=enhanced_usage,
+ )
+
+ return returnable.model_dump(exclude_unset=True, exclude_none=True)
+ return output
+
+
+def postprocess_inputs_to_openai_format(
+ func: Op, args: tuple, kwargs: dict
+) -> ProcessedInputs:
+ """
+ Need to process the input reported to weave to send it on openai format so that Weave front end renders
+ chat view. This only affects what is sent to weave.
+ """
+ original_args = args
+ original_kwargs = kwargs
+
+ chat_nvidia_obj = args[0]
+ messages_array = args[1]
+ messages_array = convert_to_openai_messages(messages_array)
+ n = len(messages_array)
+
+ stream = False
+ if "stream" in func.name:
+ stream = True
+
+ weave_report = {
+ "model": chat_nvidia_obj.model,
+ "messages": messages_array,
+ "max_tokens": chat_nvidia_obj.max_tokens,
+ "temperature": chat_nvidia_obj.temperature,
+ "top_p": chat_nvidia_obj.top_p,
+ "object": "ChatNVIDIA._generate",
+ "n": n,
+ "stream": stream,
+ }
+
+ return ProcessedInputs(
+ original_args=original_args,
+ original_kwargs=original_kwargs,
+ args=original_args,
+ kwargs=original_kwargs,
+ inputs=weave_report,
+ )
+
+
+def should_use_accumulator(inputs: dict) -> bool:
+ return isinstance(inputs, dict) and bool(inputs.get("stream"))
+
+
+def nvidia_ai_endpoints_wrapper(settings: OpSettings) -> Callable[[Callable], Callable]:
+ def wrapper(fn: Callable) -> Callable:
+ op_kwargs = settings.model_dump()
+ op = weave.op(fn, **op_kwargs)
+ op._set_on_input_handler(postprocess_inputs_to_openai_format)
+ return add_accumulator(
+ op,
+ make_accumulator=lambda inputs: nvidia_accumulator,
+ should_accumulate=should_use_accumulator,
+ on_finish_post_processor=postprocess_output_to_openai_format,
+ )
+
+ return wrapper
+
+
+def get_nvidia_ai_patcher(
+ settings: IntegrationSettings | None = None,
+) -> MultiPatcher | NoOpPatcher:
+ if settings is None:
+ settings = IntegrationSettings()
+
+ if not settings.enabled:
+ return NoOpPatcher()
+
+ global _lc_nvidia_patcher
+ if _lc_nvidia_patcher is not None:
+ return _lc_nvidia_patcher
+
+ base = settings.op_settings
+
+ generate_settings: OpSettings = base.model_copy(
+ update={
+ "name": base.name or "langchain_nvidia_ai_endpoints.ChatNVIDIA._generate",
+ }
+ )
+ stream_settings: OpSettings = base.model_copy(
+ update={
+ "name": base.name or "langchain_nvidia_ai_endpoints.ChatNVIDIA._stream",
+ }
+ )
+
+ _lc_nvidia_patcher = MultiPatcher(
+ [
+ # Patch invoke method
+ SymbolPatcher(
+ lambda: importlib.import_module("langchain_nvidia_ai_endpoints"),
+ "ChatNVIDIA._generate",
+ nvidia_ai_endpoints_wrapper(generate_settings),
+ ),
+ # Patch stream method
+ SymbolPatcher(
+ lambda: importlib.import_module("langchain_nvidia_ai_endpoints"),
+ "ChatNVIDIA._stream",
+ nvidia_ai_endpoints_wrapper(stream_settings),
+ ),
+ ]
+ )
+
+ return _lc_nvidia_patcher
diff --git a/weave/trace/autopatch.py b/weave/trace/autopatch.py
index c1c47d375127..bc77752957c2 100644
--- a/weave/trace/autopatch.py
+++ b/weave/trace/autopatch.py
@@ -46,6 +46,7 @@ class AutopatchSettings(BaseModel):
notdiamond: IntegrationSettings = Field(default_factory=IntegrationSettings)
openai: IntegrationSettings = Field(default_factory=IntegrationSettings)
vertexai: IntegrationSettings = Field(default_factory=IntegrationSettings)
+ chatnvidia: IntegrationSettings = Field(default_factory=IntegrationSettings)
@validate_call
@@ -60,6 +61,9 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
from weave.integrations.groq.groq_sdk import get_groq_patcher
from weave.integrations.instructor.instructor_sdk import get_instructor_patcher
from weave.integrations.langchain.langchain import langchain_patcher
+ from weave.integrations.langchain_nvidia_ai_endpoints.langchain_nv_ai_endpoints import (
+ get_nvidia_ai_patcher,
+ )
from weave.integrations.litellm.litellm import get_litellm_patcher
from weave.integrations.llamaindex.llamaindex import llamaindex_patcher
from weave.integrations.mistral import get_mistral_patcher
@@ -82,6 +86,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
get_google_genai_patcher(settings.google_ai_studio).attempt_patch()
get_notdiamond_patcher(settings.notdiamond).attempt_patch()
get_vertexai_patcher(settings.vertexai).attempt_patch()
+ get_nvidia_ai_patcher(settings.chatnvidia).attempt_patch()
llamaindex_patcher.attempt_patch()
langchain_patcher.attempt_patch()
@@ -98,6 +103,9 @@ def reset_autopatch() -> None:
from weave.integrations.groq.groq_sdk import get_groq_patcher
from weave.integrations.instructor.instructor_sdk import get_instructor_patcher
from weave.integrations.langchain.langchain import langchain_patcher
+ from weave.integrations.langchain_nvidia_ai_endpoints.langchain_nv_ai_endpoints import (
+ get_nvidia_ai_patcher,
+ )
from weave.integrations.litellm.litellm import get_litellm_patcher
from weave.integrations.llamaindex.llamaindex import llamaindex_patcher
from weave.integrations.mistral import get_mistral_patcher
@@ -117,6 +125,7 @@ def reset_autopatch() -> None:
get_google_genai_patcher().undo_patch()
get_notdiamond_patcher().undo_patch()
get_vertexai_patcher().undo_patch()
+ get_nvidia_ai_patcher().undo_patch()
llamaindex_patcher.undo_patch()
langchain_patcher.undo_patch()