diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_model.png b/docs/docs/guides/integrations/imgs/chatnvidia_model.png
new file mode 100644
index 000000000000..4d0782b7719b
Binary files /dev/null and b/docs/docs/guides/integrations/imgs/chatnvidia_model.png differ
diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_trace.png b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png
new file mode 100644
index 000000000000..e573ab26089d
Binary files /dev/null and b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png differ
diff --git a/docs/docs/guides/integrations/imgs/nvidia_pokedex.png b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png
new file mode 100644
index 000000000000..51ab369fa8ca
Binary files /dev/null and b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png differ
diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md
index f83d579b21c2..78504dc154b4 100644
--- a/docs/docs/guides/integrations/index.md
+++ b/docs/docs/guides/integrations/index.md
@@ -20,6 +20,8 @@ LLM providers are the vendors that offer access to large language models for gen
 - **[Groq](/guides/integrations/groq)**
 - **[Open Router](/guides/integrations/openrouter)**
 - **[LiteLLM](/guides/integrations/litellm)**
+- **[NVIDIA NIM](/guides/integrations/nvidia_nim)**
+
 
 
 **[Local Models](/guides/integrations/local_models)**: For when you're running models on your own infrastructure.
diff --git a/docs/docs/guides/integrations/nvidia_nim.md b/docs/docs/guides/integrations/nvidia_nim.md
new file mode 100644
index 000000000000..01007ee7e6a6
--- /dev/null
+++ b/docs/docs/guides/integrations/nvidia_nim.md
@@ -0,0 +1,176 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# NVIDIA NIM
+
+Weave automatically tracks and logs LLM calls made via the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) library, after `weave.init()` is called.
+
+## Tracing
+
+It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application.
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+    Weave can automatically capture traces for the [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
+
+    Start capturing by calling `weave.init(<project-name>)` with a project name your choice.
+
+    ```python
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+    import weave
+    client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.8, max_tokens=64, top_p=1)
+    # highlight-next-line
+    weave.init('emoji-bot')
+
+    messages=[
+        {
+          "role": "system",
+          "content": "You are AGI. You will be provided with a message, and your task is to respond using emojis only."
+        }]
+
+    response = client.invoke(messages)
+    ```
+
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+      ```plaintext
+      This feature is not available in TypeScript yet since this library is only in Python.
+      ```
+  </TabItem>
+</Tabs>
+
+![chatnvidia_trace.png](imgs/chatnvidia_trace.png)
+
+## Track your own ops
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git.
+
+Simply create a function decorated with [`@weave.op`](/guides/tracking/ops) that calls into [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
+
+In the example below, we have 2 functions wrapped with op. This helps us see how intermediate steps, like the retrieval step in a RAG app, are affecting how our app behaves.
+
+    ```python
+    # highlight-next-line
+    import weave
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+    import requests, random
+    PROMPT="""Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it.
+            Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences. """
+    POKEMON = ['pikachu', 'charmander', 'squirtle', 'bulbasaur', 'jigglypuff', 'meowth', 'eevee']
+    client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.7, max_tokens=100, top_p=1)
+
+    # highlight-next-line
+    @weave.op
+    def get_pokemon_data(pokemon_name):
+        # highlight-next-line
+        # This is a step within your application, like the retrieval step within a RAG app
+        url = f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}"
+        response = requests.get(url)
+        if response.status_code == 200:
+            data = response.json()
+            name = data["name"]
+            types = [t["type"]["name"] for t in data["types"]]
+            species_url = data["species"]["url"]
+            species_response = requests.get(species_url)
+            evolved_from = "Unknown"
+            if species_response.status_code == 200:
+                species_data = species_response.json()
+                if species_data["evolves_from_species"]:
+                    evolved_from = species_data["evolves_from_species"]["name"]
+            return {"name": name, "types": types, "evolved_from": evolved_from}
+        else:
+            return None
+
+    # highlight-next-line
+    @weave.op
+    def pokedex(name: str, prompt: str) -> str:
+        # highlight-next-line
+        # This is your root op that calls out to other ops
+        # highlight-next-line
+        data = get_pokemon_data(name)
+        if not data: return "Error: Unable to fetch data"
+
+        messages=[
+                {"role": "system","content": prompt},
+                {"role": "user", "content": str(data)}
+            ]
+
+        response = client.invoke(messages)
+        return response.content
+
+    # highlight-next-line
+    weave.init('pokedex-nvidia')
+    # Get data for a specific Pokémon
+    pokemon_data = pokedex(random.choice(POKEMON), PROMPT)
+    ```
+
+Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inputs & outputs of that step.
+</TabItem>
+<TabItem value="typescript" label="TypeScript">
+    ```plaintext
+    This feature is not available in TypeScript yet since this library is only in Python.
+    ```
+</TabItem>
+</Tabs>
+
+![nvidia_pokedex.png](imgs/nvidia_pokedex.png)
+
+## Create a `Model` for easier experimentation
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+    Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app.
+
+    In addition to versioning code and capturing inputs/outputs, [`Model`](/guides/core-types/models)s capture structured parameters that control your application’s behavior, making it easy to find what parameters worked best. You can also use Weave Models with `serve`, and [`Evaluation`](/guides/core-types/evaluations)s.
+
+    In the example below, you can experiment with `model` and `system_message`. Every time you change one of these, you'll get a new _version_ of `GrammarCorrectorModel`.
+
+    ```python
+    import weave
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+    weave.init('grammar-nvidia')
+
+    class GrammarCorrectorModel(weave.Model): # Change to `weave.Model`
+      system_message: str
+
+      @weave.op()
+      def predict(self, user_input): # Change to `predict`
+        client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0, max_tokens=100, top_p=1)
+
+        messages=[
+              {
+                  "role": "system",
+                  "content": self.system_message
+              },
+              {
+                  "role": "user",
+                  "content": user_input
+              }
+              ]
+
+        response = client.invoke(messages)
+        return response.content
+
+
+    corrector = GrammarCorrectorModel(
+        system_message = "You are a grammar checker, correct the following user input.")
+    result = corrector.predict("That was so easy, it was a piece of pie!")
+    print(result)
+    ```
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+    ```plaintext
+    This feature is not available in TypeScript yet since this library is only in Python.
+    ```
+  </TabItem>
+</Tabs>
+
+![chatnvidia_model.png](imgs/chatnvidia_model.png)
+
+## Usage Info
+
+The ChatNVIDIA integration supports `invoke`, `stream` and their async variants. It also supports tool use. 
+As ChatNVIDIA is meant to be used with many types of models, it does not have function calling support.
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index fa2f7f80d32e..b132e5ac6476 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -101,6 +101,7 @@ const sidebars: SidebarsConfig = {
             "guides/integrations/groq",
             "guides/integrations/openrouter",
             "guides/integrations/litellm",
+            "guides/integrations/nvidia_nim",
           ],
         },
         "guides/integrations/local_models",
diff --git a/noxfile.py b/noxfile.py
index dff1305f21fd..f70339598275 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -8,6 +8,7 @@
     "cohere",
     "dspy",
     "langchain",
+    "langchain_nvidia_ai_endpoints",
     "litellm",
     "notdiamond",
     "google_ai_studio",
@@ -40,6 +41,7 @@ def lint(session):
         "google_ai_studio",
         "groq",
         "instructor",
+        "langchain_nvidia_ai_endpoints",
         "langchain",
         "litellm",
         "llamaindex",
@@ -73,6 +75,10 @@ def tests(session, shard):
     if shard == "google_ai_studio":
         env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
 
+    # Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard
+    if shard == "langchain_nvidia_ai_endpoints":
+        env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY")
+
     # we are doing some integration test in test_llm_integrations.py that requires
     # setting some environment variables for the LLM providers
     if shard == "scorers_tests":
diff --git a/pyproject.toml b/pyproject.toml
index 660fab30960f..eb5636029b2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,10 @@ langchain = [
   "pysqlite3",
   "opentelemetry-exporter-otlp",
 ]
+langchain_nvidia_ai_endpoints = [
+  "langchain-core>=0.2.1",
+  "langchain-nvidia-ai-endpoints",
+]
 litellm = ["litellm>=1.36.1"]
 llamaindex = ["llama-index>=0.10.35"]
 mistral0 = ["mistralai>=0.1.8,<1.0.0"]
@@ -98,7 +102,11 @@ test = [
 
   # Integration Tests
   "pytest-recording>=0.13.2",
-  "vcrpy>=6.0.1",
+  # "vcrpy>=6.0.1",
+  # https://github.com/kevin1024/vcrpy/pull/889
+  # This resolves test issues until a new pypi release can be made.  Once that release
+  # is made, we can remove this and revert to the vcrpy>=6.0.1 dependency.
+  "vcrpy @ git+https://github.com/kevin1024/vcrpy.git@48d0a2e453f6635af343000cdaf9794a781e807e",
 
   # serving tests
   "flask",
@@ -108,6 +116,7 @@ test = [
   "httpx",
 ]
 
+
 [project.scripts]
 weave = "weave.trace.cli:cli"
 
@@ -138,6 +147,8 @@ exclude = [
   "weave/clear_cache.py",
 ]
 
+[tool.hatch.metadata]
+allow-direct-references = true
 
 [tool.pytest.ini_options]
 filterwarnings = [
diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py b/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml
new file mode 100644
index 000000000000..fea579fb73e6
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml
@@ -0,0 +1,50 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '161'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chat-8bfccc9544b64c70b47605a647b69b8a","object":"chat.completion","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello!
+        It''s nice to meet you. Is there something I can help you with or would you
+        like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '445'
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 23 Dec 2024 22:21:45 GMT
+      Nvcf-Reqid:
+      - 704f40c5-4d25-46fb-8d76-66364bc9e156
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml
new file mode 100644
index 000000000000..eab2bd71a7c6
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_stream_quickstart.yaml
@@ -0,0 +1,152 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+      {"include_usage": true}}'
+    headers:
+      Accept:
+      - text/event-stream
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '203'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+        data: {"id":"chat-dbf248718ebc4c52a1a04269c275c78e","object":"chat.completion.chunk","created":1734992506,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 23 Dec 2024 22:21:46 GMT
+      Nvcf-Reqid:
+      - 5d8ef74f-0537-47a5-b0dc-0735d916776f
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml
new file mode 100644
index 000000000000..0f3dc21c319e
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_quickstart.yaml
@@ -0,0 +1,50 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '161'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chat-55c0d02e9caa471694b571312c012a34","object":"chat.completion","created":1734992504,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello!
+        It''s nice to meet you. Is there something I can help you with or would you
+        like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '445'
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 23 Dec 2024 22:21:44 GMT
+      Nvcf-Reqid:
+      - ea89199e-9f54-4c8d-8895-b1fd9034b86e
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml
new file mode 100644
index 000000000000..9d6d68b489f6
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_stream_quickstart.yaml
@@ -0,0 +1,152 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+      {"include_usage": true}}'
+    headers:
+      Accept:
+      - text/event-stream
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '203'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":12,"completion_tokens":0}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":13,"completion_tokens":1}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"!"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":14,"completion_tokens":2}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        It"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":15,"completion_tokens":3}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"''s"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":16,"completion_tokens":4}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        nice"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":17,"completion_tokens":5}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":18,"completion_tokens":6}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        meet"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":19,"completion_tokens":7}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":20,"completion_tokens":8}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"."},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":21,"completion_tokens":9}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        Is"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":22,"completion_tokens":10}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        there"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":23,"completion_tokens":11}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        something"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":24,"completion_tokens":12}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":25,"completion_tokens":13}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":26,"completion_tokens":14}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":27,"completion_tokens":15}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":28,"completion_tokens":16}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        with"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":29,"completion_tokens":17}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        or"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":30,"completion_tokens":18}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        would"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":31,"completion_tokens":19}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":32,"completion_tokens":20}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        like"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":33,"completion_tokens":21}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":34,"completion_tokens":22}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"
+        chat"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":35,"completion_tokens":23}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":"?"},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":""},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+        data: {"id":"chat-595e0ba8ceb643f89b82d8b3e95cdd67","object":"chat.completion.chunk","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 23 Dec 2024 22:21:45 GMT
+      Nvcf-Reqid:
+      - ca6cf115-a5f7-447c-95e7-eafced589f5d
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml
new file mode 100644
index 000000000000..aa03ceaa8d58
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call.yaml
@@ -0,0 +1,56 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+      along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false, "tools":
+      [{"type": "function", "function": {"name": "cricket_player_names", "description":
+      "store the name of players", "parameters": {"type": "object", "properties":
+      {"name": {"type": "string", "description": "The name of the player"}, "team:":
+      {"type": "string", "description": "The team of the player"}, "highest_score":
+      {"type": "number", "description": "The highest score of the player"}}, "required":
+      ["name", "team", "highest_score"]}}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '670'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chat-c827eb3e9dad425dbde170c946ff7cf7","object":"chat.completion","created":1734992507,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-3b35163e6a5a4961aa22b93581a0c5b2","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+        \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '580'
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 23 Dec 2024 22:21:48 GMT
+      Nvcf-Reqid:
+      - 497f5f82-973e-4224-8593-97664a04b39c
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml
new file mode 100644
index 000000000000..9043999a5f7f
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async.yaml
@@ -0,0 +1,56 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+      along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false, "tools":
+      [{"type": "function", "function": {"name": "cricket_player_names", "description":
+      "store the name of players", "parameters": {"type": "object", "properties":
+      {"name": {"type": "string", "description": "The name of the player"}, "team:":
+      {"type": "string", "description": "The team of the player"}, "highest_score":
+      {"type": "number", "description": "The highest score of the player"}}, "required":
+      ["name", "team", "highest_score"]}}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '670'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chat-5e1b429e3b704443992df3e6a1f95021","object":"chat.completion","created":1734992509,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":null,"tool_calls":[{"id":"chatcmpl-tool-b16ddfa73cb94d9b95d6572615e71589","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+        \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"}}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30},"prompt_logprobs":null}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '580'
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 23 Dec 2024 22:21:49 GMT
+      Nvcf-Reqid:
+      - 60deff33-b1fe-46d1-abc4-35978ee01613
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml
new file mode 100644
index 000000000000..e58ed33910b8
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_async_stream.yaml
@@ -0,0 +1,68 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+      along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+      {"include_usage": true}, "tools": [{"type": "function", "function": {"name":
+      "cricket_player_names", "description": "store the name of players", "parameters":
+      {"type": "object", "properties": {"name": {"type": "string", "description":
+      "The name of the player"}, "team:": {"type": "string", "description": "The team
+      of the player"}, "highest_score": {"type": "number", "description": "The highest
+      score of the player"}}, "required": ["name", "team", "highest_score"]}}}]}'
+    headers:
+      Accept:
+      - text/event-stream
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '712'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}}
+
+
+        data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-df3daa09595c462297a7253930e4d915","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+        \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+        data: {"id":"chat-43c44c15b2274e9e94fec57c04543f80","object":"chat.completion.chunk","created":1734992511,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 23 Dec 2024 22:21:51 GMT
+      Nvcf-Reqid:
+      - 43d997a5-3e0a-485d-9567-e16813f1b183
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml
new file mode 100644
index 000000000000..c8bd72e70e11
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_tool_call_stream.yaml
@@ -0,0 +1,68 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Can you name a cricket player
+      along with team name and highest score?"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": true, "stream_options":
+      {"include_usage": true}, "tools": [{"type": "function", "function": {"name":
+      "cricket_player_names", "description": "store the name of players", "parameters":
+      {"type": "object", "properties": {"name": {"type": "string", "description":
+      "The name of the player"}, "team:": {"type": "string", "description": "The team
+      of the player"}, "highest_score": {"type": "number", "description": "The highest
+      score of the player"}}, "required": ["name", "team", "highest_score"]}}}]}'
+    headers:
+      Accept:
+      - text/event-stream
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '712'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":318,"completion_tokens":0}}
+
+
+        data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"delta":{"role":null,"content":null,"tool_calls":[{"id":"chatcmpl-tool-d0d074128af94984bfc40534381b5860","type":"function","function":{"name":"cricket_player_names","arguments":"{\"name\":
+        \"Virat Kohli\", \"team:\": \"India\", \"highest_score\": 183}"},"index":0}]},"logprobs":null,"finish_reason":"tool_calls","stop_reason":null}],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+        data: {"id":"chat-4c0f7aead39b4e8f916c259cf9941b5e","object":"chat.completion.chunk","created":1734992510,"model":"meta/llama-3.1-8b-instruct","choices":[],"usage":{"prompt_tokens":318,"total_tokens":348,"completion_tokens":30}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 23 Dec 2024 22:21:50 GMT
+      Nvcf-Reqid:
+      - 2746750e-86b1-4959-bd8b-1da038003578
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py
new file mode 100644
index 000000000000..e46ebac8e89c
--- /dev/null
+++ b/tests/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints_test.py
@@ -0,0 +1,567 @@
+import os
+
+import pytest
+from langchain_core.messages import AIMessageChunk
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+import weave
+from weave.integrations.integration_utilities import op_name_from_ref
+
+model = "meta/llama-3.1-8b-instruct"
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"],
+    allowed_hosts=["api.wandb.ai", "localhost"],
+)
+def test_chatnvidia_quickstart(client: weave.trace.weave_client.WeaveClient) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    )
+
+    response = nvidia_client.invoke("Hello!")
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert response.content is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at  # type: ignore
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    usage = call.summary["usage"][output["model"]]  # type: ignore
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 24
+    assert usage["prompt_tokens"] == 12
+    assert usage["total_tokens"] == 36
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_async_quickstart(
+    client: weave.trace.weave_client.WeaveClient,
+) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    )
+
+    response = await nvidia_client.ainvoke("Hello!")
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert response.content is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 24
+    assert usage["prompt_tokens"] == 12
+    assert usage["total_tokens"] == 36
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"],
+    allowed_hosts=["api.wandb.ai", "localhost"],
+)
+def test_chatnvidia_stream_quickstart(
+    client: weave.trace.weave_client.WeaveClient,
+) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    )
+
+    response = nvidia_client.stream("Hello!")
+    answer = AIMessageChunk(content="")
+    for chunk in response:
+        answer += chunk
+        answer.usage_metadata = chunk.usage_metadata
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert answer.content is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    print(call.summary["usage"][output["model"]])
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 24
+    assert usage["prompt_tokens"] == 12
+    assert usage["total_tokens"] == 36
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_async_stream_quickstart(
+    client: weave.trace.weave_client.WeaveClient,
+) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    )
+    response = nvidia_client.astream("Hello!")
+    answer = AIMessageChunk(content="")
+    async for chunk in response:
+        answer += chunk
+        answer.usage_metadata = chunk.usage_metadata
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert answer.content is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    print(call.summary["usage"][output["model"]])
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 24
+    assert usage["prompt_tokens"] == 12
+    assert usage["total_tokens"] == 36
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [{"role": "user", "content": "Hello!"}]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+def test_chatnvidia_tool_call(client: weave.trace.weave_client.WeaveClient) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    function_list = [
+        {
+            "type": "function",
+            "function": {
+                "name": "cricket_player_names",  # Function Name
+                "description": "store the name of players",  # Meta information of function
+                "parameters": {  # parameters
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "The name of the player",
+                        },
+                        "team:": {
+                            "type": "string",
+                            "description": "The team of the player",
+                        },
+                        "highest_score": {
+                            "type": "number",
+                            "description": "The highest score of the player",
+                        },
+                    },
+                    "required": ["name", "team", "highest_score"],
+                },
+            },
+        }
+    ]
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    ).bind_tools(function_list)
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+
+    response = nvidia_client.invoke(messages)
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert response.content is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 30
+    assert usage["prompt_tokens"] == 318
+    assert usage["total_tokens"] == 348
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_tool_call_async(
+    client: weave.trace.weave_client.WeaveClient,
+) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    function_list = [
+        {
+            "type": "function",
+            "function": {
+                "name": "cricket_player_names",  # Function Name
+                "description": "store the name of players",  # Meta information of function
+                "parameters": {  # parameters
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "The name of the player",
+                        },
+                        "team:": {
+                            "type": "string",
+                            "description": "The team of the player",
+                        },
+                        "highest_score": {
+                            "type": "number",
+                            "description": "The highest score of the player",
+                        },
+                    },
+                    "required": ["name", "team", "highest_score"],
+                },
+            },
+        }
+    ]
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    ).bind_tools(function_list)
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+
+    response = await nvidia_client.ainvoke(messages)
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert response.content is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-generate"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 30
+    assert usage["prompt_tokens"] == 318
+    assert usage["total_tokens"] == 348
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+def test_chatnvidia_tool_call_stream(
+    client: weave.trace.weave_client.WeaveClient,
+) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    function_list = [
+        {
+            "type": "function",
+            "function": {
+                "name": "cricket_player_names",  # Function Name
+                "description": "store the name of players",  # Meta information of function
+                "parameters": {  # parameters
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "The name of the player",
+                        },
+                        "team:": {
+                            "type": "string",
+                            "description": "The team of the player",
+                        },
+                        "highest_score": {
+                            "type": "number",
+                            "description": "The highest score of the player",
+                        },
+                    },
+                    "required": ["name", "team", "highest_score"],
+                },
+            },
+        }
+    ]
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    ).bind_tools(function_list)
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+
+    response = nvidia_client.stream(messages)
+
+    answer = AIMessageChunk(content="")
+    for chunk in response:
+        answer += chunk
+        answer.usage_metadata = chunk.usage_metadata
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert answer.tool_calls is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 30
+    assert usage["prompt_tokens"] == 318
+    assert usage["total_tokens"] == 348
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
+
+
+@pytest.mark.skip_clickhouse_client  # TODO:VCR recording does not seem to allow us to make requests to the clickhouse db in non-recording mode
+@pytest.mark.vcr(
+    filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"]
+)
+@pytest.mark.asyncio
+async def test_chatnvidia_tool_call_async_stream(
+    client: weave.trace.weave_client.WeaveClient,
+) -> None:
+    api_key = os.environ.get("NVIDIA_API_KEY", "DUMMY_API_KEY")
+
+    function_list = [
+        {
+            "type": "function",
+            "function": {
+                "name": "cricket_player_names",  # Function Name
+                "description": "store the name of players",  # Meta information of function
+                "parameters": {  # parameters
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "The name of the player",
+                        },
+                        "team:": {
+                            "type": "string",
+                            "description": "The team of the player",
+                        },
+                        "highest_score": {
+                            "type": "number",
+                            "description": "The highest score of the player",
+                        },
+                    },
+                    "required": ["name", "team", "highest_score"],
+                },
+            },
+        }
+    ]
+
+    nvidia_client = ChatNVIDIA(
+        api_key=api_key, model=model, temperature=0.0, max_tokens=64, top_p=1
+    ).bind_tools(function_list)
+
+    messages = [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+
+    response = nvidia_client.astream(messages)
+
+    answer = AIMessageChunk(content="")
+    async for chunk in response:
+        answer += chunk
+        answer.usage_metadata = chunk.usage_metadata
+
+    calls = list(client.calls())
+    # need to make 2 because of the langchain integration getting a call in there
+    assert len(calls) == 2
+    call = calls[1]
+
+    assert answer.tool_calls is not None
+
+    assert (
+        op_name_from_ref(call.op_name)
+        == "langchain_nvidia_ai_endpoints.ChatNVIDIA-stream"
+    )
+    assert call.started_at is not None
+    assert call.started_at < call.ended_at
+
+    output = call.output
+    assert output["model"] == model
+    assert output["object"] == "chat.completion"
+
+    usage = call.summary["usage"][output["model"]]
+    assert usage["requests"] == 1
+    assert usage["completion_tokens"] == 30
+    assert usage["prompt_tokens"] == 318
+    assert usage["total_tokens"] == 348
+
+    inputs = call.inputs
+    assert inputs["model"] == model
+    assert inputs["messages"] == [
+        {
+            "role": "user",
+            "content": "Can you name a cricket player along with team name and highest score?",
+        }
+    ]
+    assert inputs["max_tokens"] == 64
+    assert inputs["temperature"] == 0.0
+    assert inputs["top_p"] == 1
diff --git a/weave-js/src/components/DropdownMenu/DropdownMenu.tsx b/weave-js/src/components/DropdownMenu/DropdownMenu.tsx
index 64a93f42006b..56d12fa1432e 100644
--- a/weave-js/src/components/DropdownMenu/DropdownMenu.tsx
+++ b/weave-js/src/components/DropdownMenu/DropdownMenu.tsx
@@ -111,7 +111,10 @@ export const Separator = ({
 }: RadixDropdownMenu.DropdownMenuSeparatorProps) => (
   <RadixDropdownMenu.Separator
     className={twMerge(
-      classNames('my-6 h-px bg-moon-250 dark:bg-moon-750', className)
+      classNames(
+        'my-6 border-t border-moon-250 dark:border-moon-750',
+        className
+      )
     )}
     {...props}
   />
diff --git a/weave-js/src/components/Tag/Tag.tsx b/weave-js/src/components/Tag/Tag.tsx
index 85ec9d8a6e2e..b7ebe22123d7 100644
--- a/weave-js/src/components/Tag/Tag.tsx
+++ b/weave-js/src/components/Tag/Tag.tsx
@@ -1,5 +1,12 @@
 import classNames from 'classnames';
-import React, {FC, ReactElement, useMemo, useRef} from 'react';
+import React, {
+  FC,
+  ReactElement,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
 import {twMerge} from 'tailwind-merge';
 
 import {Icon, IconName} from '../Icon';
@@ -103,11 +110,18 @@ export const RemovableTag: FC<RemovableTagProps> = ({
   Wrapper = Tailwind,
 }) => {
   const labelRef = useRef<HTMLParagraphElement>(null);
-  const isTooltipEnabled = isTagLabelTruncated(labelRef);
+  const [isTruncated, setIsTruncated] = useState(false);
+
+  useEffect(() => {
+    if (labelRef.current) {
+      setIsTruncated(isTagLabelTruncated(labelRef));
+    }
+  }, [label]);
+
   const classes = useTagClasses({color, isInteractive: true, label});
 
   const nakedTag = (
-    <TagTooltip value={label} disabled={!isTooltipEnabled}>
+    <TagTooltip value={label} disabled={!isTruncated}>
       <div
         key={`tag-${label}`}
         className={twMerge(classes, showIcon ? 'px-4' : 'pl-6 pr-4')}>
@@ -132,6 +146,5 @@ export const RemovableTag: FC<RemovableTagProps> = ({
   if (Wrapper) {
     return <Wrapper>{nakedTag}</Wrapper>;
   }
-
   return nakedTag;
 };
diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/__init__.py b/weave/integrations/langchain_nvidia_ai_endpoints/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py
new file mode 100644
index 000000000000..0d376db51d91
--- /dev/null
+++ b/weave/integrations/langchain_nvidia_ai_endpoints/langchain_nv_ai_endpoints.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+import importlib
+import time
+from typing import Any, Callable
+
+import_failed = False
+
+try:
+    from langchain_core.messages import AIMessageChunk, convert_to_openai_messages
+    from langchain_core.outputs import ChatGenerationChunk, ChatResult
+except ImportError:
+    import_failed = True
+
+import weave
+from weave.trace.autopatch import IntegrationSettings, OpSettings
+from weave.trace.op import Op, ProcessedInputs
+from weave.trace.op_extensions.accumulator import add_accumulator
+from weave.trace.patcher import MultiPatcher, NoOpPatcher, SymbolPatcher
+
+_lc_nvidia_patcher: MultiPatcher | None = None
+
+
+# NVIDIA-specific accumulator for parsing the objects of streaming interactions
+def nvidia_accumulator(acc: Any | None, value: Any) -> Any:
+    if acc is None:
+        acc = ChatGenerationChunk(message=AIMessageChunk(content=""))
+    acc = acc + value
+
+    # Need to do this since the __add__ impl for the streaming response is wrong
+    # We will get the actual usage in the final chunk so this will be eventually consistent
+    acc.message.usage_metadata = value.message.usage_metadata
+
+    return acc
+
+
+# Post processor to transform output into OpenAI's ChatCompletion format -- need to handle stream and non-stream outputs
+def postprocess_output_to_openai_format(output: Any) -> dict:
+    """
+    Need to post process the output reported to weave to send it on openai format so that Weave front end renders
+    chat view. This only affects what is sent to weave.
+    """
+    from openai.types.chat import ChatCompletion
+
+    if isinstance(output, ChatResult):  # its ChatResult
+        message = output.llm_output
+        enhanced_usage = message.get("token_usage", {})
+        enhanced_usage["output_tokens"] = message.get("token_usage").get(
+            "completion_tokens", 0
+        )
+        enhanced_usage["input_tokens"] = message.get("token_usage").get(
+            "prompt_tokens", 0
+        )
+
+        returnable = ChatCompletion(
+            id="None",
+            choices=[
+                {
+                    "index": 0,
+                    "message": {
+                        "content": message.get("content", ""),
+                        "role": message.get("role", ""),
+                        "tool_calls": message.get("tool_calls", []),
+                    },
+                    "logprobs": None,
+                    "finish_reason": message.get("finish_reason", ""),
+                }
+            ],
+            created=int(time.time()),
+            model=message.get("model_name", ""),
+            object="chat.completion",
+            tool_calls=message.get("tool_calls", []),
+            system_fingerprint=None,
+            usage=enhanced_usage,
+        )
+
+        return returnable.model_dump(exclude_unset=True, exclude_none=True)
+
+    elif isinstance(output, ChatGenerationChunk):  # its ChatGenerationChunk
+        orig_message = output.message
+        openai_message = convert_to_openai_messages(output.message)
+        enhanced_usage = getattr(orig_message, "usage_metadata", {})
+        enhanced_usage["completion_tokens"] = orig_message.usage_metadata.get(
+            "output_tokens", 0
+        )
+        enhanced_usage["prompt_tokens"] = orig_message.usage_metadata.get(
+            "input_tokens", 0
+        )
+
+        returnable = ChatCompletion(
+            id="None",
+            choices=[
+                {
+                    "index": 0,
+                    "message": {
+                        "content": orig_message.content,
+                        "role": getattr(orig_message, "role", "assistant"),
+                        "tool_calls": openai_message.get("tool_calls", []),
+                    },
+                    "logprobs": None,
+                    "finish_reason": getattr(orig_message, "response_metadata", {}).get(
+                        "finish_reason", None
+                    ),
+                }
+            ],
+            created=int(time.time()),
+            model=getattr(orig_message, "response_metadata", {}).get(
+                "model_name", None
+            ),
+            tool_calls=openai_message.get("tool_calls", []),
+            object="chat.completion",
+            system_fingerprint=None,
+            usage=enhanced_usage,
+        )
+
+        return returnable.model_dump(exclude_unset=True, exclude_none=True)
+    return output
+
+
+def postprocess_inputs_to_openai_format(
+    func: Op, args: tuple, kwargs: dict
+) -> ProcessedInputs:
+    """
+    Need to process the input reported to weave to send it on openai format so that Weave front end renders
+    chat view. This only affects what is sent to weave.
+    """
+    original_args = args
+    original_kwargs = kwargs
+
+    chat_nvidia_obj = args[0]
+    messages_array = args[1]
+    messages_array = convert_to_openai_messages(messages_array)
+    n = len(messages_array)
+
+    stream = False
+    if "stream" in func.name:
+        stream = True
+
+    weave_report = {
+        "model": chat_nvidia_obj.model,
+        "messages": messages_array,
+        "max_tokens": chat_nvidia_obj.max_tokens,
+        "temperature": chat_nvidia_obj.temperature,
+        "top_p": chat_nvidia_obj.top_p,
+        "object": "ChatNVIDIA._generate",
+        "n": n,
+        "stream": stream,
+    }
+
+    return ProcessedInputs(
+        original_args=original_args,
+        original_kwargs=original_kwargs,
+        args=original_args,
+        kwargs=original_kwargs,
+        inputs=weave_report,
+    )
+
+
+def should_use_accumulator(inputs: dict) -> bool:
+    return isinstance(inputs, dict) and bool(inputs.get("stream"))
+
+
+def nvidia_ai_endpoints_wrapper(settings: OpSettings) -> Callable[[Callable], Callable]:
+    def wrapper(fn: Callable) -> Callable:
+        op_kwargs = settings.model_dump()
+        op = weave.op(fn, **op_kwargs)
+        op._set_on_input_handler(postprocess_inputs_to_openai_format)
+        return add_accumulator(
+            op,
+            make_accumulator=lambda inputs: nvidia_accumulator,
+            should_accumulate=should_use_accumulator,
+            on_finish_post_processor=postprocess_output_to_openai_format,
+        )
+
+    return wrapper
+
+
+def get_nvidia_ai_patcher(
+    settings: IntegrationSettings | None = None,
+) -> MultiPatcher | NoOpPatcher:
+    if settings is None:
+        settings = IntegrationSettings()
+
+    if not settings.enabled:
+        return NoOpPatcher()
+
+    global _lc_nvidia_patcher
+    if _lc_nvidia_patcher is not None:
+        return _lc_nvidia_patcher
+
+    base = settings.op_settings
+
+    generate_settings: OpSettings = base.model_copy(
+        update={
+            "name": base.name or "langchain_nvidia_ai_endpoints.ChatNVIDIA._generate",
+        }
+    )
+    stream_settings: OpSettings = base.model_copy(
+        update={
+            "name": base.name or "langchain_nvidia_ai_endpoints.ChatNVIDIA._stream",
+        }
+    )
+
+    _lc_nvidia_patcher = MultiPatcher(
+        [
+            # Patch invoke method
+            SymbolPatcher(
+                lambda: importlib.import_module("langchain_nvidia_ai_endpoints"),
+                "ChatNVIDIA._generate",
+                nvidia_ai_endpoints_wrapper(generate_settings),
+            ),
+            # Patch stream method
+            SymbolPatcher(
+                lambda: importlib.import_module("langchain_nvidia_ai_endpoints"),
+                "ChatNVIDIA._stream",
+                nvidia_ai_endpoints_wrapper(stream_settings),
+            ),
+        ]
+    )
+
+    return _lc_nvidia_patcher
diff --git a/weave/trace/autopatch.py b/weave/trace/autopatch.py
index c1c47d375127..bc77752957c2 100644
--- a/weave/trace/autopatch.py
+++ b/weave/trace/autopatch.py
@@ -46,6 +46,7 @@ class AutopatchSettings(BaseModel):
     notdiamond: IntegrationSettings = Field(default_factory=IntegrationSettings)
     openai: IntegrationSettings = Field(default_factory=IntegrationSettings)
     vertexai: IntegrationSettings = Field(default_factory=IntegrationSettings)
+    chatnvidia: IntegrationSettings = Field(default_factory=IntegrationSettings)
 
 
 @validate_call
@@ -60,6 +61,9 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
     from weave.integrations.groq.groq_sdk import get_groq_patcher
     from weave.integrations.instructor.instructor_sdk import get_instructor_patcher
     from weave.integrations.langchain.langchain import langchain_patcher
+    from weave.integrations.langchain_nvidia_ai_endpoints.langchain_nv_ai_endpoints import (
+        get_nvidia_ai_patcher,
+    )
     from weave.integrations.litellm.litellm import get_litellm_patcher
     from weave.integrations.llamaindex.llamaindex import llamaindex_patcher
     from weave.integrations.mistral import get_mistral_patcher
@@ -82,6 +86,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
     get_google_genai_patcher(settings.google_ai_studio).attempt_patch()
     get_notdiamond_patcher(settings.notdiamond).attempt_patch()
     get_vertexai_patcher(settings.vertexai).attempt_patch()
+    get_nvidia_ai_patcher(settings.chatnvidia).attempt_patch()
 
     llamaindex_patcher.attempt_patch()
     langchain_patcher.attempt_patch()
@@ -98,6 +103,9 @@ def reset_autopatch() -> None:
     from weave.integrations.groq.groq_sdk import get_groq_patcher
     from weave.integrations.instructor.instructor_sdk import get_instructor_patcher
     from weave.integrations.langchain.langchain import langchain_patcher
+    from weave.integrations.langchain_nvidia_ai_endpoints.langchain_nv_ai_endpoints import (
+        get_nvidia_ai_patcher,
+    )
     from weave.integrations.litellm.litellm import get_litellm_patcher
     from weave.integrations.llamaindex.llamaindex import llamaindex_patcher
     from weave.integrations.mistral import get_mistral_patcher
@@ -117,6 +125,7 @@ def reset_autopatch() -> None:
     get_google_genai_patcher().undo_patch()
     get_notdiamond_patcher().undo_patch()
     get_vertexai_patcher().undo_patch()
+    get_nvidia_ai_patcher().undo_patch()
 
     llamaindex_patcher.undo_patch()
     langchain_patcher.undo_patch()
diff --git a/weave/trace/sanitize.py b/weave/trace/sanitize.py
index 5125dbfb09dc..24e1fea5dd95 100644
--- a/weave/trace/sanitize.py
+++ b/weave/trace/sanitize.py
@@ -1,6 +1,11 @@
+# always use lowercase keys for the redact keys
 REDACT_KEYS = (
     "api_key",
     "auth_headers",
-    "Authorization",
+    "authorization",
 )
 REDACTED_VALUE = "REDACTED"
+
+
+def should_redact(key: str) -> bool:
+    return key.lower() in REDACT_KEYS
diff --git a/weave/trace/serialize.py b/weave/trace/serialize.py
index 5e0c6006aa50..ae8afa0e12e8 100644
--- a/weave/trace/serialize.py
+++ b/weave/trace/serialize.py
@@ -8,7 +8,7 @@
 from weave.trace import custom_objs
 from weave.trace.object_record import ObjectRecord
 from weave.trace.refs import ObjectRef, TableRef, parse_uri
-from weave.trace.sanitize import REDACT_KEYS, REDACTED_VALUE
+from weave.trace.sanitize import REDACTED_VALUE, should_redact
 from weave.trace_server.interface.builtin_object_classes.builtin_object_registry import (
     BUILTIN_OBJECT_REGISTRY,
 )
@@ -148,7 +148,7 @@ def dictify(
     elif isinstance(obj, dict):
         dict_result = {}
         for k, v in obj.items():
-            if k in REDACT_KEYS:
+            if should_redact(k):
                 dict_result[k] = REDACTED_VALUE
             else:
                 dict_result[k] = dictify(v, maxdepth, depth + 1, seen)
@@ -160,7 +160,7 @@ def dictify(
             if isinstance(as_dict, dict):
                 to_dict_result = {}
                 for k, v in as_dict.items():
-                    if k in REDACT_KEYS:
+                    if should_redact(k):
                         to_dict_result[k] = REDACTED_VALUE
                     elif maxdepth == 0 or depth < maxdepth:
                         to_dict_result[k] = dictify(v, maxdepth, depth + 1)
@@ -187,7 +187,7 @@ def dictify(
         for attr in dir(obj):
             if attr.startswith("_"):
                 continue
-            if attr in REDACT_KEYS:
+            if should_redact(attr):
                 result[attr] = REDACTED_VALUE
                 continue
             try:
diff --git a/weave/trace/weave_client.py b/weave/trace/weave_client.py
index 1d5d54b9b23c..87a445383b57 100644
--- a/weave/trace/weave_client.py
+++ b/weave/trace/weave_client.py
@@ -38,7 +38,7 @@
     parse_op_uri,
     parse_uri,
 )
-from weave.trace.sanitize import REDACT_KEYS, REDACTED_VALUE
+from weave.trace.sanitize import REDACTED_VALUE, should_redact
 from weave.trace.serialize import from_json, isinstance_namedtuple, to_json
 from weave.trace.serializer import get_serializer_for_obj
 from weave.trace.settings import client_parallelism
@@ -1648,7 +1648,7 @@ def redact_sensitive_keys(obj: Any) -> Any:
     if isinstance(obj, dict):
         dict_res = {}
         for k, v in obj.items():
-            if k in REDACT_KEYS:
+            if should_redact(k):
                 dict_res[k] = REDACTED_VALUE
             else:
                 dict_res[k] = redact_sensitive_keys(v)
diff --git a/weave/trace_server/async_batch_processor.py b/weave/trace_server/async_batch_processor.py
index 03071607aee0..a8a183d94bfe 100644
--- a/weave/trace_server/async_batch_processor.py
+++ b/weave/trace_server/async_batch_processor.py
@@ -6,7 +6,6 @@
 from typing import Callable, Generic, TypeVar
 
 from weave.trace.context.tests_context import get_raise_on_captured_errors
-from weave.trace_server import requests
 
 T = TypeVar("T")
 logger = logging.getLogger(__name__)
@@ -61,14 +60,10 @@ def _process_batches(self) -> None:
             if current_batch:
                 try:
                     self.processor_fn(current_batch)
-                except requests.HTTPError as e:
-                    if e.response.status_code == 413:
-                        # 413: payload too large, don't raise just log
-                        if get_raise_on_captured_errors():
-                            raise
-                        logger.exception(f"Error processing batch: {e}")
-                    else:
-                        raise e
+                except Exception as e:
+                    if get_raise_on_captured_errors():
+                        raise
+                    logger.exception(f"Error processing batch: {e}")
 
             if self.stop_event.is_set() and self.queue.empty():
                 break