From dc65385ac6f61f5dbe70f60cf34044e2acc2fb46 Mon Sep 17 00:00:00 2001 From: Brace Sproul Date: Mon, 5 Aug 2024 14:59:03 -0700 Subject: [PATCH 1/6] scripts[minor],docs[minor]: Add template for KV store. Updated in memory & fs store docs (#6379) * scripts[minor]: Add template for KV store * cr and add in memory store * add yield keys section * added file system store --- .../integrations/stores/file_system.ipynb | 276 ++++++++++++++++++ .../docs/integrations/stores/file_system.mdx | 26 -- .../docs/integrations/stores/in_memory.ipynb | 238 +++++++++++++++ .../docs/integrations/stores/in_memory.mdx | 13 - libs/langchain-scripts/src/cli/docs/index.ts | 7 + .../src/cli/docs/kv_store.ts | 166 +++++++++++ .../src/cli/docs/templates/kv_store.ipynb | 269 +++++++++++++++++ 7 files changed, 956 insertions(+), 39 deletions(-) create mode 100644 docs/core_docs/docs/integrations/stores/file_system.ipynb delete mode 100644 docs/core_docs/docs/integrations/stores/file_system.mdx create mode 100644 docs/core_docs/docs/integrations/stores/in_memory.ipynb delete mode 100644 docs/core_docs/docs/integrations/stores/in_memory.mdx create mode 100644 libs/langchain-scripts/src/cli/docs/kv_store.ts create mode 100644 libs/langchain-scripts/src/cli/docs/templates/kv_store.ipynb diff --git a/docs/core_docs/docs/integrations/stores/file_system.ipynb b/docs/core_docs/docs/integrations/stores/file_system.ipynb new file mode 100644 index 000000000000..f7da9b71cdee --- /dev/null +++ b/docs/core_docs/docs/integrations/stores/file_system.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: File System Store\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LocalFileStore\n", + "\n", + "```{=mdx}\n", + "\n", + ":::tip Compatibility\n", + "\n", + "Only available on Node.js.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "This will help you get started with [LocalFileStore](/docs/concepts/#key-value-stores). For detailed documentation of all LocalFileStore features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_storage_file_system.LocalFileStore.html).\n", + "\n", + "## Overview\n", + "\n", + "The `LocalFileStore` is a wrapper around the `fs` module for storing data as key-value pairs.\n", + "Each key value pair has its own file nested inside the directory passed to the `.fromPath` method.\n", + "The file name is the key and inside contains the value of the key.\n", + "\n", + "```{=mdx}\n", + "\n", + ":::info\n", + "\n", + "The path passed to the `.fromPath` must be a directory, not a file.\n", + "\n", + ":::\n", + "\n", + "```\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | [PY support](https://python.langchain.com/v0.2/docs/integrations/stores/file_system/) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: |\n", + "| [LocalFileStore](https://api.js.langchain.com/classes/langchain_storage_file_system.LocalFileStore.html) | [langchain](https://api.js.langchain.com/modules/langchain_storage_file_system.html) | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/langchain?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/langchain?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "### Installation\n", + "\n", + "The LangChain `LocalFileStore` integration lives in the `langchain` package:\n", + "\n", + "```{=mdx}\n", + "\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " langchain\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our byte store:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import { LocalFileStore } from \"langchain/storage/file_system\"\n", + "\n", + "const kvStore = await LocalFileStore.fromPath(\"./messages\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define an encoder and decoder for converting the data to `Uint8Array` and back:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "const encoder = new TextEncoder();\n", + "const decoder = new TextDecoder();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "You can set data under keys like this using the `mset` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 'value1', 'value2' ]\n" + ] + } + ], + "source": [ + "await kvStore.mset(\n", + " [\n", + " [\"key1\", encoder.encode(\"value1\")],\n", + " [\"key2\", encoder.encode(\"value2\")],\n", + " ]\n", + ")\n", + "\n", + "const results = await kvStore.mget(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")\n", + "console.log(results.map((v) => decoder.decode(v)));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And you can delete data using the `mdelete` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ undefined, undefined ]\n" + ] + } + ], + "source": [ + "await kvStore.mdelete(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")\n", + "\n", + "await kvStore.mget(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Yielding values\n", + "\n", + "If you want to get back all the keys you can call the `yieldKeys` method. Optionally, you can pass a key prefix to only get back keys which match that prefix." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 'message:id:key1', 'message:id:key2' ]\n" + ] + } + ], + "source": [ + "import { LocalFileStore } from \"langchain/storage/file_system\"\n", + "\n", + "const kvStoreForYield = await LocalFileStore.fromPath(\"./messages\");\n", + "\n", + "const encoderForYield = new TextEncoder();\n", + "\n", + "// Add some data to the store\n", + "await kvStoreForYield.mset(\n", + " [\n", + " [\"message:id:key1\", encoderForYield.encode(\"value1\")],\n", + " [\"message:id:key2\", encoderForYield.encode(\"value2\")],\n", + " ]\n", + ")\n", + "\n", + "const yieldedKeys = [];\n", + "for await (const key of kvStoreForYield.yieldKeys(\"message:id:\")) {\n", + " yieldedKeys.push(key);\n", + "}\n", + "\n", + "console.log(yieldedKeys);" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import fs from \"fs\";\n", + "\n", + "// Cleanup\n", + "await fs.promises.rm(\"./messages\", { recursive: true, force: true });" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all LocalFileStore features and configurations, head to the [API reference](https://api.js.langchain.com/classes/langchain_storage_file_system.LocalFileStore.html)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/core_docs/docs/integrations/stores/file_system.mdx b/docs/core_docs/docs/integrations/stores/file_system.mdx deleted file mode 100644 index dbe25b1df18e..000000000000 --- a/docs/core_docs/docs/integrations/stores/file_system.mdx +++ /dev/null @@ -1,26 +0,0 @@ ---- -sidebar_class_name: node-only ---- - -# File System Store - -:::tip Compatibility -Only available on Node.js. -::: - -This example demonstrates how to setup chat history storage using the `LocalFileStore` KV store integration. - -## Usage - -:::info -The path passed to the `.fromPath` must be a directory, not a file. -::: - -The `LocalFileStore` is a wrapper around the `fs` module for storing data as key-value pairs. -Each key value pair has its own file nested inside the directory passed to the `.fromPath` method. -The file name is the key and inside contains the value of the key. - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/stores/file_system_storage.ts"; - -{Example} diff --git a/docs/core_docs/docs/integrations/stores/in_memory.ipynb b/docs/core_docs/docs/integrations/stores/in_memory.ipynb new file mode 100644 index 000000000000..beef57053818 --- /dev/null +++ b/docs/core_docs/docs/integrations/stores/in_memory.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: InMemory Store\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# InMemoryStore\n", + "\n", + "This will help you get started with [InMemoryStore](/docs/concepts/#key-value-stores). For detailed documentation of all InMemoryStore features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_core_stores.InMemoryStore.html).\n", + "\n", + "The `InMemoryStore` allows for a generic type to be assigned to the values in the store. We'll assign type `BaseMessage` as the type of our values, keeping with the theme of a chat history store.\n", + "\n", + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | [PY support](https://python.langchain.com/v0.2/docs/integrations/stores/in_memory/) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: |\n", + "| [InMemoryStore](https://api.js.langchain.com/classes/langchain_core_stores.InMemoryStore.html) | [@langchain/core](https://api.js.langchain.com/modules/langchain_core_stores.html) | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/core?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/core?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "### Installation\n", + "\n", + "The LangChain InMemoryStore integration lives in the `@langchain/core` package:\n", + "\n", + "```{=mdx}\n", + "\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/core\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our byte store:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import { InMemoryStore } from \"@langchain/core/stores\"\n", + "import { BaseMessage } from \"@langchain/core/messages\";\n", + "\n", + "const kvStore = new InMemoryStore();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "You can set data under keys like this using the `mset` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " HumanMessage {\n", + " \"content\": \"value1\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"content\": \"value2\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "import { AIMessage, HumanMessage } from \"@langchain/core/messages\";\n", + "\n", + "await kvStore.mset(\n", + " [\n", + " [\"key1\", new HumanMessage(\"value1\")],\n", + " [\"key2\", new AIMessage(\"value2\")],\n", + " ]\n", + ")\n", + "\n", + "await kvStore.mget(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And you can delete data using the `mdelete` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ undefined, undefined ]\n" + ] + } + ], + "source": [ + "await kvStore.mdelete(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")\n", + "\n", + "await kvStore.mget(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Yielding values\n", + "\n", + "If you want to get back all the keys you can call the `yieldKeys` method. Optionally, you can pass a key prefix to only get back keys which match that prefix." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 'message:id:key1', 'message:id:key2' ]\n" + ] + } + ], + "source": [ + "import { InMemoryStore } from \"@langchain/core/stores\"\n", + "import { AIMessage, BaseMessage, HumanMessage } from \"@langchain/core/messages\";\n", + "\n", + "const kvStoreForYield = new InMemoryStore();\n", + "\n", + "// Add some data to the store\n", + "await kvStoreForYield.mset(\n", + " [\n", + " [\"message:id:key1\", new HumanMessage(\"value1\")],\n", + " [\"message:id:key2\", new AIMessage(\"value2\")],\n", + " ]\n", + ")\n", + "\n", + "const yieldedKeys = [];\n", + "for await (const key of kvStoreForYield.yieldKeys(\"message:id:\")) {\n", + " yieldedKeys.push(key);\n", + "}\n", + "\n", + "console.log(yieldedKeys);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all InMemoryStore features and configurations, head to the [API reference](https://api.js.langchain.com/classes/langchain_core_stores.InMemoryStore.html)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/core_docs/docs/integrations/stores/in_memory.mdx b/docs/core_docs/docs/integrations/stores/in_memory.mdx deleted file mode 100644 index 10c3e80bbec7..000000000000 --- a/docs/core_docs/docs/integrations/stores/in_memory.mdx +++ /dev/null @@ -1,13 +0,0 @@ -# In Memory Store - -This example demonstrates how to setup chat history storage using the `InMemoryStore` KV store integration. - -## Usage - -The `InMemoryStore` allows for a generic type to be assigned to the values in the store. -We'll assign type `BaseMessage` as the type of our values, keeping with the theme of a chat history store. - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/stores/in_memory_storage.ts"; - -{Example} diff --git a/libs/langchain-scripts/src/cli/docs/index.ts b/libs/langchain-scripts/src/cli/docs/index.ts index 71e47e471de4..87e6a92dcc49 100644 --- a/libs/langchain-scripts/src/cli/docs/index.ts +++ b/libs/langchain-scripts/src/cli/docs/index.ts @@ -8,6 +8,7 @@ import { fillLLMIntegrationDocTemplate } from "./llms.js"; import { fillRetrieverIntegrationDocTemplate } from "./retrievers.js"; import { fillEmbeddingsIntegrationDocTemplate } from "./embeddings.js"; import { fillToolkitIntegrationDocTemplate } from "./toolkits.js"; +import { fillKVStoreIntegrationDocTemplate } from "./kv_store.js"; type CLIInput = { type: string; @@ -21,6 +22,7 @@ const ALLOWED_TYPES = [ "embeddings", "doc_loader", "toolkit", + "kv_store", ]; async function main() { @@ -73,6 +75,11 @@ async function main() { className, }); break; + case "kv_store": + await fillKVStoreIntegrationDocTemplate({ + className, + }); + break; default: console.error( `Invalid type: '${type}'.\nMust be one of:\n - ${ALLOWED_TYPES.join( diff --git a/libs/langchain-scripts/src/cli/docs/kv_store.ts b/libs/langchain-scripts/src/cli/docs/kv_store.ts new file mode 100644 index 000000000000..6b43262dff51 --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/kv_store.ts @@ -0,0 +1,166 @@ +import * as path from "node:path"; +import * as fs from "node:fs"; +import { + boldText, + getUserInput, + greenText, + redBackground, +} from "../utils/get-input.js"; +import { fetchURLStatus } from "../utils/fetch-url-status.js"; +import { + SIDEBAR_LABEL_PLACEHOLDER, + MODULE_NAME_PLACEHOLDER, + PACKAGE_NAME_PLACEHOLDER, + FULL_IMPORT_PATH_PLACEHOLDER, + ENV_VAR_NAME_PLACEHOLDER, + PYTHON_DOC_URL_PLACEHOLDER, + API_REF_MODULE_PLACEHOLDER, + API_REF_PACKAGE_PLACEHOLDER, + LOCAL_PLACEHOLDER, + PY_SUPPORT_PLACEHOLDER, +} from "../constants.js"; + +const TEMPLATE_PATH = path.resolve("./src/cli/docs/templates/kv_store.ipynb"); +const INTEGRATIONS_DOCS_PATH = path.resolve( + "../../docs/core_docs/docs/integrations/stores" +); + +type ExtraFields = { + pySupport: boolean; + local: boolean; + envVarName: string; + fullImportPath: string; + packageName: string; +}; + +async function promptExtraFields(fields: { + envVarGuess: string; +}): Promise { + const hasPySupport = await getUserInput( + "Does this integration have Python support? (y/n) ", + undefined, + true + ); + const hasLocalSupport = await getUserInput( + "Does this integration support running locally? (y/n) ", + undefined, + true + ); + const importPath = await getUserInput( + "What is the full import path of the integration? (e.g @langchain/community/llms/togetherai) ", + undefined, + true + ); + + let packageName = ""; + if (importPath.startsWith("langchain/")) { + packageName = "langchain"; + } else { + packageName = importPath.split("/").slice(0, 2).join("/"); + } + + const verifyPackageName = await getUserInput( + `Is ${packageName} the correct package name? (y/n) `, + undefined, + true + ); + if (verifyPackageName.toLowerCase() === "n") { + packageName = await getUserInput( + "Please enter the full package name (e.g @langchain/community) ", + undefined, + true + ); + } + + const isEnvGuessCorrect = await getUserInput( + `Is the environment variable for the API key named ${fields.envVarGuess}? (y/n) `, + undefined, + true + ); + let envVarName = fields.envVarGuess; + if (isEnvGuessCorrect.toLowerCase() === "n") { + envVarName = await getUserInput( + "Please enter the correct environment variable name ", + undefined, + true + ); + } + + return { + pySupport: hasPySupport.toLowerCase() === "y", + local: hasLocalSupport.toLowerCase() === "y", + envVarName, + fullImportPath: importPath, + packageName, + }; +} + +export async function fillKVStoreIntegrationDocTemplate(fields: { + className: string; +}) { + // Sidebar labels should match this format "XYZ Store" + let sidebarLabel = ""; + if (fields.className.endsWith("KVStore")) { + sidebarLabel = fields.className.replace("KVStore", " Store"); + } else if (fields.className.endsWith("ByteStore")) { + sidebarLabel = fields.className.replace("ByteStore", " Store"); + } else { + sidebarLabel = fields.className.replace("Store", " Store"); + } + const pyDocUrl = `https://python.langchain.com/v0.2/docs/integrations/stores/${sidebarLabel.toLowerCase()}/`; + let envVarName = `${sidebarLabel.toUpperCase()}_API_KEY`; + const extraFields = await promptExtraFields({ + envVarGuess: envVarName, + }); + envVarName = extraFields.envVarName; + const importPathEnding = extraFields.fullImportPath.split("/").pop() ?? ""; + const apiRefModuleUrl = `https://api.js.langchain.com/classes/${extraFields.fullImportPath + .replace("@", "") + .replaceAll("/", "_") + .replaceAll("-", "_")}.${fields.className}.html`; + const apiRefPackageUrl = apiRefModuleUrl + .replace("/classes/", "/modules/") + .replace(`.${fields.className}.html`, ".html"); + + const apiRefUrlSuccesses = await Promise.all([ + fetchURLStatus(apiRefModuleUrl), + fetchURLStatus(apiRefPackageUrl), + ]); + if (apiRefUrlSuccesses.find((s) => !s)) { + console.warn( + "API ref URLs invalid. Please manually ensure they are correct." + ); + } + + const docTemplate = (await fs.promises.readFile(TEMPLATE_PATH, "utf-8")) + .replaceAll(SIDEBAR_LABEL_PLACEHOLDER, sidebarLabel) + .replaceAll(MODULE_NAME_PLACEHOLDER, fields.className) + .replaceAll(PACKAGE_NAME_PLACEHOLDER, extraFields.packageName) + .replaceAll(FULL_IMPORT_PATH_PLACEHOLDER, extraFields.fullImportPath) + .replaceAll(ENV_VAR_NAME_PLACEHOLDER, envVarName) + .replaceAll(PYTHON_DOC_URL_PLACEHOLDER, pyDocUrl) + .replaceAll(API_REF_MODULE_PLACEHOLDER, apiRefModuleUrl) + .replaceAll(API_REF_PACKAGE_PLACEHOLDER, apiRefPackageUrl) + .replaceAll(LOCAL_PLACEHOLDER, extraFields?.local ? "✅" : "❌") + .replaceAll(PY_SUPPORT_PLACEHOLDER, extraFields?.pySupport ? "✅" : "❌"); + + const docPath = path.join( + INTEGRATIONS_DOCS_PATH, + `${importPathEnding}.ipynb` + ); + await fs.promises.writeFile(docPath, docTemplate); + const prettyDocPath = docPath.split("docs/core_docs/")[1]; + + const updatePythonDocUrlText = ` ${redBackground( + "- Update the Python documentation URL with the proper URL." + )}`; + const successText = `\nSuccessfully created new document loader integration doc at ${prettyDocPath}.`; + + console.log( + `${greenText(successText)}\n +${boldText("Next steps:")} +${extraFields?.pySupport ? updatePythonDocUrlText : ""} + - Run all code cells in the generated doc to record the outputs. + - Add extra sections on integration specific features.\n` + ); +} diff --git a/libs/langchain-scripts/src/cli/docs/templates/kv_store.ipynb b/libs/langchain-scripts/src/cli/docs/templates/kv_store.ipynb new file mode 100644 index 000000000000..b54beedc0710 --- /dev/null +++ b/libs/langchain-scripts/src/cli/docs/templates/kv_store.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: __sidebar_label__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# __module_name__\n", + "\n", + "- TODO: Make sure API reference link is correct.\n", + "\n", + "This will help you get started with [__module_name__](/docs/concepts/#key-value-stores). For detailed documentation of all __module_name__ features and configurations head to the [API reference](__api_ref_module__).\n", + "\n", + "- TODO: Add any other relevant links, like information about models, prices, context windows, etc. See https://js.langchain.com/v0.2/docs/integrations/stores/in_memory/ for an example.\n", + "\n", + "## Overview\n", + "\n", + "- TODO: (Optional) A short introduction to the underlying technology/API.\n", + "\n", + "### Integration details\n", + "\n", + "- TODO: Fill in table features.\n", + "- TODO: Remove PY support link if not relevant, otherwise ensure link is correct.\n", + "- TODO: Make sure API reference links are correct.\n", + "\n", + "| Class | Package | Local | [PY support](__python_doc_url__) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: |\n", + "| [__module_name__](__api_ref_module__) | [__package_name__](__api_ref_package__) | __local__ | __py_support__ | ![NPM - Downloads](https://img.shields.io/npm/dm/__package_name__?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/__package_name__?style=flat-square&label=%20&) |\n", + "\n", + "## Setup\n", + "\n", + "- TODO: Update with relevant info.\n", + "\n", + "To create a __sidebar_label__ byte store, you'll need to create a/an __sidebar_label__ account, get an API key, and install the `__package_name__` integration package.\n", + "\n", + "### Credentials\n", + "\n", + "- TODO: Update with relevant info, or omit if the service does not require any credentials.\n", + "\n", + "Head to (TODO: link) to sign up to __sidebar_label__ and generate an API key. Once you've done this set the `__env_var_name__` environment variable:\n", + "\n", + "```typescript\n", + "process.env.__env_var_name__=\"__your_api_key__\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The LangChain __module_name__ integration lives in the `__package_name__` package:\n", + "\n", + "```{=mdx}\n", + "\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " __package_name__\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our byte store:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "import { __module_name__ } from \"__full_import_path__\"\n", + "\n", + "const kvStore = new __module_name__({\n", + " // params...\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define an encoder and decoder for converting the data to `Uint8Array` and back:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "const encoder = new TextEncoder();\n", + "const decoder = new TextDecoder();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "- TODO: Run cells so output can be seen.\n", + "\n", + "You can set data under keys like this using the `mset` method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "await kvStore.mset(\n", + " [\n", + " [\"key1\", encoder.encode(\"value1\")],\n", + " [\"key2\", encoder.encode(\"value2\")],\n", + " ]\n", + ")\n", + "\n", + "const results = await kvStore.mget(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")\n", + "console.log(results.map((v) => decoder.decode(v)));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And you can delete data using the `mdelete` method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "await kvStore.mdelete(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")\n", + "\n", + "await kvStore.mget(\n", + " [\n", + " \"key1\",\n", + " \"key2\",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Yielding values\n", + "\n", + "If you want to get back all the keys you can call the `yieldKeys` method. Optionally, you can pass a key prefix to only get back keys which match that prefix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "typescript" + } + }, + "outputs": [], + "source": [ + "import { __module_name__ } from \"__full_import_path__\"\n", + "\n", + "const kvStoreForYield = new __module_name__({\n", + " ...\n", + "});\n", + "\n", + "const encoderForYield = new TextEncoder();\n", + "\n", + "// Add some data to the store\n", + "await kvStoreForYield.mset(\n", + " [\n", + " [\"message:id:key1\", encoderForYield.encode(\"value1\")],\n", + " [\"message:id:key2\", encoderForYield.encode(\"value2\")],\n", + " ]\n", + ")\n", + "\n", + "const yieldedKeys = [];\n", + "for await (const key of kvStoreForYield.yieldKeys(\"message:id:\")) {\n", + " yieldedKeys.push(key);\n", + "}\n", + "\n", + "console.log(yieldedKeys);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TODO: Any functionality specific to this key-value store provider\n", + "\n", + "E.g. extra initialization. Delete if not relevant." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all __module_name__ features and configurations, head to the [API reference](__api_ref_module__)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From ae5a2b0baf49035fb8ec045ee37bb3ccd591ad60 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 5 Aug 2024 16:41:22 -0700 Subject: [PATCH 2/6] docs[patch]: Update tavily retriever docs (#6400) * Update tavily retriever docs * Fix typo --- .../docs/integrations/retrievers/tavily.ipynb | 274 ++++++++++++++++++ .../docs/integrations/retrievers/tavily.mdx | 22 -- 2 files changed, 274 insertions(+), 22 deletions(-) create mode 100644 docs/core_docs/docs/integrations/retrievers/tavily.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/tavily.mdx diff --git a/docs/core_docs/docs/integrations/retrievers/tavily.ipynb b/docs/core_docs/docs/integrations/retrievers/tavily.ipynb new file mode 100644 index 000000000000..2915c6ec4ddb --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/tavily.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Tavily Search API\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# TavilySearchAPIRetriever\n", + "\n", + "[Tavily's Search API](https://tavily.com) is a search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed.\n", + "\n", + "## Overview\n", + "\n", + "This will help you getting started with the Tavily Search API [retriever](/docs/concepts/#retrievers). For detailed documentation of all `TavilySearchAPIRetriever` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_retrievers_tavily_search_api.TavilySearchAPIRetriever.html).\n", + "\n", + "### Integration details\n", + "\n", + "| Retriever | Source | Package |\n", + "| :--- | :--- | :---: |\n", + "[`TavilySearchAPIRetriever`](https://api.js.langchain.com/classes/langchain_community_retrievers_tavily_search_api.TavilySearchAPIRetriever.html) | Information on the web. | [`@langchain/community`](https://npmjs.com/@langchain/community/) |\n", + "\n", + "## Setup\n", + "\n", + "You will need to populate a `TAVILY_API_KEY` environment variable with your Tavily API key or pass it into the constructor as `apiKey`.\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "This retriever lives in the `@langchain/community` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our retriever:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { TavilySearchAPIRetriever } from \"@langchain/community/retrievers/tavily_search_api\";\n", + "\n", + "const retriever = new TavilySearchAPIRetriever({\n", + " k: 3,\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "c9da7fc7", + "metadata": {}, + "source": [ + "For a full list of allowed arguments, see [the official documentation](https://docs.tavily.com/docs/tavily-api/rest_api#parameters). You can pass any param to the SDK via a `kwargs` object." + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: \"{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1722900266, 'localtime': '2024-08-05 16:24'}, 'current': {'last_updated_epoch': 1722899700, 'last_updated': '2024-08-05 16:15', 'temp_c': 16.8, 'temp_f': 62.2, 'is_day': 1, 'condition': {'text': 'Partly Cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 13.2, 'wind_kph': 21.2, 'wind_degree': 261, 'wind_dir': 'W', 'pressure_mb': 1014.0, 'pressure_in': 29.94, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 74, 'cloud': 60, 'feelslike_c': 16.8, 'feelslike_f': 62.2, 'windchill_c': 16.8, 'windchill_f': 62.2, 'heatindex_c': 16.8, 'heatindex_f': 62.2, 'dewpoint_c': 12.3, 'dewpoint_f': 54.1, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 5.0, 'gust_mph': 17.3, 'gust_kph': 27.8}}\",\n", + " metadata: {\n", + " title: 'Weather in San Francisco',\n", + " source: 'https://www.weatherapi.com/',\n", + " score: 0.9947009,\n", + " images: []\n", + " },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Current Weather for Popular Cities . San Francisco, CA 56 ° F Mostly Cloudy; Manhattan, NY warning 85 ° F Fair; Schiller Park, IL (60176) 71 ° F Mostly Cloudy; Boston, MA warning 84 ° F Partly ...',\n", + " metadata: {\n", + " title: 'San Francisco, CA Hourly Weather Forecast | Weather Underground',\n", + " source: 'https://www.wunderground.com/hourly/us/ca/san-francisco/date/2024-08-02',\n", + " score: 0.9859904,\n", + " images: []\n", + " },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'San Francisco CA 37.77°N 122.41°W (Elev. 131 ft) Last Update: 2:42 pm PDT Aug 4, 2024. Forecast Valid: 5pm PDT Aug 4, 2024-6pm PDT Aug 11, 2024 . Forecast Discussion . Additional Resources. Radar & Satellite Image. Hourly Weather Forecast. ... Severe Weather ; Current Outlook Maps ; Drought ; Fire Weather ; Fronts/Precipitation Maps ; Current ...',\n", + " metadata: {\n", + " title: 'National Weather Service',\n", + " source: 'https://forecast.weather.gov/zipcity.php?inputstring=San+Francisco,CA',\n", + " score: 0.98141783,\n", + " images: []\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const query = \"what is the current weather in SF?\";\n", + "\n", + "await retriever.invoke(query);" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, `TavilySearchAPIRetriever` can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "We will need a LLM or chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o-mini\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: retriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The current weather in San Francisco is partly cloudy with a temperature of 16.8°C (62.2°F). The wind is coming from the west at 13.2 mph (21.2 kph), and the humidity is at 74%. There is no precipitation, and visibility is 10 km (6 miles).\n" + ] + } + ], + "source": [ + "await ragChain.invoke(query);" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `TavilySearchAPIRetriever` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_retrievers_tavily_search_api.TavilySearchAPIRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/tavily.mdx b/docs/core_docs/docs/integrations/retrievers/tavily.mdx deleted file mode 100644 index a5d73465ce3d..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/tavily.mdx +++ /dev/null @@ -1,22 +0,0 @@ -# Tavily Search API - -[Tavily's Search API](https://tavily.com) is a search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed. - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/community -``` - -You will need to populate a `TAVILY_API_KEY` environment variable with your Tavily API key or pass it into the constructor as `apiKey`. - -For a full list of allowed arguments, see [the official documentation](https://app.tavily.com/documentation/api). You can also pass any param to the SDK via a `kwargs` object. - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/tavily.ts"; - -{Example} From 7293f858d9069faeb7a229abc65d0f2c9865ab1a Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 5 Aug 2024 16:44:15 -0700 Subject: [PATCH 3/6] docs[minor]: Update self-query retriever docs (#6398) * Update Chroma self-query retriever docs * Test * Update all self-query retriever docs * Update deps * Fix * Fix notebook * Fix type issues --- docs/core_docs/.gitignore | 24 + docs/core_docs/docs/how_to/streaming.ipynb | 6 +- .../retrievers/bedrock-knowledge-bases.ipynb | 539 +++++++++--------- .../docs/integrations/retrievers/exa.ipynb | 2 +- .../retrievers/kendra-retriever.ipynb | 4 +- .../retrievers/self_query/chroma.ipynb | 411 +++++++++++++ .../retrievers/self_query/chroma.mdx | 48 -- .../retrievers/self_query/hnswlib.ipynb | 405 +++++++++++++ .../retrievers/self_query/hnswlib.mdx | 42 -- .../retrievers/self_query/memory.ipynb | 405 +++++++++++++ .../retrievers/self_query/memory.mdx | 42 -- .../retrievers/self_query/pinecone.ipynb | 423 ++++++++++++++ .../retrievers/self_query/pinecone.mdx | 48 -- .../retrievers/self_query/qdrant.ipynb | 418 ++++++++++++++ .../retrievers/self_query/qdrant.mdx | 53 -- .../retrievers/self_query/supabase.ipynb | 415 ++++++++++++++ .../retrievers/self_query/supabase.mdx | 46 -- .../retrievers/self_query/vectara.ipynb | 417 ++++++++++++++ .../retrievers/self_query/vectara.mdx | 47 -- .../retrievers/self_query/weaviate.ipynb | 426 ++++++++++++++ .../retrievers/self_query/weaviate.mdx | 53 -- docs/core_docs/package.json | 2 +- docs/core_docs/scripts/quarto-build.js | 2 +- examples/package.json | 2 +- langchain/package.json | 2 +- libs/langchain-community/langchain.config.js | 3 +- libs/langchain-community/package.json | 9 +- .../src/cli/docs/templates/retrievers.ipynb | 2 +- yarn.lock | 165 ++---- 29 files changed, 3663 insertions(+), 798 deletions(-) create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/chroma.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/chroma.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/memory.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/memory.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/pinecone.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/pinecone.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/qdrant.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/qdrant.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/supabase.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/supabase.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/vectara.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/vectara.mdx create mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/weaviate.ipynb delete mode 100644 docs/core_docs/docs/integrations/retrievers/self_query/weaviate.mdx diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore index e1e4c5360d03..b4db51d639de 100644 --- a/docs/core_docs/.gitignore +++ b/docs/core_docs/.gitignore @@ -214,8 +214,16 @@ docs/integrations/text_embedding/togetherai.md docs/integrations/text_embedding/togetherai.mdx docs/integrations/text_embedding/openai.md docs/integrations/text_embedding/openai.mdx +docs/integrations/text_embedding/mistralai.md +docs/integrations/text_embedding/mistralai.mdx +docs/integrations/text_embedding/fireworks.md +docs/integrations/text_embedding/fireworks.mdx +docs/integrations/text_embedding/cohere.md +docs/integrations/text_embedding/cohere.mdx docs/integrations/text_embedding/azure_openai.md docs/integrations/text_embedding/azure_openai.mdx +docs/integrations/retrievers/kendra-retriever.md +docs/integrations/retrievers/kendra-retriever.mdx docs/integrations/retrievers/exa.md docs/integrations/retrievers/exa.mdx docs/integrations/retrievers/bedrock-knowledge-bases.md @@ -258,6 +266,22 @@ docs/integrations/chat/azure.md docs/integrations/chat/azure.mdx docs/integrations/chat/anthropic.md docs/integrations/chat/anthropic.mdx +docs/integrations/retrievers/self_query/weaviate.md +docs/integrations/retrievers/self_query/weaviate.mdx +docs/integrations/retrievers/self_query/vectara.md +docs/integrations/retrievers/self_query/vectara.mdx +docs/integrations/retrievers/self_query/supabase.md +docs/integrations/retrievers/self_query/supabase.mdx +docs/integrations/retrievers/self_query/qdrant.md +docs/integrations/retrievers/self_query/qdrant.mdx +docs/integrations/retrievers/self_query/pinecone.md +docs/integrations/retrievers/self_query/pinecone.mdx +docs/integrations/retrievers/self_query/memory.md +docs/integrations/retrievers/self_query/memory.mdx +docs/integrations/retrievers/self_query/hnswlib.md +docs/integrations/retrievers/self_query/hnswlib.mdx +docs/integrations/retrievers/self_query/chroma.md +docs/integrations/retrievers/self_query/chroma.mdx docs/integrations/document_loaders/web_loaders/web_puppeteer.md docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx docs/integrations/document_loaders/web_loaders/web_cheerio.md diff --git a/docs/core_docs/docs/how_to/streaming.ipynb b/docs/core_docs/docs/how_to/streaming.ipynb index aada24ae09fa..d83b0a29f741 100644 --- a/docs/core_docs/docs/how_to/streaming.ipynb +++ b/docs/core_docs/docs/how_to/streaming.ipynb @@ -76,7 +76,7 @@ "metadata": {}, "outputs": [], "source": [ - "// @ls-docs-hide-cell\n", + "// @lc-docs-hide-cell\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", "const model = new ChatOpenAI({\n", @@ -2058,9 +2058,9 @@ ], "metadata": { "kernelspec": { - "display_name": "TypeScript", + "display_name": "Deno", "language": "typescript", - "name": "tslab" + "name": "deno" }, "language_info": { "codemirror_mode": { diff --git a/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb b/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb index fbf57c6eb66a..a7721b6b1b58 100644 --- a/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb +++ b/docs/core_docs/docs/integrations/retrievers/bedrock-knowledge-bases.ipynb @@ -1,273 +1,272 @@ { - "cells": [ - { - "cell_type": "raw", - "id": "afaf8039", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "---\n", - "sidebar_label: Knowledge Bases for Amazon Bedrock\n", - "---" - ] + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } }, - { - "cell_type": "markdown", - "id": "e49f1e0d", - "metadata": {}, - "source": [ - "# Knowledge Bases for Amazon Bedrock\n", - "\n", - "## Overview\n", - "\n", - "This will help you getting started with the [AmazonKnowledgeBaseRetriever](/docs/concepts/#retrievers). For detailed documentation of all AmazonKnowledgeBaseRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html).\n", - "\n", - "Knowledge Bases for Amazon Bedrock is a fully managed support for end-to-end RAG workflow provided by Amazon Web Services (AWS).\n", - "It provides an entire ingestion workflow of converting your documents into embeddings (vector) and storing the embeddings in a specialized vector database.\n", - "Knowledge Bases for Amazon Bedrock supports popular databases for vector storage, including vector engine for Amazon OpenSearch Serverless, Pinecone, Redis Enterprise Cloud, Amazon Aurora (coming soon), and MongoDB (coming soon).\n", - "\n", - "### Integration details\n", - "\n", - "| Retriever | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/docs/integrations/retrievers/bedrock/) |\n", - "| :--- | :--- | :---: | :---: | :---: |\n", - "[AmazonKnowledgeBaseRetriever](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html) | 🟠 (see details below) | ✅ | @langchain/aws | ✅ |\n", - "\n", - "> AWS Knowledge Base Retriever can be 'self hosted' in the sense you can run it on your own AWS infrastructure. However it is not possible to run on another cloud provider or on-premises.\n", - "\n", - "## Setup\n", - "\n", - "In order to use the AmazonKnowledgeBaseRetriever, you need to have an AWS account, where you can manage your indexes and documents. Once you've setup your account, set the following environment variables:\n", - "\n", - "```bash\n", - "process.env.AWS_KNOWLEDGE_BASE_ID=your-knowledge-base-id\n", - "process.env.AWS_ACCESS_KEY_ID=your-access-key-id\n", - "process.env.AWS_SECRET_ACCESS_KEY=your-secret-access-key\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "72ee0c4b-9764-423a-9dbf-95129e185210", - "metadata": {}, - "source": [ - "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de", - "metadata": {}, - "outputs": [], - "source": [ - "// process.env.LANGSMITH_API_KEY = \"\";\n", - "// process.env.LANGSMITH_TRACING = \"true\";" - ] - }, - { - "cell_type": "markdown", - "id": "0730d6a1-c893-4840-9817-5e5251676d5d", - "metadata": {}, - "source": [ - "### Installation\n", - "\n", - "This retriever lives in the `@langchain/aws` package:\n", - "\n", - "```{=mdx}\n", - "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", - "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", - "\n", - "\n", - "\n", - "\n", - " @langchain/aws\n", - "\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "a38cde65-254d-4219-a441-068766c0d4b5", - "metadata": {}, - "source": [ - "## Instantiation\n", - "\n", - "Now we can instantiate our retriever:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", - "metadata": {}, - "outputs": [], - "source": [ - "import { AmazonKnowledgeBaseRetriever } from \"@langchain/aws\";\n", - "\n", - "const retriever = new AmazonKnowledgeBaseRetriever({\n", - " topK: 10,\n", - " knowledgeBaseId: process.env.AWS_KNOWLEDGE_BASE_ID,\n", - " region: \"us-east-2\",\n", - " clientOptions: {\n", - " credentials: {\n", - " accessKeyId: process.env.AWS_ACCESS_KEY_ID,\n", - " secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,\n", - " },\n", - " },\n", - "});" - ] - }, - { - "cell_type": "markdown", - "id": "5c5f2839-4020-424e-9fc9-07777eede442", - "metadata": {}, - "source": [ - "## Usage" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", - "metadata": {}, - "outputs": [], - "source": [ - "const query = \"...\"\n", - "\n", - "await retriever.invoke(query);" - ] - }, - { - "cell_type": "markdown", - "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", - "metadata": {}, - "source": [ - "## Use within a chain\n", - "\n", - "Like other retrievers, AmazonKnowledgeBaseRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", - "\n", - "We will need a LLM or chat model:\n", - "\n", - "```{=mdx}\n", - "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", - "\n", - "\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", - "metadata": {}, - "outputs": [], - "source": [ - "// @ls-docs-hide-cell\n", - "\n", - "import { ChatOpenAI } from \"@langchain/openai\";\n", - "\n", - "const llm = new ChatOpenAI({\n", - " model: \"gpt-4o-mini\",\n", - " temperature: 0,\n", - "});" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", - "metadata": {}, - "outputs": [], - "source": [ - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", - "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", - "\n", - "import type { Document } from \"@langchain/core/documents\";\n", - "\n", - "const prompt = ChatPromptTemplate.fromTemplate(`\n", - "Answer the question based only on the context provided.\n", - "\n", - "Context: {context}\n", - "\n", - "Question: {question}`);\n", - "\n", - "const formatDocs = (docs: Document[]) => {\n", - " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\");\n", - "}\n", - "\n", - "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", - "const ragChain = RunnableSequence.from([\n", - " {\n", - " context: retriever.pipe(formatDocs),\n", - " question: new RunnablePassthrough(),\n", - " },\n", - " prompt,\n", - " llm,\n", - " new StringOutputParser(),\n", - "]);" - ] - }, - { - "cell_type": "markdown", - "id": "22b1d6f8", - "metadata": {}, - "source": [ - "```{=mdx}\n", - "\n", - ":::tip\n", - "\n", - "See [our RAG tutorial](docs/tutorials/rag) for more information and examples on `RunnableSequence`'s like the one above.\n", - "\n", - ":::\n", - "\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", - "metadata": {}, - "outputs": [], - "source": [ - "await ragChain.invoke(\"...\")" - ] - }, - { - "cell_type": "markdown", - "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", - "metadata": {}, - "source": [ - "## API reference\n", - "\n", - "For detailed documentation of all AmazonKnowledgeBaseRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "TypeScript", - "language": "typescript", - "name": "tslab" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "typescript", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } + "source": [ + "---\n", + "sidebar_label: Knowledge Bases for Amazon Bedrock\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Knowledge Bases for Amazon Bedrock\n", + "\n", + "## Overview\n", + "\n", + "This will help you getting started with the [AmazonKnowledgeBaseRetriever](/docs/concepts/#retrievers). For detailed documentation of all AmazonKnowledgeBaseRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html).\n", + "\n", + "Knowledge Bases for Amazon Bedrock is a fully managed support for end-to-end RAG workflow provided by Amazon Web Services (AWS).\n", + "It provides an entire ingestion workflow of converting your documents into embeddings (vector) and storing the embeddings in a specialized vector database.\n", + "Knowledge Bases for Amazon Bedrock supports popular databases for vector storage, including vector engine for Amazon OpenSearch Serverless, Pinecone, Redis Enterprise Cloud, Amazon Aurora (coming soon), and MongoDB (coming soon).\n", + "\n", + "### Integration details\n", + "\n", + "| Retriever | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/docs/integrations/retrievers/bedrock/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[AmazonKnowledgeBaseRetriever](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html) | 🟠 (see details below) | ✅ | @langchain/aws | ✅ |\n", + "\n", + "> AWS Knowledge Base Retriever can be 'self hosted' in the sense you can run it on your own AWS infrastructure. However it is not possible to run on another cloud provider or on-premises.\n", + "\n", + "## Setup\n", + "\n", + "In order to use the AmazonKnowledgeBaseRetriever, you need to have an AWS account, where you can manage your indexes and documents. Once you've setup your account, set the following environment variables:\n", + "\n", + "```bash\n", + "process.env.AWS_KNOWLEDGE_BASE_ID=your-knowledge-base-id\n", + "process.env.AWS_ACCESS_KEY_ID=your-access-key-id\n", + "process.env.AWS_SECRET_ACCESS_KEY=your-secret-access-key\n", + "```" + ] }, - "nbformat": 4, - "nbformat_minor": 5 - } - \ No newline at end of file + { + "cell_type": "markdown", + "id": "72ee0c4b-9764-423a-9dbf-95129e185210", + "metadata": {}, + "source": [ + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de", + "metadata": {}, + "outputs": [], + "source": [ + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";" + ] + }, + { + "cell_type": "markdown", + "id": "0730d6a1-c893-4840-9817-5e5251676d5d", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "This retriever lives in the `@langchain/aws` package:\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/aws\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our retriever:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { AmazonKnowledgeBaseRetriever } from \"@langchain/aws\";\n", + "\n", + "const retriever = new AmazonKnowledgeBaseRetriever({\n", + " topK: 10,\n", + " knowledgeBaseId: process.env.AWS_KNOWLEDGE_BASE_ID,\n", + " region: \"us-east-2\",\n", + " clientOptions: {\n", + " credentials: {\n", + " accessKeyId: process.env.AWS_ACCESS_KEY_ID,\n", + " secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,\n", + " },\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [], + "source": [ + "const query = \"...\"\n", + "\n", + "await retriever.invoke(query);" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, AmazonKnowledgeBaseRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "We will need a LLM or chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25b647a3-f8f2-4541-a289-7a241e43f9df", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o-mini\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: retriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "markdown", + "id": "22b1d6f8", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "\n", + ":::tip\n", + "\n", + "See [our RAG tutorial](docs/tutorials/rag) for more information and examples on `RunnableSequence`'s like the one above.\n", + "\n", + ":::\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [], + "source": [ + "await ragChain.invoke(\"...\")" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all AmazonKnowledgeBaseRetriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_aws.AmazonKnowledgeBaseRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "typescript", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/exa.ipynb b/docs/core_docs/docs/integrations/retrievers/exa.ipynb index 795f5fdc7d12..b73706c05d67 100644 --- a/docs/core_docs/docs/integrations/retrievers/exa.ipynb +++ b/docs/core_docs/docs/integrations/retrievers/exa.ipynb @@ -257,7 +257,7 @@ "metadata": {}, "outputs": [], "source": [ - "// @ls-docs-hide-cell\n", + "// @lc-docs-hide-cell\n", "\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", diff --git a/docs/core_docs/docs/integrations/retrievers/kendra-retriever.ipynb b/docs/core_docs/docs/integrations/retrievers/kendra-retriever.ipynb index ad8c5da4751f..30dc60171f32 100644 --- a/docs/core_docs/docs/integrations/retrievers/kendra-retriever.ipynb +++ b/docs/core_docs/docs/integrations/retrievers/kendra-retriever.ipynb @@ -124,7 +124,7 @@ "source": [ "## Use within a chain\n", "\n", - "Like other retrievers, __module_name__ can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "Like other retrievers, the `AWSKendraRetriever` can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", "\n", "We will need a LLM or chat model:\n", "\n", @@ -142,7 +142,7 @@ "metadata": {}, "outputs": [], "source": [ - "// @ls-docs-hide-cell\n", + "// @lc-docs-hide-cell\n", "\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/chroma.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/chroma.ipynb new file mode 100644 index 000000000000..8fb4f47b7e5f --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/chroma.ipynb @@ -0,0 +1,411 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Chroma\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Chroma\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [Chroma vector store](/docs/integrations/vectorstores/chroma). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/retrievers/self_query/chroma_self_query/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`Chroma`](https://api.js.langchain.com/classes/langchain_community_vectorstores_chroma.Chroma.html) | ✅ | ✅ | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a Chroma instance as documented [here](/docs/integrations/vectorstores/chroma).\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/community` package. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community langchain @langchain/openai\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your Chroma vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { Chroma } from \"@langchain/community/vectorstores/chroma\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await Chroma.fromDocuments(docs, embeddings, {\n", + " collectionName: \"movie-collection\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o-mini\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { ChromaTranslator } from \"@langchain/community/structured_query/chroma\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm,\n", + " vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo,\n", + " /**\n", + " * We need to create a basic translator that translates the queries into a\n", + " * filter format that the vector store can understand. We provide a basic translator\n", + " * translator here, but you can create your own translator by extending BaseTranslator\n", + " * abstract class. Note that the vector store needs to support filtering on the metadata\n", + " * attributes you want to query on.\n", + " */\n", + " structuredQueryTranslator: new ChromaTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { director: 'Satoshi Kon', rating: 8.6, year: 2006 },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " director: 'Andrei Tarkovsky',\n", + " genre: 'science fiction',\n", + " rating: 9.9,\n", + " year: 1979\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, Chroma self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are:\n", + "\n", + "1. The movie directed by Andrei Tarkovsky, which has a rating of 9.9. \n", + "2. The movie directed by Satoshi Kon, which has a rating of 8.6.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\")" + ] + }, + { + "cell_type": "markdown", + "id": "124ffade", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query. The filter syntax is the same as the backing Chroma vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9216e4a4", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm,\n", + " vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo,\n", + " structuredQueryTranslator: new ChromaTranslator(),\n", + " searchParams: {\n", + " filter: {\n", + " rating: {\n", + " $gt: 8.5,\n", + " }\n", + " },\n", + " mergeFiltersOperator: \"and\",\n", + " }\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Chroma self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/chroma.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/chroma.mdx deleted file mode 100644 index af541a920d65..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/chroma.mdx +++ /dev/null @@ -1,48 +0,0 @@ -# Chroma Self Query Retriever - -This example shows how to use a self query retriever with a Chroma vector store. - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/chroma_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to create a basic translator that translates the queries into a - * filter format that the vector store can understand. We provide a basic translator - * translator here, but you can create your own translator by extending BaseTranslator - * abstract class. Note that the vector store needs to support filtering on the metadata - * attributes you want to query on. - */ - structuredQueryTranslator: new ChromaTranslator(), - searchParams: { - filter: { - rating: { - $gt: 8.5, - }, - }, - mergeFiltersOperator: "and", - }, -}); -``` - -See [the official docs](https://docs.trychroma.com/usage-guide#using-where-filters) for a full list of filters. diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.ipynb new file mode 100644 index 000000000000..0b0d16400815 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.ipynb @@ -0,0 +1,405 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: HNSWLib\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# HNSWLib\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [HNSWLib vector store](/docs/integrations/vectorstores/hnswlib). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | Py support |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`HNSWLib`](https://api.js.langchain.com/classes/langchain_community_vectorstores_hnswlib.HNSWLib.html) | ✅ | ❌ | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) | ❌ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a HNSWLib instance as documented [here](/docs/integrations/vectorstores/hnswlib).\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/community` package. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community langchain @langchain/openai\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your HNSWLib vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { HNSWLib } from \"@langchain/community/vectorstores/hnswlib\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await HNSWLib.fromDocuments(docs, embeddings);" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { FunctionalTranslator } from \"@langchain/core/structured_query\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " /**\n", + " * We need to create a basic translator that translates the queries into a\n", + " * filter format that the vector store can understand. We provide a basic translator\n", + " * translator here, but you can create your own translator by extending BaseTranslator\n", + " * abstract class. Note that the vector store needs to support filtering on the metadata\n", + " * attributes you want to query on.\n", + " */\n", + " structuredQueryTranslator: new FunctionalTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { year: 2006, director: 'Satoshi Kon', rating: 8.6 },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " year: 1979,\n", + " director: 'Andrei Tarkovsky',\n", + " genre: 'science fiction',\n", + " rating: 9.9\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, HNSWLib self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are:\n", + "\n", + "1. The movie directed by Satoshi Kon in 2006, which has a rating of 8.6.\n", + "2. The movie directed by Andrei Tarkovsky in 1979, which has a rating of 9.9.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\")" + ] + }, + { + "cell_type": "markdown", + "id": "30e62cac", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query. The filter syntax is a predicate function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65ea413d", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaults = SelfQueryRetriever.fromLLM({\n", + " llm,\n", + " vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo,\n", + " structuredQueryTranslator: new FunctionalTranslator(),\n", + " searchParams: {\n", + " filter: (doc: Document) => doc.metadata && doc.metadata.rating > 8.5,\n", + " mergeFiltersOperator: \"and\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all HNSWLib self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.mdx deleted file mode 100644 index 1e672afd7628..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/hnswlib.mdx +++ /dev/null @@ -1,42 +0,0 @@ -# HNSWLib Self Query Retriever - -This example shows how to use a self query retriever with an HNSWLib vector store. - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/hnswlib_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to use a translator that translates the queries into a - * filter format that the vector store can understand. We provide a basic translator - * translator here, but you can create your own translator by extending BaseTranslator - * abstract class. Note that the vector store needs to support filtering on the metadata - * attributes you want to query on. - */ - structuredQueryTranslator: new FunctionalTranslator(), - searchParams: { - filter: (doc: Document) => doc.metadata && doc.metadata.rating > 8.5, - mergeFiltersOperator: "and", - }, -}); -``` diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/memory.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/memory.ipynb new file mode 100644 index 000000000000..3612e4bf45e6 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/memory.ipynb @@ -0,0 +1,405 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: In-memory\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# In-memory\n", + "\n", + "This guide will help you getting started with such a retriever backed by an [in-memory vector store](/docs/integrations/vectorstores/memory). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/docarray_in_memory/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`MemoryVectorStore`](https://api.js.langchain.com/classes/langchain_vectorstores_memory.MemoryVectorStore.html) | ✅ | ❌ | [`langchain`](https://www.npmjs.com/package/langchain) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up an in-memory vector store instance as documented [here](/docs/integrations/vectorstores/memory).\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/community` package. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community langchain @langchain/openai\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your in-memory vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await MemoryVectorStore.fromDocuments(docs, embeddings);" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { FunctionalTranslator } from \"@langchain/core/structured_query\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " /**\n", + " * We need to create a basic translator that translates the queries into a\n", + " * filter format that the vector store can understand. We provide a basic translator\n", + " * translator here, but you can create your own translator by extending BaseTranslator\n", + " * abstract class. Note that the vector store needs to support filtering on the metadata\n", + " * attributes you want to query on.\n", + " */\n", + " structuredQueryTranslator: new FunctionalTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { year: 2006, director: 'Satoshi Kon', rating: 8.6 },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " year: 1979,\n", + " director: 'Andrei Tarkovsky',\n", + " genre: 'science fiction',\n", + " rating: 9.9\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, in-memory self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are:\n", + "\n", + "1. The movie directed by Satoshi Kon in 2006, which has a rating of 8.6.\n", + "2. The movie directed by Andrei Tarkovsky in 1979, which has a rating of 9.9.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\")" + ] + }, + { + "cell_type": "markdown", + "id": "516d8472", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query. The filter syntax is a predicate function:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cb9ddd99", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new FunctionalTranslator(),\n", + " searchParams: {\n", + " filter: (doc: Document) => doc.metadata && doc.metadata.rating > 8.5,\n", + " mergeFiltersOperator: \"and\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all in-memory self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/memory.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/memory.mdx deleted file mode 100644 index bf92e44d0f90..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/memory.mdx +++ /dev/null @@ -1,42 +0,0 @@ -# Memory Vector Store Self Query Retriever - -This example shows how to use a self query retriever with a basic, in-memory vector store. - -## Usage - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/memory_self_query.ts"; - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai -``` - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to use a translator that translates the queries into a - * filter format that the vector store can understand. We provide a basic translator - * translator here, but you can create your own translator by extending BaseTranslator - * abstract class. Note that the vector store needs to support filtering on the metadata - * attributes you want to query on. - */ - structuredQueryTranslator: new FunctionalTranslator(), - searchParams: { - filter: (doc: Document) => doc.metadata && doc.metadata.rating > 8.5, - mergeFiltersOperator: "and", - }, -}); -``` diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/pinecone.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/pinecone.ipynb new file mode 100644 index 000000000000..277a194fd034 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/pinecone.ipynb @@ -0,0 +1,423 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Pinecone\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Pinecone\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [Pinecone vector store](/docs/integrations/vectorstores/pinecone). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/retrievers/self_query/pinecone/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`PineconeStore`](https://api.js.langchain.com/classes/langchain_pinecone.PineconeStore.html) | ❌ | ✅ | [`@langchain/pinecone`](https://www.npmjs.com/package/@langchain/pinecone) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a Pinecone instance as documented [here](/docs/integrations/vectorstores/pinecone). Set the following environment variables:\n", + "\n", + "```ts\n", + "process.env.PINECONE_API_KEY = \"YOUR_API_KEY\";\n", + "process.env.PINECONE_ENVIRONMENT = \"YOUR_ENVIRONMENT\";\n", + "process.env.PINECONE_INDEX = \"YOUR_INDEX\";\n", + "```\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/pinecone` package. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "The official Pinecone SDK (`@pinecone-database/pinecone`) is automatically installed as a dependency of `@langchain/pinecone`, but you may wish to install it independently as well.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/pinecone langchain @langchain/openai @pinecone-database/pinecone\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your Pinecone vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { Pinecone } from \"@pinecone-database/pinecone\";\n", + "\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { PineconeStore } from \"@langchain/pinecone\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "\n", + "const pinecone = new Pinecone();\n", + "\n", + "const pineconeIndex = pinecone.Index(process.env.PINECONE_INDEX!);\n", + "\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await PineconeStore.fromDocuments(docs, embeddings, {\n", + " pineconeIndex: pineconeIndex,\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { PineconeTranslator } from \"@langchain/pinecone\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " /**\n", + " * We need to create a basic translator that translates the queries into a\n", + " * filter format that the vector store can understand. We provide a basic translator\n", + " * translator here, but you can create your own translator by extending BaseTranslator\n", + " * abstract class. Note that the vector store needs to support filtering on the metadata\n", + " * attributes you want to query on.\n", + " */\n", + " structuredQueryTranslator: new PineconeTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { director: 'Satoshi Kon', rating: 8.6, year: 2006 },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " director: 'Andrei Tarkovsky',\n", + " genre: 'science fiction',\n", + " rating: 9.9,\n", + " year: 1979\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, Pinecone self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are the ones directed by Satoshi Kon (rating: 8.6) and Andrei Tarkovsky (rating: 9.9).\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\")" + ] + }, + { + "cell_type": "markdown", + "id": "c52ef888", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query. The filter syntax is the same as the backing Pinecone vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f6103afe", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new PineconeTranslator(),\n", + " searchParams: {\n", + " filter: {\n", + " rating: {\n", + " $gt: 8.5,\n", + " },\n", + " },\n", + " mergeFiltersOperator: \"and\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Pinecone self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/pinecone.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/pinecone.mdx deleted file mode 100644 index da7adc8d271d..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/pinecone.mdx +++ /dev/null @@ -1,48 +0,0 @@ -# Pinecone Self Query Retriever - -This example shows how to use a self query retriever with a Pinecone vector store. - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/pinecone -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/pinecone_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to create a basic translator that translates the queries into a - * filter format that the vector store can understand. We provide a basic translator - * translator here, but you can create your own translator by extending BaseTranslator - * abstract class. Note that the vector store needs to support filtering on the metadata - * attributes you want to query on. - */ - structuredQueryTranslator: new PineconeTranslator(), - searchParams: { - filter: { - rating: { - $gt: 8.5, - }, - }, - mergeFiltersOperator: "and", - }, -}); -``` - -See the [official docs](https://docs.pinecone.io/docs/metadata-filtering) for more on how to construct metadata filters. diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/qdrant.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/qdrant.ipynb new file mode 100644 index 000000000000..154e9bd6150a --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/qdrant.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Qdrant\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Qdrant\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [Qdrant vector store](/docs/integrations/vectorstores/qdrant). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/retrievers/self_query/qdrant_self_query/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`QdrantVectorStore`](https://api.js.langchain.com/classes/langchain_qdrant.QdrantVectorStore.html) | ✅ | ✅ | [`@langchain/qdrant`](https://www.npmjs.com/package/@langchain/qdrant) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a Qdrant instance as documented [here](/docs/integrations/vectorstores/qdrant). Set the following environment variables:\n", + "\n", + "```ts\n", + "process.env.QDRANT_URL = \"YOUR_QDRANT_URL_HERE\" // for example, http://localhost:6333\n", + "```\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/qdrant` package. You'll also need to install the `langchain` and `@langchain/community` packages to import the main `SelfQueryRetriever` classes.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/qdrant langchain @langchain/community @langchain/openai\n", + "\n", + "```\n", + "\n", + "The official Qdrant SDK (`@qdrant/js-client-rest`) is automatically installed as a dependency of `@langchain/qdrant`, but you may wish to install it independently as well." + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your Qdrant vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { QdrantVectorStore } from \"@langchain/qdrant\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "import { QdrantClient } from \"@qdrant/js-client-rest\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "\n", + "const client = new QdrantClient({ url: process.env.QDRANT_URL });\n", + "\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await QdrantVectorStore.fromDocuments(docs, embeddings, {\n", + " client,\n", + " collectionName: \"movie-collection\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { QdrantTranslator } from \"@langchain/community/structured_query/qdrant\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new QdrantTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { director: 'Satoshi Kon', rating: 8.6, year: 2006 },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " director: 'Andrei Tarkovsky',\n", + " genre: 'science fiction',\n", + " rating: 9.9,\n", + " year: 1979\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, Qdrant self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are the ones directed by Satoshi Kon (rating: 8.6) and Andrei Tarkovsky (rating: 9.9).\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\")" + ] + }, + { + "cell_type": "markdown", + "id": "c52ef888", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query. The filter syntax is the same as the backing Qdrant vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f6103afe", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new QdrantTranslator(),\n", + " searchParams: {\n", + " filter: {\n", + " must: [\n", + " {\n", + " key: \"metadata.rating\",\n", + " range: {\n", + " gt: 8.5,\n", + " },\n", + " },\n", + " ],\n", + " },\n", + " mergeFiltersOperator: \"and\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Qdrant self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/qdrant.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/qdrant.mdx deleted file mode 100644 index b5b2ab559946..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/qdrant.mdx +++ /dev/null @@ -1,53 +0,0 @@ -# Qdrant Self Query Retriever - -This example shows how to use a self query retriever with a Qdrant vector store. - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community @langchain/qdrant @qdrant/js-client-rest -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/qdrant_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to create a basic translator that translates the queries into a - * filter format that the vector store can understand. We provide a basic translator here. - * You can create your own translator by extending BaseTranslator - * abstract class. Note that the vector store needs to support filtering on the metadata - * attributes you want to query on. - */ - structuredQueryTranslator: new QdrantTranslator(), - searchParams: { - filter: { - must: [ - { - key: "metadata.rating", - range: { - gt: 8.5, - }, - }, - ], - }, - mergeFiltersOperator: "and", - }, -}); -``` - -See the [official docs](https://qdrant.tech/documentation/concepts/filtering/) for more on how to construct metadata filters. diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/supabase.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/supabase.ipynb new file mode 100644 index 000000000000..b249902a7c66 --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/supabase.ipynb @@ -0,0 +1,415 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Supabase\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Supabase\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [Supabase vector store](/docs/integrations/vectorstores/supabase). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/retrievers/self_query/supabase_self_query/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`SupabaseVectorStore`](https://api.js.langchain.com/classes/langchain_community_vectorstores_supabase.SupabaseVectorStore.html) | ✅ | ✅ | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a Supabase instance as documented [here](/docs/integrations/vectorstores/supabase). Set the following environment variables:\n", + "\n", + "```ts\n", + "process.env.SUPABASE_PRIVATE_KEY = \"YOUR_SUPABASE_PRIVATE_KEY\";\n", + "process.env.SUPABASE_URL = \"YOUR_SUPABASE_URL\";\n", + "```\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/community` package, which requires the official Supabase SDK as a peer dependency. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community langchain @langchain/openai @supabase/supabase-js\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your Supabase vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { SupabaseVectorStore } from \"@langchain/community/vectorstores/supabase\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "import { createClient } from \"@supabase/supabase-js\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "\n", + "const client = createClient(\n", + " process.env.SUPABASE_URL,\n", + " process.env.SUPABASE_PRIVATE_KEY\n", + ");\n", + "\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await SupabaseVectorStore.fromDocuments(docs, embeddings, {\n", + " client,\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { SupabaseTranslator } from \"@langchain/community/structured_query/supabase\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new SupabaseTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { year: 2006, rating: 8.6, director: 'Satoshi Kon' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " year: 1979,\n", + " genre: 'science fiction',\n", + " rating: 9.9,\n", + " director: 'Andrei Tarkovsky'\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, Supabase self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are:\n", + "\n", + "1. The movie directed by Satoshi Kon in 2006, which has a rating of 8.6.\n", + "2. The movie directed by Andrei Tarkovsky in 1979, which has a rating of 9.9.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\");" + ] + }, + { + "cell_type": "markdown", + "id": "c52ef888", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query. The filter syntax is a function that returns a [Supabase filter](https://supabase.com/docs/reference/javascript/filter):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6103afe", + "metadata": {}, + "outputs": [], + "source": [ + "import type { SupabaseFilter } from \"@langchain/community/vectorstores/supabase\";\n", + "\n", + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new SupabaseTranslator(),\n", + " searchParams: {\n", + " filter: (rpc: SupabaseFilter) => rpc.filter(\"metadata->>type\", \"eq\", \"movie\"),\n", + " mergeFiltersOperator: \"and\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Supabase self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/supabase.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/supabase.mdx deleted file mode 100644 index b18717179160..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/supabase.mdx +++ /dev/null @@ -1,46 +0,0 @@ -# Supabase Self Query Retriever - -This example shows how to use a self query retriever with a [Supabase](https://supabase.com) vector store. - -If you haven't already set up Supabase, please [follow the instructions here](/docs/integrations/vectorstores/supabase). - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/supabase_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to create a basic translator that translates the queries into a - * filter format that the vector store can understand. We provide a basic translator - * translator here, but you can create your own translator by extending BaseTranslator - * abstract class. Note that the vector store needs to support filtering on the metadata - * attributes you want to query on. - */ - structuredQueryTranslator: new SupabaseTranslator(), - searchParams: { - filter: (rpc: SupabaseFilter) => rpc.filter("metadata->>type", "eq", "movie"),, - mergeFiltersOperator: "and", - } -}); -``` - -See the [official docs](https://postgrest.org/en/stable/references/api/tables_views.html?highlight=operators#json-columns) for more on how to construct metadata filters. diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/vectara.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/vectara.ipynb new file mode 100644 index 000000000000..0d45945e7e5f --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/vectara.ipynb @@ -0,0 +1,417 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Vectara\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Vectara\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [Vectara vector store](/docs/integrations/vectorstores/vectara). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/retrievers/self_query/vectara_self_query/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`VectaraStore`](https://api.js.langchain.com/classes/langchain_community_vectorstores_vectara.VectaraStore.html) | ❌ | ✅ | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a Vectara instance as documented [here](/docs/integrations/vectorstores/vectara). Set the following environment variables:\n", + "\n", + "```typescript\n", + "process.env.VECTARA_CUSTOMER_ID = \"your_customer_id\";\n", + "process.env.VECTARA_CORPUS_ID = \"your_corpus_id\";\n", + "process.env.VECTARA_API_KEY = \"your-vectara-api-key\";\n", + "```\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/community` package. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community langchain\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your Vectara vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { VectaraStore } from \"@langchain/community/vectorstores/vectara\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "// Vectara provides embeddings\n", + "import { FakeEmbeddings } from \"@langchain/core/utils/testing\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "// Vectara provides embeddings\n", + "const embeddings = new FakeEmbeddings();\n", + "const vectorStore = await VectaraStore.fromDocuments(docs, embeddings, {\n", + " customerId: Number(process.env.VECTARA_CUSTOMER_ID),\n", + " corpusId: Number(process.env.VECTARA_CORPUS_ID),\n", + " apiKey: String(process.env.VECTARA_API_KEY),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { VectaraTranslator } from \"@langchain/community/structured_query/vectara\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new VectaraTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { year: 2006, rating: 8.6, director: 'Satoshi Kon' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " year: 1979,\n", + " genre: 'science fiction',\n", + " rating: 9.9,\n", + " director: 'Andrei Tarkovsky'\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, Vectara self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The movies rated higher than 8.5 are:\n", + "\n", + "1. The movie directed by Satoshi Kon in 2006, which has a rating of 8.6.\n", + "2. The movie directed by Andrei Tarkovsky in 1979, which has a rating of 9.9.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\");" + ] + }, + { + "cell_type": "markdown", + "id": "c52ef888", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query.\n", + "\n", + "See the [official docs](https://docs.vectara.com/) for more on how to construct metadata filters." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f6103afe", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm,\n", + " vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo,\n", + " /**\n", + " * We need to use a translator that translates the queries into a\n", + " * filter format that the vector store can understand. LangChain provides one here.\n", + " */\n", + " structuredQueryTranslator: new VectaraTranslator(),\n", + " searchParams: {\n", + " filter: {\n", + " filter: \"( doc.genre = 'science fiction' ) and ( doc.rating > 8.5 )\",\n", + " },\n", + " mergeFiltersOperator: \"and\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Vectara self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/vectara.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/vectara.mdx deleted file mode 100644 index 4722aa491ca3..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/vectara.mdx +++ /dev/null @@ -1,47 +0,0 @@ -# Vectara Self Query Retriever - -This example shows how to use a self query retriever with a [Vectara](https://vectara.com/) vector store. - -If you haven't already set up Vectara, please [follow the instructions here](/docs/integrations/vectorstores/vectara.mdx). - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -This example shows how to intialize a `SelfQueryRetriever` with a vector store: - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/vectara_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to use a translator that translates the queries into a - * filter format that the vector store can understand. LangChain provides one here. - */ - structuredQueryTranslator: new VectaraTranslator()(), - searchParams: { - filter: { - filter: "( doc.genre = 'science fiction' ) and ( doc.rating > 8.5 )", - }, - mergeFiltersOperator: "and", - }, -}); -``` - -See the [official docs](https://docs.vectara.com/) for more on how to construct metadata filters. diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/weaviate.ipynb b/docs/core_docs/docs/integrations/retrievers/self_query/weaviate.ipynb new file mode 100644 index 000000000000..09f3f4ff1d4e --- /dev/null +++ b/docs/core_docs/docs/integrations/retrievers/self_query/weaviate.ipynb @@ -0,0 +1,426 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Weaviate\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# Weaviate\n", + "\n", + "This guide will help you getting started with such a retriever backed by a [Weaviate vector store](/docs/integrations/vectorstores/weaviate). For detailed documentation of all features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html).\n", + "\n", + "## Overview\n", + "\n", + "A [self-query retriever](/docs/how_to/self_query/) retrieves documents by dynamically generating metadata filters based on some input query. This allows the retriever to account for underlying document metadata in addition to pure semantic similarity when fetching results.\n", + "\n", + "It uses a module called a `Translator` that generates a filter based on information about metadata fields and the query language that a given vector store supports.\n", + "\n", + "### Integration details\n", + "\n", + "| Backing vector store | Self-host | Cloud offering | Package | [Py support](https://python.langchain.com/v0.2/docs/integrations/retrievers/self_query/weaviate_self_query/) |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "[`WeaviateVectorStore`](https://api.js.langchain.com/classes/langchain_weaviate.WeaviateStore.html) | ✅ | ✅ | [`@langchain/weaviate`](https://www.npmjs.com/package/@langchain/weaviate) | ✅ |\n", + "\n", + "## Setup\n", + "\n", + "Set up a Weaviate instance as documented [here](/docs/integrations/vectorstores/weaviate). Set the following environment variables if relevant:\n", + "\n", + "```ts\n", + "process.env.WEAVIATE_SCHEME = \"https\";\n", + "// Include port if relevant, e.g. \"localhost:8080\"\n", + "process.env.WEAVIATE_HOST = \"YOUR_WEAVIATE_HOST\";\n", + "process.env.WEAVIATE_URL = \"YOUR_WEAVIATE_URL\";\n", + "```\n", + "\n", + "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGSMITH_API_KEY = \"\";\n", + "// process.env.LANGSMITH_TRACING = \"true\";\n", + "```\n", + "\n", + "### Installation\n", + "\n", + "The vector store lives in the `@langchain/weaviate` package. You'll also need to install the `langchain` package to import the main `SelfQueryRetriever` class.\n", + "\n", + "The official Weaviate SDK (`weaviate-ts-client`) is automatically installed as a dependency of `@langchain/weaviate`, but you may wish to install it independently as well.\n", + "\n", + "For this example, we'll also use OpenAI embeddings, so you'll need to install the `@langchain/openai` package and [obtain an API key](https://platform.openai.com):\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/weaviate langchain @langchain/openai weaviate-ts-client\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "First, initialize your Weaviate vector store with some documents that contain metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e7fd15a5", + "metadata": {}, + "outputs": [], + "source": [ + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { WeaviateStore } from \"@langchain/weaviate\";\n", + "import { Document } from \"@langchain/core/documents\";\n", + "import type { AttributeInfo } from \"langchain/chains/query_constructor\";\n", + "\n", + "import weaviate from \"weaviate-ts-client\";\n", + "\n", + "/**\n", + " * First, we create a bunch of documents. You can load your own documents here instead.\n", + " * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.\n", + " */\n", + "const docs = [\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n", + " metadata: { year: 1993, rating: 7.7, genre: \"science fiction\" },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n", + " metadata: { year: 2010, director: \"Christopher Nolan\", rating: 8.2 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n", + " metadata: { year: 2006, director: \"Satoshi Kon\", rating: 8.6 },\n", + " }),\n", + " new Document({\n", + " pageContent:\n", + " \"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n", + " metadata: { year: 2019, director: \"Greta Gerwig\", rating: 8.3 },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Toys come alive and have a blast doing so\",\n", + " metadata: { year: 1995, genre: \"animated\" },\n", + " }),\n", + " new Document({\n", + " pageContent: \"Three men walk into the Zone, three men walk out of the Zone\",\n", + " metadata: {\n", + " year: 1979,\n", + " director: \"Andrei Tarkovsky\",\n", + " genre: \"science fiction\",\n", + " rating: 9.9,\n", + " },\n", + " }),\n", + "];\n", + "\n", + "/**\n", + " * Next, we define the attributes we want to be able to query on.\n", + " * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.\n", + " * We also provide a description of each attribute and the type of the attribute.\n", + " * This is used to generate the query prompts.\n", + " */\n", + "const attributeInfo: AttributeInfo[] = [\n", + " {\n", + " name: \"genre\",\n", + " description: \"The genre of the movie\",\n", + " type: \"string or array of strings\",\n", + " },\n", + " {\n", + " name: \"year\",\n", + " description: \"The year the movie was released\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"director\",\n", + " description: \"The director of the movie\",\n", + " type: \"string\",\n", + " },\n", + " {\n", + " name: \"rating\",\n", + " description: \"The rating of the movie (1-10)\",\n", + " type: \"number\",\n", + " },\n", + " {\n", + " name: \"length\",\n", + " description: \"The length of the movie in minutes\",\n", + " type: \"number\",\n", + " },\n", + "];\n", + "\n", + "/**\n", + " * Next, we instantiate a vector store. This is where we store the embeddings of the documents.\n", + " * We also need to provide an embeddings object. This is used to embed the documents.\n", + " */\n", + "const client = (weaviate as any).client({\n", + " scheme: process.env.WEAVIATE_SCHEME || \"https\",\n", + " host: process.env.WEAVIATE_HOST || \"localhost\",\n", + " apiKey: process.env.WEAVIATE_API_KEY\n", + " ? // eslint-disable-next-line @typescript-eslint/no-explicit-any\n", + " new (weaviate as any).ApiKey(process.env.WEAVIATE_API_KEY)\n", + " : undefined,\n", + "});\n", + "\n", + "const embeddings = new OpenAIEmbeddings();\n", + "const vectorStore = await WeaviateStore.fromDocuments(docs, embeddings, {\n", + " client,\n", + " indexName: \"Test\",\n", + " textKey: \"text\",\n", + " metadataKeys: [\"year\", \"director\", \"rating\", \"genre\"],\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "f321c7bd", + "metadata": {}, + "source": [ + "Now we can instantiate our retriever:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "821ea7a0", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "70cc8e65-2a02-408a-bbc6-8ef649057d82", + "metadata": {}, + "outputs": [], + "source": [ + "import { SelfQueryRetriever } from \"langchain/retrievers/self_query\";\n", + "import { WeaviateTranslator } from \"@langchain/weaviate\";\n", + "\n", + "const selfQueryRetriever = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " /** A short summary of what the document contents represent. */\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new WeaviateTranslator(),\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "5c5f2839-4020-424e-9fc9-07777eede442", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Now, ask a question that requires some knowledge of the document's metadata to answer. You can see that the retriever will generate the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea',\n", + " metadata: { director: 'Satoshi Kon', genre: null, rating: 8.6, year: 2006 },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Three men walk into the Zone, three men walk out of the Zone',\n", + " metadata: {\n", + " director: 'Andrei Tarkovsky',\n", + " genre: 'science fiction',\n", + " rating: 9.9,\n", + " year: 1979\n", + " },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "await selfQueryRetriever.invoke(\n", + " \"Which movies are rated higher than 8.5?\"\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e", + "metadata": {}, + "source": [ + "## Use within a chain\n", + "\n", + "Like other retrievers, Weaviate self-query retrievers can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n", + "\n", + "Note that because their returned answers can heavily depend on document metadata, we format the retrieved documents differently to include that information." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", + "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", + "\n", + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromTemplate(`\n", + "Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}`);\n", + "\n", + "const formatDocs = (docs: Document[]) => {\n", + " return docs.map((doc) => JSON.stringify(doc)).join(\"\\n\\n\");\n", + "}\n", + "\n", + "// See https://js.langchain.com/v0.2/docs/tutorials/rag\n", + "const ragChain = RunnableSequence.from([\n", + " {\n", + " context: selfQueryRetriever.pipe(formatDocs),\n", + " question: new RunnablePassthrough(),\n", + " },\n", + " prompt,\n", + " llm,\n", + " new StringOutputParser(),\n", + "]);" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d47c37dd-5c11-416c-a3b6-bec413cd70e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Both movies are rated higher than 8.5. The first movie directed by Satoshi Kon has a rating of 8.6, and the second movie directed by Andrei Tarkovsky has a rating of 9.9.\n" + ] + } + ], + "source": [ + "await ragChain.invoke(\"Which movies are rated higher than 8.5?\");" + ] + }, + { + "cell_type": "markdown", + "id": "c52ef888", + "metadata": {}, + "source": [ + "## Default search params\n", + "\n", + "You can also pass a `searchParams` field into the above method that provides default filters applied in addition to any generated query." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f6103afe", + "metadata": {}, + "outputs": [], + "source": [ + "const selfQueryRetrieverWithDefaultParams = SelfQueryRetriever.fromLLM({\n", + " llm: llm,\n", + " vectorStore: vectorStore,\n", + " documentContents: \"Brief summary of a movie\",\n", + " attributeInfo: attributeInfo,\n", + " structuredQueryTranslator: new WeaviateTranslator(),\n", + " searchParams: {\n", + " filter: {\n", + " where: {\n", + " operator: \"Equal\",\n", + " path: [\"type\"],\n", + " valueText: \"movie\",\n", + " },\n", + " },\n", + " mergeFiltersOperator: \"or\",\n", + " },\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all Weaviate self-query retriever features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_retrievers_self_query.SelfQueryRetriever.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/retrievers/self_query/weaviate.mdx b/docs/core_docs/docs/integrations/retrievers/self_query/weaviate.mdx deleted file mode 100644 index 625d96d9e649..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/self_query/weaviate.mdx +++ /dev/null @@ -1,53 +0,0 @@ -# Weaviate Self Query Retriever - -This example shows how to use a self query retriever with a [Weaviate](https://weaviate.io/) vector store. - -If you haven't already set up Weaviate, please [follow the instructions here](/docs/integrations/vectorstores/weaviate). - -## Usage - -This example shows how to intialize a `SelfQueryRetriever` with a vector store: - -Weaviate has their own standalone integration package with LangChain, accessible via [`@langchain/weaviate`](https://www.npmjs.com/package/@langchain/weaviate) on NPM! - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/weaviate @langchain/openai -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/retrievers/weaviate_self_query.ts"; - -{Example} - -You can also initialize the retriever with default search parameters that apply in -addition to the generated query: - -```typescript -const selfQueryRetriever = SelfQueryRetriever.fromLLM({ - llm, - vectorStore, - documentContents, - attributeInfo, - /** - * We need to use a translator that translates the queries into a - * filter format that the vector store can understand. LangChain provides one here. - */ - structuredQueryTranslator: new WeaviateTranslator(), - searchParams: { - filter: { - where: { - operator: "Equal", - path: ["type"], - valueText: "movie", - }, - }, - mergeFiltersOperator: "or", - }, -}); -``` - -See the [official docs](https://weaviate.io/developers/weaviate/api/graphql/filters) for more on how to construct metadata filters. diff --git a/docs/core_docs/package.json b/docs/core_docs/package.json index ec24faff8686..86526b9483af 100644 --- a/docs/core_docs/package.json +++ b/docs/core_docs/package.json @@ -37,7 +37,7 @@ "@docusaurus/remark-plugin-npm2yarn": "2.4.3", "@docusaurus/theme-mermaid": "2.4.3", "@mdx-js/react": "^1.6.22", - "@supabase/supabase-js": "^2.39.7", + "@supabase/supabase-js": "^2.45.0", "clsx": "^1.2.1", "cookie": "^0.6.0", "isomorphic-dompurify": "^2.9.0", diff --git a/docs/core_docs/scripts/quarto-build.js b/docs/core_docs/scripts/quarto-build.js index 1caac722ea03..2afe49f64a19 100644 --- a/docs/core_docs/scripts/quarto-build.js +++ b/docs/core_docs/scripts/quarto-build.js @@ -3,7 +3,7 @@ const { glob } = require("glob"); const { execSync } = require("node:child_process"); const IGNORED_CELL_REGEX = - /^```\s?\w*?[\s\S]\/\/ ?@lc-docs-hide-cell[\s\S]*?^```/gm; + /^``` *\w*?[\s\S]\/\/ ?@lc-docs-hide-cell[\s\S]*?^```/gm; const LC_TS_IGNORE_REGEX = /\/\/ ?@lc-ts-ignore\n/g; async function main() { diff --git a/examples/package.json b/examples/package.json index f0ce7dfa2f8a..9f5d844e3dc6 100644 --- a/examples/package.json +++ b/examples/package.json @@ -69,7 +69,7 @@ "@qdrant/js-client-rest": "^1.9.0", "@raycast/api": "^1.55.2", "@rockset/client": "^0.9.1", - "@supabase/supabase-js": "^2.10.0", + "@supabase/supabase-js": "^2.45.0", "@tensorflow/tfjs-backend-cpu": "^4.4.0", "@upstash/redis": "^1.32.0", "@upstash/vector": "^1.1.1", diff --git a/langchain/package.json b/langchain/package.json index bfdb7424697d..01b0332bcbc3 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -625,7 +625,7 @@ "@mendable/firecrawl-js": "^0.0.13", "@notionhq/client": "^2.2.10", "@pinecone-database/pinecone": "^1.1.0", - "@supabase/supabase-js": "^2.10.0", + "@supabase/supabase-js": "^2.45.0", "@swc/core": "^1.3.90", "@swc/jest": "^0.2.29", "@tsconfig/recommended": "^1.0.2", diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index 17edcc49ebf6..3709f58812e7 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -27,7 +27,8 @@ export const config = { "pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js", "web-auth-library/google", "notion-to-md/build/utils/notion.js", - "@getzep/zep-cloud/api" + "@getzep/zep-cloud/api", + "@supabase/postgrest-js", ], entrypoints: { load: "load/index", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 6acda4bd7138..1e47ec65bb95 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -99,8 +99,7 @@ "@smithy/signature-v4": "^2.0.10", "@smithy/util-utf8": "^2.0.0", "@spider-cloud/spider-client": "^0.0.21", - "@supabase/postgrest-js": "^1.1.1", - "@supabase/supabase-js": "^2.10.0", + "@supabase/supabase-js": "^2.45.0", "@swc/core": "^1.3.90", "@swc/jest": "^0.2.29", "@tensorflow-models/universal-sentence-encoder": "^1.3.3", @@ -259,8 +258,7 @@ "@smithy/signature-v4": "^2.0.10", "@smithy/util-utf8": "^2.0.0", "@spider-cloud/spider-client": "^0.0.21", - "@supabase/postgrest-js": "^1.1.1", - "@supabase/supabase-js": "^2.10.0", + "@supabase/supabase-js": "^2.45.0", "@tensorflow-models/universal-sentence-encoder": "*", "@tensorflow/tfjs-converter": "*", "@tensorflow/tfjs-core": "*", @@ -472,9 +470,6 @@ "@spider-cloud/spider-client": { "optional": true }, - "@supabase/postgrest-js": { - "optional": true - }, "@supabase/supabase-js": { "optional": true }, diff --git a/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb b/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb index 3ea8a62d5d21..744886a5f582 100644 --- a/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb +++ b/libs/langchain-scripts/src/cli/docs/templates/retrievers.ipynb @@ -141,7 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "// @ls-docs-hide-cell\n", + "// @lc-docs-hide-cell\n", "\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", diff --git a/yarn.lock b/yarn.lock index 658bc58c7d5e..c96e97605695 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11130,8 +11130,7 @@ __metadata: "@smithy/signature-v4": ^2.0.10 "@smithy/util-utf8": ^2.0.0 "@spider-cloud/spider-client": ^0.0.21 - "@supabase/postgrest-js": ^1.1.1 - "@supabase/supabase-js": ^2.10.0 + "@supabase/supabase-js": ^2.45.0 "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 "@tensorflow-models/universal-sentence-encoder": ^1.3.3 @@ -11298,8 +11297,7 @@ __metadata: "@smithy/signature-v4": ^2.0.10 "@smithy/util-utf8": ^2.0.0 "@spider-cloud/spider-client": ^0.0.21 - "@supabase/postgrest-js": ^1.1.1 - "@supabase/supabase-js": ^2.10.0 + "@supabase/supabase-js": ^2.45.0 "@tensorflow-models/universal-sentence-encoder": "*" "@tensorflow/tfjs-converter": "*" "@tensorflow/tfjs-core": "*" @@ -11465,8 +11463,6 @@ __metadata: optional: true "@spider-cloud/spider-client": optional: true - "@supabase/postgrest-js": - optional: true "@supabase/supabase-js": optional: true "@tensorflow-models/universal-sentence-encoder": @@ -17187,39 +17183,21 @@ __metadata: languageName: node linkType: hard -"@supabase/functions-js@npm:2.1.5": - version: 2.1.5 - resolution: "@supabase/functions-js@npm:2.1.5" +"@supabase/auth-js@npm:2.64.4": + version: 2.64.4 + resolution: "@supabase/auth-js@npm:2.64.4" dependencies: "@supabase/node-fetch": ^2.6.14 - checksum: f2ab8636af8d982270b61631a5120369ca10db101b4298da71be892e5d91a8ddaddcf7f51079ad0fe24731a15892b21bd7dbe41b997da9d4b90e4326d09632c8 - languageName: node - linkType: hard - -"@supabase/functions-js@npm:^2.1.0": - version: 2.1.0 - resolution: "@supabase/functions-js@npm:2.1.0" - dependencies: - cross-fetch: ^3.1.5 - checksum: 231addfa49c1b109ddcc219dd8ed6b83e87cf2516cf7cf2742330176baa794d99a78694ff7f748fffa920f37a8078352822611d84ff43c8bc293c4d4486134fd + checksum: 05d7f40e67a89ba561c47ce7f398f1979d19a89b6a9e90c25fcd591b2b0fa161d9f1edf50777eeeb256468a1e479557c2d07d966d7b312e188ffca9758823213 languageName: node linkType: hard -"@supabase/gotrue-js@npm:2.62.2": - version: 2.62.2 - resolution: "@supabase/gotrue-js@npm:2.62.2" +"@supabase/functions-js@npm:2.4.1": + version: 2.4.1 + resolution: "@supabase/functions-js@npm:2.4.1" dependencies: "@supabase/node-fetch": ^2.6.14 - checksum: d77f5075c56dc8529b137f8aba3e4f5ee7da30c21f519202fd63d2b71aef459b6b61daee0a7e199eaa31ab14206336efa0141baafebfe3f1fb94c0e1f15dfbf5 - languageName: node - linkType: hard - -"@supabase/gotrue-js@npm:^2.12.0": - version: 2.13.0 - resolution: "@supabase/gotrue-js@npm:2.13.0" - dependencies: - cross-fetch: ^3.1.5 - checksum: 52aa7566833a24d2e3c84d0665664ad3936bacf63671d55fb5f7e01614e7564b3cc8ce815f83e8ec585e37110422f6274ccfbbb942fc93c56b28178018637395 + checksum: 786a1499b26b928f39b9af68b44bfc0a016c525d85c6b8bd2f6cd712e53db3ab5099ed4e7b1038ae8527dcee16a3aa8f80618a7227d322052810d378746d7154 languageName: node linkType: hard @@ -17232,89 +17210,47 @@ __metadata: languageName: node linkType: hard -"@supabase/postgrest-js@npm:1.9.2": - version: 1.9.2 - resolution: "@supabase/postgrest-js@npm:1.9.2" +"@supabase/postgrest-js@npm:1.15.8": + version: 1.15.8 + resolution: "@supabase/postgrest-js@npm:1.15.8" dependencies: "@supabase/node-fetch": ^2.6.14 - checksum: 9aefbdfc1c0d8a00b932b0939dbcbb5ec392b1324ad1b63b5e0486c6f9882a9c2292c80d3f803a0338938097372f08b3bcbdc3c4699d5bef13791ddc35d53b86 - languageName: node - linkType: hard - -"@supabase/postgrest-js@npm:^1.1.1": - version: 1.4.1 - resolution: "@supabase/postgrest-js@npm:1.4.1" - dependencies: - cross-fetch: ^3.1.5 - checksum: 0a4ad721a5985b92bd7d3703dc6a97c9c57200ff7bdc13c75134d235f7d795e700a421c7bdb1298190ec0b24fb6923c182d5ec7d34d091e9c179813ed8091546 + checksum: b2081f76356f35f76e9e6f2e1b4dd1528c96404f4feeebf1ebb8068fb726e182c06f77cbd151c00d751ad9d69bcab4596dd68bf5c90ee7ace2ac22695bed3268 languageName: node linkType: hard -"@supabase/realtime-js@npm:2.9.3": - version: 2.9.3 - resolution: "@supabase/realtime-js@npm:2.9.3" +"@supabase/realtime-js@npm:2.10.2": + version: 2.10.2 + resolution: "@supabase/realtime-js@npm:2.10.2" dependencies: "@supabase/node-fetch": ^2.6.14 "@types/phoenix": ^1.5.4 "@types/ws": ^8.5.10 ws: ^8.14.2 - checksum: 180a5084b94a4e324fc04041182bf8819c3c2545a731c276a56f9647f78078180b0460b68a0d6c568d29b2fa4aace0545bb71dcb89b547ec85781032dff74e71 + checksum: a98342e6a8437c2c14a86728abd12f21d853f167fb22ede46ef680cc0ef990bf7c8a8403c448ab335d884cfe8c3b2f6c4e3363dcb777511ee3abb6270d65a954 languageName: node linkType: hard -"@supabase/realtime-js@npm:^2.4.0": +"@supabase/storage-js@npm:2.6.0": version: 2.6.0 - resolution: "@supabase/realtime-js@npm:2.6.0" - dependencies: - "@types/phoenix": ^1.5.4 - websocket: ^1.0.34 - checksum: 3a3b696a0203fcc78c9be301f169315a131c8fd5efb88d470d2f48be24cacf2f8a4f37081a85b853b78c46e008b94875a6e8a3473fceb6a191965016e50c9e24 - languageName: node - linkType: hard - -"@supabase/storage-js@npm:2.5.5": - version: 2.5.5 - resolution: "@supabase/storage-js@npm:2.5.5" + resolution: "@supabase/storage-js@npm:2.6.0" dependencies: "@supabase/node-fetch": ^2.6.14 - checksum: 4470499113c15e1124d99048eef0097c7ba431d728e351519ee26948775171d6c6bb41156f8ffb3860009b82b93809af01c9d075ece6000f783f59ce9fd00ee8 + checksum: ade54ef113922c43f4b46080b5ee637fad07117e4e16383ce0d040981a550a6f3fc0edc4f20dd8677cb28bb46b4071be130df86944ce7a9abe68bb86b955a832 languageName: node linkType: hard -"@supabase/storage-js@npm:^2.3.1": - version: 2.3.1 - resolution: "@supabase/storage-js@npm:2.3.1" +"@supabase/supabase-js@npm:^2.45.0": + version: 2.45.0 + resolution: "@supabase/supabase-js@npm:2.45.0" dependencies: - cross-fetch: ^3.1.5 - checksum: b502bc0735f06df091b40839e37f7a795563c1a6be918bbd7a2dab45299d625bde825dd24e396c8dca08eaa3bf247c5159dd5018277cc9fb90310c62cc88148c - languageName: node - linkType: hard - -"@supabase/supabase-js@npm:^2.10.0": - version: 2.10.0 - resolution: "@supabase/supabase-js@npm:2.10.0" - dependencies: - "@supabase/functions-js": ^2.1.0 - "@supabase/gotrue-js": ^2.12.0 - "@supabase/postgrest-js": ^1.1.1 - "@supabase/realtime-js": ^2.4.0 - "@supabase/storage-js": ^2.3.1 - cross-fetch: ^3.1.5 - checksum: 60da311e9aae2f9cce0716baf9820cf2030b2ee3ba0df6f95de60023b770d034b2f5044e4492c50c02f62379b1ac9979ca65e136873fc6c9d8bd76e758a62b33 - languageName: node - linkType: hard - -"@supabase/supabase-js@npm:^2.39.7": - version: 2.39.7 - resolution: "@supabase/supabase-js@npm:2.39.7" - dependencies: - "@supabase/functions-js": 2.1.5 - "@supabase/gotrue-js": 2.62.2 + "@supabase/auth-js": 2.64.4 + "@supabase/functions-js": 2.4.1 "@supabase/node-fetch": 2.6.15 - "@supabase/postgrest-js": 1.9.2 - "@supabase/realtime-js": 2.9.3 - "@supabase/storage-js": 2.5.5 - checksum: f2cc7c7841762d3161f53b19e82b0eb860d72510729114ed45a38aff0427c5897e4a176092b370b68fcd621b385de4d3b90fc43fe39d129d09c04b6da5a407d7 + "@supabase/postgrest-js": 1.15.8 + "@supabase/realtime-js": 2.10.2 + "@supabase/storage-js": 2.6.0 + checksum: 33794e741ae2197e7c31e4c03e195cf3dbf477def48e86a37ccb34adc207289d6ed19a91952178989f1f70ce611a32644ad95b0eebd7964102d144cc95abf7c2 languageName: node linkType: hard @@ -21453,7 +21389,7 @@ __metadata: languageName: node linkType: hard -"bufferutil@npm:4.0.7, bufferutil@npm:^4.0.1": +"bufferutil@npm:4.0.7": version: 4.0.7 resolution: "bufferutil@npm:4.0.7" dependencies: @@ -22848,7 +22784,7 @@ __metadata: "@langchain/scripts": "workspace:*" "@mdx-js/react": ^1.6.22 "@microsoft/fetch-event-source": ^2.0.1 - "@supabase/supabase-js": ^2.39.7 + "@supabase/supabase-js": ^2.45.0 "@swc/core": ^1.3.62 "@types/cookie": ^0 clsx: ^1.2.1 @@ -23803,7 +23739,7 @@ __metadata: languageName: node linkType: hard -"debug@npm:2.6.9, debug@npm:^2.2.0, debug@npm:^2.6.0": +"debug@npm:2.6.9, debug@npm:^2.6.0": version: 2.6.9 resolution: "debug@npm:2.6.9" dependencies: @@ -26195,7 +26131,7 @@ __metadata: "@qdrant/js-client-rest": ^1.9.0 "@raycast/api": ^1.55.2 "@rockset/client": ^0.9.1 - "@supabase/supabase-js": ^2.10.0 + "@supabase/supabase-js": ^2.45.0 "@tensorflow/tfjs-backend-cpu": ^4.4.0 "@tsconfig/recommended": ^1.0.2 "@types/js-yaml": ^4 @@ -31209,7 +31145,7 @@ __metadata: "@mendable/firecrawl-js": ^0.0.13 "@notionhq/client": ^2.2.10 "@pinecone-database/pinecone": ^1.1.0 - "@supabase/supabase-js": ^2.10.0 + "@supabase/supabase-js": ^2.45.0 "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 "@tsconfig/recommended": ^1.0.2 @@ -40719,16 +40655,6 @@ __metadata: languageName: node linkType: hard -"utf-8-validate@npm:^5.0.2": - version: 5.0.10 - resolution: "utf-8-validate@npm:5.0.10" - dependencies: - node-gyp: latest - node-gyp-build: ^4.3.0 - checksum: 5579350a023c66a2326752b6c8804cc7b39dcd251bb088241da38db994b8d78352e388dcc24ad398ab98385ba3c5ffcadb6b5b14b2637e43f767869055e46ba6 - languageName: node - linkType: hard - "util-deprecate@npm:^1.0.1, util-deprecate@npm:^1.0.2, util-deprecate@npm:~1.0.1": version: 1.0.2 resolution: "util-deprecate@npm:1.0.2" @@ -41250,20 +41176,6 @@ __metadata: languageName: node linkType: hard -"websocket@npm:^1.0.34": - version: 1.0.34 - resolution: "websocket@npm:1.0.34" - dependencies: - bufferutil: ^4.0.1 - debug: ^2.2.0 - es5-ext: ^0.10.50 - typedarray-to-buffer: ^3.1.5 - utf-8-validate: ^5.0.2 - yaeti: ^0.0.6 - checksum: 8a0ce6d79cc1334bb6ea0d607f0092f3d32700b4dd19e4d5540f2a85f3b50e1f8110da0e4716737056584dde70bbebcb40bbd94bbb437d7468c71abfbfa077d8 - languageName: node - linkType: hard - "whatwg-encoding@npm:^2.0.0": version: 2.0.0 resolution: "whatwg-encoding@npm:2.0.0" @@ -41790,13 +41702,6 @@ __metadata: languageName: node linkType: hard -"yaeti@npm:^0.0.6": - version: 0.0.6 - resolution: "yaeti@npm:0.0.6" - checksum: 6db12c152f7c363b80071086a3ebf5032e03332604eeda988872be50d6c8469e1f13316175544fa320f72edad696c2d83843ad0ff370659045c1a68bcecfcfea - languageName: node - linkType: hard - "yallist@npm:4.0.0, yallist@npm:^4.0.0": version: 4.0.0 resolution: "yallist@npm:4.0.0" From 687485e5cdeb0d3a2cae8c6eae11be73344adaeb Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 5 Aug 2024 17:14:59 -0700 Subject: [PATCH 4/6] Add redirect from vector store retriever page (#6401) --- .../docs/integrations/retrievers/tavily.ipynb | 2 +- .../docs/integrations/retrievers/vectorstore.mdx | 12 ------------ docs/core_docs/vercel.json | 4 ++++ 3 files changed, 5 insertions(+), 13 deletions(-) delete mode 100644 docs/core_docs/docs/integrations/retrievers/vectorstore.mdx diff --git a/docs/core_docs/docs/integrations/retrievers/tavily.ipynb b/docs/core_docs/docs/integrations/retrievers/tavily.ipynb index 2915c6ec4ddb..05a354c3645f 100644 --- a/docs/core_docs/docs/integrations/retrievers/tavily.ipynb +++ b/docs/core_docs/docs/integrations/retrievers/tavily.ipynb @@ -35,7 +35,7 @@ "\n", "## Setup\n", "\n", - "You will need to populate a `TAVILY_API_KEY` environment variable with your Tavily API key or pass it into the constructor as `apiKey`.\n", + "You will need to populate a `TAVILY_API_KEY` environment variable with your Tavily API key or pass it into the constructor as `apiKey`. Obtain a key by signing up [on their website](https://tavily.com/).\n", "\n", "If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", "\n", diff --git a/docs/core_docs/docs/integrations/retrievers/vectorstore.mdx b/docs/core_docs/docs/integrations/retrievers/vectorstore.mdx deleted file mode 100644 index f4d09c3afab9..000000000000 --- a/docs/core_docs/docs/integrations/retrievers/vectorstore.mdx +++ /dev/null @@ -1,12 +0,0 @@ ---- -hide_table_of_contents: true ---- - -# Vector Store - -Once you've created a [Vector Store](/docs/concepts#vectorstores), the way to use it as a Retriever is very simple: - -```typescript -vectorStore = ... -retriever = vectorStore.asRetriever() -``` diff --git a/docs/core_docs/vercel.json b/docs/core_docs/vercel.json index fb7b99a58327..d911e4625f63 100644 --- a/docs/core_docs/vercel.json +++ b/docs/core_docs/vercel.json @@ -64,6 +64,10 @@ { "source": "/v0.2/docs/integrations/chat/chrome_ai(/?)", "destination": "/v0.2/docs/integrations/llms/chrome_ai/" + }, + { + "source": "/v0.2/docs/integrations/retrievers/vectorstore(/?)", + "destination": "/v0.2/docs/how_to/vectorstore_retriever/" } ] } From db6c02beb49e2a380fe2faf26be9dc108485ffea Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 5 Aug 2024 18:00:55 -0700 Subject: [PATCH 5/6] docs[patch]: Simplify Anthropic docs (#6403) * Update Anthropic docs * Simplify --- .../docs/integrations/chat/anthropic.ipynb | 643 ++---------------- libs/langchain-anthropic/src/chat_models.ts | 5 +- 2 files changed, 71 insertions(+), 577 deletions(-) diff --git a/docs/core_docs/docs/integrations/chat/anthropic.ipynb b/docs/core_docs/docs/integrations/chat/anthropic.ipynb index 3aea237d0a3a..b3fce19f740f 100644 --- a/docs/core_docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/core_docs/docs/integrations/chat/anthropic.ipynb @@ -21,7 +21,9 @@ "source": [ "# ChatAnthropic\n", "\n", - "This will help you getting started with ChatAnthropic [chat models](/docs/concepts/#chat-models). For detailed documentation of all ChatAnthropic features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_anthropic.ChatAnthropic.html).\n", + "[Anthropic](https://www.anthropic.com/) is an AI safety and research company. They are the creator of Claude.\n", + "\n", + "This will help you getting started with Anthropic [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatAnthropic` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_anthropic.ChatAnthropic.html).\n", "\n", "## Overview\n", "### Integration details\n", @@ -56,7 +58,7 @@ "\n", "### Installation\n", "\n", - "The LangChain ChatAnthropic integration lives in the `@langchain/anthropic` package:\n", + "The LangChain `ChatAnthropic` integration lives in the `@langchain/anthropic` package:\n", "\n", "```{=mdx}\n", "\n", @@ -84,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae", "metadata": {}, "outputs": [], @@ -97,7 +99,7 @@ " maxTokens: undefined,\n", " maxRetries: 2,\n", " // other params...\n", - "})" + "});" ] }, { @@ -110,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "62e0dbc3", "metadata": { "tags": [] @@ -121,10 +123,10 @@ "output_type": "stream", "text": [ "AIMessage {\n", - " \"id\": \"msg_01M9yt3aSqKJKM1RnZF4f44Q\",\n", + " \"id\": \"msg_013WBXXiggy6gMbAUY6NpsuU\",\n", " \"content\": \"Voici la traduction en français :\\n\\nJ'adore la programmation.\",\n", " \"additional_kwargs\": {\n", - " \"id\": \"msg_01M9yt3aSqKJKM1RnZF4f44Q\",\n", + " \"id\": \"msg_013WBXXiggy6gMbAUY6NpsuU\",\n", " \"type\": \"message\",\n", " \"role\": \"assistant\",\n", " \"model\": \"claude-3-haiku-20240307\",\n", @@ -136,7 +138,7 @@ " }\n", " },\n", " \"response_metadata\": {\n", - " \"id\": \"msg_01M9yt3aSqKJKM1RnZF4f44Q\",\n", + " \"id\": \"msg_013WBXXiggy6gMbAUY6NpsuU\",\n", " \"model\": \"claude-3-haiku-20240307\",\n", " \"stop_reason\": \"end_turn\",\n", " \"stop_sequence\": null,\n", @@ -171,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "d86145b3-bfef-46e8-b227-4dda5c9c2705", "metadata": {}, "outputs": [ @@ -201,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b", "metadata": {}, "outputs": [ @@ -210,10 +212,10 @@ "output_type": "stream", "text": [ "AIMessage {\n", - " \"id\": \"msg_012gUKUG65teaois31W3bfGF\",\n", + " \"id\": \"msg_01Ca52fpd1mcGRhH4spzAWr4\",\n", " \"content\": \"Ich liebe das Programmieren.\",\n", " \"additional_kwargs\": {\n", - " \"id\": \"msg_012gUKUG65teaois31W3bfGF\",\n", + " \"id\": \"msg_01Ca52fpd1mcGRhH4spzAWr4\",\n", " \"type\": \"message\",\n", " \"role\": \"assistant\",\n", " \"model\": \"claude-3-haiku-20240307\",\n", @@ -225,7 +227,7 @@ " }\n", " },\n", " \"response_metadata\": {\n", - " \"id\": \"msg_012gUKUG65teaois31W3bfGF\",\n", + " \"id\": \"msg_01Ca52fpd1mcGRhH4spzAWr4\",\n", " \"model\": \"claude-3-haiku-20240307\",\n", " \"stop_reason\": \"end_turn\",\n", " \"stop_sequence\": null,\n", @@ -272,270 +274,18 @@ }, { "cell_type": "markdown", - "id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd", - "metadata": {}, - "source": [ - "## Multimodal inputs\n", - "\n", - "Claude-3 models support image multimodal inputs. The passed input must be a base64 encoded image with the\n", - "filetype as a prefix (e.g. `data:image/png;base64,{YOUR_BASE64_ENCODED_DATA}`).\n", - "Here's an example:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "1cb65e95", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "AIMessage {\n", - " \"id\": \"msg_01AuGpm6xbacTwoUFdNiCnzu\",\n", - " \"content\": \"The image shows a hot dog. It consists of a cylindrical bread roll or bun that has been sliced lengthwise, revealing the bright red hot dog sausage filling inside. The hot dog sausage appears to be made from seasoned and smoked meat. This classic fast food item is a popular snack or meal, commonly enjoyed at sporting events, cookouts, and casual eateries.\",\n", - " \"additional_kwargs\": {\n", - " \"id\": \"msg_01AuGpm6xbacTwoUFdNiCnzu\",\n", - " \"type\": \"message\",\n", - " \"role\": \"assistant\",\n", - " \"model\": \"claude-3-sonnet-20240229\",\n", - " \"stop_reason\": \"end_turn\",\n", - " \"stop_sequence\": null,\n", - " \"usage\": {\n", - " \"input_tokens\": 276,\n", - " \"output_tokens\": 88\n", - " }\n", - " },\n", - " \"response_metadata\": {\n", - " \"id\": \"msg_01AuGpm6xbacTwoUFdNiCnzu\",\n", - " \"model\": \"claude-3-sonnet-20240229\",\n", - " \"stop_reason\": \"end_turn\",\n", - " \"stop_sequence\": null,\n", - " \"usage\": {\n", - " \"input_tokens\": 276,\n", - " \"output_tokens\": 88\n", - " },\n", - " \"type\": \"message\",\n", - " \"role\": \"assistant\"\n", - " },\n", - " \"tool_calls\": [],\n", - " \"invalid_tool_calls\": [],\n", - " \"usage_metadata\": {\n", - " \"input_tokens\": 276,\n", - " \"output_tokens\": 88,\n", - " \"total_tokens\": 364\n", - " }\n", - "}\n" - ] - } - ], - "source": [ - "import fs from \"fs/promises\";\n", - "\n", - "import { ChatAnthropic } from \"@langchain/anthropic\";\n", - "import { HumanMessage } from \"@langchain/core/messages\";\n", - "\n", - "const imageData2 = await fs.readFile(\"../../../../../examples/hotdog.jpg\");\n", - "const llm2 = new ChatAnthropic({\n", - " model: \"claude-3-sonnet-20240229\",\n", - "});\n", - "const message2 = new HumanMessage({\n", - " content: [\n", - " {\n", - " type: \"text\",\n", - " text: \"What's in this image?\",\n", - " },\n", - " {\n", - " type: \"image_url\",\n", - " image_url: {\n", - " url: `data:image/jpeg;base64,${imageData2.toString(\"base64\")}`,\n", - " },\n", - " },\n", - " ],\n", - "});\n", - "\n", - "await llm2.invoke([message2]);" - ] - }, - { - "cell_type": "markdown", - "id": "5c14fbc0", - "metadata": {}, - "source": [ - "See [the official docs](https://docs.anthropic.com/claude/docs/vision#what-image-file-types-does-claude-support)\n", - "for a complete list of supported file types." - ] - }, - { - "cell_type": "markdown", - "id": "9bce78a1", - "metadata": {}, - "source": [ - "## Agents\n", - "\n", - "Anthropic models that support tool calling can be used in the Tool Calling agent. Here's an example:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "0648b504", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[\n", - " {\n", - " index: 0,\n", - " type: 'text',\n", - " text: '\\n\\nThe current weather in San Francisco, CA is 28°C.'\n", - " }\n", - "]\n" - ] - } - ], - "source": [ - "import { z } from \"zod\";\n", - "\n", - "import { ChatAnthropic } from \"@langchain/anthropic\";\n", - "import { tool } from \"@langchain/core/tools\";\n", - "import { AgentExecutor, createToolCallingAgent } from \"langchain/agents\";\n", - "\n", - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "\n", - "const llm3 = new ChatAnthropic({\n", - " model: \"claude-3-sonnet-20240229\",\n", - " temperature: 0,\n", - "});\n", - "\n", - "// Prompt template must have \"input\" and \"agent_scratchpad input variables\"\n", - "const prompt3 = ChatPromptTemplate.fromMessages([\n", - " [\"system\", \"You are a helpful assistant\"],\n", - " [\"placeholder\", \"{chat_history}\"],\n", - " [\"human\", \"{input}\"],\n", - " [\"placeholder\", \"{agent_scratchpad}\"],\n", - "]);\n", - "\n", - "const currentWeatherTool3 = tool(async () => \"28 °C\", {\n", - " name: \"get_current_weather\",\n", - " description: \"Get the current weather in a given location\",\n", - " schema: z.object({\n", - " location: z.string().describe(\"The city and state, e.g. San Francisco, CA\"),\n", - " }),\n", - "});\n", - "\n", - "const agent3 = createToolCallingAgent({\n", - " llm: llm3,\n", - " tools: [currentWeatherTool3],\n", - " prompt: prompt3,\n", - "});\n", - "\n", - "const agentExecutor3 = new AgentExecutor({\n", - " agent: agent3,\n", - " tools: [currentWeatherTool3],\n", - "});\n", - "\n", - "const input3 = \"What's the weather like in SF?\";\n", - "const result3 = await agentExecutor3.invoke({ input: input3 });\n", - "\n", - "console.log(result3.output);" - ] - }, - { - "cell_type": "markdown", - "id": "d452d4b6", - "metadata": {}, - "source": [ - "## Custom headers\n", - "\n", - "You can pass custom headers in your requests like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "41943f0a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "AIMessage {\n", - " \"id\": \"msg_013Ft3kN62gNtiMWRqg6xxt8\",\n", - " \"content\": \"The sky appears blue due to a phenomenon called Rayleigh scattering. Here's a brief explanation:\\n\\n1) Sunlight is made up of different wavelengths of light, including the visible spectrum that we see as colors.\\n\\n2) As sunlight passes through the Earth's atmosphere, the different wavelengths of light interact with the gas molecules in the air.\\n\\n3) The shorter wavelengths of light, such as the blue and violet colors, get scattered more easily by the tiny gas molecules. This is because the wavelengths are similar in size to the molecules.\\n\\n4) The longer wavelengths of light, such as red and orange, get scattered much less by the gas molecules and travel more directly through the atmosphere.\\n\\n5) The blue wavelengths that are scattered in different directions become scattered across the entire sky, making the sky appear blue to our eyes.\\n\\n6) During sunrise and sunset, the sun's rays travel through more atmosphere before reaching our eyes, causing the blue light to get scattered away and allowing more of the red/orange wavelengths to pass through, giving those colors in the sky.\\n\\nSo in essence, the abundant scattering of blue light by the gas molecules in the atmosphere is what causes the sky to appear blue during the daytime.\",\n", - " \"additional_kwargs\": {\n", - " \"id\": \"msg_013Ft3kN62gNtiMWRqg6xxt8\",\n", - " \"type\": \"message\",\n", - " \"role\": \"assistant\",\n", - " \"model\": \"claude-3-sonnet-20240229\",\n", - " \"stop_reason\": \"end_turn\",\n", - " \"stop_sequence\": null,\n", - " \"usage\": {\n", - " \"input_tokens\": 13,\n", - " \"output_tokens\": 272\n", - " }\n", - " },\n", - " \"response_metadata\": {\n", - " \"id\": \"msg_013Ft3kN62gNtiMWRqg6xxt8\",\n", - " \"model\": \"claude-3-sonnet-20240229\",\n", - " \"stop_reason\": \"end_turn\",\n", - " \"stop_sequence\": null,\n", - " \"usage\": {\n", - " \"input_tokens\": 13,\n", - " \"output_tokens\": 272\n", - " },\n", - " \"type\": \"message\",\n", - " \"role\": \"assistant\"\n", - " },\n", - " \"tool_calls\": [],\n", - " \"invalid_tool_calls\": [],\n", - " \"usage_metadata\": {\n", - " \"input_tokens\": 13,\n", - " \"output_tokens\": 272,\n", - " \"total_tokens\": 285\n", - " }\n", - "}\n" - ] - } - ], - "source": [ - "import { ChatAnthropic } from \"@langchain/anthropic\";\n", - "\n", - "const llm4 = new ChatAnthropic({\n", - " model: \"claude-3-sonnet-20240229\",\n", - " maxTokens: 1024,\n", - " clientOptions: {\n", - " defaultHeaders: {\n", - " \"X-Api-Key\": process.env.ANTHROPIC_API_KEY,\n", - " },\n", - " },\n", - "});\n", - "\n", - "const res4 = await llm4.invoke(\"Why is the sky blue?\");\n", - "\n", - "console.log(res4);" - ] - }, - { - "cell_type": "markdown", - "id": "985c4b4b", + "id": "8dac39db", "metadata": {}, "source": [ - "## Tools\n", + "## Content blocks\n", "\n", - "The Anthropic API supports tool calling, along with multi-tool calling. The following examples demonstrate how to call tools:\n", - "\n", - "### Single Tool" + "One key difference to note between Anthropic models and most others is that the contents of a single Anthropic AI message can either be a single string or a **list of content blocks**. For example when an Anthropic model [calls a tool](/docs/how_to/tool_calling), the tool invocation is part of the message content (as well as being exposed in the standardized `AIMessage.tool_calls` field):" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "2ce56548", + "execution_count": 5, + "id": "f5994de0", "metadata": {}, "outputs": [ { @@ -543,15 +293,15 @@ "output_type": "stream", "text": [ "AIMessage {\n", - " \"id\": \"msg_01XPUHrR4sNCqPr1i9zcsAsg\",\n", + " \"id\": \"msg_01DZGs9DyuashaYxJ4WWpWUP\",\n", " \"content\": [\n", " {\n", " \"type\": \"text\",\n", - " \"text\": \"Okay, let me use the calculator tool to find the answer:\"\n", + " \"text\": \"Here is the calculation for 2 + 2:\"\n", " },\n", " {\n", " \"type\": \"tool_use\",\n", - " \"id\": \"toolu_01MhUVuUedc1drBKLarhedFZ\",\n", + " \"id\": \"toolu_01SQXBamkBr6K6NdHE7GWwF8\",\n", " \"name\": \"calculator\",\n", " \"input\": {\n", " \"number1\": 2,\n", @@ -561,7 +311,7 @@ " }\n", " ],\n", " \"additional_kwargs\": {\n", - " \"id\": \"msg_01XPUHrR4sNCqPr1i9zcsAsg\",\n", + " \"id\": \"msg_01DZGs9DyuashaYxJ4WWpWUP\",\n", " \"type\": \"message\",\n", " \"role\": \"assistant\",\n", " \"model\": \"claude-3-haiku-20240307\",\n", @@ -569,17 +319,17 @@ " \"stop_sequence\": null,\n", " \"usage\": {\n", " \"input_tokens\": 449,\n", - " \"output_tokens\": 101\n", + " \"output_tokens\": 100\n", " }\n", " },\n", " \"response_metadata\": {\n", - " \"id\": \"msg_01XPUHrR4sNCqPr1i9zcsAsg\",\n", + " \"id\": \"msg_01DZGs9DyuashaYxJ4WWpWUP\",\n", " \"model\": \"claude-3-haiku-20240307\",\n", " \"stop_reason\": \"tool_use\",\n", " \"stop_sequence\": null,\n", " \"usage\": {\n", " \"input_tokens\": 449,\n", - " \"output_tokens\": 101\n", + " \"output_tokens\": 100\n", " },\n", " \"type\": \"message\",\n", " \"role\": \"assistant\"\n", @@ -592,15 +342,15 @@ " \"number2\": 2,\n", " \"operation\": \"add\"\n", " },\n", - " \"id\": \"toolu_01MhUVuUedc1drBKLarhedFZ\",\n", + " \"id\": \"toolu_01SQXBamkBr6K6NdHE7GWwF8\",\n", " \"type\": \"tool_call\"\n", " }\n", " ],\n", " \"invalid_tool_calls\": [],\n", " \"usage_metadata\": {\n", " \"input_tokens\": 449,\n", - " \"output_tokens\": 101,\n", - " \"total_tokens\": 550\n", + " \"output_tokens\": 100,\n", + " \"total_tokens\": 549\n", " }\n", "}\n" ] @@ -612,7 +362,7 @@ "import { z } from \"zod\";\n", "import { zodToJsonSchema } from \"zod-to-json-schema\";\n", "\n", - "const calculatorSchema5 = z.object({\n", + "const calculatorSchema = z.object({\n", " operation: z\n", " .enum([\"add\", \"subtract\", \"multiply\", \"divide\"])\n", " .describe(\"The type of operation to execute.\"),\n", @@ -620,20 +370,17 @@ " number2: z.number().describe(\"The second number to operate on.\"),\n", "});\n", "\n", - "const tool5 = {\n", + "const calculatorTool = {\n", " name: \"calculator\",\n", " description: \"A simple calculator tool\",\n", - " input_schema: zodToJsonSchema(calculatorSchema5),\n", + " input_schema: zodToJsonSchema(calculatorSchema),\n", "};\n", "\n", - "const llm5 = new ChatAnthropic({\n", - " apiKey: process.env.ANTHROPIC_API_KEY,\n", + "const toolCallingLlm = new ChatAnthropic({\n", " model: \"claude-3-haiku-20240307\",\n", - "}).bind({\n", - " tools: [tool5],\n", - "});\n", + "}).bindTools([calculatorTool]);\n", "\n", - "const prompt5 = ChatPromptTemplate.fromMessages([\n", + "const toolPrompt = ChatPromptTemplate.fromMessages([\n", " [\n", " \"system\",\n", " \"You are a helpful assistant who always needs to use a calculator.\",\n", @@ -642,46 +389,27 @@ "]);\n", "\n", "// Chain your prompt and model together\n", - "const chain5 = prompt5.pipe(llm5);\n", + "const toolCallChain = toolPrompt.pipe(toolCallingLlm);\n", "\n", - "const response5 = await chain5.invoke({\n", + "await toolCallChain.invoke({\n", " input: \"What is 2 + 2?\",\n", - "});\n", - "console.log(response5);" + "});" ] }, { "cell_type": "markdown", - "id": "6e91f97b", + "id": "d452d4b6", "metadata": {}, "source": [ - "### Forced tool calling\n", - "\n", - "In this example we'll provide the model with two tools:\n", - "\n", - "- `calculator`\n", - "- `get_weather`\n", - "\n", - "Then, when we call `bindTools`, we'll force the model to use the `get_weather` tool by passing the `tool_choice` arg like this:\n", - "\n", - "```typescript\n", - ".bindTools({\n", - " tools,\n", - " tool_choice: {\n", - " type: \"tool\",\n", - " name: \"get_weather\",\n", - " }\n", - "});\n", - "```\n", + "## Custom headers\n", "\n", - "Finally, we'll invoke the model, but instead of asking about the weather, we'll ask it to do some math.\n", - "Since we explicitly forced the model to use the `get_weather` tool, it will ignore the input and return the weather information (in this case it returned ``, which is expected.)" + "You can pass custom headers in your requests like this:" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "8d6e4828", + "execution_count": 6, + "id": "41943f0a", "metadata": {}, "outputs": [ { @@ -689,58 +417,38 @@ "output_type": "stream", "text": [ "AIMessage {\n", - " \"id\": \"msg_018G4mEZu8KNKtaQxZQ3o8YB\",\n", - " \"content\": [\n", - " {\n", - " \"type\": \"tool_use\",\n", - " \"id\": \"toolu_01DS9RwsFKdhHNYmhwPJHdHa\",\n", - " \"name\": \"get_weather\",\n", - " \"input\": {\n", - " \"city\": \"\",\n", - " \"state\": \"\"\n", - " }\n", - " }\n", - " ],\n", + " \"id\": \"msg_019z4nWpShzsrbSHTWXWQh6z\",\n", + " \"content\": \"The sky appears blue due to a phenomenon called Rayleigh scattering. Here's a brief explanation:\\n\\n1) Sunlight is made up of different wavelengths of visible light, including all the colors of the rainbow.\\n\\n2) As sunlight passes through the atmosphere, the gases (mostly nitrogen and oxygen) cause the shorter wavelengths of light, such as violet and blue, to be scattered more easily than the longer wavelengths like red and orange.\\n\\n3) This scattering of the shorter blue wavelengths occurs in all directions by the gas molecules in the atmosphere.\\n\\n4) Our eyes are more sensitive to the scattered blue light than the scattered violet light, so we perceive the sky as having a blue color.\\n\\n5) The scattering is more pronounced for light traveling over longer distances through the atmosphere. This is why the sky appears even darker blue when looking towards the horizon.\\n\\nSo in essence, the selective scattering of the shorter blue wavelengths of sunlight by the gases in the atmosphere is what causes the sky to appear blue to our eyes during the daytime.\",\n", " \"additional_kwargs\": {\n", - " \"id\": \"msg_018G4mEZu8KNKtaQxZQ3o8YB\",\n", + " \"id\": \"msg_019z4nWpShzsrbSHTWXWQh6z\",\n", " \"type\": \"message\",\n", " \"role\": \"assistant\",\n", - " \"model\": \"claude-3-haiku-20240307\",\n", - " \"stop_reason\": \"tool_use\",\n", + " \"model\": \"claude-3-sonnet-20240229\",\n", + " \"stop_reason\": \"end_turn\",\n", " \"stop_sequence\": null,\n", " \"usage\": {\n", - " \"input_tokens\": 672,\n", - " \"output_tokens\": 51\n", + " \"input_tokens\": 13,\n", + " \"output_tokens\": 236\n", " }\n", " },\n", " \"response_metadata\": {\n", - " \"id\": \"msg_018G4mEZu8KNKtaQxZQ3o8YB\",\n", - " \"model\": \"claude-3-haiku-20240307\",\n", - " \"stop_reason\": \"tool_use\",\n", + " \"id\": \"msg_019z4nWpShzsrbSHTWXWQh6z\",\n", + " \"model\": \"claude-3-sonnet-20240229\",\n", + " \"stop_reason\": \"end_turn\",\n", " \"stop_sequence\": null,\n", " \"usage\": {\n", - " \"input_tokens\": 672,\n", - " \"output_tokens\": 51\n", + " \"input_tokens\": 13,\n", + " \"output_tokens\": 236\n", " },\n", " \"type\": \"message\",\n", " \"role\": \"assistant\"\n", " },\n", - " \"tool_calls\": [\n", - " {\n", - " \"name\": \"get_weather\",\n", - " \"args\": {\n", - " \"city\": \"\",\n", - " \"state\": \"\"\n", - " },\n", - " \"id\": \"toolu_01DS9RwsFKdhHNYmhwPJHdHa\",\n", - " \"type\": \"tool_call\"\n", - " }\n", - " ],\n", + " \"tool_calls\": [],\n", " \"invalid_tool_calls\": [],\n", " \"usage_metadata\": {\n", - " \"input_tokens\": 672,\n", - " \"output_tokens\": 51,\n", - " \"total_tokens\": 723\n", + " \"input_tokens\": 13,\n", + " \"output_tokens\": 236,\n", + " \"total_tokens\": 249\n", " }\n", "}\n" ] @@ -748,233 +456,18 @@ ], "source": [ "import { ChatAnthropic } from \"@langchain/anthropic\";\n", - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "import { z } from \"zod\";\n", - "import { zodToJsonSchema } from \"zod-to-json-schema\";\n", - "\n", - "const calculatorSchema6 = z.object({\n", - " operation: z\n", - " .enum([\"add\", \"subtract\", \"multiply\", \"divide\"])\n", - " .describe(\"The type of operation to execute.\"),\n", - " number1: z.number().describe(\"The first number to operate on.\"),\n", - " number2: z.number().describe(\"The second number to operate on.\"),\n", - "});\n", - "\n", - "const weatherSchema6 = z.object({\n", - " city: z.string().describe(\"The city to get the weather from\"),\n", - " state: z.string().optional().describe(\"The state to get the weather from\"),\n", - "});\n", "\n", - "const tools6 = [\n", - " {\n", - " name: \"calculator\",\n", - " description: \"A simple calculator tool\",\n", - " input_schema: zodToJsonSchema(calculatorSchema6),\n", - " },\n", - " {\n", - " name: \"get_weather\",\n", - " description:\n", - " \"Get the weather of a specific location and return the temperature in Celsius.\",\n", - " input_schema: zodToJsonSchema(weatherSchema6),\n", - " },\n", - "];\n", - "\n", - "const llm6 = new ChatAnthropic({\n", - " apiKey: process.env.ANTHROPIC_API_KEY,\n", - " model: \"claude-3-haiku-20240307\",\n", - "}).bind({\n", - " tools: tools6,\n", - " tool_choice: {\n", - " type: \"tool\",\n", - " name: \"get_weather\",\n", + "const llmWithCustomHeaders = new ChatAnthropic({\n", + " model: \"claude-3-sonnet-20240229\",\n", + " maxTokens: 1024,\n", + " clientOptions: {\n", + " defaultHeaders: {\n", + " \"X-Api-Key\": process.env.ANTHROPIC_API_KEY,\n", + " },\n", " },\n", "});\n", "\n", - "const prompt6 = ChatPromptTemplate.fromMessages([\n", - " [\n", - " \"system\",\n", - " \"You are a helpful assistant who always needs to use a calculator.\",\n", - " ],\n", - " [\"human\", \"{input}\"],\n", - "]);\n", - "\n", - "// Chain your prompt and model together\n", - "const chain6 = prompt6.pipe(llm6);\n", - "\n", - "const response6 = await chain6.invoke({\n", - " input: \"What is the sum of 2725 and 273639\",\n", - "});\n", - "\n", - "console.log(response6);" - ] - }, - { - "cell_type": "markdown", - "id": "1aa777bc", - "metadata": {}, - "source": [ - "The `tool_choice` argument has three possible values:\n", - "\n", - "- `{ type: \"tool\", name: \"tool_name\" }` | `string` - Forces the model to use the specified tool. If passing a single string, it will be treated as the tool name.\n", - "- `\"any\"` - Allows the model to choose the tool, but still forcing it to choose at least one.\n", - "- `\"auto\"` - The default value. Allows the model to select any tool, or none." - ] - }, - { - "cell_type": "markdown", - "id": "15253085", - "metadata": {}, - "source": [ - "### `withStructuredOutput`" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "5e466d35", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ operation: 'add', number1: 2, number2: 2 }\n" - ] - } - ], - "source": [ - "import { ChatAnthropic } from \"@langchain/anthropic\";\n", - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "import { z } from \"zod\";\n", - "\n", - "const calculatorSchema7 = z\n", - " .object({\n", - " operation: z\n", - " .enum([\"add\", \"subtract\", \"multiply\", \"divide\"])\n", - " .describe(\"The type of operation to execute.\"),\n", - " number1: z.number().describe(\"The first number to operate on.\"),\n", - " number2: z.number().describe(\"The second number to operate on.\"),\n", - " })\n", - " .describe(\"A simple calculator tool\");\n", - "\n", - "const llm7 = new ChatAnthropic({\n", - " apiKey: process.env.ANTHROPIC_API_KEY,\n", - " model: \"claude-3-haiku-20240307\",\n", - "});\n", - "\n", - "// Pass the schema and tool name to the withStructuredOutput method\n", - "const modelWithTool7 = llm7.withStructuredOutput(calculatorSchema7);\n", - "\n", - "const prompt7 = ChatPromptTemplate.fromMessages([\n", - " [\n", - " \"system\",\n", - " \"You are a helpful assistant who always needs to use a calculator.\",\n", - " ],\n", - " [\"human\", \"{input}\"],\n", - "]);\n", - "\n", - "// Chain your prompt and model together\n", - "const chain7 = prompt7.pipe(modelWithTool7);\n", - "\n", - "const response7 = await chain7.invoke({\n", - " input: \"What is 2 + 2?\",\n", - "});\n", - "console.log(response7);" - ] - }, - { - "cell_type": "markdown", - "id": "4973b265", - "metadata": {}, - "source": [ - "You can supply a \"name\" field to give the LLM additional context around what you are trying to generate. You can also pass `includeRaw` to get the raw message back from the model too." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "951c5352", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " raw: AIMessage {\n", - " \"id\": \"msg_01TrkHbEkioCYNHQhqxw5unu\",\n", - " \"content\": [\n", - " {\n", - " \"type\": \"tool_use\",\n", - " \"id\": \"toolu_01XMrGHXeSVTfSw1oKFZokzG\",\n", - " \"name\": \"calculator\",\n", - " \"input\": {\n", - " \"number1\": 2,\n", - " \"number2\": 2,\n", - " \"operation\": \"add\"\n", - " }\n", - " }\n", - " ],\n", - " \"additional_kwargs\": {\n", - " \"id\": \"msg_01TrkHbEkioCYNHQhqxw5unu\",\n", - " \"type\": \"message\",\n", - " \"role\": \"assistant\",\n", - " \"model\": \"claude-3-haiku-20240307\",\n", - " \"stop_reason\": \"tool_use\",\n", - " \"stop_sequence\": null,\n", - " \"usage\": {\n", - " \"input_tokens\": 552,\n", - " \"output_tokens\": 69\n", - " }\n", - " },\n", - " \"response_metadata\": {\n", - " \"id\": \"msg_01TrkHbEkioCYNHQhqxw5unu\",\n", - " \"model\": \"claude-3-haiku-20240307\",\n", - " \"stop_reason\": \"tool_use\",\n", - " \"stop_sequence\": null,\n", - " \"usage\": {\n", - " \"input_tokens\": 552,\n", - " \"output_tokens\": 69\n", - " },\n", - " \"type\": \"message\",\n", - " \"role\": \"assistant\"\n", - " },\n", - " \"tool_calls\": [\n", - " {\n", - " \"name\": \"calculator\",\n", - " \"args\": {\n", - " \"number1\": 2,\n", - " \"number2\": 2,\n", - " \"operation\": \"add\"\n", - " },\n", - " \"id\": \"toolu_01XMrGHXeSVTfSw1oKFZokzG\",\n", - " \"type\": \"tool_call\"\n", - " }\n", - " ],\n", - " \"invalid_tool_calls\": [],\n", - " \"usage_metadata\": {\n", - " \"input_tokens\": 552,\n", - " \"output_tokens\": 69,\n", - " \"total_tokens\": 621\n", - " }\n", - " },\n", - " parsed: { operation: 'add', number1: 2, number2: 2 }\n", - "}\n" - ] - } - ], - "source": [ - "const includeRawModel7 = llm7.withStructuredOutput(calculatorSchema7, {\n", - " name: \"calculator\",\n", - " includeRaw: true,\n", - "});\n", - "const includeRawChain7 = prompt7.pipe(includeRawModel7);\n", - "\n", - "const includeRawResponse7 = await includeRawChain7.invoke({\n", - " input: \"What is 2 + 2?\",\n", - "});\n", - "\n", - "console.log(includeRawResponse7);" + "await llmWithCustomHeaders.invoke(\"Why is the sky blue?\");" ] }, { diff --git a/libs/langchain-anthropic/src/chat_models.ts b/libs/langchain-anthropic/src/chat_models.ts index e4b6f37ef856..e3521e7dc5e2 100644 --- a/libs/langchain-anthropic/src/chat_models.ts +++ b/libs/langchain-anthropic/src/chat_models.ts @@ -182,8 +182,9 @@ function extractToken(chunk: AIMessageChunk): string | undefined { /** * Wrapper around Anthropic large language models. * - * To use you should have the `@anthropic-ai/sdk` package installed, with the - * `ANTHROPIC_API_KEY` environment variable set. + * To use this package, you should have an Anthropic API key set as an + * environment variable named `ANTHROPIC_API_KEY` or passed + * into the constructor. * * @remarks * Any parameters that are valid to be passed to {@link From 85fb3dd942d609cc8cb211d3db1c9c29bc3651e8 Mon Sep 17 00:00:00 2001 From: Brace Sproul Date: Mon, 5 Aug 2024 18:06:05 -0700 Subject: [PATCH 6/6] ci[patch]: Run aws package in daily standard tests (#6404) --- .github/workflows/standard-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/standard-tests.yml b/.github/workflows/standard-tests.yml index fdffbbe619d4..641b5ba197cc 100644 --- a/.github/workflows/standard-tests.yml +++ b/.github/workflows/standard-tests.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - package: [anthropic, cohere, google-genai, groq, mistralai] + package: [anthropic, cohere, google-genai, groq, mistralai, aws] steps: - uses: actions/checkout@v4 - name: Use Node.js 18.x