Skip to content

Commit

Permalink
Tigris vectorstore (#1423)
Browse files Browse the repository at this point in the history
* Add Tigris Vector Store

* review feedback

* Skip integration test, small docs update

* Remove unused example

* Revert build artifact change

* Remove build artifact

---------

Co-authored-by: Ovais Tariq <ot@tigrisdata.com>
Co-authored-by: Ovais Tariq <ovaistariq@gmail.com>
  • Loading branch information
3 people authored May 26, 2023
1 parent da6320a commit dc948fc
Show file tree
Hide file tree
Showing 14 changed files with 459 additions and 11 deletions.
45 changes: 45 additions & 0 deletions docs/docs/modules/indexes/vector_stores/integrations/tigris.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
sidebar_class_name: node-only
---

import CodeBlock from "@theme/CodeBlock";

# Tigris

Tigris makes it easy to build AI applications with vector embeddings.
It is a fully managed cloud-native database that allows you store and
index documents and vector embeddings for fast and scalable vector search.

:::tip Compatibility
Only available on Node.js.
:::

## Setup

### 1. Install the Tigris SDK

Install the SDK as follows

```bash npm2yarn
npm install -S @tigrisdata/vector
```

### 2. Fetch Tigris API credentials

You can sign up for a free Tigris account [here](https://console.preview.tigrisdata.cloud/signup).

Once you have signed up for the Tigris account, create a new project called `vectordemo`.
Next, make a note of the `clientId` and `clientSecret`, which you can get from the
Application Keys section of the project.

## Index docs

import FromDocs from "@examples/indexes/vector_stores/tigris/fromDocs.ts";

<CodeBlock language="typescript">{FromDocs}</CodeBlock>

## Query docs

import Search from "@examples/indexes/vector_stores/tigris/search.ts";

<CodeBlock language="typescript">{Search}</CodeBlock>
4 changes: 4 additions & 0 deletions examples/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,9 @@ MYSCALE_PORT=ADD_YOURS_HERE
MYSCALE_USERNAME=ADD_YOURS_HERE
MYSCALE_PASSWORD=ADD_YOURS_HERE
REDIS_URL=ADD_YOURS_HERE
TIGRIS_URI=ADD_YOURS_HERE
TIGRIS_PROJECT=ADD_YOURS_HERE
TIGRIS_CLIENT_ID=ADD_YOURS_HERE
TIGRIS_CLIENT_SECRET=ADD_YOURS_HERE
NOTION_INTEGRATION_TOKEN=ADD_YOURS_HERE
FIGMA_ACCESS_TOKEN=ADD_YOURS_HERE
1 change: 1 addition & 0 deletions examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"@prisma/client": "^4.11.0",
"@supabase/supabase-js": "^2.10.0",
"@tensorflow/tfjs-backend-cpu": "^4.4.0",
"@tigrisdata/vector": "^1.1.0",
"@upstash/redis": "^1.20.6",
"@zilliz/milvus2-sdk-node": "^2.2.7",
"axios": "^0.26.0",
Expand Down
36 changes: 36 additions & 0 deletions examples/src/indexes/vector_stores/tigris/fromDocs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { VectorDocumentStore } from "@tigrisdata/vector";
import { Document } from "langchain/document";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { TigrisVectorStore } from "langchain/vectorstores/tigris";

const index = new VectorDocumentStore({
connection: {
serverUrl: "api.preview.tigrisdata.cloud",
projectName: process.env.TIGRIS_PROJECT,
clientId: process.env.TIGRIS_CLIENT_ID,
clientSecret: process.env.TIGRIS_CLIENT_SECRET,
},
indexName: "examples_index",
numDimensions: 1536, // match the OpenAI embedding size
});

const docs = [
new Document({
metadata: { foo: "bar" },
pageContent: "tigris is a cloud-native vector db",
}),
new Document({
metadata: { foo: "bar" },
pageContent: "the quick brown fox jumped over the lazy dog",
}),
new Document({
metadata: { baz: "qux" },
pageContent: "lorem ipsum dolor sit amet",
}),
new Document({
metadata: { baz: "qux" },
pageContent: "tigris is a river",
}),
];

await TigrisVectorStore.fromDocuments(docs, new OpenAIEmbeddings(), { index });
33 changes: 33 additions & 0 deletions examples/src/indexes/vector_stores/tigris/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { VectorDocumentStore } from "@tigrisdata/vector";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { TigrisVectorStore } from "langchain/vectorstores/tigris";

const index = new VectorDocumentStore({
connection: {
serverUrl: "api.preview.tigrisdata.cloud",
projectName: process.env.TIGRIS_PROJECT,
clientId: process.env.TIGRIS_CLIENT_ID,
clientSecret: process.env.TIGRIS_CLIENT_SECRET,
},
indexName: "examples_index",
numDimensions: 1536, // match the OpenAI embedding size
});

const vectorStore = await TigrisVectorStore.fromExistingIndex(
new OpenAIEmbeddings(),
{ index }
);

/* Search the vector DB independently with metadata filters */
const results = await vectorStore.similaritySearch("tigris", 1, {
"metadata.foo": "bar",
});
console.log(JSON.stringify(results, null, 2));
/*
[
Document {
pageContent: 'tigris is a cloud-native vector db',
metadata: { foo: 'bar' }
}
]
*/
4 changes: 4 additions & 0 deletions langchain/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,9 @@ FIGMA_ACCESS_TOKEN=ADD_YOURS_HERE
REDIS_URL=ADD_YOURS_HERE
UPSTASH_REDIS_REST_URL=https://ADD_YOURS_HERE.upstash.io
UPSTASH_REDIS_REST_TOKEN=ADD_YOURS_HERE
TIGRIS_URI=ADD_YOURS_HERE
TIGRIS_PROJECT=ADD_YOURS_HERE
TIGRIS_CLIENT_ID=ADD_YOURS_HERE
TIGRIS_CLIENT_SECRET=ADD_YOURS_HERE
NOTION_INTEGRATION_TOKEN=ADD_YOURS_HERE
FIGMA_ACCESS_TOKEN=ADD_YOURS_HERE
3 changes: 3 additions & 0 deletions langchain/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ vectorstores/myscale.d.ts
vectorstores/redis.cjs
vectorstores/redis.js
vectorstores/redis.d.ts
vectorstores/tigris.cjs
vectorstores/tigris.js
vectorstores/tigris.d.ts
text_splitter.cjs
text_splitter.js
text_splitter.d.ts
Expand Down
13 changes: 13 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@
"vectorstores/redis.cjs",
"vectorstores/redis.js",
"vectorstores/redis.d.ts",
"vectorstores/tigris.cjs",
"vectorstores/tigris.js",
"vectorstores/tigris.d.ts",
"text_splitter.cjs",
"text_splitter.js",
"text_splitter.d.ts",
Expand Down Expand Up @@ -386,6 +389,7 @@
"@tensorflow/tfjs-backend-cpu": "^4.4.0",
"@tensorflow/tfjs-converter": "^4.4.0",
"@tensorflow/tfjs-core": "^4.4.0",
"@tigrisdata/vector": "^1.1.0",
"@tsconfig/recommended": "^1.0.2",
"@types/d3-dsv": "^2",
"@types/flat": "^5.0.2",
Expand Down Expand Up @@ -453,6 +457,7 @@
"@tensorflow-models/universal-sentence-encoder": "*",
"@tensorflow/tfjs-converter": "*",
"@tensorflow/tfjs-core": "*",
"@tigrisdata/vector": "^1.1.0",
"@upstash/redis": "^1.20.6",
"@zilliz/milvus2-sdk-node": ">=2.2.7",
"apify-client": "^2.7.1",
Expand Down Expand Up @@ -523,6 +528,9 @@
"@tensorflow/tfjs-core": {
"optional": true
},
"@tigrisdata/vector": {
"optional": true
},
"@upstash/redis": {
"optional": true
},
Expand Down Expand Up @@ -879,6 +887,11 @@
"import": "./vectorstores/redis.js",
"require": "./vectorstores/redis.cjs"
},
"./vectorstores/tigris": {
"types": "./vectorstores/tigris.d.ts",
"import": "./vectorstores/tigris.js",
"require": "./vectorstores/tigris.cjs"
},
"./text_splitter": {
"types": "./text_splitter.d.ts",
"import": "./text_splitter.js",
Expand Down
2 changes: 2 additions & 0 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ const entrypoints = {
"vectorstores/prisma": "vectorstores/prisma",
"vectorstores/myscale": "vectorstores/myscale",
"vectorstores/redis": "vectorstores/redis",
"vectorstores/tigris": "vectorstores/tigris",
// text_splitter
text_splitter: "text_splitter",
// memory
Expand Down Expand Up @@ -186,6 +187,7 @@ const requiresOptionalDependency = [
"vectorstores/milvus",
"vectorstores/myscale",
"vectorstores/redis",
"vectorstores/tigris",
"document_loaders/web/apify_dataset",
"document_loaders/web/cheerio",
"document_loaders/web/puppeteer",
Expand Down
75 changes: 75 additions & 0 deletions langchain/src/vectorstores/tests/tigris.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* eslint-disable no-process-env */
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import { beforeEach, describe, expect, test } from "@jest/globals";
import { faker } from "@faker-js/faker";
import { VectorDocumentStore } from "@tigrisdata/vector";
import * as uuid from "uuid";
import { Document } from "../../document.js";
import { OpenAIEmbeddings } from "../../embeddings/openai.js";
import { TigrisVectorStore } from "../tigris.js";

describe.skip("TigrisVectorStore", () => {
let tigrisStore: TigrisVectorStore;

beforeEach(async () => {
const client = new VectorDocumentStore({
connection: {
serverUrl: process.env.TIGRIS_URI,
projectName: process.env.TIGRIS_PROJECT,
clientId: process.env.TIGRIS_CLIENT_ID,
clientSecret: process.env.TIGRIS_CLIENT_SECRET,
},
indexName: "integration_test_index",
numDimensions: 1536,
});

const embeddings = new OpenAIEmbeddings();
tigrisStore = new TigrisVectorStore(embeddings, { index: client });
});

test("user-provided ids", async () => {
const documentId = uuid.v4();
const pageContent = faker.lorem.sentence(5);

await tigrisStore.addDocuments(
[{ pageContent, metadata: {} }],
[documentId]
);

const results = await tigrisStore.similaritySearch(pageContent, 1);

expect(results).toEqual([new Document({ metadata: {}, pageContent })]);
});

test("auto-generated ids", async () => {
const pageContent = faker.lorem.sentence(5);

await tigrisStore.addDocuments([{ pageContent, metadata: { foo: "bar" } }]);

const results = await tigrisStore.similaritySearch(pageContent, 1);

expect(results).toEqual([
new Document({ metadata: { foo: "bar" }, pageContent }),
]);
});

test("metadata filtering", async () => {
const pageContent = faker.lorem.sentence(5);
const id = uuid.v4();

await tigrisStore.addDocuments([
{ pageContent, metadata: { foo: "bar" } },
{ pageContent, metadata: { foo: id } },
{ pageContent, metadata: { foo: "qux" } },
]);

// If the filter wasn't working, we'd get all 3 documents back
const results = await tigrisStore.similaritySearch(pageContent, 3, {
"metadata.foo": id,
});

expect(results).toEqual([
new Document({ metadata: { foo: id }, pageContent }),
]);
});
});
76 changes: 76 additions & 0 deletions langchain/src/vectorstores/tests/tigris.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { jest, test, expect } from "@jest/globals";
import { FakeEmbeddings } from "../../embeddings/fake.js";

import { TigrisVectorStore } from "../tigris.js";

test("TigrisVectorStore with external ids", async () => {
const client = {
addDocumentsWithVectors: jest.fn(),
similaritySearchVectorWithScore: jest
.fn()
.mockImplementation(async () => []),
};
const embeddings = new FakeEmbeddings();

const store = new TigrisVectorStore(embeddings, {
index: client as any,
});

expect(store).toBeDefined();

await store.addDocuments(
[
{
pageContent: "hello",
metadata: {
a: 1,
b: { nested: [1, { a: 4 }] },
},
},
],
["id1"]
);

expect(client.addDocumentsWithVectors).toHaveBeenCalledTimes(1);

expect(client.addDocumentsWithVectors).toHaveBeenCalledWith({
ids: ["id1"],
embeddings: [[0.1, 0.2, 0.3, 0.4]],
documents: [
{
content: "hello",
metadata: {
a: 1,
b: { nested: [1, { a: 4 }] },
},
},
],
});

const results = await store.similaritySearch("hello", 1);

expect(results).toHaveLength(0);
});

test("TigrisVectorStore with generated ids", async () => {
const client = {
addDocumentsWithVectors: jest.fn(),
similaritySearchVectorWithScore: jest
.fn()
.mockImplementation(async () => []),
};
const embeddings = new FakeEmbeddings();

const store = new TigrisVectorStore(embeddings, { index: client as any });

expect(store).toBeDefined();

await store.addDocuments([{ pageContent: "hello", metadata: { a: 1 } }]);

expect(client.addDocumentsWithVectors).toHaveBeenCalledTimes(1);

const results = await store.similaritySearch("hello", 1);

expect(results).toHaveLength(0);
});
Loading

1 comment on commit dc948fc

@vercel
Copy link

@vercel vercel bot commented on dc948fc May 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.