From 542bf80af983957599303d2bfed5b25d8778dc75 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 10 Jul 2023 09:26:21 -0700 Subject: [PATCH] Allow ids passed into SupabaseVectorStore for upsertion, adds tests, updates docs (#1915) * Allow ids passed into SupabaseVectorStore for upsertion, adds tests, updates docs * Change bad variable name --- .../vector_stores/integrations/pinecone.md | 1 + .../indexes/vector_stores/chroma/delete.ts | 1 + .../elasticsearch/elasticsearch.ts | 1 + .../vector_stores/supabase_deletion.ts | 1 + ...base_with_query_builder_metadata_filter.ts | 1 + .../indexes/vector_stores/weaviate_delete.ts | 1 + langchain/src/vectorstores/supabase.ts | 24 +++++++--- .../src/vectorstores/tests/chroma.int.test.ts | 32 ++++++++++++++ .../tests/elasticsearch.int.test.ts | 42 +++++++++++++----- .../vectorstores/tests/pinecone.int.test.ts | 13 +++++- .../vectorstores/tests/supabase.int.test.ts | 37 ++++++++++++++++ .../vectorstores/tests/weaviate.int.test.ts | 44 ++++++++++++++++--- 12 files changed, 175 insertions(+), 23 deletions(-) diff --git a/docs/docs/modules/indexes/vector_stores/integrations/pinecone.md b/docs/docs/modules/indexes/vector_stores/integrations/pinecone.md index d3a3e841d8ff..cfcbbb4d0240 100644 --- a/docs/docs/modules/indexes/vector_stores/integrations/pinecone.md +++ b/docs/docs/modules/indexes/vector_stores/integrations/pinecone.md @@ -154,6 +154,7 @@ const docs = [ }), ]; +// Also takes an additional {ids: []} parameter for upsertion const ids = await pineconeStore.addDocuments(docs); const results = await pineconeStore.similaritySearch(pageContent, 2, { diff --git a/examples/src/indexes/vector_stores/chroma/delete.ts b/examples/src/indexes/vector_stores/chroma/delete.ts index f89d4e75dbee..2db950d34c62 100644 --- a/examples/src/indexes/vector_stores/chroma/delete.ts +++ b/examples/src/indexes/vector_stores/chroma/delete.ts @@ -44,6 +44,7 @@ const documents = [ }, ]; +// Also supports an additional {ids: []} parameter for upsertion const ids = await vectorStore.addDocuments(documents); const response = await vectorStore.similaritySearch("scared", 2); diff --git a/examples/src/indexes/vector_stores/elasticsearch/elasticsearch.ts b/examples/src/indexes/vector_stores/elasticsearch/elasticsearch.ts index 7fd98baca237..e0fcb0df9085 100644 --- a/examples/src/indexes/vector_stores/elasticsearch/elasticsearch.ts +++ b/examples/src/indexes/vector_stores/elasticsearch/elasticsearch.ts @@ -57,6 +57,7 @@ export async function run() { // await ElasticVectorSearch.fromDocuments(docs, embeddings, clientArgs); const vectorStore = new ElasticVectorSearch(embeddings, clientArgs); + // Also supports an additional {ids: []} parameter for upsertion const ids = await vectorStore.addDocuments(docs); /* Search the vector DB independently with meta filters */ diff --git a/examples/src/indexes/vector_stores/supabase_deletion.ts b/examples/src/indexes/vector_stores/supabase_deletion.ts index 59fc81b69048..e479c7e7e249 100644 --- a/examples/src/indexes/vector_stores/supabase_deletion.ts +++ b/examples/src/indexes/vector_stores/supabase_deletion.ts @@ -26,6 +26,7 @@ export const run = async () => { { pageContent: "hello", metadata: { b: 1, c: 9, stuff: "wrong" } }, ]; + // Also takes an additional {ids: []} parameter for upsertion const ids = await store.addDocuments(docs); const resultA = await store.similaritySearch("hello", 2); diff --git a/examples/src/indexes/vector_stores/supabase_with_query_builder_metadata_filter.ts b/examples/src/indexes/vector_stores/supabase_with_query_builder_metadata_filter.ts index 3b645dbc6d0c..4dea13091487 100644 --- a/examples/src/indexes/vector_stores/supabase_with_query_builder_metadata_filter.ts +++ b/examples/src/indexes/vector_stores/supabase_with_query_builder_metadata_filter.ts @@ -42,6 +42,7 @@ export const run = async () => { { pageContent: "what's this", metadata: { b: 4, c: 6, stuff: "right" } }, ]; + // Also supports an additional {ids: []} parameter for upsertion await store.addDocuments(docs); const funcFilterA: SupabaseFilterRPCCall = (rpc) => diff --git a/examples/src/indexes/vector_stores/weaviate_delete.ts b/examples/src/indexes/vector_stores/weaviate_delete.ts index dd5ffd8f6c92..b942a4ea14c6 100644 --- a/examples/src/indexes/vector_stores/weaviate_delete.ts +++ b/examples/src/indexes/vector_stores/weaviate_delete.ts @@ -22,6 +22,7 @@ export async function run() { const docs = [{ pageContent: "see ya!", metadata: { foo: "bar" } }]; + // Also supports an additional {ids: []} parameter for upsertion const ids = await store.addDocuments(docs); // Search the index without any filters diff --git a/langchain/src/vectorstores/supabase.ts b/langchain/src/vectorstores/supabase.ts index 3fc9e64dfd31..a6cbab413ae2 100644 --- a/langchain/src/vectorstores/supabase.ts +++ b/langchain/src/vectorstores/supabase.ts @@ -50,15 +50,20 @@ export class SupabaseVectorStore extends VectorStore { this.filter = args.filter; } - async addDocuments(documents: Document[]) { + async addDocuments(documents: Document[], options?: { ids?: string[] }) { const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors( await this.embeddings.embedDocuments(texts), - documents + documents, + options ); } - async addVectors(vectors: number[][], documents: Document[]) { + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ) { const rows = vectors.map((embedding, idx) => ({ content: documents[idx].pageContent, embedding, @@ -68,9 +73,14 @@ export class SupabaseVectorStore extends VectorStore { // upsert returns 500/502/504 (yes really any of them) if given too many rows/characters // ~2000 trips it, but my data is probably smaller than average pageContent and metadata const chunkSize = 500; - let ids: string[] = []; + let returnedIds: string[] = []; for (let i = 0; i < rows.length; i += chunkSize) { - const chunk = rows.slice(i, i + chunkSize); + const chunk = rows.slice(i, i + chunkSize).map((row) => { + if (options?.ids) { + return { id: options.ids[i], ...row }; + } + return row; + }); const res = await this.client.from(this.tableName).upsert(chunk).select(); if (res.error) { @@ -79,10 +89,10 @@ export class SupabaseVectorStore extends VectorStore { ); } if (res.data) { - ids = ids.concat(res.data.map((row) => row.id)); + returnedIds = returnedIds.concat(res.data.map((row) => row.id)); } } - return ids; + return returnedIds; } async delete(params: { ids: string[] }): Promise { diff --git a/langchain/src/vectorstores/tests/chroma.int.test.ts b/langchain/src/vectorstores/tests/chroma.int.test.ts index 45635934db8f..7f5c6c0c54d7 100644 --- a/langchain/src/vectorstores/tests/chroma.int.test.ts +++ b/langchain/src/vectorstores/tests/chroma.int.test.ts @@ -51,6 +51,38 @@ describe("Chroma", () => { ]); }); + test.skip("upsert", async () => { + const pageContent = faker.lorem.sentence(5); + const id = uuid.v4(); + + const ids = await chromaStore.addDocuments([ + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: id } }, + ]); + + const results = await chromaStore.similaritySearch(pageContent, 4, { + foo: id, + }); + + expect(results.length).toEqual(2); + + const ids2 = await chromaStore.addDocuments( + [ + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: id } }, + ], + { ids } + ); + + expect(ids).toEqual(ids2); + + const newResults = await chromaStore.similaritySearch(pageContent, 4, { + foo: id, + }); + + expect(newResults.length).toEqual(2); + }); + test.skip("delete by ids", async () => { const pageContent = faker.lorem.sentence(5); const id = uuid.v4(); diff --git a/langchain/src/vectorstores/tests/elasticsearch.int.test.ts b/langchain/src/vectorstores/tests/elasticsearch.int.test.ts index 979b7bd64469..291f0ea25918 100644 --- a/langchain/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/langchain/src/vectorstores/tests/elasticsearch.int.test.ts @@ -36,31 +36,53 @@ test("ElasticVectorSearch integration", async () => { expect(store).toBeDefined(); + const createdAt = new Date().getTime(); + const ids = await store.addDocuments([ - { pageContent: "hello", metadata: { a: 2 } }, - { pageContent: "car", metadata: { a: 1 } }, - { pageContent: "adjective", metadata: { a: 1 } }, - { pageContent: "hi", metadata: { a: 1 } }, + { pageContent: "hello", metadata: { a: createdAt + 1 } }, + { pageContent: "car", metadata: { a: createdAt } }, + { pageContent: "adjective", metadata: { a: createdAt } }, + { pageContent: "hi", metadata: { a: createdAt } }, ]); const results1 = await store.similaritySearch("hello!", 1); expect(results1).toHaveLength(1); expect(results1).toEqual([ - new Document({ metadata: { a: 2 }, pageContent: "hello" }), + new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }), ]); - const results2 = await store.similaritySearchWithScore("testing!", 3, { - a: 1, + const results2 = await store.similaritySearchWithScore("testing!", 6, { + a: createdAt, }); expect(results2).toHaveLength(3); + const ids2 = await store.addDocuments( + [ + { pageContent: "hello upserted", metadata: { a: createdAt + 1 } }, + { pageContent: "car upserted", metadata: { a: createdAt } }, + { pageContent: "adjective upserted", metadata: { a: createdAt } }, + { pageContent: "hi upserted", metadata: { a: createdAt } }, + ], + { ids } + ); + + expect(ids).toEqual(ids2); + + const results3 = await store.similaritySearchWithScore("testing!", 6, { + a: createdAt, + }); + + expect(results3).toHaveLength(3); + + console.log(`Upserted:`, results3); + await store.delete({ ids: ids.slice(2) }); - const results3 = await store.similaritySearchWithScore("hello!", 1, { - a: 1, + const results4 = await store.similaritySearchWithScore("testing!", 3, { + a: createdAt, }); - expect(results3).toHaveLength(1); + expect(results4).toHaveLength(1); }); diff --git a/langchain/src/vectorstores/tests/pinecone.int.test.ts b/langchain/src/vectorstores/tests/pinecone.int.test.ts index 5004cff60415..9fbed3b0ef93 100644 --- a/langchain/src/vectorstores/tests/pinecone.int.test.ts +++ b/langchain/src/vectorstores/tests/pinecone.int.test.ts @@ -33,9 +33,20 @@ describe("PineconeStore", () => { [documentId] ); - const results = await pineconeStore.similaritySearch(pageContent, 1); + const results = await pineconeStore.similaritySearch(pageContent, 2); expect(results).toEqual([new Document({ metadata: {}, pageContent })]); + + await pineconeStore.addDocuments( + [{ pageContent: `${pageContent} upserted`, metadata: {} }], + [documentId] + ); + + const results2 = await pineconeStore.similaritySearch(pageContent, 2); + + expect(results2).toEqual([ + new Document({ metadata: {}, pageContent: `${pageContent} upserted` }), + ]); }); test("auto-generated ids", async () => { diff --git a/langchain/src/vectorstores/tests/supabase.int.test.ts b/langchain/src/vectorstores/tests/supabase.int.test.ts index 1878f6320e67..f57a4e3d9021 100644 --- a/langchain/src/vectorstores/tests/supabase.int.test.ts +++ b/langchain/src/vectorstores/tests/supabase.int.test.ts @@ -165,6 +165,43 @@ test("Search a SupabaseVectorStore with a functional metadata filter", async () ]); }); +test("Upsert on a SupabaseVectorStore", async () => { + const client = createClient( + process.env.SUPABASE_VECTOR_STORE_URL!, + process.env.SUPABASE_VECTOR_STORE_PRIVATE_KEY! + ); + + const embeddings = new OpenAIEmbeddings(); + + const store = new SupabaseVectorStore(embeddings, { + client, + tableName: "documents", + }); + + expect(store).toBeDefined(); + + const createdAt = new Date().getTime(); + + const ids = await store.addDocuments([ + { pageContent: "hello 0", metadata: { created_at: createdAt } }, + ]); + + const results = await store.similaritySearch("hello", 2, { + created_at: createdAt, + }); + expect(results).toHaveLength(1); + const ids2 = await store.addDocuments( + [{ pageContent: "hello 1", metadata: { created_at: createdAt } }], + { ids } + ); + expect(ids).toEqual(ids2); + const results2 = await store.similaritySearch("hello", 2, { + created_at: createdAt, + }); + expect(results2).toHaveLength(1); + expect(results2[0].pageContent).toEqual("hello 1"); +}); + test("Delete on a SupabaseVectorStore", async () => { const client = createClient( process.env.SUPABASE_VECTOR_STORE_URL!, diff --git a/langchain/src/vectorstores/tests/weaviate.int.test.ts b/langchain/src/vectorstores/tests/weaviate.int.test.ts index 8b10eab9b31a..11a0c494789e 100644 --- a/langchain/src/vectorstores/tests/weaviate.int.test.ts +++ b/langchain/src/vectorstores/tests/weaviate.int.test.ts @@ -90,7 +90,7 @@ test.skip("WeaviateStore", async () => { ]); }); -test.skip("WeaviateStore delete", async () => { +test.skip("WeaviateStore upsert + delete", async () => { // Something wrong with the weaviate-ts-client types, so we need to disable // eslint-disable-next-line @typescript-eslint/no-explicit-any const client = (weaviate as any).client({ @@ -131,7 +131,7 @@ test.skip("WeaviateStore delete", async () => { }, ]); - const results = await store.similaritySearch("hello world", 2, { + const results = await store.similaritySearch("hello world", 4, { where: { operator: "Equal", path: ["deletionTest"], @@ -149,9 +149,23 @@ test.skip("WeaviateStore delete", async () => { }), ]); - await store.delete({ ids: ids.slice(0, 1) }); + const ids2 = await store.addDocuments( + [ + { + pageContent: "hello world upserted", + metadata: { deletionTest: (createdAt + 1).toString() }, + }, + { + pageContent: "hello world upserted", + metadata: { deletionTest: (createdAt + 1).toString() }, + }, + ], + { ids } + ); - const results2 = await store.similaritySearch("hello world", 1, { + expect(ids2).toEqual(ids); + + const results2 = await store.similaritySearch("hello world", 4, { where: { operator: "Equal", path: ["deletionTest"], @@ -160,7 +174,27 @@ test.skip("WeaviateStore delete", async () => { }); expect(results2).toEqual([ new Document({ - pageContent: "hello world", + pageContent: "hello world upserted", + metadata: { deletionTest: (createdAt + 1).toString() }, + }), + new Document({ + pageContent: "hello world upserted", + metadata: { deletionTest: (createdAt + 1).toString() }, + }), + ]); + + await store.delete({ ids: ids.slice(0, 1) }); + + const results3 = await store.similaritySearch("hello world", 1, { + where: { + operator: "Equal", + path: ["deletionTest"], + valueText: (createdAt + 1).toString(), + }, + }); + expect(results3).toEqual([ + new Document({ + pageContent: "hello world upserted", metadata: { deletionTest: (createdAt + 1).toString() }, }), ]);