From bd9ca1a49b124c6bb0f076c1af55abdbc6c8cc1c Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Mon, 20 Jan 2025 21:07:45 -0800 Subject: [PATCH 1/5] Prevent cache misses from triggering model start callback runs twice --- .../src/language_models/chat_models.ts | 119 ++++++++++++------ langchain-core/src/language_models/llms.ts | 99 +++++++++------ .../language_models/tests/chat_models.test.ts | 48 +++++++ .../src/language_models/tests/llms.test.ts | 30 +++++ 4 files changed, 220 insertions(+), 76 deletions(-) diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index 9ca563a38a04..96260010f94b 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -48,6 +48,7 @@ import { concat } from "../utils/stream.js"; import { RunnablePassthrough } from "../runnables/passthrough.js"; import { isZodSchema } from "../utils/types/is_zod_schema.js"; import { callbackHandlerPrefersStreaming } from "../callbacks/base.js"; +import { isAIMessage, isBaseMessage } from "../../messages.js"; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type ToolChoice = string | Record | "auto" | "any"; @@ -343,41 +344,52 @@ export abstract class BaseChatModel< async _generateUncached( messages: BaseMessageLike[][], parsedOptions: this["ParsedCallOptions"], - handledOptions: RunnableConfig + handledOptions: RunnableConfig, + existingRunManagers?: CallbackManagerForLLMRun[] ): Promise { const baseMessages = messages.map((messageList) => messageList.map(coerceMessageLikeToMessage) ); - const inheritableMetadata = { - ...handledOptions.metadata, - ...this.getLsParams(parsedOptions), - }; - // create callback manager and start run - const callbackManager_ = await CallbackManager.configure( - handledOptions.callbacks, - this.callbacks, - handledOptions.tags, - this.tags, - inheritableMetadata, - this.metadata, - { verbose: this.verbose } - ); - const extra = { - options: parsedOptions, - invocation_params: this?.invocationParams(parsedOptions), - batch_size: 1, - }; - const runManagers = await callbackManager_?.handleChatModelStart( - this.toJSON(), - baseMessages, - handledOptions.runId, - undefined, - extra, - undefined, - undefined, - handledOptions.runName - ); + let runManagers; + if (existingRunManagers !== undefined && existingRunManagers.length > 0) { + if (existingRunManagers.length !== baseMessages.length) { + throw new Error( + "Received invalid number of existing run managers for chat model call. Please contact us for help." + ); + } + runManagers = existingRunManagers; + } else { + const inheritableMetadata = { + ...handledOptions.metadata, + ...this.getLsParams(parsedOptions), + }; + // create callback manager and start run + const callbackManager_ = await CallbackManager.configure( + handledOptions.callbacks, + this.callbacks, + handledOptions.tags, + this.tags, + inheritableMetadata, + this.metadata, + { verbose: this.verbose } + ); + const extra = { + options: parsedOptions, + invocation_params: this?.invocationParams(parsedOptions), + batch_size: 1, + }; + runManagers = await callbackManager_?.handleChatModelStart( + this.toJSON(), + baseMessages, + handledOptions.runId, + undefined, + extra, + undefined, + undefined, + handledOptions.runName + ); + } const generations: ChatGeneration[][] = []; const llmOutputs: LLMResult["llmOutput"][] = []; // Even if stream is not explicitly called, check if model is implicitly @@ -511,7 +523,12 @@ export abstract class BaseChatModel< // eslint-disable-next-line @typescript-eslint/no-explicit-any parsedOptions: any; handledOptions: RunnableConfig; - }): Promise { + }): Promise< + LLMResult & { + missingPromptIndices: number[]; + existingRunManagers?: CallbackManagerForLLMRun[]; + } + > { const baseMessages = messages.map((messageList) => messageList.map(coerceMessageLikeToMessage) ); @@ -580,7 +597,24 @@ export abstract class BaseChatModel< cachedResults.map(async ({ result: promiseResult, runManager }, i) => { if (promiseResult.status === "fulfilled") { const result = promiseResult.value as Generation[]; - generations[i] = result; + generations[i] = result.map((result) => { + if ( + "message" in result && + isBaseMessage(result.message) && + isAIMessage(result.message) + ) { + result.message.usage_metadata = { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }; + } + result.generationInfo = { + ...result.generationInfo, + tokenUsage: {}, + }; + return result; + }); if (result.length) { await runManager?.handleLLMNewToken(result[0].text); } @@ -598,6 +632,7 @@ export abstract class BaseChatModel< const output = { generations, missingPromptIndices, + existingRunManagers: runManagers, }; // This defines RUN_KEY as a non-enumerable property on the output object @@ -650,20 +685,24 @@ export abstract class BaseChatModel< callOptions as CallOptions ); - const { generations, missingPromptIndices } = await this._generateCached({ - messages: baseMessages, - cache, - llmStringKey, - parsedOptions: callOptions, - handledOptions: runnableConfig, - }); + const { generations, missingPromptIndices, existingRunManagers } = + await this._generateCached({ + messages: baseMessages, + cache, + llmStringKey, + parsedOptions: callOptions, + handledOptions: runnableConfig, + }); let llmOutput = {}; if (missingPromptIndices.length > 0) { const results = await this._generateUncached( missingPromptIndices.map((i) => baseMessages[i]), callOptions, - runnableConfig + runnableConfig, + existingRunManagers !== undefined + ? missingPromptIndices.map((i) => existingRunManagers?.[i]) + : undefined ); await Promise.all( results.generations.map(async (generation, index) => { diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index ce75a52479be..2256866b2f0d 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -240,32 +240,43 @@ export abstract class BaseLLM< async _generateUncached( prompts: string[], parsedOptions: this["ParsedCallOptions"], - handledOptions: BaseCallbackConfig + handledOptions: BaseCallbackConfig, + existingRunManagers?: CallbackManagerForLLMRun[] ): Promise { - const callbackManager_ = await CallbackManager.configure( - handledOptions.callbacks, - this.callbacks, - handledOptions.tags, - this.tags, - handledOptions.metadata, - this.metadata, - { verbose: this.verbose } - ); - const extra = { - options: parsedOptions, - invocation_params: this?.invocationParams(parsedOptions), - batch_size: prompts.length, - }; - const runManagers = await callbackManager_?.handleLLMStart( - this.toJSON(), - prompts, - handledOptions.runId, - undefined, - extra, - undefined, - undefined, - handledOptions?.runName - ); + let runManagers; + if (existingRunManagers !== undefined && existingRunManagers.length > 0) { + if (existingRunManagers.length !== prompts.length) { + throw new Error( + "Received invalid number of existing run managers for LLM call. Please contact us for help." + ); + } + runManagers = existingRunManagers; + } else { + const callbackManager_ = await CallbackManager.configure( + handledOptions.callbacks, + this.callbacks, + handledOptions.tags, + this.tags, + handledOptions.metadata, + this.metadata, + { verbose: this.verbose } + ); + const extra = { + options: parsedOptions, + invocation_params: this?.invocationParams(parsedOptions), + batch_size: prompts.length, + }; + runManagers = await callbackManager_?.handleLLMStart( + this.toJSON(), + prompts, + handledOptions.runId, + undefined, + extra, + undefined, + undefined, + handledOptions?.runName + ); + } // Even if stream is not explicitly called, check if model is implicitly // called from streamEvents() or streamLog() to get all streamed events. // Bail out if _streamResponseChunks not overridden @@ -346,7 +357,12 @@ export abstract class BaseLLM< parsedOptions: any; handledOptions: RunnableConfig; runId?: string; - }): Promise { + }): Promise< + LLMResult & { + missingPromptIndices: number[]; + existingRunManagers?: CallbackManagerForLLMRun[]; + } + > { const callbackManager_ = await CallbackManager.configure( handledOptions.callbacks, this.callbacks, @@ -401,7 +417,13 @@ export abstract class BaseLLM< cachedResults.map(async ({ result: promiseResult, runManager }, i) => { if (promiseResult.status === "fulfilled") { const result = promiseResult.value as Generation[]; - generations[i] = result; + generations[i] = result.map((result) => { + result.generationInfo = { + ...result.generationInfo, + tokenUsage: {}, + }; + return result; + }); if (result.length) { await runManager?.handleLLMNewToken(result[0].text); } @@ -419,6 +441,7 @@ export abstract class BaseLLM< const output = { generations, missingPromptIndices, + existingRunManagers: runManagers, }; // This defines RUN_KEY as a non-enumerable property on the output object @@ -465,21 +488,25 @@ export abstract class BaseLLM< const llmStringKey = this._getSerializedCacheKeyParametersForCall( callOptions as CallOptions ); - const { generations, missingPromptIndices } = await this._generateCached({ - prompts, - cache, - llmStringKey, - parsedOptions: callOptions, - handledOptions: runnableConfig, - runId: runnableConfig.runId, - }); + const { generations, missingPromptIndices, existingRunManagers } = + await this._generateCached({ + prompts, + cache, + llmStringKey, + parsedOptions: callOptions, + handledOptions: runnableConfig, + runId: runnableConfig.runId, + }); let llmOutput = {}; if (missingPromptIndices.length > 0) { const results = await this._generateUncached( missingPromptIndices.map((i) => prompts[i]), callOptions, - runnableConfig + runnableConfig, + existingRunManagers !== undefined + ? missingPromptIndices.map((i) => existingRunManagers?.[i]) + : undefined ); await Promise.all( results.generations.map(async (generation, index) => { diff --git a/langchain-core/src/language_models/tests/chat_models.test.ts b/langchain-core/src/language_models/tests/chat_models.test.ts index f335d5edc40f..8598d7aa6cd3 100644 --- a/langchain-core/src/language_models/tests/chat_models.test.ts +++ b/langchain-core/src/language_models/tests/chat_models.test.ts @@ -287,6 +287,54 @@ test("Test ChatModel can cache complex messages", async () => { expect(cachedMsg.content).toEqual(JSON.stringify(contentToCache, null, 2)); }); +test("Test ChatModel with cache does not start multiple chat model runs", async () => { + const model = new FakeChatModel({ + cache: true, + }); + if (!model.cache) { + throw new Error("Cache not enabled"); + } + + const contentToCache = [ + { + type: "text", + text: "Hello there again!", + }, + ]; + const humanMessage = new HumanMessage({ + content: contentToCache, + }); + + const prompt = getBufferString([humanMessage]); + const llmKey = model._getSerializedCacheKeyParametersForCall({}); + + const value = await model.cache.lookup(prompt, llmKey); + expect(value).toBeNull(); + + // Invoke model to trigger cache update + const eventStream = model.streamEvents([humanMessage], { version: "v2" }); + + expect(await model.cache.lookup(prompt, llmKey)).toBeDefined(); + + const events = []; + for await (const event of eventStream) { + events.push(event); + } + expect(events.length).toEqual(2); + expect(events[0].event).toEqual("on_chat_model_start"); + expect(events[1].event).toEqual("on_chat_model_end"); + + const eventStream2 = model.streamEvents([humanMessage], { version: "v2" }); + + const events2 = []; + for await (const event of eventStream2) { + events2.push(event); + } + expect(events2.length).toEqual(2); + expect(events2[0].event).toEqual("on_chat_model_start"); + expect(events2[1].event).toEqual("on_chat_model_end"); +}); + test("Test ChatModel can emit a custom event", async () => { const model = new FakeListChatModel({ responses: ["hi"], diff --git a/langchain-core/src/language_models/tests/llms.test.ts b/langchain-core/src/language_models/tests/llms.test.ts index 54b56e42f7d8..f1cf453bfc75 100644 --- a/langchain-core/src/language_models/tests/llms.test.ts +++ b/langchain-core/src/language_models/tests/llms.test.ts @@ -42,6 +42,36 @@ test("Test FakeLLM uses callbacks with a cache", async () => { expect(response2).toEqual(acc); }); +test("Test LLM with cache does not start multiple LLM runs", async () => { + const model = new FakeLLM({ + cache: true, + }); + if (!model.cache) { + throw new Error("Cache not enabled"); + } + + // Invoke model to trigger cache update + const eventStream = model.streamEvents("Hello there!", { version: "v2" }); + + const events = []; + for await (const event of eventStream) { + events.push(event); + } + expect(events.length).toEqual(2); + expect(events[0].event).toEqual("on_llm_start"); + expect(events[1].event).toEqual("on_llm_end"); + + const eventStream2 = model.streamEvents("Hello there!", { version: "v2" }); + + const events2 = []; + for await (const event of eventStream2) { + events2.push(event); + } + expect(events2.length).toEqual(2); + expect(events2[0].event).toEqual("on_llm_start"); + expect(events2[1].event).toEqual("on_llm_end"); +}); + test("Test FakeStreamingLLM works when streaming through a prompt", async () => { const prompt = HumanMessagePromptTemplate.fromTemplate("hello there {name}"); const model = new FakeStreamingLLM({}); From f7f7944b9e5da512dfccd5860a6200011f6187cb Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Mon, 20 Jan 2025 21:17:37 -0800 Subject: [PATCH 2/5] Fix build --- langchain-core/src/language_models/chat_models.ts | 5 +++-- langchain-core/src/language_models/llms.ts | 2 +- libs/langchain-openai/src/tests/chat_models.int.test.ts | 6 ++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index 96260010f94b..860333a20fe7 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -9,6 +9,8 @@ import { coerceMessageLikeToMessage, AIMessageChunk, isAIMessageChunk, + isBaseMessage, + isAIMessage, } from "../messages/index.js"; import type { BasePromptValueInterface } from "../prompt_values.js"; import { @@ -48,7 +50,6 @@ import { concat } from "../utils/stream.js"; import { RunnablePassthrough } from "../runnables/passthrough.js"; import { isZodSchema } from "../utils/types/is_zod_schema.js"; import { callbackHandlerPrefersStreaming } from "../callbacks/base.js"; -import { isAIMessage, isBaseMessage } from "../../messages.js"; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type ToolChoice = string | Record | "auto" | "any"; @@ -351,7 +352,7 @@ export abstract class BaseChatModel< messageList.map(coerceMessageLikeToMessage) ); - let runManagers; + let runManagers: CallbackManagerForLLMRun[] | undefined; if (existingRunManagers !== undefined && existingRunManagers.length > 0) { if (existingRunManagers.length !== baseMessages.length) { throw new Error( diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index 2256866b2f0d..4e4c7a3b7ec3 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -243,7 +243,7 @@ export abstract class BaseLLM< handledOptions: BaseCallbackConfig, existingRunManagers?: CallbackManagerForLLMRun[] ): Promise { - let runManagers; + let runManagers: CallbackManagerForLLMRun[] | undefined; if (existingRunManagers !== undefined && existingRunManagers.length > 0) { if (existingRunManagers.length !== prompts.length) { throw new Error( diff --git a/libs/langchain-openai/src/tests/chat_models.int.test.ts b/libs/langchain-openai/src/tests/chat_models.int.test.ts index a49c014d5d8a..a88e24cad56b 100644 --- a/libs/langchain-openai/src/tests/chat_models.int.test.ts +++ b/libs/langchain-openai/src/tests/chat_models.int.test.ts @@ -579,6 +579,12 @@ test("ChatOpenAI can cache generations", async () => { expect(lookupSpy).toHaveBeenCalledTimes(2); expect(updateSpy).toHaveBeenCalledTimes(2); + const res2 = await chat.generate([[message], [message]]); + expect(res2.generations.length).toBe(2); + + expect(lookupSpy).toHaveBeenCalledTimes(4); + expect(updateSpy).toHaveBeenCalledTimes(2); + lookupSpy.mockRestore(); updateSpy.mockRestore(); }); From cf829087fd828af832cca392df0154ced58db9c1 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Mon, 20 Jan 2025 21:19:01 -0800 Subject: [PATCH 3/5] Fix lint --- langchain-core/src/language_models/chat_models.ts | 2 ++ langchain-core/src/language_models/llms.ts | 1 + 2 files changed, 3 insertions(+) diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index 860333a20fe7..d76e1a1f3302 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -604,12 +604,14 @@ export abstract class BaseChatModel< isBaseMessage(result.message) && isAIMessage(result.message) ) { + // eslint-disable-next-line no-param-reassign result.message.usage_metadata = { input_tokens: 0, output_tokens: 0, total_tokens: 0, }; } + // eslint-disable-next-line no-param-reassign result.generationInfo = { ...result.generationInfo, tokenUsage: {}, diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index 4e4c7a3b7ec3..cb0da5a30830 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -418,6 +418,7 @@ export abstract class BaseLLM< if (promiseResult.status === "fulfilled") { const result = promiseResult.value as Generation[]; generations[i] = result.map((result) => { + // eslint-disable-next-line no-param-reassign result.generationInfo = { ...result.generationInfo, tokenUsage: {}, From fab9ae4c3e8458167e3f58ceee35bd4e2972ddfc Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Mon, 20 Jan 2025 21:29:02 -0800 Subject: [PATCH 4/5] Update check --- langchain-core/src/language_models/chat_models.ts | 10 ++++------ langchain-core/src/language_models/llms.ts | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index d76e1a1f3302..5e49a18fafeb 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -353,12 +353,10 @@ export abstract class BaseChatModel< ); let runManagers: CallbackManagerForLLMRun[] | undefined; - if (existingRunManagers !== undefined && existingRunManagers.length > 0) { - if (existingRunManagers.length !== baseMessages.length) { - throw new Error( - "Received invalid number of existing run managers for chat model call. Please contact us for help." - ); - } + if ( + existingRunManagers !== undefined && + existingRunManagers.length === baseMessages.length + ) { runManagers = existingRunManagers; } else { const inheritableMetadata = { diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index cb0da5a30830..60c4c3038c54 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -244,12 +244,10 @@ export abstract class BaseLLM< existingRunManagers?: CallbackManagerForLLMRun[] ): Promise { let runManagers: CallbackManagerForLLMRun[] | undefined; - if (existingRunManagers !== undefined && existingRunManagers.length > 0) { - if (existingRunManagers.length !== prompts.length) { - throw new Error( - "Received invalid number of existing run managers for LLM call. Please contact us for help." - ); - } + if ( + existingRunManagers !== undefined && + existingRunManagers.length === prompts.length + ) { runManagers = existingRunManagers; } else { const callbackManager_ = await CallbackManager.configure( From 02c44de1218b58d47bd8ef497bda8d115cb5f612 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Mon, 20 Jan 2025 21:30:22 -0800 Subject: [PATCH 5/5] Naming --- .../src/language_models/chat_models.ts | 18 +++++++++--------- langchain-core/src/language_models/llms.ts | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index 5e49a18fafeb..36feee110abe 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -346,7 +346,7 @@ export abstract class BaseChatModel< messages: BaseMessageLike[][], parsedOptions: this["ParsedCallOptions"], handledOptions: RunnableConfig, - existingRunManagers?: CallbackManagerForLLMRun[] + startedRunManagers?: CallbackManagerForLLMRun[] ): Promise { const baseMessages = messages.map((messageList) => messageList.map(coerceMessageLikeToMessage) @@ -354,10 +354,10 @@ export abstract class BaseChatModel< let runManagers: CallbackManagerForLLMRun[] | undefined; if ( - existingRunManagers !== undefined && - existingRunManagers.length === baseMessages.length + startedRunManagers !== undefined && + startedRunManagers.length === baseMessages.length ) { - runManagers = existingRunManagers; + runManagers = startedRunManagers; } else { const inheritableMetadata = { ...handledOptions.metadata, @@ -525,7 +525,7 @@ export abstract class BaseChatModel< }): Promise< LLMResult & { missingPromptIndices: number[]; - existingRunManagers?: CallbackManagerForLLMRun[]; + startedRunManagers?: CallbackManagerForLLMRun[]; } > { const baseMessages = messages.map((messageList) => @@ -633,7 +633,7 @@ export abstract class BaseChatModel< const output = { generations, missingPromptIndices, - existingRunManagers: runManagers, + startedRunManagers: runManagers, }; // This defines RUN_KEY as a non-enumerable property on the output object @@ -686,7 +686,7 @@ export abstract class BaseChatModel< callOptions as CallOptions ); - const { generations, missingPromptIndices, existingRunManagers } = + const { generations, missingPromptIndices, startedRunManagers } = await this._generateCached({ messages: baseMessages, cache, @@ -701,8 +701,8 @@ export abstract class BaseChatModel< missingPromptIndices.map((i) => baseMessages[i]), callOptions, runnableConfig, - existingRunManagers !== undefined - ? missingPromptIndices.map((i) => existingRunManagers?.[i]) + startedRunManagers !== undefined + ? missingPromptIndices.map((i) => startedRunManagers?.[i]) : undefined ); await Promise.all( diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index 60c4c3038c54..63e18cb9a0b3 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -241,14 +241,14 @@ export abstract class BaseLLM< prompts: string[], parsedOptions: this["ParsedCallOptions"], handledOptions: BaseCallbackConfig, - existingRunManagers?: CallbackManagerForLLMRun[] + startedRunManagers?: CallbackManagerForLLMRun[] ): Promise { let runManagers: CallbackManagerForLLMRun[] | undefined; if ( - existingRunManagers !== undefined && - existingRunManagers.length === prompts.length + startedRunManagers !== undefined && + startedRunManagers.length === prompts.length ) { - runManagers = existingRunManagers; + runManagers = startedRunManagers; } else { const callbackManager_ = await CallbackManager.configure( handledOptions.callbacks, @@ -358,7 +358,7 @@ export abstract class BaseLLM< }): Promise< LLMResult & { missingPromptIndices: number[]; - existingRunManagers?: CallbackManagerForLLMRun[]; + startedRunManagers?: CallbackManagerForLLMRun[]; } > { const callbackManager_ = await CallbackManager.configure( @@ -440,7 +440,7 @@ export abstract class BaseLLM< const output = { generations, missingPromptIndices, - existingRunManagers: runManagers, + startedRunManagers: runManagers, }; // This defines RUN_KEY as a non-enumerable property on the output object @@ -487,7 +487,7 @@ export abstract class BaseLLM< const llmStringKey = this._getSerializedCacheKeyParametersForCall( callOptions as CallOptions ); - const { generations, missingPromptIndices, existingRunManagers } = + const { generations, missingPromptIndices, startedRunManagers } = await this._generateCached({ prompts, cache, @@ -503,8 +503,8 @@ export abstract class BaseLLM< missingPromptIndices.map((i) => prompts[i]), callOptions, runnableConfig, - existingRunManagers !== undefined - ? missingPromptIndices.map((i) => existingRunManagers?.[i]) + startedRunManagers !== undefined + ? missingPromptIndices.map((i) => startedRunManagers?.[i]) : undefined ); await Promise.all(