diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts index ccf958ca98a5d..fe279907bb416 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts @@ -6,7 +6,7 @@ */ import { i18n } from '@kbn/i18n'; -import { TokenCount as TokenCountType, type Message } from './types'; +import { type Message } from './types'; export enum StreamingChatResponseEventType { ChatCompletionChunk = 'chatCompletionChunk', @@ -16,7 +16,6 @@ export enum StreamingChatResponseEventType { MessageAdd = 'messageAdd', ChatCompletionError = 'chatCompletionError', BufferFlush = 'bufferFlush', - TokenCount = 'tokenCount', } type StreamingChatResponseEventBase< @@ -54,7 +53,6 @@ export type ConversationCreateEvent = StreamingChatResponseEventBase< id: string; title: string; last_updated: string; - token_count?: TokenCountType; }; } >; @@ -66,7 +64,6 @@ export type ConversationUpdateEvent = StreamingChatResponseEventBase< id: string; title: string; last_updated: string; - token_count?: TokenCountType; }; } >; @@ -95,17 +92,6 @@ export type BufferFlushEvent = StreamingChatResponseEventBase< } >; -export type TokenCountEvent = StreamingChatResponseEventBase< - StreamingChatResponseEventType.TokenCount, - { - tokens: { - completion: number; - prompt: number; - total: number; - }; - } ->; - export type StreamingChatResponseEvent = | ChatCompletionChunkEvent | ChatCompletionMessageEvent @@ -113,7 +99,6 @@ export type StreamingChatResponseEvent = | ConversationUpdateEvent | MessageAddEvent | ChatCompletionErrorEvent - | TokenCountEvent | BufferFlushEvent; export type StreamingChatResponseEventWithoutError = Exclude< @@ -121,7 +106,7 @@ export type StreamingChatResponseEventWithoutError = Exclude< ChatCompletionErrorEvent >; -export type ChatEvent = ChatCompletionChunkEvent | TokenCountEvent | ChatCompletionMessageEvent; +export type ChatEvent = ChatCompletionChunkEvent | ChatCompletionMessageEvent; export type MessageOrChatEvent = ChatEvent | MessageAddEvent; export enum ChatCompletionErrorCode { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts index 0157a6a2b0aae..2280ce3f53baa 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts @@ -18,7 +18,6 @@ export { export type { ChatCompletionChunkEvent, ChatCompletionMessageEvent, - TokenCountEvent, ConversationCreateEvent, ConversationUpdateEvent, MessageAddEvent, diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts index e0f353b252564..2c004a2360b06 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts @@ -45,12 +45,6 @@ export interface Message { }; } -export interface TokenCount { - prompt: number; - completion: number; - total: number; -} - export interface Conversation { '@timestamp': string; user?: { @@ -61,7 +55,6 @@ export interface Conversation { id: string; title: string; last_updated: string; - token_count?: TokenCount; }; systemMessage?: string; messages: Message[]; @@ -71,8 +64,8 @@ export interface Conversation { public: boolean; } -export type ConversationRequestBase = Omit & { - conversation: { title: string; token_count?: TokenCount; id?: string }; +type ConversationRequestBase = Omit & { + conversation: { title: string; id?: string }; }; export type ConversationCreateRequest = ConversationRequestBase; diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts index 173331f80d776..6263ae0f9b63d 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts @@ -16,7 +16,6 @@ import { withLatestFrom, filter, } from 'rxjs'; -import { withoutTokenCountEvents } from './without_token_count_events'; import { type ChatCompletionChunkEvent, ChatEvent, @@ -69,15 +68,12 @@ export function emitWithConcatenatedMessage( return (source$) => { const shared = source$.pipe(shareReplay()); - const withoutTokenCount$ = shared.pipe(filterChunkEvents()); - const response$ = concat( shared, shared.pipe( - withoutTokenCountEvents(), concatenateChatCompletionChunks(), last(), - withLatestFrom(withoutTokenCount$), + withLatestFrom(shared.pipe(filterChunkEvents())), mergeMap(([message, chunkEvent]) => { return mergeWithEditedMessage(message, chunkEvent, callback); }) diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/without_token_count_events.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/without_token_count_events.ts deleted file mode 100644 index 137b1140fbdcd..0000000000000 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/without_token_count_events.ts +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { filter, OperatorFunction } from 'rxjs'; -import { - StreamingChatResponseEvent, - StreamingChatResponseEventType, - TokenCountEvent, -} from '../conversation_complete'; - -export function withoutTokenCountEvents(): OperatorFunction< - T, - Exclude -> { - return filter( - (event): event is Exclude => - event.type !== StreamingChatResponseEventType.TokenCount - ); -} diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts index e737d534f3367..ffecb7f67b3c3 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts @@ -65,28 +65,6 @@ export const chatFeedbackEventSchema: EventTypeOpts = { description: 'The timestamp of the last message in the conversation.', }, }, - token_count: { - properties: { - completion: { - type: 'long', - _meta: { - description: 'The number of tokens in the completion.', - }, - }, - prompt: { - type: 'long', - _meta: { - description: 'The number of tokens in the prompt.', - }, - }, - total: { - type: 'long', - _meta: { - description: 'The total number of tokens in the conversation.', - }, - }, - }, - }, }, }, labels: { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts index 23e7a70cecb31..ec4d14eed6c4f 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts @@ -13,7 +13,6 @@ import { Readable } from 'stream'; import { AssistantScope } from '@kbn/ai-assistant-common'; import { aiAssistantSimulatedFunctionCalling } from '../..'; import { createFunctionResponseMessage } from '../../../common/utils/create_function_response_message'; -import { withoutTokenCountEvents } from '../../../common/utils/without_token_count_events'; import { LangTracer } from '../../service/client/instrumentation/lang_tracer'; import { flushBuffer } from '../../service/util/flush_buffer'; import { observableIntoOpenAIStream } from '../../service/util/observable_into_openai_stream'; @@ -203,16 +202,14 @@ const chatRecallRoute = createObservabilityAIAssistantServerRoute({ recallAndScore({ analytics: (await resources.plugins.core.start()).analytics, chat: (name, params) => - client - .chat(name, { - ...params, - stream: true, - connectorId, - simulateFunctionCalling, - signal, - tracer: new LangTracer(otelContext.active()), - }) - .pipe(withoutTokenCountEvents()), + client.chat(name, { + ...params, + stream: true, + connectorId, + simulateFunctionCalling, + signal, + tracer: new LangTracer(otelContext.active()), + }), context, logger: resources.logger, messages: [], diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts index cfa0410364efb..dfed9ae9ecd29 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts @@ -7,9 +7,7 @@ import * as t from 'io-ts'; import { toBooleanRt } from '@kbn/io-ts-utils'; import { - type Conversation, type ConversationCreateRequest, - type ConversationRequestBase, type ConversationUpdateRequest, type Message, MessageRole, @@ -57,17 +55,12 @@ const tokenCountRt = t.type({ total: t.number, }); -export const baseConversationRt: t.Type = t.intersection([ +export const conversationCreateRt: t.Type = t.intersection([ t.type({ '@timestamp': t.string, - conversation: t.intersection([ - t.type({ - title: t.string, - }), - t.partial({ - token_count: tokenCountRt, - }), - ]), + conversation: t.type({ + title: t.string, + }), messages: t.array(messageRt), labels: t.record(t.string, t.string), numeric_labels: t.record(t.string, t.number), @@ -84,17 +77,8 @@ export const assistantScopeType = t.union([ t.literal('all'), ]); -export const conversationCreateRt: t.Type = t.intersection([ - baseConversationRt, - t.type({ - conversation: t.type({ - title: t.string, - }), - }), -]); - export const conversationUpdateRt: t.Type = t.intersection([ - baseConversationRt, + conversationCreateRt, t.type({ conversation: t.intersection([ t.type({ @@ -102,33 +86,12 @@ export const conversationUpdateRt: t.Type = t.interse title: t.string, }), t.partial({ - token_count: tokenCountRt, + token_count: tokenCountRt, // deprecated, but kept for backwards compatibility }), ]), }), ]); -export const conversationRt: t.Type = t.intersection([ - baseConversationRt, - t.intersection([ - t.type({ - namespace: t.string, - conversation: t.intersection([ - t.type({ - id: t.string, - last_updated: t.string, - }), - t.partial({ - token_count: tokenCountRt, - }), - ]), - }), - t.partial({ - user: t.intersection([t.type({ name: t.string }), t.partial({ id: t.string })]), - }), - ]), -]); - export const functionRt = t.intersection([ t.type({ name: t.string, diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts index 63861061408d4..f9ad6f3ed71c8 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts @@ -69,21 +69,6 @@ function createLlmSimulator(subscriber: any) { toolCalls: msg.function_call ? [{ function: msg.function_call }] : [], }); }, - tokenCount: async ({ - completion, - prompt, - total, - }: { - completion: number; - prompt: number; - total: number; - }) => { - subscriber.next({ - type: InferenceChatCompletionEventType.ChatCompletionTokenCount, - tokens: { completion, prompt, total }, - }); - subscriber.complete(); - }, chunk: async (msg: ChunkDelta) => { subscriber.next({ type: InferenceChatCompletionEventType.ChatCompletionChunk, @@ -252,11 +237,6 @@ describe('Observability AI Assistant client', () => { }, }, ], - tokens: { - completion: 0, - prompt: 0, - total: 0, - }, }) .catch((error) => titleLlmSimulator.error(error)); }; @@ -388,7 +368,6 @@ describe('Observability AI Assistant client', () => { titleLlmPromiseReject(new Error('Failed generating title')); await nextTick(); - await llmSimulator.tokenCount({ completion: 1, prompt: 33, total: 34 }); await llmSimulator.complete(); await finished(stream); @@ -400,11 +379,6 @@ describe('Observability AI Assistant client', () => { title: 'New conversation', id: expect.any(String), last_updated: expect.any(String), - token_count: { - completion: 1, - prompt: 33, - total: 34, - }, }, type: StreamingChatResponseEventType.ConversationCreate, }); @@ -418,7 +392,6 @@ describe('Observability AI Assistant client', () => { await llmSimulator.chunk({ content: ' again' }); titleLlmPromiseResolve('An auto-generated title'); - await llmSimulator.tokenCount({ completion: 6, prompt: 210, total: 216 }); await llmSimulator.complete(); await finished(stream); @@ -457,11 +430,6 @@ describe('Observability AI Assistant client', () => { title: 'An auto-generated title', id: expect.any(String), last_updated: expect.any(String), - token_count: { - completion: 6, - prompt: 210, - total: 216, - }, }, type: StreamingChatResponseEventType.ConversationCreate, }); @@ -475,11 +443,6 @@ describe('Observability AI Assistant client', () => { id: expect.any(String), last_updated: expect.any(String), title: 'An auto-generated title', - token_count: { - completion: 6, - prompt: 210, - total: 216, - }, }, labels: {}, numeric_labels: {}, @@ -543,11 +506,6 @@ describe('Observability AI Assistant client', () => { id: 'my-conversation-id', title: 'My stored conversation', last_updated: new Date().toISOString(), - token_count: { - completion: 1, - prompt: 78, - total: 79, - }, }, labels: {}, numeric_labels: {}, @@ -583,7 +541,6 @@ describe('Observability AI Assistant client', () => { await llmSimulator.chunk({ content: 'Hello' }); await llmSimulator.next({ content: 'Hello' }); - await llmSimulator.tokenCount({ completion: 1, prompt: 33, total: 34 }); await llmSimulator.complete(); await finished(stream); @@ -595,11 +552,6 @@ describe('Observability AI Assistant client', () => { title: 'My stored conversation', id: expect.any(String), last_updated: expect.any(String), - token_count: { - completion: 2, - prompt: 111, - total: 113, - }, }, type: StreamingChatResponseEventType.ConversationUpdate, }); @@ -614,11 +566,6 @@ describe('Observability AI Assistant client', () => { id: expect.any(String), last_updated: expect.any(String), title: 'My stored conversation', - token_count: { - completion: 2, - prompt: 111, - total: 113, - }, }, labels: {}, numeric_labels: {}, @@ -900,7 +847,6 @@ describe('Observability AI Assistant client', () => { beforeEach(async () => { await llmSimulator.chunk({ content: 'I am done here' }); await llmSimulator.next({ content: 'I am done here' }); - await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await llmSimulator.complete(); await waitForNextWrite(stream); @@ -940,11 +886,6 @@ describe('Observability AI Assistant client', () => { id: expect.any(String), last_updated: expect.any(String), title: 'My predefined title', - token_count: { - completion: expect.any(Number), - prompt: expect.any(Number), - total: expect.any(Number), - }, }, }); @@ -1200,7 +1141,6 @@ describe('Observability AI Assistant client', () => { await llmSimulator.chunk({ content: 'Hello' }); await llmSimulator.next({ content: 'Hello' }); - await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await llmSimulator.complete(); await finished(stream); @@ -1492,7 +1432,6 @@ describe('Observability AI Assistant client', () => { await llmSimulator.chunk({ function_call: { name: 'get_top_alerts' } }); await llmSimulator.next({ content: 'done' }); - await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await llmSimulator.complete(); await waitFor(() => functionResponsePromiseResolve !== undefined); @@ -1612,7 +1551,6 @@ describe('Observability AI Assistant client', () => { function_call: { name: 'my_action', arguments: JSON.stringify({ foo: 'bar' }) }, }); await llmSimulator.next({ content: 'content' }); - await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await llmSimulator.complete(); }); @@ -1643,7 +1581,6 @@ describe('Observability AI Assistant client', () => { await llmSimulator.next({ content: 'Looks like the function call failed', }); - await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await llmSimulator.complete(); }); diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts index bb2064a3dbb6b..790bd2c8d9c1b 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts @@ -41,7 +41,6 @@ import { ConversationUpdateEvent, createConversationNotFoundError, StreamingChatResponseEventType, - TokenCountEvent, type StreamingChatResponseEvent, } from '../../../common/conversation_complete'; import { convertMessagesForInference } from '../../../common/convert_messages_for_inference'; @@ -56,7 +55,6 @@ import { KnowledgeBaseType, KnowledgeBaseEntryRole, } from '../../../common/types'; -import { withoutTokenCountEvents } from '../../../common/utils/without_token_count_events'; import { CONTEXT_FUNCTION_NAME } from '../../functions/context'; import type { ChatFunctionClient } from '../chat_function_client'; import { KnowledgeBaseService, RecalledEntry } from '../knowledge_base_service'; @@ -68,9 +66,7 @@ import { LangTracer } from './instrumentation/lang_tracer'; import { continueConversation } from './operators/continue_conversation'; import { convertInferenceEventsToStreamingEvents } from './operators/convert_inference_events_to_streaming_events'; import { extractMessages } from './operators/extract_messages'; -import { extractTokenCount } from './operators/extract_token_count'; import { getGeneratedTitle } from './operators/get_generated_title'; -import { instrumentAndCountTokens } from './operators/instrument_and_count_tokens'; import { reIndexKnowledgeBaseAndPopulateSemanticTextField, scheduleKbSemanticTextMigrationTask, @@ -78,6 +74,7 @@ import { import { ObservabilityAIAssistantPluginStartDependencies } from '../../types'; import { ObservabilityAIAssistantConfig } from '../../config'; import { getElserModelId } from '../knowledge_base_service/get_elser_model_id'; +import { apmInstrumentation } from './operators/apm_instrumentation'; const MAX_FUNCTION_CALLS = 8; @@ -297,17 +294,12 @@ export class ObservabilityAIAssistantClient { // wait until all dependencies have completed forkJoin([ // get just the new messages - nextEvents$.pipe(withoutTokenCountEvents(), extractMessages()), - // count all the token count events emitted during completion - mergeOperator( - nextEvents$, - title$.pipe(filter((value): value is TokenCountEvent => typeof value !== 'string')) - ).pipe(extractTokenCount()), + nextEvents$.pipe(extractMessages()), // get just the title, and drop the token count events title$.pipe(filter((value): value is string => typeof value === 'string')), systemMessage$, ]).pipe( - switchMap(([addedMessages, tokenCountResult, title, systemMessage]) => { + switchMap(([addedMessages, title, systemMessage]) => { const initialMessagesWithAddedMessages = initialMessages.concat(addedMessages); const lastMessage = last(initialMessagesWithAddedMessages); @@ -329,13 +321,6 @@ export class ObservabilityAIAssistantClient { return throwError(() => createConversationNotFoundError()); } - const persistedTokenCount = conversation._source?.conversation - .token_count ?? { - prompt: 0, - completion: 0, - total: 0, - }; - return from( this.update( conversationId, @@ -349,16 +334,10 @@ export class ObservabilityAIAssistantClient { // update messages and system message { messages: initialMessagesWithAddedMessages, systemMessage }, - // update token count + // update title { conversation: { title: title || conversation._source?.conversation.title, - token_count: { - prompt: persistedTokenCount.prompt + tokenCountResult.prompt, - completion: - persistedTokenCount.completion + tokenCountResult.completion, - total: persistedTokenCount.total + tokenCountResult.total, - }, }, } ) @@ -382,7 +361,6 @@ export class ObservabilityAIAssistantClient { conversation: { title, id: conversationId, - token_count: tokenCountResult, }, public: !!isPublic, labels: {}, @@ -403,8 +381,7 @@ export class ObservabilityAIAssistantClient { ); return output$.pipe( - instrumentAndCountTokens('complete'), - withoutTokenCountEvents(), + apmInstrumentation('complete'), catchError((error) => { this.dependencies.logger.error(error); return throwError(() => error); @@ -462,7 +439,7 @@ export class ObservabilityAIAssistantClient { stream: TStream; } ): TStream extends true - ? Observable + ? Observable : Promise { let tools: Record | undefined; let toolChoice: ToolChoiceType | { function: string } | undefined; @@ -500,7 +477,7 @@ export class ObservabilityAIAssistantClient { }) ).pipe( convertInferenceEventsToStreamingEvents(), - instrumentAndCountTokens(name), + apmInstrumentation(name), failOnNonExistingFunctionCall({ functions }), tap((event) => { if ( @@ -512,7 +489,7 @@ export class ObservabilityAIAssistantClient { }), shareReplay() ) as TStream extends true - ? Observable + ? Observable : never; } else { return this.dependencies.inferenceClient.chatComplete({ diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/instrument_and_count_tokens.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/apm_instrumentation.ts similarity index 68% rename from x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/instrument_and_count_tokens.ts rename to x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/apm_instrumentation.ts index 094b2606ae533..88290e38775c8 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/instrument_and_count_tokens.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/apm_instrumentation.ts @@ -18,9 +18,8 @@ import { finalize, } from 'rxjs'; import type { StreamingChatResponseEvent } from '../../../../common/conversation_complete'; -import { extractTokenCount } from './extract_token_count'; -export function instrumentAndCountTokens( +export function apmInstrumentation( name: string ): OperatorFunction { return (source$) => { @@ -35,19 +34,9 @@ export function instrumentAndCountTokens( const shared$ = source$.pipe(shareReplay()); - let tokenCount = { - prompt: 0, - completion: 0, - total: 0, - }; - return merge( shared$, shared$.pipe( - extractTokenCount(), - tap((nextTokenCount) => { - tokenCount = nextTokenCount; - }), last(), tap(() => { span?.setOutcome('success'); @@ -57,11 +46,6 @@ export function instrumentAndCountTokens( return throwError(() => error); }), finalize(() => { - span?.addLabels({ - tokenCountPrompt: tokenCount.prompt, - tokenCountCompletion: tokenCount.completion, - tokenCountTotal: tokenCount.total, - }); span?.end(); }), ignoreElements() diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts index a78f0b4aa70be..5b4b2dfa340cf 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts @@ -31,14 +31,12 @@ import { FunctionVisibility } from '../../../../common/functions/types'; import { AdHocInstruction, Instruction } from '../../../../common/types'; import { createFunctionResponseMessage } from '../../../../common/utils/create_function_response_message'; import { emitWithConcatenatedMessage } from '../../../../common/utils/emit_with_concatenated_message'; -import { withoutTokenCountEvents } from '../../../../common/utils/without_token_count_events'; import type { ChatFunctionClient } from '../../chat_function_client'; import type { AutoAbortedChatFunction } from '../../types'; import { createServerSideFunctionResponseError } from '../../util/create_server_side_function_response_error'; import { LangTracer } from '../instrumentation/lang_tracer'; import { catchFunctionNotFoundError } from './catch_function_not_found_error'; import { extractMessages } from './extract_messages'; -import { hideTokenCountEvents } from './hide_token_count_events'; const MAX_FUNCTION_RESPONSE_TOKEN_COUNT = 4000; @@ -68,67 +66,65 @@ function executeFunctionAndCatchError({ // hide token count events from functions to prevent them from // having to deal with it as well - return tracer.startActiveSpan(`execute_function ${name}`, ({ tracer: nextTracer }) => - hideTokenCountEvents((hide) => { - const executeFunctionResponse$ = from( - functionClient.executeFunction({ - name, - chat: (operationName, params) => { - return chat(operationName, { - ...params, - tracer: nextTracer, - connectorId, - }).pipe(hide()); - }, - args, - signal, - messages, - connectorId, - simulateFunctionCalling, - }) - ); + return tracer.startActiveSpan(`execute_function ${name}`, ({ tracer: nextTracer }) => { + const executeFunctionResponse$ = from( + functionClient.executeFunction({ + name, + chat: (operationName, params) => { + return chat(operationName, { + ...params, + tracer: nextTracer, + connectorId, + }); + }, + args, + signal, + messages, + connectorId, + simulateFunctionCalling, + }) + ); - return executeFunctionResponse$.pipe( - catchError((error) => { - logger.error(`Encountered error running function ${name}: ${JSON.stringify(error)}`); - // We want to catch the error only when a promise occurs - // if it occurs in the Observable, we cannot easily recover - // from it because the function may have already emitted - // values which could lead to an invalid conversation state, - // so in that case we let the stream fail. - return of(createServerSideFunctionResponseError({ name, error })); - }), - switchMap((response) => { - if (isObservable(response)) { - return response; - } - - // is messageAdd event - if ('type' in response) { - return of(response); - } - - const encoded = encode(JSON.stringify(response.content || {})); - - const exceededTokenLimit = encoded.length >= MAX_FUNCTION_RESPONSE_TOKEN_COUNT; - - return of( - createFunctionResponseMessage({ - name, - content: exceededTokenLimit - ? { - message: - 'Function response exceeded the maximum length allowed and was truncated', - truncated: decode(take(encoded, MAX_FUNCTION_RESPONSE_TOKEN_COUNT)), - } - : response.content, - data: response.data, - }) - ); - }) - ); - }) - ); + return executeFunctionResponse$.pipe( + catchError((error) => { + logger.error(`Encountered error running function ${name}: ${JSON.stringify(error)}`); + // We want to catch the error only when a promise occurs + // if it occurs in the Observable, we cannot easily recover + // from it because the function may have already emitted + // values which could lead to an invalid conversation state, + // so in that case we let the stream fail. + return of(createServerSideFunctionResponseError({ name, error })); + }), + switchMap((response) => { + if (isObservable(response)) { + return response; + } + + // is messageAdd event + if ('type' in response) { + return of(response); + } + + const encoded = encode(JSON.stringify(response.content || {})); + + const exceededTokenLimit = encoded.length >= MAX_FUNCTION_RESPONSE_TOKEN_COUNT; + + return of( + createFunctionResponseMessage({ + name, + content: exceededTokenLimit + ? { + message: + 'Function response exceeded the maximum length allowed and was truncated', + truncated: decode(take(encoded, MAX_FUNCTION_RESPONSE_TOKEN_COUNT)), + } + : response.content, + data: response.data, + }) + ); + }) + ); + }); } function getFunctionDefinitions({ @@ -315,7 +311,6 @@ export function continueConversation({ return concat( shared$, shared$.pipe( - withoutTokenCountEvents(), extractMessages(), switchMap((extractedMessages) => { if (!extractedMessages.length) { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts index 0a88c38f78836..6b894776f2326 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { Observable, OperatorFunction, map } from 'rxjs'; +import { Observable, OperatorFunction, filter, map } from 'rxjs'; import { v4 } from 'uuid'; import { ChatCompletionEvent as InferenceChatCompletionEvent, @@ -13,17 +13,17 @@ import { } from '@kbn/inference-common'; import { ChatCompletionChunkEvent, - TokenCountEvent, ChatCompletionMessageEvent, StreamingChatResponseEventType, } from '../../../../common'; export function convertInferenceEventsToStreamingEvents(): OperatorFunction< InferenceChatCompletionEvent, - ChatCompletionChunkEvent | TokenCountEvent | ChatCompletionMessageEvent + ChatCompletionChunkEvent | ChatCompletionMessageEvent > { return (events$: Observable) => { return events$.pipe( + filter((event) => event.type !== InferenceChatCompletionEventType.ChatCompletionTokenCount), map((event) => { switch (event.type) { case InferenceChatCompletionEventType.ChatCompletionChunk: @@ -42,16 +42,6 @@ export function convertInferenceEventsToStreamingEvents(): OperatorFunction< : undefined, }, } as ChatCompletionChunkEvent; - case InferenceChatCompletionEventType.ChatCompletionTokenCount: - // Convert to TokenCountEvent - return { - type: StreamingChatResponseEventType.TokenCount, - tokens: { - completion: event.tokens.completion, - prompt: event.tokens.prompt, - total: event.tokens.total, - }, - } as TokenCountEvent; case InferenceChatCompletionEventType.ChatCompletionMessage: // Convert to ChatCompletionMessageEvent return { @@ -68,6 +58,7 @@ export function convertInferenceEventsToStreamingEvents(): OperatorFunction< : undefined, }, } as ChatCompletionMessageEvent; + default: throw new Error(`Unknown event type`); } diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/extract_token_count.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/extract_token_count.ts deleted file mode 100644 index cf4719ad75b8e..0000000000000 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/extract_token_count.ts +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { filter, OperatorFunction, scan, startWith } from 'rxjs'; -import { - StreamingChatResponseEvent, - StreamingChatResponseEventType, - TokenCountEvent, -} from '../../../../common/conversation_complete'; - -export function extractTokenCount(): OperatorFunction< - StreamingChatResponseEvent, - TokenCountEvent['tokens'] -> { - return (events$) => { - return events$.pipe( - filter( - (event): event is TokenCountEvent => - event.type === StreamingChatResponseEventType.TokenCount - ), - scan( - (acc, event) => { - acc.completion += event.tokens.completion; - acc.prompt += event.tokens.prompt; - acc.total += event.tokens.total; - return acc; - }, - { completion: 0, prompt: 0, total: 0 } - ), - startWith({ completion: 0, prompt: 0, total: 0 }) - ); - }; -} diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts index d4c7c40e440ce..0697785b41f91 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts @@ -9,7 +9,6 @@ import { ignoreElements, last, merge, Observable, shareReplay, tap } from 'rxjs' import { createFunctionNotFoundError, FunctionDefinition } from '../../../../common'; import { ChatEvent } from '../../../../common/conversation_complete'; import { concatenateChatCompletionChunks } from '../../../../common/utils/concatenate_chat_completion_chunks'; -import { withoutTokenCountEvents } from '../../../../common/utils/without_token_count_events'; export function failOnNonExistingFunctionCall({ functions, @@ -22,7 +21,6 @@ export function failOnNonExistingFunctionCall({ return merge( shared$, shared$.pipe( - withoutTokenCountEvents(), concatenateChatCompletionChunks(), last(), tap((event) => { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts index 776881378ce99..4389bd2b71be8 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts @@ -4,9 +4,9 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -import { filter, lastValueFrom, of, throwError, toArray } from 'rxjs'; +import { filter, lastValueFrom, of, throwError } from 'rxjs'; import { ChatCompleteResponse } from '@kbn/inference-common'; -import { Message, MessageRole, StreamingChatResponseEventType } from '../../../../common'; +import { Message, MessageRole } from '../../../../common'; import { LangTracer } from '../instrumentation/lang_tracer'; import { TITLE_CONVERSATION_FUNCTION_NAME, getGeneratedTitle } from './get_generated_title'; @@ -118,44 +118,6 @@ describe('getGeneratedTitle', () => { expect(await testTitle(`"User's request for a title"`)).toEqual(`User's request for a title`); }); - it('ignores token count events and still passes them through', async () => { - const { title$ } = callGenerateTitle([ - { - content: '', - toolCalls: [ - { - toolCallId: 'test_id', - function: { - name: 'title_conversation', - arguments: { - title: 'My title', - }, - }, - }, - ], - tokens: { - completion: 10, - prompt: 10, - total: 10, - }, - }, - ]); - - const events = await lastValueFrom(title$.pipe(toArray())); - - expect(events).toEqual([ - 'My title', - { - tokens: { - completion: 10, - prompt: 10, - total: 10, - }, - type: StreamingChatResponseEventType.TokenCount, - }, - ]); - }); - it('handles errors in chat and falls back to the default title', async () => { const chatSpy = jest .fn() diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts index 20d6c1217aa6d..89a43d62af20d 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts @@ -9,8 +9,7 @@ import { catchError, mergeMap, Observable, of, tap, from } from 'rxjs'; import { Logger } from '@kbn/logging'; import { ChatCompleteResponse } from '@kbn/inference-common'; import type { ObservabilityAIAssistantClient } from '..'; -import { Message, MessageRole, StreamingChatResponseEventType } from '../../../../common'; -import { TokenCountEvent } from '../../../../common/conversation_complete'; +import { Message, MessageRole } from '../../../../common'; import { LangTracer } from '../instrumentation/lang_tracer'; export const TITLE_CONVERSATION_FUNCTION_NAME = 'title_conversation'; @@ -33,7 +32,7 @@ export function getGeneratedTitle({ chat: ChatFunctionWithoutConnectorAndTokenCount; logger: Pick; tracer: LangTracer; -}): Observable { +}): Observable { return from( chat('generate_title', { systemMessage: @@ -84,21 +83,7 @@ export function getGeneratedTitle({ // - JustTextWithoutQuotes => Captures: JustTextWithoutQuotes title = title.replace(/^"(.*)"$/g, '$1').replace(/^'(.*)'$/g, '$1'); - const tokenCount: TokenCountEvent | undefined = response.tokens - ? { - type: StreamingChatResponseEventType.TokenCount, - tokens: { - completion: response.tokens.completion, - prompt: response.tokens.prompt, - total: response.tokens.total, - }, - } - : undefined; - - const events: Array = [title]; - if (tokenCount) events.push(tokenCount); - - return from(events); // Emit each event separately + return of(title); }), tap((event) => { if (typeof event === 'string') { diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/hide_token_count_events.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/hide_token_count_events.ts deleted file mode 100644 index 7aabb1448382f..0000000000000 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/hide_token_count_events.ts +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { merge, Observable, partition } from 'rxjs'; -import type { StreamingChatResponseEvent } from '../../../../common'; -import { - StreamingChatResponseEventType, - TokenCountEvent, -} from '../../../../common/conversation_complete'; - -type Hide = () => ( - source$: Observable -) => Observable>; - -export function hideTokenCountEvents( - cb: (hide: Hide) => Observable> -): Observable { - // `hide` can be called multiple times, so we keep track of each invocation - const allInterceptors: Array> = []; - - const hide: Hide = () => (source$) => { - const [tokenCountEvents$, otherEvents$] = partition( - source$, - (value): value is TokenCountEvent => value.type === StreamingChatResponseEventType.TokenCount - ); - - allInterceptors.push(tokenCountEvents$); - - return otherEvents$; - }; - - // combine the two observables again - return merge(cb(hide), ...allInterceptors); -} diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts index 435b2a731fa9b..a0490cb960811 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts @@ -25,10 +25,6 @@ const dynamic = { dynamic: true, }; -const integer = { - type: 'integer' as const, -}; - export const conversationComponentTemplate: ClusterComponentTemplate['component_template']['template'] = { mappings: { @@ -59,13 +55,6 @@ export const conversationComponentTemplate: ClusterComponentTemplate['component_ id: keyword, title: text, last_updated: date, - token_count: { - properties: { - prompt: integer, - completion: integer, - total: integer, - }, - }, }, }, namespace: keyword, diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts index a9826a180c969..4e4b2a982cd3a 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts @@ -11,7 +11,6 @@ import { BufferFlushEvent, StreamingChatResponseEventType, StreamingChatResponseEventWithoutError, - TokenCountEvent, } from '../../../common/conversation_complete'; // The Cloud proxy currently buffers 4kb or 8kb of data until flushing. @@ -19,7 +18,7 @@ import { // so we manually insert some data every 250ms if needed to force it // to flush. -export function flushBuffer( +export function flushBuffer( isCloud: boolean ): OperatorFunction { return (source: Observable) => diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts index a5e6ef2d17c91..89f5e2ae49360 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts @@ -24,11 +24,10 @@ import { ChatCompletionChunkEvent, StreamingChatResponseEventType, StreamingChatResponseEventWithoutError, - TokenCountEvent, } from '../../../common/conversation_complete'; export function observableIntoOpenAIStream( - source: Observable, + source: Observable, logger: Logger ) { const stream = new PassThrough(); diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts index 0f602dd836d51..68a57dc7f54bd 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts @@ -110,8 +110,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon await simulator.next(`Part: ${i}\n`); } - await simulator.tokenCount({ completion: 20, prompt: 33, total: 53 }); - await simulator.complete(); await new Promise((innerResolve) => passThrough.on('end', () => innerResolve())); @@ -127,15 +125,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon 2 )}` ); - - const tokenCountChunk = receivedChunks.find((chunk) => chunk.type === 'tokenCount'); - expect(tokenCountChunk).to.eql( - { - type: 'tokenCount', - tokens: { completion: 20, prompt: 33, total: 53 }, - }, - `received token count chunk did not match expected` - ); } runTest().then(resolve, reject); diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts index 5d77326cdc49b..ae1f3143d4984 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts @@ -12,7 +12,6 @@ import expect from '@kbn/expect'; import { ChatCompletionChunkEvent, ConversationCreateEvent, - ConversationUpdateEvent, MessageAddEvent, StreamingChatResponseEvent, StreamingChatResponseEventType, @@ -25,7 +24,7 @@ import { LlmResponseSimulator, } from '../../../../../../observability_ai_assistant_api_integration/common/create_llm_proxy'; import { createOpenAiChunk } from '../../../../../../observability_ai_assistant_api_integration/common/create_openai_chunk'; -import { decodeEvents, getConversationCreatedEvent, getConversationUpdatedEvent } from '../helpers'; +import { decodeEvents, getConversationCreatedEvent } from '../helpers'; import type { DeploymentAgnosticFtrProviderContext } from '../../../../ftr_provider_context'; import { SupertestWithRoleScope } from '../../../../services/role_scoped_supertest'; @@ -104,7 +103,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon }, ], }); - await titleSimulator.tokenCount({ completion: 5, prompt: 10, total: 15 }); await titleSimulator.complete(); await conversationSimulator.status(200); @@ -170,7 +168,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon await simulator.rawWrite(`data: ${chunk.substring(0, 10)}`); await simulator.rawWrite(`${chunk.substring(10)}\n\n`); - await simulator.tokenCount({ completion: 20, prompt: 33, total: 53 }); await simulator.complete(); await new Promise((resolve) => passThrough.on('end', () => resolve())); @@ -253,7 +250,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon events = await getEvents({}, async (conversationSimulator) => { await conversationSimulator.next('Hello'); await conversationSimulator.next(' again'); - await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await conversationSimulator.complete(); }).then((_events) => { return _events.filter( @@ -296,26 +292,12 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon }, }); - expect( - omit( - events[4], - 'conversation.id', - 'conversation.last_updated', - 'conversation.token_count' - ) - ).to.eql({ + expect(omit(events[4], 'conversation.id', 'conversation.last_updated')).to.eql({ type: StreamingChatResponseEventType.ConversationCreate, conversation: { title: 'My generated title', }, }); - - const tokenCount = (events[4] as ConversationCreateEvent).conversation.token_count!; - - expect(tokenCount.completion).to.be.greaterThan(0); - expect(tokenCount.prompt).to.be.greaterThan(0); - - expect(tokenCount.total).to.eql(tokenCount.completion + tokenCount.prompt); }); after(async () => { @@ -375,7 +357,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon }, ], }); - await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await conversationSimulator.complete(); } ); @@ -422,7 +403,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon describe('when updating an existing conversation', () => { let conversationCreatedEvent: ConversationCreateEvent; - let conversationUpdatedEvent: ConversationUpdateEvent; before(async () => { void proxy @@ -499,8 +479,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon expect(updatedResponse.status).to.be(200); await proxy.waitForAllInterceptorsSettled(); - - conversationUpdatedEvent = getConversationUpdatedEvent(updatedResponse.body); }); after(async () => { @@ -515,18 +493,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon expect(status).to.be(200); }); - - it('has correct token count for a new conversation', async () => { - expect(conversationCreatedEvent.conversation.token_count?.completion).to.be.greaterThan(0); - expect(conversationCreatedEvent.conversation.token_count?.prompt).to.be.greaterThan(0); - expect(conversationCreatedEvent.conversation.token_count?.total).to.be.greaterThan(0); - }); - - it('has correct token count for the updated conversation', async () => { - expect(conversationUpdatedEvent.conversation.token_count!.total).to.be.greaterThan( - conversationCreatedEvent.conversation.token_count!.total - ); - }); }); // todo diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts index 6dabb460bf599..e9120084c8af7 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts @@ -88,7 +88,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon await titleSimulator.status(200); await titleSimulator.next('My generated title'); - await titleSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await titleSimulator.complete(); await conversationSimulator.status(200); @@ -173,7 +172,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon }, ], }); - await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await conversationSimulator.complete(); } ); @@ -245,7 +243,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon }, ], }); - await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 }); await conversationSimulator.complete(); } ); @@ -262,7 +259,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon before(async () => { responseBody = await getOpenAIResponse(async (conversationSimulator) => { await conversationSimulator.next('Hello'); - await conversationSimulator.tokenCount({ completion: 5, prompt: 10, total: 15 }); await conversationSimulator.complete(); }); }); diff --git a/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts b/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts index 03b1dae289af3..19319ecef893f 100644 --- a/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts +++ b/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts @@ -45,7 +45,6 @@ export interface LlmResponseSimulator { }>; } ) => Promise; - tokenCount: (msg: { completion: number; prompt: number; total: number }) => Promise; error: (error: any) => Promise; complete: () => Promise; rawWrite: (chunk: string) => Promise; @@ -166,17 +165,6 @@ export class LlmProxy { Connection: 'keep-alive', }); }), - tokenCount: (msg) => { - const chunk = { - object: 'chat.completion.chunk', - usage: { - completion_tokens: msg.completion, - prompt_tokens: msg.prompt, - total_tokens: msg.total, - }, - }; - return write(`data: ${JSON.stringify(chunk)}\n\n`); - }, next: (msg) => { const chunk = createOpenAiChunk(msg); return write(`data: ${JSON.stringify(chunk)}\n\n`); @@ -220,7 +208,7 @@ export class LlmProxy { for (const chunk of parsedChunks) { await simulator.next(chunk); } - await simulator.tokenCount({ completion: 1, prompt: 1, total: 1 }); + await simulator.complete(); }, } as any; diff --git a/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts b/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts index c6fcf5dfc471a..2db983c28b6e5 100644 --- a/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts +++ b/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts @@ -137,11 +137,6 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte ], conversation: { title: 'My old conversation', - token_count: { - completion: 1, - prompt: 1, - total: 2, - }, }, '@timestamp': '2024-04-18T14:29:22.948', public: false, @@ -278,14 +273,10 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte ], }); - await titleSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 }); - await titleSimulator.complete(); await conversationSimulator.next('My response'); - await conversationSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 }); - await conversationSimulator.complete(); await header.waitUntilLoadingHasFinished(); @@ -350,8 +341,6 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte await conversationSimulator.next('My second response'); - await conversationSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 }); - await conversationSimulator.complete(); await header.waitUntilLoadingHasFinished(); @@ -445,8 +434,6 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte 'Service Level Indicators (SLIs) are quantifiable defined metrics that measure the performance and availability of a service or distributed system.' ); - await conversationSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 }); - await conversationSimulator.complete(); await header.waitUntilLoadingHasFinished();