diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts
index ccf958ca98a5d..fe279907bb416 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/conversation_complete.ts
@@ -6,7 +6,7 @@
  */
 
 import { i18n } from '@kbn/i18n';
-import { TokenCount as TokenCountType, type Message } from './types';
+import { type Message } from './types';
 
 export enum StreamingChatResponseEventType {
   ChatCompletionChunk = 'chatCompletionChunk',
@@ -16,7 +16,6 @@ export enum StreamingChatResponseEventType {
   MessageAdd = 'messageAdd',
   ChatCompletionError = 'chatCompletionError',
   BufferFlush = 'bufferFlush',
-  TokenCount = 'tokenCount',
 }
 
 type StreamingChatResponseEventBase<
@@ -54,7 +53,6 @@ export type ConversationCreateEvent = StreamingChatResponseEventBase<
       id: string;
       title: string;
       last_updated: string;
-      token_count?: TokenCountType;
     };
   }
 >;
@@ -66,7 +64,6 @@ export type ConversationUpdateEvent = StreamingChatResponseEventBase<
       id: string;
       title: string;
       last_updated: string;
-      token_count?: TokenCountType;
     };
   }
 >;
@@ -95,17 +92,6 @@ export type BufferFlushEvent = StreamingChatResponseEventBase<
   }
 >;
 
-export type TokenCountEvent = StreamingChatResponseEventBase<
-  StreamingChatResponseEventType.TokenCount,
-  {
-    tokens: {
-      completion: number;
-      prompt: number;
-      total: number;
-    };
-  }
->;
-
 export type StreamingChatResponseEvent =
   | ChatCompletionChunkEvent
   | ChatCompletionMessageEvent
@@ -113,7 +99,6 @@ export type StreamingChatResponseEvent =
   | ConversationUpdateEvent
   | MessageAddEvent
   | ChatCompletionErrorEvent
-  | TokenCountEvent
   | BufferFlushEvent;
 
 export type StreamingChatResponseEventWithoutError = Exclude<
@@ -121,7 +106,7 @@ export type StreamingChatResponseEventWithoutError = Exclude<
   ChatCompletionErrorEvent
 >;
 
-export type ChatEvent = ChatCompletionChunkEvent | TokenCountEvent | ChatCompletionMessageEvent;
+export type ChatEvent = ChatCompletionChunkEvent | ChatCompletionMessageEvent;
 export type MessageOrChatEvent = ChatEvent | MessageAddEvent;
 
 export enum ChatCompletionErrorCode {
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts
index 0157a6a2b0aae..2280ce3f53baa 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/index.ts
@@ -18,7 +18,6 @@ export {
 export type {
   ChatCompletionChunkEvent,
   ChatCompletionMessageEvent,
-  TokenCountEvent,
   ConversationCreateEvent,
   ConversationUpdateEvent,
   MessageAddEvent,
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts
index e0f353b252564..2c004a2360b06 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/types.ts
@@ -45,12 +45,6 @@ export interface Message {
   };
 }
 
-export interface TokenCount {
-  prompt: number;
-  completion: number;
-  total: number;
-}
-
 export interface Conversation {
   '@timestamp': string;
   user?: {
@@ -61,7 +55,6 @@ export interface Conversation {
     id: string;
     title: string;
     last_updated: string;
-    token_count?: TokenCount;
   };
   systemMessage?: string;
   messages: Message[];
@@ -71,8 +64,8 @@ export interface Conversation {
   public: boolean;
 }
 
-export type ConversationRequestBase = Omit<Conversation, 'user' | 'conversation' | 'namespace'> & {
-  conversation: { title: string; token_count?: TokenCount; id?: string };
+type ConversationRequestBase = Omit<Conversation, 'user' | 'conversation' | 'namespace'> & {
+  conversation: { title: string; id?: string };
 };
 
 export type ConversationCreateRequest = ConversationRequestBase;
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts
index 173331f80d776..6263ae0f9b63d 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/emit_with_concatenated_message.ts
@@ -16,7 +16,6 @@ import {
   withLatestFrom,
   filter,
 } from 'rxjs';
-import { withoutTokenCountEvents } from './without_token_count_events';
 import {
   type ChatCompletionChunkEvent,
   ChatEvent,
@@ -69,15 +68,12 @@ export function emitWithConcatenatedMessage<T extends ChatEvent>(
   return (source$) => {
     const shared = source$.pipe(shareReplay());
 
-    const withoutTokenCount$ = shared.pipe(filterChunkEvents());
-
     const response$ = concat(
       shared,
       shared.pipe(
-        withoutTokenCountEvents(),
         concatenateChatCompletionChunks(),
         last(),
-        withLatestFrom(withoutTokenCount$),
+        withLatestFrom(shared.pipe(filterChunkEvents())),
         mergeMap(([message, chunkEvent]) => {
           return mergeWithEditedMessage(message, chunkEvent, callback);
         })
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/without_token_count_events.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/without_token_count_events.ts
deleted file mode 100644
index 137b1140fbdcd..0000000000000
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/utils/without_token_count_events.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { filter, OperatorFunction } from 'rxjs';
-import {
-  StreamingChatResponseEvent,
-  StreamingChatResponseEventType,
-  TokenCountEvent,
-} from '../conversation_complete';
-
-export function withoutTokenCountEvents<T extends StreamingChatResponseEvent>(): OperatorFunction<
-  T,
-  Exclude<T, TokenCountEvent>
-> {
-  return filter(
-    (event): event is Exclude<T, TokenCountEvent> =>
-      event.type !== StreamingChatResponseEventType.TokenCount
-  );
-}
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts
index e737d534f3367..ffecb7f67b3c3 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/public/analytics/schemas/chat_feedback.ts
@@ -65,28 +65,6 @@ export const chatFeedbackEventSchema: EventTypeOpts<ChatFeedback> = {
                 description: 'The timestamp of the last message in the conversation.',
               },
             },
-            token_count: {
-              properties: {
-                completion: {
-                  type: 'long',
-                  _meta: {
-                    description: 'The number of tokens in the completion.',
-                  },
-                },
-                prompt: {
-                  type: 'long',
-                  _meta: {
-                    description: 'The number of tokens in the prompt.',
-                  },
-                },
-                total: {
-                  type: 'long',
-                  _meta: {
-                    description: 'The total number of tokens in the conversation.',
-                  },
-                },
-              },
-            },
           },
         },
         labels: {
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts
index 23e7a70cecb31..ec4d14eed6c4f 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/chat/route.ts
@@ -13,7 +13,6 @@ import { Readable } from 'stream';
 import { AssistantScope } from '@kbn/ai-assistant-common';
 import { aiAssistantSimulatedFunctionCalling } from '../..';
 import { createFunctionResponseMessage } from '../../../common/utils/create_function_response_message';
-import { withoutTokenCountEvents } from '../../../common/utils/without_token_count_events';
 import { LangTracer } from '../../service/client/instrumentation/lang_tracer';
 import { flushBuffer } from '../../service/util/flush_buffer';
 import { observableIntoOpenAIStream } from '../../service/util/observable_into_openai_stream';
@@ -203,16 +202,14 @@ const chatRecallRoute = createObservabilityAIAssistantServerRoute({
       recallAndScore({
         analytics: (await resources.plugins.core.start()).analytics,
         chat: (name, params) =>
-          client
-            .chat(name, {
-              ...params,
-              stream: true,
-              connectorId,
-              simulateFunctionCalling,
-              signal,
-              tracer: new LangTracer(otelContext.active()),
-            })
-            .pipe(withoutTokenCountEvents()),
+          client.chat(name, {
+            ...params,
+            stream: true,
+            connectorId,
+            simulateFunctionCalling,
+            signal,
+            tracer: new LangTracer(otelContext.active()),
+          }),
         context,
         logger: resources.logger,
         messages: [],
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts
index cfa0410364efb..dfed9ae9ecd29 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/routes/runtime_types.ts
@@ -7,9 +7,7 @@
 import * as t from 'io-ts';
 import { toBooleanRt } from '@kbn/io-ts-utils';
 import {
-  type Conversation,
   type ConversationCreateRequest,
-  type ConversationRequestBase,
   type ConversationUpdateRequest,
   type Message,
   MessageRole,
@@ -57,17 +55,12 @@ const tokenCountRt = t.type({
   total: t.number,
 });
 
-export const baseConversationRt: t.Type<ConversationRequestBase> = t.intersection([
+export const conversationCreateRt: t.Type<ConversationCreateRequest> = t.intersection([
   t.type({
     '@timestamp': t.string,
-    conversation: t.intersection([
-      t.type({
-        title: t.string,
-      }),
-      t.partial({
-        token_count: tokenCountRt,
-      }),
-    ]),
+    conversation: t.type({
+      title: t.string,
+    }),
     messages: t.array(messageRt),
     labels: t.record(t.string, t.string),
     numeric_labels: t.record(t.string, t.number),
@@ -84,17 +77,8 @@ export const assistantScopeType = t.union([
   t.literal('all'),
 ]);
 
-export const conversationCreateRt: t.Type<ConversationCreateRequest> = t.intersection([
-  baseConversationRt,
-  t.type({
-    conversation: t.type({
-      title: t.string,
-    }),
-  }),
-]);
-
 export const conversationUpdateRt: t.Type<ConversationUpdateRequest> = t.intersection([
-  baseConversationRt,
+  conversationCreateRt,
   t.type({
     conversation: t.intersection([
       t.type({
@@ -102,33 +86,12 @@ export const conversationUpdateRt: t.Type<ConversationUpdateRequest> = t.interse
         title: t.string,
       }),
       t.partial({
-        token_count: tokenCountRt,
+        token_count: tokenCountRt, // deprecated, but kept for backwards compatibility
       }),
     ]),
   }),
 ]);
 
-export const conversationRt: t.Type<Conversation> = t.intersection([
-  baseConversationRt,
-  t.intersection([
-    t.type({
-      namespace: t.string,
-      conversation: t.intersection([
-        t.type({
-          id: t.string,
-          last_updated: t.string,
-        }),
-        t.partial({
-          token_count: tokenCountRt,
-        }),
-      ]),
-    }),
-    t.partial({
-      user: t.intersection([t.type({ name: t.string }), t.partial({ id: t.string })]),
-    }),
-  ]),
-]);
-
 export const functionRt = t.intersection([
   t.type({
     name: t.string,
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts
index 63861061408d4..f9ad6f3ed71c8 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.test.ts
@@ -69,21 +69,6 @@ function createLlmSimulator(subscriber: any) {
         toolCalls: msg.function_call ? [{ function: msg.function_call }] : [],
       });
     },
-    tokenCount: async ({
-      completion,
-      prompt,
-      total,
-    }: {
-      completion: number;
-      prompt: number;
-      total: number;
-    }) => {
-      subscriber.next({
-        type: InferenceChatCompletionEventType.ChatCompletionTokenCount,
-        tokens: { completion, prompt, total },
-      });
-      subscriber.complete();
-    },
     chunk: async (msg: ChunkDelta) => {
       subscriber.next({
         type: InferenceChatCompletionEventType.ChatCompletionChunk,
@@ -252,11 +237,6 @@ describe('Observability AI Assistant client', () => {
                       },
                     },
                   ],
-                  tokens: {
-                    completion: 0,
-                    prompt: 0,
-                    total: 0,
-                  },
                 })
                 .catch((error) => titleLlmSimulator.error(error));
             };
@@ -388,7 +368,6 @@ describe('Observability AI Assistant client', () => {
           titleLlmPromiseReject(new Error('Failed generating title'));
 
           await nextTick();
-          await llmSimulator.tokenCount({ completion: 1, prompt: 33, total: 34 });
           await llmSimulator.complete();
 
           await finished(stream);
@@ -400,11 +379,6 @@ describe('Observability AI Assistant client', () => {
               title: 'New conversation',
               id: expect.any(String),
               last_updated: expect.any(String),
-              token_count: {
-                completion: 1,
-                prompt: 33,
-                total: 34,
-              },
             },
             type: StreamingChatResponseEventType.ConversationCreate,
           });
@@ -418,7 +392,6 @@ describe('Observability AI Assistant client', () => {
           await llmSimulator.chunk({ content: ' again' });
 
           titleLlmPromiseResolve('An auto-generated title');
-          await llmSimulator.tokenCount({ completion: 6, prompt: 210, total: 216 });
           await llmSimulator.complete();
 
           await finished(stream);
@@ -457,11 +430,6 @@ describe('Observability AI Assistant client', () => {
               title: 'An auto-generated title',
               id: expect.any(String),
               last_updated: expect.any(String),
-              token_count: {
-                completion: 6,
-                prompt: 210,
-                total: 216,
-              },
             },
             type: StreamingChatResponseEventType.ConversationCreate,
           });
@@ -475,11 +443,6 @@ describe('Observability AI Assistant client', () => {
                 id: expect.any(String),
                 last_updated: expect.any(String),
                 title: 'An auto-generated title',
-                token_count: {
-                  completion: 6,
-                  prompt: 210,
-                  total: 216,
-                },
               },
               labels: {},
               numeric_labels: {},
@@ -543,11 +506,6 @@ describe('Observability AI Assistant client', () => {
                     id: 'my-conversation-id',
                     title: 'My stored conversation',
                     last_updated: new Date().toISOString(),
-                    token_count: {
-                      completion: 1,
-                      prompt: 78,
-                      total: 79,
-                    },
                   },
                   labels: {},
                   numeric_labels: {},
@@ -583,7 +541,6 @@ describe('Observability AI Assistant client', () => {
 
       await llmSimulator.chunk({ content: 'Hello' });
       await llmSimulator.next({ content: 'Hello' });
-      await llmSimulator.tokenCount({ completion: 1, prompt: 33, total: 34 });
       await llmSimulator.complete();
 
       await finished(stream);
@@ -595,11 +552,6 @@ describe('Observability AI Assistant client', () => {
           title: 'My stored conversation',
           id: expect.any(String),
           last_updated: expect.any(String),
-          token_count: {
-            completion: 2,
-            prompt: 111,
-            total: 113,
-          },
         },
         type: StreamingChatResponseEventType.ConversationUpdate,
       });
@@ -614,11 +566,6 @@ describe('Observability AI Assistant client', () => {
             id: expect.any(String),
             last_updated: expect.any(String),
             title: 'My stored conversation',
-            token_count: {
-              completion: 2,
-              prompt: 111,
-              total: 113,
-            },
           },
           labels: {},
           numeric_labels: {},
@@ -900,7 +847,6 @@ describe('Observability AI Assistant client', () => {
         beforeEach(async () => {
           await llmSimulator.chunk({ content: 'I am done here' });
           await llmSimulator.next({ content: 'I am done here' });
-          await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
           await llmSimulator.complete();
           await waitForNextWrite(stream);
 
@@ -940,11 +886,6 @@ describe('Observability AI Assistant client', () => {
               id: expect.any(String),
               last_updated: expect.any(String),
               title: 'My predefined title',
-              token_count: {
-                completion: expect.any(Number),
-                prompt: expect.any(Number),
-                total: expect.any(Number),
-              },
             },
           });
 
@@ -1200,7 +1141,6 @@ describe('Observability AI Assistant client', () => {
 
       await llmSimulator.chunk({ content: 'Hello' });
       await llmSimulator.next({ content: 'Hello' });
-      await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
       await llmSimulator.complete();
 
       await finished(stream);
@@ -1492,7 +1432,6 @@ describe('Observability AI Assistant client', () => {
 
       await llmSimulator.chunk({ function_call: { name: 'get_top_alerts' } });
       await llmSimulator.next({ content: 'done' });
-      await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
       await llmSimulator.complete();
 
       await waitFor(() => functionResponsePromiseResolve !== undefined);
@@ -1612,7 +1551,6 @@ describe('Observability AI Assistant client', () => {
           function_call: { name: 'my_action', arguments: JSON.stringify({ foo: 'bar' }) },
         });
         await llmSimulator.next({ content: 'content' });
-        await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
         await llmSimulator.complete();
       });
 
@@ -1643,7 +1581,6 @@ describe('Observability AI Assistant client', () => {
         await llmSimulator.next({
           content: 'Looks like the function call failed',
         });
-        await llmSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
 
         await llmSimulator.complete();
       });
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts
index bb2064a3dbb6b..790bd2c8d9c1b 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts
@@ -41,7 +41,6 @@ import {
   ConversationUpdateEvent,
   createConversationNotFoundError,
   StreamingChatResponseEventType,
-  TokenCountEvent,
   type StreamingChatResponseEvent,
 } from '../../../common/conversation_complete';
 import { convertMessagesForInference } from '../../../common/convert_messages_for_inference';
@@ -56,7 +55,6 @@ import {
   KnowledgeBaseType,
   KnowledgeBaseEntryRole,
 } from '../../../common/types';
-import { withoutTokenCountEvents } from '../../../common/utils/without_token_count_events';
 import { CONTEXT_FUNCTION_NAME } from '../../functions/context';
 import type { ChatFunctionClient } from '../chat_function_client';
 import { KnowledgeBaseService, RecalledEntry } from '../knowledge_base_service';
@@ -68,9 +66,7 @@ import { LangTracer } from './instrumentation/lang_tracer';
 import { continueConversation } from './operators/continue_conversation';
 import { convertInferenceEventsToStreamingEvents } from './operators/convert_inference_events_to_streaming_events';
 import { extractMessages } from './operators/extract_messages';
-import { extractTokenCount } from './operators/extract_token_count';
 import { getGeneratedTitle } from './operators/get_generated_title';
-import { instrumentAndCountTokens } from './operators/instrument_and_count_tokens';
 import {
   reIndexKnowledgeBaseAndPopulateSemanticTextField,
   scheduleKbSemanticTextMigrationTask,
@@ -78,6 +74,7 @@ import {
 import { ObservabilityAIAssistantPluginStartDependencies } from '../../types';
 import { ObservabilityAIAssistantConfig } from '../../config';
 import { getElserModelId } from '../knowledge_base_service/get_elser_model_id';
+import { apmInstrumentation } from './operators/apm_instrumentation';
 
 const MAX_FUNCTION_CALLS = 8;
 
@@ -297,17 +294,12 @@ export class ObservabilityAIAssistantClient {
           // wait until all dependencies have completed
           forkJoin([
             // get just the new messages
-            nextEvents$.pipe(withoutTokenCountEvents(), extractMessages()),
-            // count all the token count events emitted during completion
-            mergeOperator(
-              nextEvents$,
-              title$.pipe(filter((value): value is TokenCountEvent => typeof value !== 'string'))
-            ).pipe(extractTokenCount()),
+            nextEvents$.pipe(extractMessages()),
             // get just the title, and drop the token count events
             title$.pipe(filter((value): value is string => typeof value === 'string')),
             systemMessage$,
           ]).pipe(
-            switchMap(([addedMessages, tokenCountResult, title, systemMessage]) => {
+            switchMap(([addedMessages, title, systemMessage]) => {
               const initialMessagesWithAddedMessages = initialMessages.concat(addedMessages);
 
               const lastMessage = last(initialMessagesWithAddedMessages);
@@ -329,13 +321,6 @@ export class ObservabilityAIAssistantClient {
                         return throwError(() => createConversationNotFoundError());
                       }
 
-                      const persistedTokenCount = conversation._source?.conversation
-                        .token_count ?? {
-                        prompt: 0,
-                        completion: 0,
-                        total: 0,
-                      };
-
                       return from(
                         this.update(
                           conversationId,
@@ -349,16 +334,10 @@ export class ObservabilityAIAssistantClient {
                             // update messages and system message
                             { messages: initialMessagesWithAddedMessages, systemMessage },
 
-                            // update token count
+                            // update title
                             {
                               conversation: {
                                 title: title || conversation._source?.conversation.title,
-                                token_count: {
-                                  prompt: persistedTokenCount.prompt + tokenCountResult.prompt,
-                                  completion:
-                                    persistedTokenCount.completion + tokenCountResult.completion,
-                                  total: persistedTokenCount.total + tokenCountResult.total,
-                                },
                               },
                             }
                           )
@@ -382,7 +361,6 @@ export class ObservabilityAIAssistantClient {
                   conversation: {
                     title,
                     id: conversationId,
-                    token_count: tokenCountResult,
                   },
                   public: !!isPublic,
                   labels: {},
@@ -403,8 +381,7 @@ export class ObservabilityAIAssistantClient {
         );
 
         return output$.pipe(
-          instrumentAndCountTokens('complete'),
-          withoutTokenCountEvents(),
+          apmInstrumentation('complete'),
           catchError((error) => {
             this.dependencies.logger.error(error);
             return throwError(() => error);
@@ -462,7 +439,7 @@ export class ObservabilityAIAssistantClient {
       stream: TStream;
     }
   ): TStream extends true
-    ? Observable<ChatCompletionChunkEvent | TokenCountEvent | ChatCompletionMessageEvent>
+    ? Observable<ChatCompletionChunkEvent | ChatCompletionMessageEvent>
     : Promise<ChatCompleteResponse> {
     let tools: Record<string, { description: string; schema: any }> | undefined;
     let toolChoice: ToolChoiceType | { function: string } | undefined;
@@ -500,7 +477,7 @@ export class ObservabilityAIAssistantClient {
         })
       ).pipe(
         convertInferenceEventsToStreamingEvents(),
-        instrumentAndCountTokens(name),
+        apmInstrumentation(name),
         failOnNonExistingFunctionCall({ functions }),
         tap((event) => {
           if (
@@ -512,7 +489,7 @@ export class ObservabilityAIAssistantClient {
         }),
         shareReplay()
       ) as TStream extends true
-        ? Observable<ChatCompletionChunkEvent | TokenCountEvent | ChatCompletionMessageEvent>
+        ? Observable<ChatCompletionChunkEvent | ChatCompletionMessageEvent>
         : never;
     } else {
       return this.dependencies.inferenceClient.chatComplete({
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/instrument_and_count_tokens.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/apm_instrumentation.ts
similarity index 68%
rename from x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/instrument_and_count_tokens.ts
rename to x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/apm_instrumentation.ts
index 094b2606ae533..88290e38775c8 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/instrument_and_count_tokens.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/apm_instrumentation.ts
@@ -18,9 +18,8 @@ import {
   finalize,
 } from 'rxjs';
 import type { StreamingChatResponseEvent } from '../../../../common/conversation_complete';
-import { extractTokenCount } from './extract_token_count';
 
-export function instrumentAndCountTokens<T extends StreamingChatResponseEvent>(
+export function apmInstrumentation<T extends StreamingChatResponseEvent>(
   name: string
 ): OperatorFunction<T, T> {
   return (source$) => {
@@ -35,19 +34,9 @@ export function instrumentAndCountTokens<T extends StreamingChatResponseEvent>(
 
     const shared$ = source$.pipe(shareReplay());
 
-    let tokenCount = {
-      prompt: 0,
-      completion: 0,
-      total: 0,
-    };
-
     return merge(
       shared$,
       shared$.pipe(
-        extractTokenCount(),
-        tap((nextTokenCount) => {
-          tokenCount = nextTokenCount;
-        }),
         last(),
         tap(() => {
           span?.setOutcome('success');
@@ -57,11 +46,6 @@ export function instrumentAndCountTokens<T extends StreamingChatResponseEvent>(
           return throwError(() => error);
         }),
         finalize(() => {
-          span?.addLabels({
-            tokenCountPrompt: tokenCount.prompt,
-            tokenCountCompletion: tokenCount.completion,
-            tokenCountTotal: tokenCount.total,
-          });
           span?.end();
         }),
         ignoreElements()
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts
index a78f0b4aa70be..5b4b2dfa340cf 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/continue_conversation.ts
@@ -31,14 +31,12 @@ import { FunctionVisibility } from '../../../../common/functions/types';
 import { AdHocInstruction, Instruction } from '../../../../common/types';
 import { createFunctionResponseMessage } from '../../../../common/utils/create_function_response_message';
 import { emitWithConcatenatedMessage } from '../../../../common/utils/emit_with_concatenated_message';
-import { withoutTokenCountEvents } from '../../../../common/utils/without_token_count_events';
 import type { ChatFunctionClient } from '../../chat_function_client';
 import type { AutoAbortedChatFunction } from '../../types';
 import { createServerSideFunctionResponseError } from '../../util/create_server_side_function_response_error';
 import { LangTracer } from '../instrumentation/lang_tracer';
 import { catchFunctionNotFoundError } from './catch_function_not_found_error';
 import { extractMessages } from './extract_messages';
-import { hideTokenCountEvents } from './hide_token_count_events';
 
 const MAX_FUNCTION_RESPONSE_TOKEN_COUNT = 4000;
 
@@ -68,67 +66,65 @@ function executeFunctionAndCatchError({
   // hide token count events from functions to prevent them from
   // having to deal with it as well
 
-  return tracer.startActiveSpan(`execute_function ${name}`, ({ tracer: nextTracer }) =>
-    hideTokenCountEvents((hide) => {
-      const executeFunctionResponse$ = from(
-        functionClient.executeFunction({
-          name,
-          chat: (operationName, params) => {
-            return chat(operationName, {
-              ...params,
-              tracer: nextTracer,
-              connectorId,
-            }).pipe(hide());
-          },
-          args,
-          signal,
-          messages,
-          connectorId,
-          simulateFunctionCalling,
-        })
-      );
+  return tracer.startActiveSpan(`execute_function ${name}`, ({ tracer: nextTracer }) => {
+    const executeFunctionResponse$ = from(
+      functionClient.executeFunction({
+        name,
+        chat: (operationName, params) => {
+          return chat(operationName, {
+            ...params,
+            tracer: nextTracer,
+            connectorId,
+          });
+        },
+        args,
+        signal,
+        messages,
+        connectorId,
+        simulateFunctionCalling,
+      })
+    );
 
-      return executeFunctionResponse$.pipe(
-        catchError((error) => {
-          logger.error(`Encountered error running function ${name}: ${JSON.stringify(error)}`);
-          // We want to catch the error only when a promise occurs
-          // if it occurs in the Observable, we cannot easily recover
-          // from it because the function may have already emitted
-          // values which could lead to an invalid conversation state,
-          // so in that case we let the stream fail.
-          return of(createServerSideFunctionResponseError({ name, error }));
-        }),
-        switchMap((response) => {
-          if (isObservable(response)) {
-            return response;
-          }
-
-          // is messageAdd event
-          if ('type' in response) {
-            return of(response);
-          }
-
-          const encoded = encode(JSON.stringify(response.content || {}));
-
-          const exceededTokenLimit = encoded.length >= MAX_FUNCTION_RESPONSE_TOKEN_COUNT;
-
-          return of(
-            createFunctionResponseMessage({
-              name,
-              content: exceededTokenLimit
-                ? {
-                    message:
-                      'Function response exceeded the maximum length allowed and was truncated',
-                    truncated: decode(take(encoded, MAX_FUNCTION_RESPONSE_TOKEN_COUNT)),
-                  }
-                : response.content,
-              data: response.data,
-            })
-          );
-        })
-      );
-    })
-  );
+    return executeFunctionResponse$.pipe(
+      catchError((error) => {
+        logger.error(`Encountered error running function ${name}: ${JSON.stringify(error)}`);
+        // We want to catch the error only when a promise occurs
+        // if it occurs in the Observable, we cannot easily recover
+        // from it because the function may have already emitted
+        // values which could lead to an invalid conversation state,
+        // so in that case we let the stream fail.
+        return of(createServerSideFunctionResponseError({ name, error }));
+      }),
+      switchMap((response) => {
+        if (isObservable(response)) {
+          return response;
+        }
+
+        // is messageAdd event
+        if ('type' in response) {
+          return of(response);
+        }
+
+        const encoded = encode(JSON.stringify(response.content || {}));
+
+        const exceededTokenLimit = encoded.length >= MAX_FUNCTION_RESPONSE_TOKEN_COUNT;
+
+        return of(
+          createFunctionResponseMessage({
+            name,
+            content: exceededTokenLimit
+              ? {
+                  message:
+                    'Function response exceeded the maximum length allowed and was truncated',
+                  truncated: decode(take(encoded, MAX_FUNCTION_RESPONSE_TOKEN_COUNT)),
+                }
+              : response.content,
+            data: response.data,
+          })
+        );
+      })
+    );
+  });
 }
 
 function getFunctionDefinitions({
@@ -315,7 +311,6 @@ export function continueConversation({
       return concat(
         shared$,
         shared$.pipe(
-          withoutTokenCountEvents(),
           extractMessages(),
           switchMap((extractedMessages) => {
             if (!extractedMessages.length) {
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts
index 0a88c38f78836..6b894776f2326 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/convert_inference_events_to_streaming_events.ts
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-import { Observable, OperatorFunction, map } from 'rxjs';
+import { Observable, OperatorFunction, filter, map } from 'rxjs';
 import { v4 } from 'uuid';
 import {
   ChatCompletionEvent as InferenceChatCompletionEvent,
@@ -13,17 +13,17 @@ import {
 } from '@kbn/inference-common';
 import {
   ChatCompletionChunkEvent,
-  TokenCountEvent,
   ChatCompletionMessageEvent,
   StreamingChatResponseEventType,
 } from '../../../../common';
 
 export function convertInferenceEventsToStreamingEvents(): OperatorFunction<
   InferenceChatCompletionEvent,
-  ChatCompletionChunkEvent | TokenCountEvent | ChatCompletionMessageEvent
+  ChatCompletionChunkEvent | ChatCompletionMessageEvent
 > {
   return (events$: Observable<InferenceChatCompletionEvent>) => {
     return events$.pipe(
+      filter((event) => event.type !== InferenceChatCompletionEventType.ChatCompletionTokenCount),
       map((event) => {
         switch (event.type) {
           case InferenceChatCompletionEventType.ChatCompletionChunk:
@@ -42,16 +42,6 @@ export function convertInferenceEventsToStreamingEvents(): OperatorFunction<
                     : undefined,
               },
             } as ChatCompletionChunkEvent;
-          case InferenceChatCompletionEventType.ChatCompletionTokenCount:
-            // Convert to TokenCountEvent
-            return {
-              type: StreamingChatResponseEventType.TokenCount,
-              tokens: {
-                completion: event.tokens.completion,
-                prompt: event.tokens.prompt,
-                total: event.tokens.total,
-              },
-            } as TokenCountEvent;
           case InferenceChatCompletionEventType.ChatCompletionMessage:
             // Convert to ChatCompletionMessageEvent
             return {
@@ -68,6 +58,7 @@ export function convertInferenceEventsToStreamingEvents(): OperatorFunction<
                     : undefined,
               },
             } as ChatCompletionMessageEvent;
+
           default:
             throw new Error(`Unknown event type`);
         }
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/extract_token_count.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/extract_token_count.ts
deleted file mode 100644
index cf4719ad75b8e..0000000000000
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/extract_token_count.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { filter, OperatorFunction, scan, startWith } from 'rxjs';
-import {
-  StreamingChatResponseEvent,
-  StreamingChatResponseEventType,
-  TokenCountEvent,
-} from '../../../../common/conversation_complete';
-
-export function extractTokenCount(): OperatorFunction<
-  StreamingChatResponseEvent,
-  TokenCountEvent['tokens']
-> {
-  return (events$) => {
-    return events$.pipe(
-      filter(
-        (event): event is TokenCountEvent =>
-          event.type === StreamingChatResponseEventType.TokenCount
-      ),
-      scan(
-        (acc, event) => {
-          acc.completion += event.tokens.completion;
-          acc.prompt += event.tokens.prompt;
-          acc.total += event.tokens.total;
-          return acc;
-        },
-        { completion: 0, prompt: 0, total: 0 }
-      ),
-      startWith({ completion: 0, prompt: 0, total: 0 })
-    );
-  };
-}
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts
index d4c7c40e440ce..0697785b41f91 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/fail_on_non_existing_function_call.ts
@@ -9,7 +9,6 @@ import { ignoreElements, last, merge, Observable, shareReplay, tap } from 'rxjs'
 import { createFunctionNotFoundError, FunctionDefinition } from '../../../../common';
 import { ChatEvent } from '../../../../common/conversation_complete';
 import { concatenateChatCompletionChunks } from '../../../../common/utils/concatenate_chat_completion_chunks';
-import { withoutTokenCountEvents } from '../../../../common/utils/without_token_count_events';
 
 export function failOnNonExistingFunctionCall({
   functions,
@@ -22,7 +21,6 @@ export function failOnNonExistingFunctionCall({
     return merge(
       shared$,
       shared$.pipe(
-        withoutTokenCountEvents(),
         concatenateChatCompletionChunks(),
         last(),
         tap((event) => {
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts
index 776881378ce99..4389bd2b71be8 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.test.ts
@@ -4,9 +4,9 @@
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
  */
-import { filter, lastValueFrom, of, throwError, toArray } from 'rxjs';
+import { filter, lastValueFrom, of, throwError } from 'rxjs';
 import { ChatCompleteResponse } from '@kbn/inference-common';
-import { Message, MessageRole, StreamingChatResponseEventType } from '../../../../common';
+import { Message, MessageRole } from '../../../../common';
 import { LangTracer } from '../instrumentation/lang_tracer';
 import { TITLE_CONVERSATION_FUNCTION_NAME, getGeneratedTitle } from './get_generated_title';
 
@@ -118,44 +118,6 @@ describe('getGeneratedTitle', () => {
     expect(await testTitle(`"User's request for a title"`)).toEqual(`User's request for a title`);
   });
 
-  it('ignores token count events and still passes them through', async () => {
-    const { title$ } = callGenerateTitle([
-      {
-        content: '',
-        toolCalls: [
-          {
-            toolCallId: 'test_id',
-            function: {
-              name: 'title_conversation',
-              arguments: {
-                title: 'My title',
-              },
-            },
-          },
-        ],
-        tokens: {
-          completion: 10,
-          prompt: 10,
-          total: 10,
-        },
-      },
-    ]);
-
-    const events = await lastValueFrom(title$.pipe(toArray()));
-
-    expect(events).toEqual([
-      'My title',
-      {
-        tokens: {
-          completion: 10,
-          prompt: 10,
-          total: 10,
-        },
-        type: StreamingChatResponseEventType.TokenCount,
-      },
-    ]);
-  });
-
   it('handles errors in chat and falls back to the default title', async () => {
     const chatSpy = jest
       .fn()
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts
index 20d6c1217aa6d..89a43d62af20d 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/get_generated_title.ts
@@ -9,8 +9,7 @@ import { catchError, mergeMap, Observable, of, tap, from } from 'rxjs';
 import { Logger } from '@kbn/logging';
 import { ChatCompleteResponse } from '@kbn/inference-common';
 import type { ObservabilityAIAssistantClient } from '..';
-import { Message, MessageRole, StreamingChatResponseEventType } from '../../../../common';
-import { TokenCountEvent } from '../../../../common/conversation_complete';
+import { Message, MessageRole } from '../../../../common';
 import { LangTracer } from '../instrumentation/lang_tracer';
 
 export const TITLE_CONVERSATION_FUNCTION_NAME = 'title_conversation';
@@ -33,7 +32,7 @@ export function getGeneratedTitle({
   chat: ChatFunctionWithoutConnectorAndTokenCount;
   logger: Pick<Logger, 'debug' | 'error'>;
   tracer: LangTracer;
-}): Observable<string | TokenCountEvent> {
+}): Observable<string> {
   return from(
     chat('generate_title', {
       systemMessage:
@@ -84,21 +83,7 @@ export function getGeneratedTitle({
       // - JustTextWithoutQuotes => Captures: JustTextWithoutQuotes
       title = title.replace(/^"(.*)"$/g, '$1').replace(/^'(.*)'$/g, '$1');
 
-      const tokenCount: TokenCountEvent | undefined = response.tokens
-        ? {
-            type: StreamingChatResponseEventType.TokenCount,
-            tokens: {
-              completion: response.tokens.completion,
-              prompt: response.tokens.prompt,
-              total: response.tokens.total,
-            },
-          }
-        : undefined;
-
-      const events: Array<string | TokenCountEvent> = [title];
-      if (tokenCount) events.push(tokenCount);
-
-      return from(events); // Emit each event separately
+      return of(title);
     }),
     tap((event) => {
       if (typeof event === 'string') {
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/hide_token_count_events.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/hide_token_count_events.ts
deleted file mode 100644
index 7aabb1448382f..0000000000000
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/operators/hide_token_count_events.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { merge, Observable, partition } from 'rxjs';
-import type { StreamingChatResponseEvent } from '../../../../common';
-import {
-  StreamingChatResponseEventType,
-  TokenCountEvent,
-} from '../../../../common/conversation_complete';
-
-type Hide = <T extends StreamingChatResponseEvent>() => (
-  source$: Observable<T | TokenCountEvent>
-) => Observable<Exclude<T, TokenCountEvent>>;
-
-export function hideTokenCountEvents<T>(
-  cb: (hide: Hide) => Observable<Exclude<T, TokenCountEvent>>
-): Observable<T | TokenCountEvent> {
-  // `hide` can be called multiple times, so we keep track of each invocation
-  const allInterceptors: Array<Observable<TokenCountEvent>> = [];
-
-  const hide: Hide = () => (source$) => {
-    const [tokenCountEvents$, otherEvents$] = partition(
-      source$,
-      (value): value is TokenCountEvent => value.type === StreamingChatResponseEventType.TokenCount
-    );
-
-    allInterceptors.push(tokenCountEvents$);
-
-    return otherEvents$;
-  };
-
-  // combine the two observables again
-  return merge(cb(hide), ...allInterceptors);
-}
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts
index 435b2a731fa9b..a0490cb960811 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/conversation_component_template.ts
@@ -25,10 +25,6 @@ const dynamic = {
   dynamic: true,
 };
 
-const integer = {
-  type: 'integer' as const,
-};
-
 export const conversationComponentTemplate: ClusterComponentTemplate['component_template']['template'] =
   {
     mappings: {
@@ -59,13 +55,6 @@ export const conversationComponentTemplate: ClusterComponentTemplate['component_
             id: keyword,
             title: text,
             last_updated: date,
-            token_count: {
-              properties: {
-                prompt: integer,
-                completion: integer,
-                total: integer,
-              },
-            },
           },
         },
         namespace: keyword,
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts
index a9826a180c969..4e4b2a982cd3a 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/flush_buffer.ts
@@ -11,7 +11,6 @@ import {
   BufferFlushEvent,
   StreamingChatResponseEventType,
   StreamingChatResponseEventWithoutError,
-  TokenCountEvent,
 } from '../../../common/conversation_complete';
 
 // The Cloud proxy currently buffers 4kb or 8kb of data until flushing.
@@ -19,7 +18,7 @@ import {
 // so we manually insert some data every 250ms if needed to force it
 // to flush.
 
-export function flushBuffer<T extends StreamingChatResponseEventWithoutError | TokenCountEvent>(
+export function flushBuffer<T extends StreamingChatResponseEventWithoutError>(
   isCloud: boolean
 ): OperatorFunction<T, T | BufferFlushEvent> {
   return (source: Observable<T>) =>
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts
index a5e6ef2d17c91..89f5e2ae49360 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/util/observable_into_openai_stream.ts
@@ -24,11 +24,10 @@ import {
   ChatCompletionChunkEvent,
   StreamingChatResponseEventType,
   StreamingChatResponseEventWithoutError,
-  TokenCountEvent,
 } from '../../../common/conversation_complete';
 
 export function observableIntoOpenAIStream(
-  source: Observable<StreamingChatResponseEventWithoutError | BufferFlushEvent | TokenCountEvent>,
+  source: Observable<StreamingChatResponseEventWithoutError | BufferFlushEvent>,
   logger: Logger
 ) {
   const stream = new PassThrough();
diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts
index 0f602dd836d51..68a57dc7f54bd 100644
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/chat/chat.spec.ts
@@ -110,8 +110,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
               await simulator.next(`Part: ${i}\n`);
             }
 
-            await simulator.tokenCount({ completion: 20, prompt: 33, total: 53 });
-
             await simulator.complete();
 
             await new Promise<void>((innerResolve) => passThrough.on('end', () => innerResolve()));
@@ -127,15 +125,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
                 2
               )}`
             );
-
-            const tokenCountChunk = receivedChunks.find((chunk) => chunk.type === 'tokenCount');
-            expect(tokenCountChunk).to.eql(
-              {
-                type: 'tokenCount',
-                tokens: { completion: 20, prompt: 33, total: 53 },
-              },
-              `received token count chunk did not match expected`
-            );
           }
 
           runTest().then(resolve, reject);
diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts
index 5d77326cdc49b..ae1f3143d4984 100644
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/complete/complete.spec.ts
@@ -12,7 +12,6 @@ import expect from '@kbn/expect';
 import {
   ChatCompletionChunkEvent,
   ConversationCreateEvent,
-  ConversationUpdateEvent,
   MessageAddEvent,
   StreamingChatResponseEvent,
   StreamingChatResponseEventType,
@@ -25,7 +24,7 @@ import {
   LlmResponseSimulator,
 } from '../../../../../../observability_ai_assistant_api_integration/common/create_llm_proxy';
 import { createOpenAiChunk } from '../../../../../../observability_ai_assistant_api_integration/common/create_openai_chunk';
-import { decodeEvents, getConversationCreatedEvent, getConversationUpdatedEvent } from '../helpers';
+import { decodeEvents, getConversationCreatedEvent } from '../helpers';
 import type { DeploymentAgnosticFtrProviderContext } from '../../../../ftr_provider_context';
 import { SupertestWithRoleScope } from '../../../../services/role_scoped_supertest';
 
@@ -104,7 +103,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
           },
         ],
       });
-      await titleSimulator.tokenCount({ completion: 5, prompt: 10, total: 15 });
       await titleSimulator.complete();
 
       await conversationSimulator.status(200);
@@ -170,7 +168,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
 
       await simulator.rawWrite(`data: ${chunk.substring(0, 10)}`);
       await simulator.rawWrite(`${chunk.substring(10)}\n\n`);
-      await simulator.tokenCount({ completion: 20, prompt: 33, total: 53 });
       await simulator.complete();
 
       await new Promise<void>((resolve) => passThrough.on('end', () => resolve()));
@@ -253,7 +250,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
         events = await getEvents({}, async (conversationSimulator) => {
           await conversationSimulator.next('Hello');
           await conversationSimulator.next(' again');
-          await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
           await conversationSimulator.complete();
         }).then((_events) => {
           return _events.filter(
@@ -296,26 +292,12 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
           },
         });
 
-        expect(
-          omit(
-            events[4],
-            'conversation.id',
-            'conversation.last_updated',
-            'conversation.token_count'
-          )
-        ).to.eql({
+        expect(omit(events[4], 'conversation.id', 'conversation.last_updated')).to.eql({
           type: StreamingChatResponseEventType.ConversationCreate,
           conversation: {
             title: 'My generated title',
           },
         });
-
-        const tokenCount = (events[4] as ConversationCreateEvent).conversation.token_count!;
-
-        expect(tokenCount.completion).to.be.greaterThan(0);
-        expect(tokenCount.prompt).to.be.greaterThan(0);
-
-        expect(tokenCount.total).to.eql(tokenCount.completion + tokenCount.prompt);
       });
 
       after(async () => {
@@ -375,7 +357,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
                 },
               ],
             });
-            await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
             await conversationSimulator.complete();
           }
         );
@@ -422,7 +403,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
 
     describe('when updating an existing conversation', () => {
       let conversationCreatedEvent: ConversationCreateEvent;
-      let conversationUpdatedEvent: ConversationUpdateEvent;
 
       before(async () => {
         void proxy
@@ -499,8 +479,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
         expect(updatedResponse.status).to.be(200);
 
         await proxy.waitForAllInterceptorsSettled();
-
-        conversationUpdatedEvent = getConversationUpdatedEvent(updatedResponse.body);
       });
 
       after(async () => {
@@ -515,18 +493,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
 
         expect(status).to.be(200);
       });
-
-      it('has correct token count for a new conversation', async () => {
-        expect(conversationCreatedEvent.conversation.token_count?.completion).to.be.greaterThan(0);
-        expect(conversationCreatedEvent.conversation.token_count?.prompt).to.be.greaterThan(0);
-        expect(conversationCreatedEvent.conversation.token_count?.total).to.be.greaterThan(0);
-      });
-
-      it('has correct token count for the updated conversation', async () => {
-        expect(conversationUpdatedEvent.conversation.token_count!.total).to.be.greaterThan(
-          conversationCreatedEvent.conversation.token_count!.total
-        );
-      });
     });
 
     // todo
diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts
index 6dabb460bf599..e9120084c8af7 100644
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/public_complete/public_complete.spec.ts
@@ -88,7 +88,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
 
       await titleSimulator.status(200);
       await titleSimulator.next('My generated title');
-      await titleSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
       await titleSimulator.complete();
 
       await conversationSimulator.status(200);
@@ -173,7 +172,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
                 },
               ],
             });
-            await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
             await conversationSimulator.complete();
           }
         );
@@ -245,7 +243,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
                 },
               ],
             });
-            await conversationSimulator.tokenCount({ completion: 0, prompt: 0, total: 0 });
             await conversationSimulator.complete();
           }
         );
@@ -262,7 +259,6 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
       before(async () => {
         responseBody = await getOpenAIResponse(async (conversationSimulator) => {
           await conversationSimulator.next('Hello');
-          await conversationSimulator.tokenCount({ completion: 5, prompt: 10, total: 15 });
           await conversationSimulator.complete();
         });
       });
diff --git a/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts b/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts
index 03b1dae289af3..19319ecef893f 100644
--- a/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts
+++ b/x-pack/test/observability_ai_assistant_api_integration/common/create_llm_proxy.ts
@@ -45,7 +45,6 @@ export interface LlmResponseSimulator {
           }>;
         }
   ) => Promise<void>;
-  tokenCount: (msg: { completion: number; prompt: number; total: number }) => Promise<void>;
   error: (error: any) => Promise<void>;
   complete: () => Promise<void>;
   rawWrite: (chunk: string) => Promise<void>;
@@ -166,17 +165,6 @@ export class LlmProxy {
                   Connection: 'keep-alive',
                 });
               }),
-              tokenCount: (msg) => {
-                const chunk = {
-                  object: 'chat.completion.chunk',
-                  usage: {
-                    completion_tokens: msg.completion,
-                    prompt_tokens: msg.prompt,
-                    total_tokens: msg.total,
-                  },
-                };
-                return write(`data: ${JSON.stringify(chunk)}\n\n`);
-              },
               next: (msg) => {
                 const chunk = createOpenAiChunk(msg);
                 return write(`data: ${JSON.stringify(chunk)}\n\n`);
@@ -220,7 +208,7 @@ export class LlmProxy {
         for (const chunk of parsedChunks) {
           await simulator.next(chunk);
         }
-        await simulator.tokenCount({ completion: 1, prompt: 1, total: 1 });
+
         await simulator.complete();
       },
     } as any;
diff --git a/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts b/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts
index c6fcf5dfc471a..2db983c28b6e5 100644
--- a/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts
+++ b/x-pack/test/observability_ai_assistant_functional/tests/conversations/index.spec.ts
@@ -137,11 +137,6 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte
           ],
           conversation: {
             title: 'My old conversation',
-            token_count: {
-              completion: 1,
-              prompt: 1,
-              total: 2,
-            },
           },
           '@timestamp': '2024-04-18T14:29:22.948',
           public: false,
@@ -278,14 +273,10 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte
                   ],
                 });
 
-                await titleSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 });
-
                 await titleSimulator.complete();
 
                 await conversationSimulator.next('My response');
 
-                await conversationSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 });
-
                 await conversationSimulator.complete();
 
                 await header.waitUntilLoadingHasFinished();
@@ -350,8 +341,6 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte
 
                   await conversationSimulator.next('My second response');
 
-                  await conversationSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 });
-
                   await conversationSimulator.complete();
 
                   await header.waitUntilLoadingHasFinished();
@@ -445,8 +434,6 @@ export default function ApiTest({ getService, getPageObjects }: FtrProviderConte
                     'Service Level Indicators (SLIs) are quantifiable defined metrics that measure the performance and availability of a service or distributed system.'
                   );
 
-                  await conversationSimulator.tokenCount({ completion: 1, prompt: 1, total: 2 });
-
                   await conversationSimulator.complete();
 
                   await header.waitUntilLoadingHasFinished();