Merge branch 'main' into feat/Custom-Token-Rates-for-Endpoints

danny-avila · Feb 27, 2025 · 59a2328 · 59a2328
2 parents edf23eb + 34f967e
commit 59a2328
Show file tree

Hide file tree

Showing 27 changed files with 568 additions and 244 deletions.
diff --git a/.env.example b/.env.example
@@ -209,12 +209,6 @@ ASSISTANTS_API_KEY=user_provided
 # More info, including how to enable use of Assistants with Azure here:
 # https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
 
-#============#
-# OpenRouter #
-#============#
-# !!!Warning: Use the variable above instead of this one. Using this one will override the OpenAI endpoint
-# OPENROUTER_API_KEY=
-
 #============#
 # Plugins    #
 #============#

diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js
@@ -7,8 +7,7 @@ const {
   getResponseSender,
   validateVisionModel,
 } = require('librechat-data-provider');
-const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
-const { encodeAndFormat } = require('~/server/services/Files/images/encode');
+const { SplitStreamHandler: _Handler, GraphEvents } = require('@librechat/agents');
 const {
   truncateText,
   formatMessage,
@@ -24,6 +23,7 @@ const {
 } = require('~/server/services/Endpoints/anthropic/helpers');
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
+const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const Tokenizer = require('~/server/services/Tokenizer');
 const { logger, sendEvent } = require('~/config');
 const { sleep } = require('~/server/utils');
@@ -32,6 +32,15 @@ const BaseClient = require('./BaseClient');
 const HUMAN_PROMPT = '\n\nHuman:';
 const AI_PROMPT = '\n\nAssistant:';
 
+class SplitStreamHandler extends _Handler {
+  getDeltaContent(chunk) {
+    return (chunk?.delta?.text ?? chunk?.completion) || '';
+  }
+  getReasoningDelta(chunk) {
+    return chunk?.delta?.thinking || '';
+  }
+}
+
 /** Helper function to introduce a delay before retrying */
 function delayBeforeRetry(attempts, baseDelay = 1000) {
   return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
@@ -105,7 +114,9 @@ class AnthropicClient extends BaseClient {
 
     const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
     this.isClaude3 = modelMatch.includes('claude-3');
-    this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
+    this.isLegacyOutput = !(
+      /claude-3[-.]5-sonnet/.test(modelMatch) || /claude-3[-.]7/.test(modelMatch)
+    );
     this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);
 
     if (
@@ -733,10 +744,17 @@ class AnthropicClient extends BaseClient {
       stop_sequences,
       temperature,
       metadata,
-      top_p,
-      top_k,
     };
 
+    if (!/claude-3[-.]7/.test(model)) {
+      if (top_p !== undefined) {
+        requestOptions.top_p = top_p;
+      }
+      if (top_k !== undefined) {
+        requestOptions.top_k = top_k;
+      }
+    }
+
     if (this.useMessages) {
       requestOptions.messages = payload;
       requestOptions.max_tokens =
@@ -798,50 +816,16 @@ class AnthropicClient extends BaseClient {
             }
           });
 
-          /** @param {string} chunk */
-          const handleChunk = (chunk) => {
-            this.streamHandler.handle({
-              choices: [
-                {
-                  delta: {
-                    content: chunk,
-                  },
-                },
-              ],
-            });
-          };
-          /** @param {string} chunk */
-          const handleReasoningChunk = (chunk) => {
-            this.streamHandler.handle({
-              choices: [
-                {
-                  delta: {
-                    reasoning_content: chunk,
-                  },
-                },
-              ],
-            });
-          };
-
           for await (const completion of response) {
-            // Handle each completion as before
             const type = completion?.type ?? '';
             if (tokenEventTypes.has(type)) {
               logger.debug(`[AnthropicClient] ${type}`, completion);
               this[type] = completion;
             }
-            if (completion?.delta?.thinking) {
-              handleReasoningChunk(completion.delta.thinking);
-            } else if (completion?.delta?.text) {
-              handleChunk(completion.delta.text);
-            } else if (completion.completion) {
-              handleChunk(completion.completion);
-            }
-
+            this.streamHandler.handle(completion);
             await sleep(streamRate);
           }
 
-          // Successful processing, exit loop
           break;
         } catch (error) {
           attempts += 1;

diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js
@@ -5,10 +5,11 @@ const {
   isAgentsEndpoint,
   isParamEndpoint,
   EModelEndpoint,
+  excludedKeys,
   ErrorTypes,
   Constants,
 } = require('librechat-data-provider');
-const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
+const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require('~/models');
 const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
 const { truncateToolCallOutputs } = require('./prompts');
 const checkBalance = require('~/models/checkBalance');
@@ -55,6 +56,10 @@ class BaseClient {
      * Flag to determine if the client re-submitted the latest assistant message.
      * @type {boolean | undefined} */
     this.continued;
+    /**
+     * Flag to determine if the client has already fetched the conversation while saving new messages.
+     * @type {boolean | undefined} */
+    this.fetchedConvo;
     /** @type {TMessage[]} */
     this.currentMessages = [];
     /** @type {import('librechat-data-provider').VisionModes | undefined} */
@@ -863,16 +868,39 @@ class BaseClient {
       return { message: savedMessage };
     }
 
-    const conversation = await saveConvo(
-      this.options.req,
-      {
-        conversationId: message.conversationId,
-        endpoint: this.options.endpoint,
-        endpointType: this.options.endpointType,
-        ...endpointOptions,
-      },
-      { context: 'api/app/clients/BaseClient.js - saveMessageToDatabase #saveConvo' },
-    );
+    const fieldsToKeep = {
+      conversationId: message.conversationId,
+      endpoint: this.options.endpoint,
+      endpointType: this.options.endpointType,
+      ...endpointOptions,
+    };
+
+    const existingConvo =
+      this.fetchedConvo === true
+        ? null
+        : await getConvo(this.options.req?.user?.id, message.conversationId);
+
+    const unsetFields = {};
+    if (existingConvo != null) {
+      this.fetchedConvo = true;
+      for (const key in existingConvo) {
+        if (!key) {
+          continue;
+        }
+        if (excludedKeys.has(key)) {
+          continue;
+        }
+
+        if (endpointOptions?.[key] === undefined) {
+          unsetFields[key] = 1;
+        }
+      }
+    }
+
+    const conversation = await saveConvo(this.options.req, fieldsToKeep, {
+      context: 'api/app/clients/BaseClient.js - saveMessageToDatabase #saveConvo',
+      unsetFields,
+    });
 
     return { message: savedMessage, conversation };
   }

diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js
@@ -109,12 +109,7 @@ class OpenAIClient extends BaseClient {
     const omniPattern = /\b(o1|o3)\b/i;
     this.isOmni = omniPattern.test(this.modelOptions.model);
 
-    const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
-    if (OPENROUTER_API_KEY && !this.azure) {
-      this.apiKey = OPENROUTER_API_KEY;
-      this.useOpenRouter = true;
-    }
-
+    const { OPENAI_FORCE_PROMPT } = process.env ?? {};
     const { reverseProxyUrl: reverseProxy } = this.options;
 
     if (!this.useOpenRouter && reverseProxy && reverseProxy.includes(KnownEndpoints.openrouter)) {