From 7dfb386f5a6b656f0569dbbbe1e1f60239cbfd7e Mon Sep 17 00:00:00 2001 From: Ruben Talstra Date: Wed, 26 Feb 2025 17:23:21 +0100 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20feat:=20Implement=20Token=20Rat?= =?UTF-8?q?es=20Configuration=20Loader=20and=20Update=20Config=20Types?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/server/services/AppService.js | 7 ++- .../services/Config/loadTokenRatesConfig.js | 27 +++++++++++ packages/data-provider/src/config.ts | 45 +++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 api/server/services/Config/loadTokenRatesConfig.js diff --git a/api/server/services/AppService.js b/api/server/services/AppService.js index d194d31a6bc..09827c92448 100644 --- a/api/server/services/AppService.js +++ b/api/server/services/AppService.js @@ -12,6 +12,7 @@ const { agentsConfigSetup } = require('./start/agents'); const { initializeRoles } = require('~/models/Role'); const { getMCPManager } = require('~/config'); const paths = require('~/config/paths'); +const { loadTokenRatesConfig } = require('./Config/loadTokenRatesConfig'); /** * @@ -21,9 +22,13 @@ const paths = require('~/config/paths'); */ const AppService = async (app) => { await initializeRoles(); - /** @type {TCustomConfig}*/ + /** @type {TCustomConfig} */ const config = (await loadCustomConfig()) ?? {}; const configDefaults = getConfigDefaults(); + const tokenRatesConfig = loadTokenRatesConfig(config, configDefaults); + // + // // Set the global token rates configuration so that it can be used by the tx.js functions. + // setTokenRatesConfig(tokenRatesConfig); const filteredTools = config.filteredTools; const includedTools = config.includedTools; diff --git a/api/server/services/Config/loadTokenRatesConfig.js b/api/server/services/Config/loadTokenRatesConfig.js new file mode 100644 index 00000000000..cc7d03b8a07 --- /dev/null +++ b/api/server/services/Config/loadTokenRatesConfig.js @@ -0,0 +1,27 @@ +const { removeNullishValues } = require('librechat-data-provider'); +const { logger } = require('~/config'); + +/** + * Loads custom token rates from the user's YAML config, merging with default token rates if available. + * + * @param {TCustomConfig | undefined} config - The loaded custom configuration. + * @param {TConfigDefaults} [configDefaults] - Optional default configuration values. + * @returns {TCustomConfig['tokenRates']} - The final token rates configuration. + */ +function loadTokenRatesConfig(config, configDefaults) { + const userTokenRates = removeNullishValues(config?.tokenRates ?? {}); + + if (!configDefaults?.tokenRates) { + logger.info(`User tokenRates configuration:\n${JSON.stringify(userTokenRates, null, 2)}`); + return userTokenRates; + } + + /** @type {TCustomConfig['tokenRates']} */ + const defaultTokenRates = removeNullishValues(configDefaults.tokenRates); + const merged = { ...defaultTokenRates, ...userTokenRates }; + + logger.info(`Merged tokenRates configuration:\n${JSON.stringify(merged, null, 2)}`); + return merged; +} + +module.exports = { loadTokenRatesConfig }; \ No newline at end of file diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 56a560f2ee0..5dbf8f90ee9 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -505,12 +505,56 @@ export type TStartupConfig = { helpAndFaqURL: string; customFooter?: string; modelSpecs?: TSpecsConfig; + tokenRates?: TTokenRates; sharedLinksEnabled: boolean; publicSharedLinksEnabled: boolean; analyticsGtmId?: string; instanceProjectId: string; }; +// Token cost schema type +export type TTokenCost = { + prompt?: number; + completion?: number; + cache?: { + write?: number; + read?: number; + }; +}; + +// Endpoint token rates schema type +export type TEndpointTokenRates = Record; + +// Token rates schema type +export type TTokenRates = { + openAI?: TEndpointTokenRates; + google?: TEndpointTokenRates; + anthropic?: TEndpointTokenRates; + bedrock?: TEndpointTokenRates; + custom?: TEndpointTokenRates; +}; + +const tokenCostSchema = z.object({ + prompt: z.number().optional(), // e.g. 1.5 => $1.50 / 1M tokens + completion: z.number().optional(), // e.g. 2.0 => $2.00 / 1M tokens + cache: z + .object({ + write: z.number().optional(), + read: z.number().optional(), + }) + .optional(), +}); + +const endpointTokenRatesSchema = z.record(z.string(), tokenCostSchema); + +const tokenRatesSchema = z.object({ + openAI: endpointTokenRatesSchema.optional(), + google: endpointTokenRatesSchema.optional(), + anthropic: endpointTokenRatesSchema.optional(), + bedrock: endpointTokenRatesSchema.optional(), + custom: endpointTokenRatesSchema.optional(), +}); + export const configSchema = z.object({ version: z.string(), cache: z.boolean().default(true), @@ -542,6 +586,7 @@ export const configSchema = z.object({ rateLimits: rateLimitSchema.optional(), fileConfig: fileConfigSchema.optional(), modelSpecs: specsConfigSchema.optional(), + tokenRates: tokenRatesSchema.optional(), endpoints: z .object({ all: baseEndpointSchema.optional(), From 262e6aa4c97eab988368be8debb15b9ac593e68c Mon Sep 17 00:00:00 2001 From: Ruben Talstra Date: Thu, 27 Feb 2025 10:57:42 +0100 Subject: [PATCH 2/3] =?UTF-8?q?=E2=9C=A8=20feat:=20Refactor=20Token=20Rate?= =?UTF-8?q?s=20Configuration=20and=20Introduce=20Custom=20Overrides?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/models/tx.js | 64 ++++++++++++++++--- api/server/services/AppService.js | 5 +- .../services/Config/loadTokenRatesConfig.js | 46 ++++++++++++- packages/data-provider/src/config.ts | 25 +------- 4 files changed, 105 insertions(+), 35 deletions(-) diff --git a/api/models/tx.js b/api/models/tx.js index 82ae9fb0347..7bc3a77600b 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -1,6 +1,49 @@ const { matchModelName } = require('../utils'); const defaultRate = 6; +const customTokenOverrides = {}; +const customCacheOverrides = {}; + +/** + * Allows overriding the default token multipliers. + * + * @param {Object} overrides - An object mapping model keys to their custom token multipliers. + * @param {Object} overrides. - An object containing custom multipliers for the model. + * @param {number} overrides..prompt - The custom prompt multiplier for the model. + * @param {number} overrides..completion - The custom completion multiplier for the model. + * + * @example + * // Override the multipliers for "gpt-4o-mini" and "gpt-3.5": + * setCustomTokenOverrides({ + * "gpt-4o-mini": { prompt: 0.2, completion: 0.5 }, + * "gpt-3.5": { prompt: 1.0, completion: 2.0 } + * }); + */ +const setCustomTokenOverrides = (overrides) => { + Object.assign(customTokenOverrides, overrides); +}; + +/** + * Allows overriding the default cache multipliers. + * The override values should be nested under a key named "Cache". + * + * @param {Object} overrides - An object mapping model keys to their custom cache multipliers. + * @param {Object} overrides. - An object that must include a "Cache" property. + * @param {Object} overrides..Cache - An object containing custom cache multipliers for the model. + * @param {number} overrides..Cache.write - The custom cache write multiplier for the model. + * @param {number} overrides..Cache.read - The custom cache read multiplier for the model. + * + * @example + * // Override the cache multipliers for "gpt-4o-mini" and "gpt-3.5": + * setCustomCacheOverrides({ + * "gpt-4o-mini": { cache: { write: 0.2, read: 0.5 } }, + * "gpt-3.5": { cache: { write: 1.0, read: 1.5 } } + * }); + */ +const setCustomCacheOverrides = (overrides) => { + Object.assign(customCacheOverrides, overrides); +}; + /** * AWS Bedrock pricing * source: https://aws.amazon.com/bedrock/pricing/ @@ -243,20 +286,23 @@ const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointToke return endpointTokenConfig?.[model]?.[cacheType] ?? null; } - if (valueKey && cacheType) { - return cacheTokenValues[valueKey]?.[cacheType] ?? null; + if (!valueKey && model) { + valueKey = getValueKey(model, endpoint); } - - if (!cacheType || !model) { + if (!valueKey) { return null; } - valueKey = getValueKey(model, endpoint); - if (!valueKey) { - return null; + // Check for custom cache overrides under the "cache" property. + if ( + customCacheOverrides[valueKey] && + customCacheOverrides[valueKey].cache && + customCacheOverrides[valueKey].cache[cacheType] != null + ) { + return customCacheOverrides[valueKey].cache[cacheType]; } - // If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers + // Fallback to the default cacheTokenValues. return cacheTokenValues[valueKey]?.[cacheType] ?? null; }; @@ -267,4 +313,6 @@ module.exports = { getCacheMultiplier, defaultRate, cacheTokenValues, + setCustomTokenOverrides, + setCustomCacheOverrides, }; diff --git a/api/server/services/AppService.js b/api/server/services/AppService.js index 09827c92448..041bd36163c 100644 --- a/api/server/services/AppService.js +++ b/api/server/services/AppService.js @@ -25,10 +25,7 @@ const AppService = async (app) => { /** @type {TCustomConfig} */ const config = (await loadCustomConfig()) ?? {}; const configDefaults = getConfigDefaults(); - const tokenRatesConfig = loadTokenRatesConfig(config, configDefaults); - // - // // Set the global token rates configuration so that it can be used by the tx.js functions. - // setTokenRatesConfig(tokenRatesConfig); + loadTokenRatesConfig(config, configDefaults); const filteredTools = config.filteredTools; const includedTools = config.includedTools; diff --git a/api/server/services/Config/loadTokenRatesConfig.js b/api/server/services/Config/loadTokenRatesConfig.js index cc7d03b8a07..8513453e4ef 100644 --- a/api/server/services/Config/loadTokenRatesConfig.js +++ b/api/server/services/Config/loadTokenRatesConfig.js @@ -1,8 +1,9 @@ const { removeNullishValues } = require('librechat-data-provider'); const { logger } = require('~/config'); +const { setCustomTokenOverrides, setCustomCacheOverrides } = require('~/models/tx'); /** - * Loads custom token rates from the user's YAML config, merging with default token rates if available. + * Loads token rates from the user's configuration, merging with default token rates if available. * * @param {TCustomConfig | undefined} config - The loaded custom configuration. * @param {TConfigDefaults} [configDefaults] - Optional default configuration values. @@ -13,6 +14,8 @@ function loadTokenRatesConfig(config, configDefaults) { if (!configDefaults?.tokenRates) { logger.info(`User tokenRates configuration:\n${JSON.stringify(userTokenRates, null, 2)}`); + // Apply custom token rates even if there are no defaults + applyCustomTokenRates(userTokenRates); return userTokenRates; } @@ -20,8 +23,49 @@ function loadTokenRatesConfig(config, configDefaults) { const defaultTokenRates = removeNullishValues(configDefaults.tokenRates); const merged = { ...defaultTokenRates, ...userTokenRates }; + // Apply custom token rates configuration + applyCustomTokenRates(merged); + logger.info(`Merged tokenRates configuration:\n${JSON.stringify(merged, null, 2)}`); return merged; } +/** + * Processes the token rates configuration to set up custom overrides for each model. + * + * The configuration is expected to be specified per model: + * + * For each model in the tokenRates configuration, this function will call the tx.js + * override functions to apply the custom token and cache multipliers. + * + * @param {TModelTokenRates} tokenRates - The token rates configuration mapping models to token costs. + */ +function applyCustomTokenRates(tokenRates) { + // Iterate over each model in the tokenRates configuration. + Object.keys(tokenRates).forEach((model) => { + const rate = tokenRates[model]; + // If token multipliers are provided, set custom token overrides. + if (rate.prompt != null || rate.completion != null) { + setCustomTokenOverrides({ + [model]: { + prompt: rate.prompt, + completion: rate.completion, + }, + }); + } + // Check for cache overrides. + const cacheOverrides = rate.cache; + if (cacheOverrides && (cacheOverrides.write != null || cacheOverrides.read != null)) { + setCustomCacheOverrides({ + [model]: { + cache: { + write: cacheOverrides.write, + read: cacheOverrides.read, + }, + }, + }); + } + }); +} + module.exports = { loadTokenRatesConfig }; \ No newline at end of file diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 5dbf8f90ee9..55097b90e79 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -505,7 +505,7 @@ export type TStartupConfig = { helpAndFaqURL: string; customFooter?: string; modelSpecs?: TSpecsConfig; - tokenRates?: TTokenRates; + tokenRates?: TModelTokenRates; sharedLinksEnabled: boolean; publicSharedLinksEnabled: boolean; analyticsGtmId?: string; @@ -523,16 +523,7 @@ export type TTokenCost = { }; // Endpoint token rates schema type -export type TEndpointTokenRates = Record; - -// Token rates schema type -export type TTokenRates = { - openAI?: TEndpointTokenRates; - google?: TEndpointTokenRates; - anthropic?: TEndpointTokenRates; - bedrock?: TEndpointTokenRates; - custom?: TEndpointTokenRates; -}; +export type TModelTokenRates = Record; const tokenCostSchema = z.object({ prompt: z.number().optional(), // e.g. 1.5 => $1.50 / 1M tokens @@ -545,16 +536,6 @@ const tokenCostSchema = z.object({ .optional(), }); -const endpointTokenRatesSchema = z.record(z.string(), tokenCostSchema); - -const tokenRatesSchema = z.object({ - openAI: endpointTokenRatesSchema.optional(), - google: endpointTokenRatesSchema.optional(), - anthropic: endpointTokenRatesSchema.optional(), - bedrock: endpointTokenRatesSchema.optional(), - custom: endpointTokenRatesSchema.optional(), -}); - export const configSchema = z.object({ version: z.string(), cache: z.boolean().default(true), @@ -586,7 +567,7 @@ export const configSchema = z.object({ rateLimits: rateLimitSchema.optional(), fileConfig: fileConfigSchema.optional(), modelSpecs: specsConfigSchema.optional(), - tokenRates: tokenRatesSchema.optional(), + tokenRates: tokenCostSchema.optional(), endpoints: z .object({ all: baseEndpointSchema.optional(), From edf23eb2aec13b99341075577dbe4ce7823d70a1 Mon Sep 17 00:00:00 2001 From: Ruben Talstra Date: Thu, 27 Feb 2025 11:00:39 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=E2=9C=A8=20feat:=20Add=20example=20custom?= =?UTF-8?q?=20token=20rates=20configuration=20to=20`librechat.example.yaml?= =?UTF-8?q?`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- librechat.example.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/librechat.example.yaml b/librechat.example.yaml index 637e7a5219b..50f46a714e9 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -68,6 +68,17 @@ interface: multiConvo: true agents: true +# Example Custom Token Rates (optional) +#tokenRates: +# gpt-4o-mini: +# prompt: 200.0 +# completion: 400.0 +# claude-3.7-sonnet: +# Cache: +# read: 200.0 +# write: 400.0 + + # Example Registration Object Structure (optional) registration: socialLogins: ['github', 'google', 'discord', 'openid', 'facebook', 'apple']