Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/custom token rates for endpoints #6086

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 56 additions & 8 deletions api/models/tx.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,49 @@
const { matchModelName } = require('../utils');
const defaultRate = 6;

const customTokenOverrides = {};
const customCacheOverrides = {};

/**
* Allows overriding the default token multipliers.
*
* @param {Object} overrides - An object mapping model keys to their custom token multipliers.
* @param {Object} overrides.<model> - An object containing custom multipliers for the model.
* @param {number} overrides.<model>.prompt - The custom prompt multiplier for the model.
* @param {number} overrides.<model>.completion - The custom completion multiplier for the model.
*
* @example
* // Override the multipliers for "gpt-4o-mini" and "gpt-3.5":
* setCustomTokenOverrides({
* "gpt-4o-mini": { prompt: 0.2, completion: 0.5 },
* "gpt-3.5": { prompt: 1.0, completion: 2.0 }
* });
*/
const setCustomTokenOverrides = (overrides) => {
Object.assign(customTokenOverrides, overrides);
};

/**
* Allows overriding the default cache multipliers.
* The override values should be nested under a key named "Cache".
*
* @param {Object} overrides - An object mapping model keys to their custom cache multipliers.
* @param {Object} overrides.<model> - An object that must include a "Cache" property.
* @param {Object} overrides.<model>.Cache - An object containing custom cache multipliers for the model.
* @param {number} overrides.<model>.Cache.write - The custom cache write multiplier for the model.
* @param {number} overrides.<model>.Cache.read - The custom cache read multiplier for the model.
*
* @example
* // Override the cache multipliers for "gpt-4o-mini" and "gpt-3.5":
* setCustomCacheOverrides({
* "gpt-4o-mini": { cache: { write: 0.2, read: 0.5 } },
* "gpt-3.5": { cache: { write: 1.0, read: 1.5 } }
* });
*/
const setCustomCacheOverrides = (overrides) => {
Object.assign(customCacheOverrides, overrides);
};

/**
* AWS Bedrock pricing
* source: https://aws.amazon.com/bedrock/pricing/
Expand Down Expand Up @@ -243,20 +286,23 @@ const getCacheMultiplier = ({ valueKey, cacheType, model, endpoint, endpointToke
return endpointTokenConfig?.[model]?.[cacheType] ?? null;
}

if (valueKey && cacheType) {
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
if (!valueKey && model) {
valueKey = getValueKey(model, endpoint);
}

if (!cacheType || !model) {
if (!valueKey) {
return null;
}

valueKey = getValueKey(model, endpoint);
if (!valueKey) {
return null;
// Check for custom cache overrides under the "cache" property.
if (
customCacheOverrides[valueKey] &&
customCacheOverrides[valueKey].cache &&
customCacheOverrides[valueKey].cache[cacheType] != null
) {
return customCacheOverrides[valueKey].cache[cacheType];
}

// If we got this far, and values[cacheType] is undefined somehow, return a rough average of default multipliers
// Fallback to the default cacheTokenValues.
return cacheTokenValues[valueKey]?.[cacheType] ?? null;
};

Expand All @@ -267,4 +313,6 @@ module.exports = {
getCacheMultiplier,
defaultRate,
cacheTokenValues,
setCustomTokenOverrides,
setCustomCacheOverrides,
};
4 changes: 3 additions & 1 deletion api/server/services/AppService.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const { agentsConfigSetup } = require('./start/agents');
const { initializeRoles } = require('~/models/Role');
const { getMCPManager } = require('~/config');
const paths = require('~/config/paths');
const { loadTokenRatesConfig } = require('./Config/loadTokenRatesConfig');

/**
*
Expand All @@ -21,9 +22,10 @@ const paths = require('~/config/paths');
*/
const AppService = async (app) => {
await initializeRoles();
/** @type {TCustomConfig}*/
/** @type {TCustomConfig} */
const config = (await loadCustomConfig()) ?? {};
const configDefaults = getConfigDefaults();
loadTokenRatesConfig(config, configDefaults);

const filteredTools = config.filteredTools;
const includedTools = config.includedTools;
Expand Down
71 changes: 71 additions & 0 deletions api/server/services/Config/loadTokenRatesConfig.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
const { removeNullishValues } = require('librechat-data-provider');
const { logger } = require('~/config');
const { setCustomTokenOverrides, setCustomCacheOverrides } = require('~/models/tx');

/**
* Loads token rates from the user's configuration, merging with default token rates if available.
*
* @param {TCustomConfig | undefined} config - The loaded custom configuration.
* @param {TConfigDefaults} [configDefaults] - Optional default configuration values.
* @returns {TCustomConfig['tokenRates']} - The final token rates configuration.
*/
function loadTokenRatesConfig(config, configDefaults) {
const userTokenRates = removeNullishValues(config?.tokenRates ?? {});

if (!configDefaults?.tokenRates) {
logger.info(`User tokenRates configuration:\n${JSON.stringify(userTokenRates, null, 2)}`);
// Apply custom token rates even if there are no defaults
applyCustomTokenRates(userTokenRates);
return userTokenRates;
}

/** @type {TCustomConfig['tokenRates']} */
const defaultTokenRates = removeNullishValues(configDefaults.tokenRates);
const merged = { ...defaultTokenRates, ...userTokenRates };

// Apply custom token rates configuration
applyCustomTokenRates(merged);

logger.info(`Merged tokenRates configuration:\n${JSON.stringify(merged, null, 2)}`);
return merged;
}

/**
* Processes the token rates configuration to set up custom overrides for each model.
*
* The configuration is expected to be specified per model:
*
* For each model in the tokenRates configuration, this function will call the tx.js
* override functions to apply the custom token and cache multipliers.
*
* @param {TModelTokenRates} tokenRates - The token rates configuration mapping models to token costs.
*/
function applyCustomTokenRates(tokenRates) {
// Iterate over each model in the tokenRates configuration.
Object.keys(tokenRates).forEach((model) => {
const rate = tokenRates[model];
// If token multipliers are provided, set custom token overrides.
if (rate.prompt != null || rate.completion != null) {
setCustomTokenOverrides({
[model]: {
prompt: rate.prompt,
completion: rate.completion,
},
});
}
// Check for cache overrides.
const cacheOverrides = rate.cache;
if (cacheOverrides && (cacheOverrides.write != null || cacheOverrides.read != null)) {
setCustomCacheOverrides({
[model]: {
cache: {
write: cacheOverrides.write,
read: cacheOverrides.read,
},
},
});
}
});
}

module.exports = { loadTokenRatesConfig };
11 changes: 11 additions & 0 deletions librechat.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ interface:
multiConvo: true
agents: true

# Example Custom Token Rates (optional)
#tokenRates:
# gpt-4o-mini:
# prompt: 200.0
# completion: 400.0
# claude-3.7-sonnet:
# Cache:
# read: 200.0
# write: 400.0


# Example Registration Object Structure (optional)
registration:
socialLogins: ['github', 'google', 'discord', 'openid', 'facebook', 'apple']
Expand Down
26 changes: 26 additions & 0 deletions packages/data-provider/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -526,12 +526,37 @@ export type TStartupConfig = {
helpAndFaqURL: string;
customFooter?: string;
modelSpecs?: TSpecsConfig;
tokenRates?: TModelTokenRates;
sharedLinksEnabled: boolean;
publicSharedLinksEnabled: boolean;
analyticsGtmId?: string;
instanceProjectId: string;
};

// Token cost schema type
export type TTokenCost = {
prompt?: number;
completion?: number;
cache?: {
write?: number;
read?: number;
};
};

// Endpoint token rates schema type
export type TModelTokenRates = Record<string, TTokenCost>;

const tokenCostSchema = z.object({
prompt: z.number().optional(), // e.g. 1.5 => $1.50 / 1M tokens
completion: z.number().optional(), // e.g. 2.0 => $2.00 / 1M tokens
cache: z
.object({
write: z.number().optional(),
read: z.number().optional(),
})
.optional(),
});

export const configSchema = z.object({
version: z.string(),
cache: z.boolean().default(true),
Expand Down Expand Up @@ -563,6 +588,7 @@ export const configSchema = z.object({
rateLimits: rateLimitSchema.optional(),
fileConfig: fileConfigSchema.optional(),
modelSpecs: specsConfigSchema.optional(),
tokenRates: tokenCostSchema.optional(),
endpoints: z
.object({
all: baseEndpointSchema.optional(),
Expand Down
Loading