Skip to content

Commit

Permalink
🚀 feat: Agent Cache Tokens & Anthropic Reasoning Support (#6098)
Browse files Browse the repository at this point in the history
* fix: handling of top_k and top_p parameters for Claude-3.7 models (allowed without reasoning)

* feat: bump @librechat/agents for Anthropic Reasoning support

* fix: update reasoning handling for OpenRouter integration

* fix: enhance agent token spending logic to include cache creation and read details

* fix: update logic for thinking status in ContentParts component

* refactor: improve agent title handling

* chore: bump @librechat/agents to version 2.1.7 for parallel tool calling for Google models
  • Loading branch information
danny-avila authored Feb 27, 2025
1 parent 34f967e commit 9802629
Show file tree
Hide file tree
Showing 11 changed files with 187 additions and 40 deletions.
17 changes: 8 additions & 9 deletions api/app/clients/AnthropicClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -746,15 +746,6 @@ class AnthropicClient extends BaseClient {
metadata,
};

if (!/claude-3[-.]7/.test(model)) {
if (top_p !== undefined) {
requestOptions.top_p = top_p;
}
if (top_k !== undefined) {
requestOptions.top_k = top_k;
}
}

if (this.useMessages) {
requestOptions.messages = payload;
requestOptions.max_tokens =
Expand All @@ -769,6 +760,14 @@ class AnthropicClient extends BaseClient {
thinkingBudget: this.options.thinkingBudget,
});

if (!/claude-3[-.]7/.test(model)) {
requestOptions.top_p = top_p;
requestOptions.top_k = top_k;
} else if (requestOptions.thinking == null) {
requestOptions.topP = top_p;
requestOptions.topK = top_k;
}

if (this.systemMessage && this.supportsCacheControl === true) {
requestOptions.system = [
{
Expand Down
6 changes: 6 additions & 0 deletions api/app/clients/OpenAIClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,12 @@ ${convo}
modelOptions.include_reasoning = true;
reasoningKey = 'reasoning';
}
if (this.useOpenRouter && modelOptions.reasoning_effort != null) {
modelOptions.reasoning = {
effort: modelOptions.reasoning_effort,
};
delete modelOptions.reasoning_effort;
}

this.streamHandler = new SplitStreamHandler({
reasoningKey,
Expand Down
49 changes: 49 additions & 0 deletions api/app/clients/specs/AnthropicClient.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -680,4 +680,53 @@ describe('AnthropicClient', () => {
expect(capturedOptions).not.toHaveProperty('top_p');
});
});

it('should include top_k and top_p parameters for Claude-3.7 models when thinking is explicitly disabled', async () => {
const client = new AnthropicClient('test-api-key', {
modelOptions: {
model: 'claude-3-7-sonnet',
temperature: 0.7,
topK: 10,
topP: 0.9,
},
thinking: false,
});

async function* mockAsyncGenerator() {
yield { type: 'message_start', message: { usage: {} } };
yield { delta: { text: 'Test response' } };
yield { type: 'message_delta', usage: {} };
}

jest.spyOn(client, 'createResponse').mockImplementation(() => {
return mockAsyncGenerator();
});

let capturedOptions = null;
jest.spyOn(client, 'getClient').mockImplementation((options) => {
capturedOptions = options;
return {};
});

const payload = [{ role: 'user', content: 'Test message' }];
await client.sendCompletion(payload, {});

expect(capturedOptions).toHaveProperty('topK', 10);
expect(capturedOptions).toHaveProperty('topP', 0.9);

client.setOptions({
modelOptions: {
model: 'claude-3.7-sonnet',
temperature: 0.7,
topK: 10,
topP: 0.9,
},
thinking: false,
});

await client.sendCompletion(payload, {});

expect(capturedOptions).toHaveProperty('topK', 10);
expect(capturedOptions).toHaveProperty('topP', 0.9);
});
});
2 changes: 1 addition & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"@langchain/google-genai": "^0.1.9",
"@langchain/google-vertexai": "^0.2.0",
"@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^2.1.3",
"@librechat/agents": "^2.1.7",
"@waylaidwanderer/fetch-event-source": "^3.0.1",
"axios": "1.7.8",
"bcryptjs": "^2.4.3",
Expand Down
62 changes: 47 additions & 15 deletions api/server/controllers/agents/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ const {
formatContentStrings,
createContextHandlers,
} = require('~/app/clients/prompts');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const { getBufferString, HumanMessage } = require('@langchain/core/messages');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const Tokenizer = require('~/server/services/Tokenizer');
const { spendTokens } = require('~/models/spendTokens');
const BaseClient = require('~/app/clients/BaseClient');
const { createRun } = require('./run');
const { logger } = require('~/config');
Expand Down Expand Up @@ -380,32 +380,61 @@ class AgentClient extends BaseClient {
if (!collectedUsage || !collectedUsage.length) {
return;
}
const input_tokens = collectedUsage[0]?.input_tokens || 0;
const input_tokens =
(collectedUsage[0]?.input_tokens || 0) +
(Number(collectedUsage[0]?.input_token_details?.cache_creation) || 0) +
(Number(collectedUsage[0]?.input_token_details?.cache_read) || 0);

let output_tokens = 0;
let previousTokens = input_tokens; // Start with original input
for (let i = 0; i < collectedUsage.length; i++) {
const usage = collectedUsage[i];
if (!usage) {
continue;
}

const cache_creation = Number(usage.input_token_details?.cache_creation) || 0;
const cache_read = Number(usage.input_token_details?.cache_read) || 0;

const txMetadata = {
context,
conversationId: this.conversationId,
user: this.user ?? this.options.req.user?.id,
endpointTokenConfig: this.options.endpointTokenConfig,
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
};

if (i > 0) {
// Count new tokens generated (input_tokens minus previous accumulated tokens)
output_tokens += (Number(usage.input_tokens) || 0) - previousTokens;
output_tokens +=
(Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
}

// Add this message's output tokens
output_tokens += Number(usage.output_tokens) || 0;

// Update previousTokens to include this message's output
previousTokens += Number(usage.output_tokens) || 0;
spendTokens(
{
context,
conversationId: this.conversationId,
user: this.user ?? this.options.req.user?.id,
endpointTokenConfig: this.options.endpointTokenConfig,
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
},
{ promptTokens: usage.input_tokens, completionTokens: usage.output_tokens },
).catch((err) => {

if (cache_creation > 0 || cache_read > 0) {
spendStructuredTokens(txMetadata, {
promptTokens: {
input: usage.input_tokens,
write: cache_creation,
read: cache_read,
},
completionTokens: usage.output_tokens,
}).catch((err) => {
logger.error(
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
err,
);
});
}
spendTokens(txMetadata, {
promptTokens: usage.input_tokens,
completionTokens: usage.output_tokens,
}).catch((err) => {
logger.error(
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
err,
Expand Down Expand Up @@ -792,7 +821,10 @@ class AgentClient extends BaseClient {
throw new Error('Run not initialized');
}
const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
const clientOptions = {};
/** @type {import('@librechat/agents').ClientOptions} */
const clientOptions = {
maxTokens: 75,
};
const providerConfig = this.options.req.app.locals[this.options.agent.provider];
if (
providerConfig &&
Expand Down
11 changes: 10 additions & 1 deletion api/server/services/Endpoints/agents/title.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => {

const titleCache = getLogStores(CacheKeys.GEN_TITLE);
const key = `${req.user.id}-${response.conversationId}`;
const responseText =
response?.content && Array.isArray(response?.content)
? response.content.reduce((acc, block) => {
if (block?.type === 'text') {
return acc + block.text;
}
return acc;
}, '')
: (response?.content ?? response?.text ?? '');

const title = await client.titleConvo({
text,
responseText: response?.text ?? '',
responseText,
conversationId: response.conversationId,
});
await titleCache.set(key, title, 120000);
Expand Down
15 changes: 8 additions & 7 deletions api/server/services/Endpoints/anthropic/llm.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const { HttpsProxyAgent } = require('https-proxy-agent');
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');

/**
* Generates configuration options for creating an Anthropic language model (LLM) instance.
Expand Down Expand Up @@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) {
clientOptions: {},
};

requestOptions = configureReasoning(requestOptions, systemOptions);

if (!/claude-3[-.]7/.test(mergedOptions.model)) {
if (mergedOptions.topP !== undefined) {
requestOptions.topP = mergedOptions.topP;
}
if (mergedOptions.topK !== undefined) {
requestOptions.topK = mergedOptions.topK;
}
requestOptions.topP = mergedOptions.topP;
requestOptions.topK = mergedOptions.topK;
} else if (requestOptions.thinking == null) {
requestOptions.topP = mergedOptions.topP;
requestOptions.topK = mergedOptions.topK;
}

const supportsCacheControl =
Expand Down
41 changes: 41 additions & 0 deletions api/server/services/Endpoints/anthropic/llm.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,45 @@ describe('getLLMConfig', () => {
// Just verifying that the promptCache setting is processed
expect(result.llmConfig).toBeDefined();
});

it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => {
// Test with thinking explicitly set to null/undefined
const result = getLLMConfig('test-api-key', {
modelOptions: {
model: 'claude-3-7-sonnet',
topK: 10,
topP: 0.9,
thinking: false,
},
});

expect(result.llmConfig).toHaveProperty('topK', 10);
expect(result.llmConfig).toHaveProperty('topP', 0.9);

// Test with thinking explicitly set to false
const result2 = getLLMConfig('test-api-key', {
modelOptions: {
model: 'claude-3-7-sonnet',
topK: 10,
topP: 0.9,
thinking: false,
},
});

expect(result2.llmConfig).toHaveProperty('topK', 10);
expect(result2.llmConfig).toHaveProperty('topP', 0.9);

// Test with decimal notation as well
const result3 = getLLMConfig('test-api-key', {
modelOptions: {
model: 'claude-3.7-sonnet',
topK: 10,
topP: 0.9,
thinking: false,
},
});

expect(result3.llmConfig).toHaveProperty('topK', 10);
expect(result3.llmConfig).toHaveProperty('topP', 0.9);
});
});
12 changes: 10 additions & 2 deletions api/server/services/Endpoints/openAI/llm.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ function getLLMConfig(apiKey, options = {}) {
const {
modelOptions = {},
reverseProxyUrl,
useOpenRouter,
defaultQuery,
headers,
proxy,
Expand All @@ -56,9 +55,11 @@ function getLLMConfig(apiKey, options = {}) {
});
}

let useOpenRouter;
/** @type {OpenAIClientOptions['configuration']} */
const configOptions = {};
if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) {
if (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) {
useOpenRouter = true;
llmConfig.include_reasoning = true;
configOptions.baseURL = reverseProxyUrl;
configOptions.defaultHeaders = Object.assign(
Expand Down Expand Up @@ -118,6 +119,13 @@ function getLLMConfig(apiKey, options = {}) {
llmConfig.organization = process.env.OPENAI_ORGANIZATION;
}

if (useOpenRouter && llmConfig.reasoning_effort != null) {
llmConfig.reasoning = {
effort: llmConfig.reasoning_effort,
};
delete llmConfig.reasoning_effort;
}

return {
/** @type {OpenAIClientOptions} */
llmConfig,
Expand Down
4 changes: 3 additions & 1 deletion client/src/components/Chat/Messages/Content/ContentParts.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ const ContentParts = memo(
return val;
})
}
label={isSubmitting ? localize('com_ui_thinking') : localize('com_ui_thoughts')}
label={
isSubmitting && isLast ? localize('com_ui_thinking') : localize('com_ui_thoughts')
}
/>
</div>
)}
Expand Down
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9802629

Please sign in to comment.