Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: fix remote engines request templates #4477

Merged
merged 1 commit into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion extensions/engine-management-extension/models/martian.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Martian Model Router",
"version": "1.0",
"description": "Martian Model Router dynamically routes requests to the best LLM in real-time",
"parameters": {
"inference_params": {
"max_tokens": 4096,
"temperature": 0.7,
"top_p": 0.95,
Expand Down
6 changes: 3 additions & 3 deletions extensions/engine-management-extension/models/mistral.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Mistral Small",
"version": "1.1",
"description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.",
"parameters": {
"inference_params": {
"max_tokens": 32000,
"temperature": 0.7,
"top_p": 0.95,
Expand All @@ -19,7 +19,7 @@
"name": "Mistral Large",
"version": "1.1",
"description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).",
"parameters": {
"inference_params": {
"max_tokens": 32000,
"temperature": 0.7,
"top_p": 0.95,
Expand All @@ -33,7 +33,7 @@
"name": "Mixtral 8x22B",
"version": "1.1",
"description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.",
"parameters": {
"inference_params": {
"max_tokens": 32000,
"temperature": 0.7,
"top_p": 0.95,
Expand Down
2 changes: 1 addition & 1 deletion extensions/engine-management-extension/models/nvidia.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Mistral 7B",
"version": "1.1",
"description": "Mistral 7B with NVIDIA",
"parameters": {
"inference_params": {
"max_tokens": 1024,
"temperature": 0.3,
"top_p": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "OpenRouter",
"version": "1.0",
"description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
"parameters": {
"inference_params": {
"max_tokens": 128000,
"temperature": 0.7,
"top_p": 0.95,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
},
"transform_resp": {
"chat_completions": {
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": \"{% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% endif %}\", \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
"template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions extensions/engine-management-extension/resources/mistral.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
"transform_req": {
"chat_completions": {
"url": "https://api.mistral.ai/v1/chat/completions",
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
"template": "{{tojson(input_request)}}"
}
},
"transform_resp": {
"chat_completions": {
"template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
"template": "{{tojson(input_request)}}"
}
}
}
Expand Down
1 change: 0 additions & 1 deletion web/utils/messageRequestBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
this.model = model
this.thread = thread
this.messages = messages
.filter((e) => !e.metadata?.error)
.map<ChatCompletionMessage>((msg) => ({

Check warning on line 38 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

37-38 lines are not covered with tests
role: msg.role,
content: msg.content[0]?.text?.value ?? '.',
}))
Expand All @@ -47,37 +47,37 @@
base64Blob: string | undefined,
fileInfo?: FileInfo
) {
if (base64Blob && fileInfo?.type === 'pdf')
return this.addDocMessage(message, fileInfo?.name)
else if (base64Blob && fileInfo?.type === 'image') {
return this.addImageMessage(message, base64Blob)

Check warning on line 53 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

50-53 lines are not covered with tests
}
this.messages = [

Check warning on line 55 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

55 line is not covered with tests
...this.messages,
{
role: ChatCompletionRole.User,
content: message,
},
]
return this

Check warning on line 62 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

62 line is not covered with tests
}

// Chainable
addSystemMessage(message: string | undefined) {
if (!message || message.trim() === '') return this
this.messages = [

Check warning on line 68 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

67-68 lines are not covered with tests
{
role: ChatCompletionRole.System,
content: message,
},
...this.messages,
]
return this

Check warning on line 75 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

75 line is not covered with tests
}

// Chainable
addDocMessage(prompt: string, name?: string) {
const message: ChatCompletionMessage = {

Check warning on line 80 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

80 line is not covered with tests
role: ChatCompletionRole.User,
content: [
{
Expand All @@ -92,13 +92,13 @@
},
] as ChatCompletionMessageContent,
}
this.messages = [...this.messages, message]
return this

Check warning on line 96 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

95-96 lines are not covered with tests
}

// Chainable
addImageMessage(prompt: string, base64: string) {
const message: ChatCompletionMessage = {

Check warning on line 101 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

101 line is not covered with tests
role: ChatCompletionRole.User,
content: [
{
Expand All @@ -114,8 +114,8 @@
] as ChatCompletionMessageContent,
}

this.messages = [...this.messages, message]
return this

Check warning on line 118 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / coverage-check

117-118 lines are not covered with tests
}

removeLastAssistantMessage() {
Expand Down Expand Up @@ -167,7 +167,6 @@
messages: this.normalizeMessages(this.messages),
model: this.model,
thread: this.thread,
engine: this.model.engine,
}
}
}
Loading