Skip to content

Commit

Permalink
Merge pull request #5459 from DDMeaqua/tts
Browse files Browse the repository at this point in the history
add tts
  • Loading branch information
Dogtiti authored Sep 18, 2024
2 parents d51bbb4 + 10d7a64 commit a8c70d8
Show file tree
Hide file tree
Showing 28 changed files with 1,080 additions and 12 deletions.
24 changes: 19 additions & 5 deletions app/client/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export const ROLES = ["system", "user", "assistant"] as const;
export type MessageRole = (typeof ROLES)[number];

export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
export const TTSModels = ["tts-1", "tts-1-hd"] as const;
export type ChatModel = ModelType;

export interface MultimodalContent {
Expand Down Expand Up @@ -53,6 +54,15 @@ export interface LLMConfig {
style?: DalleRequestPayload["style"];
}

export interface SpeechOptions {
model: string;
input: string;
voice: string;
response_format?: string;
speed?: number;
onController?: (controller: AbortController) => void;
}

export interface ChatOptions {
messages: RequestMessage[];
config: LLMConfig;
Expand Down Expand Up @@ -87,6 +97,7 @@ export interface LLMModelProvider {

export abstract class LLMApi {
abstract chat(options: ChatOptions): Promise<void>;
abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
abstract usage(): Promise<LLMUsage>;
abstract models(): Promise<LLMModel[]>;
}
Expand Down Expand Up @@ -205,13 +216,16 @@ export function validString(x: string): boolean {
return x?.length > 0;
}

export function getHeaders() {
export function getHeaders(ignoreHeaders: boolean = false) {
const accessStore = useAccessStore.getState();
const chatStore = useChatStore.getState();
const headers: Record<string, string> = {
"Content-Type": "application/json",
Accept: "application/json",
};
let headers: Record<string, string> = {};
if (!ignoreHeaders) {
headers = {
"Content-Type": "application/json",
Accept: "application/json",
};
}

const clientConfig = getClientConfig();

Expand Down
5 changes: 5 additions & 0 deletions app/client/platforms/alibaba.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
getHeaders,
LLMApi,
LLMModel,
SpeechOptions,
MultimodalContent,
} from "../api";
import Locale from "../../locales";
Expand Down Expand Up @@ -83,6 +84,10 @@ export class QwenApi implements LLMApi {
return res?.output?.choices?.at(0)?.message?.content ?? "";
}

speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
role: v.role,
Expand Down
6 changes: 5 additions & 1 deletion app/client/platforms/anthropic.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Anthropic, ApiPath } from "@/app/constant";
import { ChatOptions, getHeaders, LLMApi } from "../api";
import { ChatOptions, getHeaders, LLMApi, SpeechOptions } from "../api";
import {
useAccessStore,
useAppConfig,
Expand Down Expand Up @@ -73,6 +73,10 @@ const ClaudeMapper = {
const keys = ["claude-2, claude-instant-1"];

export class ClaudeApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

extractMessage(res: any) {
console.log("[Response] claude response: ", res);

Expand Down
5 changes: 5 additions & 0 deletions app/client/platforms/baidu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
LLMApi,
LLMModel,
MultimodalContent,
SpeechOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -75,6 +76,10 @@ export class ErnieApi implements LLMApi {
return [baseUrl, path].join("/");
}

speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
// "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function",
Expand Down
5 changes: 5 additions & 0 deletions app/client/platforms/bytedance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
LLMApi,
LLMModel,
MultimodalContent,
SpeechOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -77,6 +78,10 @@ export class DoubaoApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? "";
}

speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({
role: v.role,
Expand Down
13 changes: 12 additions & 1 deletion app/client/platforms/google.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
LLMUsage,
SpeechOptions,
} from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant";
Expand Down Expand Up @@ -56,6 +63,10 @@ export class GeminiProApi implements LLMApi {
""
);
}
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions): Promise<void> {
const apiClient = this;
let multimodal = false;
Expand Down
12 changes: 11 additions & 1 deletion app/client/platforms/iflytek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ import {
} from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";

import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api";
import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
SpeechOptions,
} from "../api";
import Locale from "../../locales";
import {
EventStreamContentType,
Expand Down Expand Up @@ -53,6 +59,10 @@ export class SparkApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? "";
}

speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = [];
for (const v of options.messages) {
Expand Down
12 changes: 11 additions & 1 deletion app/client/platforms/moonshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@ import {
usePluginStore,
} from "@/app/store";
import { stream } from "@/app/utils/chat";
import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api";
import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
SpeechOptions,
} from "../api";
import { getClientConfig } from "@/app/config/client";
import { getMessageTextContent } from "@/app/utils";
import { RequestPayload } from "./openai";
Expand Down Expand Up @@ -53,6 +59,10 @@ export class MoonshotApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? "";
}

speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = [];
for (const v of options.messages) {
Expand Down
39 changes: 39 additions & 0 deletions app/client/platforms/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import {
LLMModel,
LLMUsage,
MultimodalContent,
SpeechOptions,
} from "../api";
import Locale from "../../locales";
import { getClientConfig } from "@/app/config/client";
Expand Down Expand Up @@ -141,6 +142,44 @@ export class ChatGPTApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? res;
}

async speech(options: SpeechOptions): Promise<ArrayBuffer> {
const requestPayload = {
model: options.model,
input: options.input,
voice: options.voice,
response_format: options.response_format,
speed: options.speed,
};

console.log("[Request] openai speech payload: ", requestPayload);

const controller = new AbortController();
options.onController?.(controller);

try {
const speechPath = this.path(OpenaiPath.SpeechPath);
const speechPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
headers: getHeaders(),
};

// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
REQUEST_TIMEOUT_MS,
);

const res = await fetch(speechPath, speechPayload);
clearTimeout(requestTimeoutId);
return await res.arrayBuffer();
} catch (e) {
console.log("[Request] failed to make a speech request", e);
throw e;
}
}

async chat(options: ChatOptions) {
const modelConfig = {
...useAppConfig.getState().modelConfig,
Expand Down
5 changes: 5 additions & 0 deletions app/client/platforms/tencent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
LLMApi,
LLMModel,
MultimodalContent,
SpeechOptions,
} from "../api";
import Locale from "../../locales";
import {
Expand Down Expand Up @@ -89,6 +90,10 @@ export class HunyuanApi implements LLMApi {
return res.Choices?.at(0)?.Message?.Content ?? "";
}

speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model);
const messages = options.messages.map((v, index) => ({
Expand Down
Loading

0 comments on commit a8c70d8

Please sign in to comment.