From dba704079c44b4b0a3f243795b3547cc105a2dd5 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Thu, 15 Aug 2024 11:34:25 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=80=20refactor:=20Modularize=20TTS=20L?= =?UTF-8?q?ogic=20for=20Improved=20Browser=20support=20(#3657)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * WIP: message audio refactor * WIP: use MessageAudio by provider * fix: Update MessageAudio component to use TTSEndpoints enum * feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging * feat: Add voice dropdown components for different TTS engines * docs: update incorrect `voices` example changed `voice: ''` to `voices: ['alloy']` * feat: Add brwoser support check for Edge TTS engine component with error toast if not supported --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com> --- client/src/common/types.ts | 25 ++ client/src/components/Audio/TTS.tsx | 256 ++++++++++++++++++ client/src/components/Audio/Voices.tsx | 94 +++++++ .../components/Chat/Messages/HoverButtons.tsx | 1 + .../components/Chat/Messages/MessageAudio.tsx | 112 +------- .../SettingsTabs/Speech/TTS/VoiceDropdown.tsx | 46 +--- client/src/hooks/Audio/index.ts | 3 + client/src/hooks/Audio/useTTSBrowser.ts | 100 +++++++ client/src/hooks/Audio/useTTSEdge.ts | 100 +++++++ client/src/hooks/Audio/useTTSExternal.ts | 101 +++++++ client/src/hooks/Input/useGetAudioSettings.ts | 11 - client/src/hooks/Input/useTextToSpeech.ts | 8 +- .../src/hooks/Input/useTextToSpeechBrowser.ts | 82 ++++-- client/src/hooks/Input/useTextToSpeechEdge.ts | 22 +- .../hooks/Input/useTextToSpeechExternal.ts | 4 +- client/src/hooks/index.ts | 1 + client/src/localization/languages/Eng.ts | 2 + librechat.example.yaml | 3 +- 18 files changed, 784 insertions(+), 187 deletions(-) create mode 100644 client/src/components/Audio/TTS.tsx create mode 100644 client/src/components/Audio/Voices.tsx create mode 100644 client/src/hooks/Audio/useTTSBrowser.ts create mode 100644 client/src/hooks/Audio/useTTSEdge.ts create mode 100644 client/src/hooks/Audio/useTTSExternal.ts diff --git a/client/src/common/types.ts b/client/src/common/types.ts index e78cc5afe52..e79bf2bddfd 100644 --- a/client/src/common/types.ts +++ b/client/src/common/types.ts @@ -19,6 +19,7 @@ import type { TStartupConfig, EModelEndpoint, AssistantsEndpoint, + TMessageContentParts, AuthorizationTypeEnum, TSetOption as SetOption, TokenExchangeMethodEnum, @@ -31,6 +32,17 @@ export enum PromptsEditorMode { ADVANCED = 'advanced', } +export enum STTEndpoints { + browser = 'browser', + external = 'external', +} + +export enum TTSEndpoints { + browser = 'browser', + edge = 'edge', + external = 'external', +} + export type AudioChunk = { audio: string; isFinal: boolean; @@ -374,6 +386,19 @@ export type Option = Record & { value: string | number | null; }; +export type VoiceOption = { + value: string; + label: string; +}; + +export type TMessageAudio = { + messageId?: string; + content?: TMessageContentParts[] | string; + className?: string; + isLast: boolean; + index: number; +}; + export type OptionWithIcon = Option & { icon?: React.ReactNode }; export type MentionOption = OptionWithIcon & { type: string; diff --git a/client/src/components/Audio/TTS.tsx b/client/src/components/Audio/TTS.tsx new file mode 100644 index 00000000000..0ccad8a0517 --- /dev/null +++ b/client/src/components/Audio/TTS.tsx @@ -0,0 +1,256 @@ +import { useEffect, useMemo } from 'react'; +import { useRecoilValue } from 'recoil'; +import type { TMessageAudio } from '~/common'; +import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks'; +import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg'; +import { useToastContext } from '~/Providers/ToastContext'; +import { logger } from '~/utils'; +import store from '~/store'; + +export function BrowserTTS({ isLast, index, messageId, content, className }: TMessageAudio) { + const localize = useLocalize(); + const playbackRate = useRecoilValue(store.playbackRate); + + const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSBrowser({ + isLast, + index, + messageId, + content, + }); + + const renderIcon = (size: string) => { + if (isLoading === true) { + return ; + } + + if (isSpeaking === true) { + return ; + } + + return ; + }; + + useEffect(() => { + const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null; + if (!messageAudio) { + return; + } + if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) { + messageAudio.playbackRate = playbackRate; + } + }, [audioRef, isSpeaking, playbackRate, messageId]); + + logger.log( + 'MessageAudio: audioRef.current?.src, audioRef.current', + audioRef.current?.src, + audioRef.current, + ); + + return ( + <> + +