Skip to content

Commit

Permalink
🔀 refactor: Modularize TTS Logic for Improved Browser support (#3657)
Browse files Browse the repository at this point in the history
* WIP: message audio refactor

* WIP: use MessageAudio by provider

* fix: Update MessageAudio component to use TTSEndpoints enum

* feat: Update useTextToSpeechBrowser hook to handle errors and improve error logging

* feat: Add voice dropdown components for different TTS engines

* docs: update incorrect `voices` example

changed `voice: ''` to `voices: ['alloy']`

* feat: Add brwoser support check for Edge TTS engine component with error toast if not supported

---------

Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
  • Loading branch information
danny-avila and berry-13 authored Aug 15, 2024
1 parent bcde0be commit dba7040
Show file tree
Hide file tree
Showing 18 changed files with 784 additions and 187 deletions.
25 changes: 25 additions & 0 deletions client/src/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import type {
TStartupConfig,
EModelEndpoint,
AssistantsEndpoint,
TMessageContentParts,
AuthorizationTypeEnum,
TSetOption as SetOption,
TokenExchangeMethodEnum,
Expand All @@ -31,6 +32,17 @@ export enum PromptsEditorMode {
ADVANCED = 'advanced',
}

export enum STTEndpoints {
browser = 'browser',
external = 'external',
}

export enum TTSEndpoints {
browser = 'browser',
edge = 'edge',
external = 'external',
}

export type AudioChunk = {
audio: string;
isFinal: boolean;
Expand Down Expand Up @@ -374,6 +386,19 @@ export type Option = Record<string, unknown> & {
value: string | number | null;
};

export type VoiceOption = {
value: string;
label: string;
};

export type TMessageAudio = {
messageId?: string;
content?: TMessageContentParts[] | string;
className?: string;
isLast: boolean;
index: number;
};

export type OptionWithIcon = Option & { icon?: React.ReactNode };
export type MentionOption = OptionWithIcon & {
type: string;
Expand Down
256 changes: 256 additions & 0 deletions client/src/components/Audio/TTS.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
import { useEffect, useMemo } from 'react';
import { useRecoilValue } from 'recoil';
import type { TMessageAudio } from '~/common';
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
import { VolumeIcon, VolumeMuteIcon, Spinner } from '~/components/svg';
import { useToastContext } from '~/Providers/ToastContext';
import { logger } from '~/utils';
import store from '~/store';

export function BrowserTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);

const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSBrowser({
isLast,
index,
messageId,
content,
});

const renderIcon = (size: string) => {
if (isLoading === true) {
return <Spinner size={size} />;
}

if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />;
}

return <VolumeIcon size={size} />;
};

useEffect(() => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, messageId]);

logger.log(
'MessageAudio: audioRef.current?.src, audioRef.current',
audioRef.current?.src,
audioRef.current,
);

return (
<>
<button
className={className}
onClickCapture={() => {
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
<audio
ref={audioRef}
controls
preload="none"
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src}
onError={(error) => {
console.error('Error fetching audio:', error);
}}
id={`audio-${messageId}`}
muted
autoPlay
/>
</>
);
}

export function EdgeTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);
const isBrowserSupported = useMemo(
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
[],
);

const { showToast } = useToastContext();
const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSEdge({
isLast,
index,
messageId,
content,
});

const renderIcon = (size: string) => {
if (isLoading === true) {
return <Spinner size={size} />;
}

if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />;
}

return <VolumeIcon size={size} />;
};

useEffect(() => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, messageId]);

logger.log(
'MessageAudio: audioRef.current?.src, audioRef.current',
audioRef.current?.src,
audioRef.current,
);

return (
<>
<button
className={className}
onClickCapture={() => {
if (!isBrowserSupported) {
showToast({
message: localize('com_nav_tts_unsupported_error'),
status: 'error',
});
return;
}
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
{isBrowserSupported ? (
<audio
ref={audioRef}
controls
preload="none"
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src}
onError={(error) => {
console.error('Error fetching audio:', error);
}}
id={`audio-${messageId}`}
muted
autoPlay
/>
) : null}
</>
);
}

export function ExternalTTS({ isLast, index, messageId, content, className }: TMessageAudio) {
const localize = useLocalize();
const playbackRate = useRecoilValue(store.playbackRate);

const { toggleSpeech, isSpeaking, isLoading, audioRef } = useTTSExternal({
isLast,
index,
messageId,
content,
});

const renderIcon = (size: string) => {
if (isLoading === true) {
return <Spinner size={size} />;
}

if (isSpeaking === true) {
return <VolumeMuteIcon size={size} />;
}

return <VolumeIcon size={size} />;
};

useEffect(() => {
const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null;
if (!messageAudio) {
return;
}
if (playbackRate != null && playbackRate > 0 && messageAudio.playbackRate !== playbackRate) {
messageAudio.playbackRate = playbackRate;
}
}, [audioRef, isSpeaking, playbackRate, messageId]);

logger.log(
'MessageAudio: audioRef.current?.src, audioRef.current',
audioRef.current?.src,
audioRef.current,
);

return (
<>
<button
className={className}
onClickCapture={() => {
if (audioRef.current) {
audioRef.current.muted = false;
}
toggleSpeech();
}}
type="button"
title={isSpeaking === true ? localize('com_ui_stop') : localize('com_ui_read_aloud')}
>
{renderIcon('19')}
</button>
<audio
ref={audioRef}
controls
preload="none"
controlsList="nodownload nofullscreen noremoteplayback"
style={{
position: 'absolute',
overflow: 'hidden',
display: 'none',
height: '0px',
width: '0px',
}}
src={audioRef.current?.src}
onError={(error) => {
console.error('Error fetching audio:', error);
}}
id={`audio-${messageId}`}
muted
autoPlay
/>
</>
);
}
94 changes: 94 additions & 0 deletions client/src/components/Audio/Voices.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import React from 'react';
import { useRecoilState } from 'recoil';
import type { Option } from '~/common';
import DropdownNoState from '~/components/ui/DropdownNoState';
import { useLocalize, useTTSBrowser, useTTSEdge, useTTSExternal } from '~/hooks';
import { logger } from '~/utils';
import store from '~/store';

export function EdgeVoiceDropdown() {
const localize = useLocalize();
const { voices = [] } = useTTSEdge();
const [voice, setVoice] = useRecoilState(store.voice);

const handleVoiceChange = (newValue?: string | Option) => {
logger.log('Edge Voice changed:', newValue);
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
if (newVoice != null) {
return setVoice(newVoice.toString());
}
};

return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<DropdownNoState
key={`edge-voice-dropdown-${voices.length}`}
value={voice}
options={voices}
onChange={handleVoiceChange}
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
anchor="bottom start"
testId="EdgeVoiceDropdown"
/>
</div>
);
}

export function BrowserVoiceDropdown() {
const localize = useLocalize();
const { voices = [] } = useTTSBrowser();
const [voice, setVoice] = useRecoilState(store.voice);

const handleVoiceChange = (newValue?: string | Option) => {
logger.log('Browser Voice changed:', newValue);
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
if (newVoice != null) {
return setVoice(newVoice.toString());
}
};

return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<DropdownNoState
key={`browser-voice-dropdown-${voices.length}`}
value={voice}
options={voices}
onChange={handleVoiceChange}
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
anchor="bottom start"
testId="BrowserVoiceDropdown"
/>
</div>
);
}

export function ExternalVoiceDropdown() {
const localize = useLocalize();
const { voices = [] } = useTTSExternal();
const [voice, setVoice] = useRecoilState(store.voice);

const handleVoiceChange = (newValue?: string | Option) => {
logger.log('External Voice changed:', newValue);
const newVoice = typeof newValue === 'string' ? newValue : newValue?.value;
if (newVoice != null) {
return setVoice(newVoice.toString());
}
};

return (
<div className="flex items-center justify-between">
<div>{localize('com_nav_voice_select')}</div>
<DropdownNoState
key={`external-voice-dropdown-${voices.length}`}
value={voice}
options={voices}
onChange={handleVoiceChange}
sizeClasses="min-w-[200px] !max-w-[400px] [--anchor-max-width:400px]"
anchor="bottom start"
testId="ExternalVoiceDropdown"
/>
</div>
);
}
Loading

0 comments on commit dba7040

Please sign in to comment.