Skip to content

Commit

Permalink
Added configuration stanzas (#22)
Browse files Browse the repository at this point in the history
* Added configuration stanzas for elevenlabs

* Fixed broken reference for

* Added configuration stanzas for whisper tts

* Added configuration stanzas for whisper stt

* Fixed dupe keys

* We now record elevenlabs outputs properly
  • Loading branch information
Und3rf10w authored Dec 20, 2023
1 parent 26f3062 commit 3d6b258
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 33 deletions.
22 changes: 20 additions & 2 deletions config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,27 @@ recordings_directory = "recordings"

[openai]
# Set your openai api key here
api_key = "sk...."
openai_api_key = "sk...."

[openai.whisper]
# Set the path to `mpv.exe` if it's not already in your PATH. Only matters if `tts_engine` == `whisper`
# mpv_path = mpv.exe
# Which openai whisper voice ID to use, defaults to "nova" if not set
whisper_voice_id = "nova"
# Which openai whisper engine ID to use, defaults to "tts-1" if not set
whisper_voice_model = "tts-1"
# Which openai whisper TTS engine to use, defaults to `whisper-1` if not set
whisper_engine = "whisper-1"

[elevenlabs]
api_key = ""
eleven_api_key = ""
# If not set, it'll use the default voice
elevenlabs_voice_id = ""
# If not set it'll use the default stability (0.5)
elevenlabs_stability = 0.5
# If not set it'll use the default similarity_boost (0.75)
elevenlabs_similarity_boost = 0.75
# If not set it'll use the default style (0). You probably want this at zero
elevenlabs_style = 0
# If not set to "True" it'll use the default speaker_boost setting (False)
elevenlabs_use_speaker_boost="False"
5 changes: 3 additions & 2 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 59 additions & 8 deletions src/openjanus/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def set_openai_api_key() -> str:
else:
LOGGER.debug("Setting openai API key from config file")
config = load_config()
environ["OPENAI_API_KEY"] = config["openai"]["api_key"]
return config["openai"]["api_key"]
environ["OPENAI_API_KEY"] = config["openai"]["openai_api_key"]
return config["openai"]["openai_api_key"]
except KeyError:
LOGGER.error("The openai API key was not found in the environment variable or the config file")
raise ApiKeyNotSetException("OpenAI")
Expand All @@ -59,8 +59,8 @@ def set_eleven_api_key() -> str:
else:
LOGGER.debug("Setting elevenlabs API key from config file")
config = load_config()
environ["ELEVEN_API_KEY"] = config["elevenlabs"]["api_key"]
return config["elevenlabs"]["api_key"]
environ["ELEVEN_API_KEY"] = config["elevenlabs"]["eleven_api_key"]
return config["elevenlabs"]["eleven_api_key"]
except KeyError:
LOGGER.error("The elevenlabs API key was not found in the environment variable or the config file")
raise ApiKeyNotSetException("Elevenlabs")
Expand All @@ -74,14 +74,14 @@ def check_mpv_path() -> str:
try:
LOGGER.debug("Setting mpv path from config file")
config = load_config()
if not path.isfile(config["openjanus"]["mpv_path"]):
if not path.isfile(config["openai"]["whisper"]["mpv_path"]):
LOGGER.error("mpv.exe was not found")
raise TtsMpvNotFoundException()
else:
return config["openjanus"]["mpv_path"]
return config["openai"]["whisper"]["mpv_path"]
except KeyError:
LOGGER.error("The mpv path was not found in the environment variable or the config file")
raise ConfigKeyNotFound("openjanus/mpv_path")
raise ConfigKeyNotFound("openai/whisper/mpv_path")
except FileNotFoundError:
LOGGER.error("mpv.exe was not found")
raise TtsMpvNotFoundException()
Expand Down Expand Up @@ -119,7 +119,7 @@ def get_recordings_dir() -> str:
LOGGER.debug("Getting recordings directory from config file")
config = load_config()
recordings_dir = config["openjanus"]["recordings_directory"]
return path.relpath(recordings_dir)
return path.relpath(recordings_dir) + "/"
except KeyError:
LOGGER.error("The recordings directory was not found in the environment variable or the config file")
raise ConfigKeyNotFound("openjanus/recordings_directory")
Expand All @@ -134,6 +134,57 @@ def ensure_recordings_dir_exists():
except Exception as e:
LOGGER.error(f"Failed to create the {recordings_dir} directory", exc_info=e)
raise DirectoryCreationException(f"Failed to create the {recordings_dir} directory") from e

def get_elevenlabs_config() -> Dict[str, Any]:
"""Get the elevenlabs config"""
try:
LOGGER.debug("Getting elevenlabs config from config file")
config = load_config()
set_eleven_api_key()
if not config["elevenlabs"]["elevenlabs_voice_id"]:
LOGGER.warning("The elevenlabs voice was not set, using the default voice")
from openjanus.tts.elevenlabs.async_patch import DEFAULT_VOICE
config["elevenlabs"]["elevenlabs_voice_id"] = DEFAULT_VOICE
if not config["elevenlabs"]['elevenlabs_stability']:
LOGGER.warning("The elevenlabs stability was not set, using the default stability")
config["elevenlabs"]["elevenlabs_stability"] = 0.5
if not config["elevenlabs"]['elevenlabs_similarity_boost']:
LOGGER.warning("The elevenlabs similarity boost was not set, using the default similarity boost")
config["elevenlabs"]["elevenlabs_similarity_boost"] = 0.75
if not config["elevenlabs"]['elevenlabs_style']:
LOGGER.warning("The elevenlabs style was not set, using the default style")
config["elevenlabs"]["elevenlabs_style"] = 0
if not config["elevenlabs"]['elevenlabs_use_speaker_boost'] or config["elevenlabs"]['elevenlabs_use_speaker_boost'].lower() != "true":
config["elevenlabs"]["elevenlabs_use_speaker_boost"] = False
elif config["elevenlabs"]['elevenlabs_use_speaker_boost'].lower() == "true":
config["elevenlabs"]["elevenlabs_use_speaker_boost"] = True
else:
LOGGER.warning("The elevenlabs use speaker boost was misconfigured, using the default use speaker boost")
config["elevenlabs"]["elevenlabs_use_speaker_boost"] = False
return config["elevenlabs"]

except KeyError:
LOGGER.error("The elevenlabs config was not found in the environment variable or the config file")
raise ConfigKeyNotFound("elevenlabs")

def get_openai_whisper_config() -> Dict[str, Any]:
"""Get the openai whisper config"""
try:
LOGGER.debug("Getting openai whisper config from config file")
config = load_config()
if not config["openai"]["whisper"]["whisper_voice_id"]:
LOGGER.warning("The openai whisper voice id was not set, using the default voice id")
config["openai"]["whisper"]["whisper_voice_id"] = "nova"
if not config["openai"]["whisper"]["whisper_voice_model"]:
LOGGER.warning("The openai whisper voice model was not set, using the default voice model")
config["openai"]["whisper"]["whisper_voice_model"] = "tts-1"
if not config["openai"]["whisper"]["whisper_engine"]:
LOGGER.warning("The openai whisper engine was not set, using the default engine")
config["openai"]["whisper"]["whisper_engine"] = "whisper-1"
return config["openai"]["whisper"]
except KeyError:
LOGGER.error("The openai whisper config was not found in the environment variable or the config file")
raise ConfigKeyNotFound("openai/whisper")


def startup_checks() -> bool:
Expand Down
14 changes: 9 additions & 5 deletions src/openjanus/stt/whisper/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from langchain.document_loaders.blob_loaders import Blob
from langchain.schema import Document

logger = logging.getLogger(__name__)
from openjanus.app.config import get_openai_whisper_config


LOGGER = logging.getLogger(__name__)


class OpenAIWhisperParser(BaseBlobParser):
Expand All @@ -15,6 +18,7 @@ class OpenAIWhisperParser(BaseBlobParser):

def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key
self.config = get_openai_whisper_config()

def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazily parse the blob."""
Expand Down Expand Up @@ -58,18 +62,18 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
file_obj.name = f"part_{split_number}.mp3"

# Transcribe
print(f"Transcribing part {split_number+1}!")
LOGGER.debug(f"Transcribing part {split_number+1}!")
attempts = 0
while attempts < 3:
try:
transcript = openai.audio.transcriptions.create(model="whisper-1", file=file_obj)
transcript = openai.audio.transcriptions.create(model=self.config.get('whisper_engine', "whisper-1"), file=file_obj)
break
except Exception as e:
attempts += 1
print(f"Attempt {attempts} failed. Exception: {str(e)}")
LOGGER.error(f"Attempt {attempts} failed. Exception: {str(e)}")
time.sleep(5)
else:
print("Failed to transcribe after 3 attempts.")
LOGGER.error("Failed to transcribe after 3 attempts.")
continue

yield Document(
Expand Down
12 changes: 7 additions & 5 deletions src/openjanus/tts/elevenlabs/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import BaseMessage

from openjanus.app.config import get_elevenlabs_config
from openjanus.tts.elevenlabs.tts import ElevenLabsText2SpeechTool
from openjanus.tts.elevenlabs.async_patch import DEFAULT_VOICE

Expand All @@ -30,13 +31,14 @@ def run_chat_message(tts: ElevenLabsText2SpeechTool, chain: BaseLanguageModel, m


def get_tool() -> ElevenLabsText2SpeechTool:
elevenlabs_config = get_elevenlabs_config()
set_api_key(getenv("ELEVEN_API_KEY"))
voice_id = DEFAULT_VOICE.voice_id
voice_id = elevenlabs_config['elevenlabs_voice_id']
voice_settings = VoiceSettings(
stability=0.5,
similarity_boost=0.75,
style=0,
use_speaker_boost=False
stability=elevenlabs_config['elevenlabs_stability'],
similarity_boost=elevenlabs_config['elevenlabs_similarity_boost'],
style=elevenlabs_config['elevenlabs_style'],
use_speaker_boost=elevenlabs_config['elevenlabs_use_speaker_boost']
)
voice = Voice(
voice_id=voice_id,
Expand Down
25 changes: 17 additions & 8 deletions src/openjanus/tts/elevenlabs/tts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime
from enum import Enum
import logging
import pathlib
import tempfile
from typing import Any, Coroutine, Dict, Optional, Union, Iterator, Generator

Expand All @@ -12,6 +13,7 @@
from langchain.tools.base import BaseTool
from langchain.utils import get_from_dict_or_env
import openjanus.tts.elevenlabs.async_patch as eleven_labs_async_patch
from openjanus.app.config import get_recordings_dir


LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -56,32 +58,39 @@ class ElevenLabsText2SpeechTool(BaseTool):
"Spanish, Italian, French, Portuguese, and Hindi. "
)
voice: Voice
output_dir: str = get_recordings_dir()
output_file_path: Optional[str] = ""

@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key exists in environment."""
_ = get_from_dict_or_env(values, "eleven_api_key", "ELEVEN_API_KEY")

return values

def set_recording_path(self):
# TODO: Clean this up, set from config, etc
output_format = self.output_dir + f"output.{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.mp3".replace(' ','_')
self.output_file_path = str(pathlib.PurePath(output_format))

def save_file(self, audio: Union[bytes, Iterator[bytes]]):
if isinstance(audio, Iterator):
raw_audio = iter(audio)
else:
raw_audio = audio
elevenlabs = _import_elevenlabs()
now = datetime.now()
formatted_dt = now.strftime(format="%Y%m%d_%H%M%S")
elevenlabs.save(raw_audio, f"{formatted_dt}_{self.voice.voice_id}_chat.mp3")
self.set_recording_path()
elevenlabs.save(raw_audio, self.output_file_path)

def _run(
self, query, run_manager: Optional[CallbackManagerForToolRun] = None
) -> str:
):
"""Use the tool."""
elevenlabs = _import_elevenlabs()
try:
speech = elevenlabs.generate(text=query, model=self.model, voice=self.voice)
elevenlabs.play(speech)
self.save_file(audio=speech)
# with tempfile.NamedTemporaryFile(
# mode="bx", suffix=".wav", delete=False
# ) as f:
Expand All @@ -96,15 +105,15 @@ async def _arun(self, stream, **kwargs: Any) -> Coroutine[Any, Any, Any]:
await self.astream_speech_from_stream(
text_stream=stream,
chunk_size=100,
save_message=False,
save_message=True,
)
except Exception as e:
raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}")




def play(self, query: str, save_message: bool = False) -> None:
def play(self, query: str, save_message: bool = True) -> None:
"""
Play the speech as text
Expand Down Expand Up @@ -143,7 +152,7 @@ async def aprocess_message(self, query, save_message):
if save_message:
self.save_file(b''.join(audio_chunks))

async def astream_speech(self, text_stream, save_message: bool = False) -> None:
async def astream_speech(self, text_stream, save_message: bool = True) -> None:
async def async_generator_to_list(async_generator):
return [item async for item in async_generator]

Expand All @@ -155,7 +164,7 @@ async def async_generator_to_list(async_generator):
for future in asyncio.as_completed(tasks):
result = await future # result is not used in this case

async def astream_speech_from_stream(self, text_stream, chunk_size: int = 1000, save_message: bool = False) -> None:
async def astream_speech_from_stream(self, text_stream, chunk_size: int = 1000, save_message: bool = True) -> None:
"""
Play a text stream with TTS
Expand Down
10 changes: 7 additions & 3 deletions src/openjanus/tts/whisper/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
import pathlib
import shutil
import subprocess
from typing import Any, Optional, Union, Iterator, Literal
from typing import Any, Dict, Optional, Union, Iterator, Literal

from langchain.tools.base import BaseTool

from openjanus.app.config import get_recordings_dir
from openjanus.app.config import get_openai_whisper_config


LOGGER = logging.getLogger(__name__)
Expand All @@ -25,20 +26,23 @@ class OpenAIWhisperSpeaker(BaseTool):
output_dir: str = get_recordings_dir()
output_file_path: Optional[str] = ""
verbose: bool = True
config: Dict[str, Any] = get_openai_whisper_config()

def __init__(
self,
api_key: Optional[str] = None,
voice_id: Optional[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]] = "nova",
voice_model: Optional[Union[str, Literal["tts-1", "tts-1-hd"]]] = "tts-1",
output_dir: str = get_recordings_dir(),
config: Dict[str, Any] = get_openai_whisper_config(),
*args,
**kwargs
) -> None:
super().__init__(*args, **kwargs)
self.config = config
self.api_key = api_key
self.voice_id = voice_id
self.voice_model = voice_model
self.voice_id = self.config.get('whisper_voice_id', voice_id)
self.voice_model = self.config.get('whisper_voice_model', voice_model)
self.output_dir = output_dir
self.output_file_path = ""

Expand Down

0 comments on commit 3d6b258

Please sign in to comment.