removed audio speed config

lspahija · lspahija · commit bbd60d6b7c14 · 2023-09-19T22:23:59.000-05:00
diff --git a/README.md b/README.md
@@ -41,8 +41,6 @@ docker run -d -e OPENAI_API_KEY=<YOUR_API_KEY> -e TTS_PROVIDER=EDGETTS -e EDGETT
 ## Notes
 The AI model defaults to `gpt-3.5-turbo` but you can adjust this by setting the `AI_COMPLETION_MODEL` environment variable (e.g. to `gpt-4` if your `OPENAI_API_KEY` has access to it)
 
-Output audio speed can be adjusted by setting the `AUDIO_SPEED` environment variable e.g. setting this to 1.5 will result in audio playing back at 1.5x default speed.
-
 You can configure the language by setting the `LANGUAGE` environment variable to the corresponding ISO-639-1 code. The default is `en`.
 Languages other than English are currently only supported when using the `gTTS` or `edge_tts` providers for text-to-speech. The TTS provider can be selected by setting the environment variable `TTS_PROVIDER` to one of the values in [tts.py](./app/tts.py).
 
diff --git a/backend/tts.py b/backend/tts.py
@@ -5,14 +5,12 @@
 
 import requests
 from gtts import gTTS
-from pydub import AudioSegment
 import edge_tts
 from elevenlabs import generate, save
 
 from util import delete_file
 
 LANGUAGE = os.getenv("LANGUAGE", "en")
-AUDIO_SPEED = os.getenv("AUDIO_SPEED", None)
 TTS_PROVIDER = os.getenv("TTS_PROVIDER", "EDGETTS")
 
 ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY", None)
@@ -40,11 +38,10 @@ async def _edge_tts_to_speech(text, background_tasks):
     filepath = f"/tmp/{uuid.uuid4()}.mp3"
     await communicate.save(filepath)
 
-    speed_adjusted_filepath = _adjust_audio_speed(filepath)
-    background_tasks.add_task(delete_file, speed_adjusted_filepath)
+    background_tasks.add_task(delete_file, filepath)
 
     logging.info('TTS time: %s %s', time.time() - start_time, 'seconds')
-    return speed_adjusted_filepath
+    return filepath
 
 
 def _gtts_to_speech(text, background_tasks):
@@ -54,11 +51,10 @@ def _gtts_to_speech(text, background_tasks):
     filepath = f"/tmp/{uuid.uuid4()}.mp3"
     tts.save(filepath)
 
-    speed_adjusted_filepath = _adjust_audio_speed(filepath)
-    background_tasks.add_task(delete_file, speed_adjusted_filepath)
+    background_tasks.add_task(delete_file, filepath)
 
     logging.info('TTS time: %s %s', time.time() - start_time, 'seconds')
-    return speed_adjusted_filepath
+    return filepath
 
 
 def _elevenlabs_to_speech(text, background_tasks):
@@ -74,11 +70,10 @@ def _elevenlabs_to_speech(text, background_tasks):
     filepath = f"/tmp/{uuid.uuid4()}.mp3"
     save(audio, filepath)
 
-    speed_adjusted_filepath = _adjust_audio_speed(filepath)
-    background_tasks.add_task(delete_file, speed_adjusted_filepath)
+    background_tasks.add_task(delete_file, filepath)
 
     logging.info('TTS time: %s %s', time.time() - start_time, 'seconds')
-    return speed_adjusted_filepath
+    return filepath
 
 
 def _streamelements_to_speech(text, background_tasks):
@@ -90,23 +85,7 @@ def _streamelements_to_speech(text, background_tasks):
     with open(filepath, "wb") as f:
         f.write(response.content)
 
-    speed_adjusted_filepath = _adjust_audio_speed(filepath)
-    background_tasks.add_task(delete_file, speed_adjusted_filepath)
+    background_tasks.add_task(delete_file, filepath)
 
     logging.info('TTS time: %s %s', time.time() - start_time, 'seconds')
-    return speed_adjusted_filepath
-
-
-def _adjust_audio_speed(audio_filepath):
-    if AUDIO_SPEED is None:
-        return audio_filepath
-
-    audio = AudioSegment.from_mp3(audio_filepath)
-    faster_audio = audio.speedup(playback_speed=float(AUDIO_SPEED))
-
-    speed_adjusted_filepath = f"/tmp/{uuid.uuid4()}.mp3"
-    faster_audio.export(speed_adjusted_filepath, format="mp3")
-
-    delete_file(audio_filepath)
-
-    return speed_adjusted_filepath
+    return filepath