⏯️ fix(tts): Resolve Voice Selection and Manual Playback Issues (#2845)

* fix: voice setting for autoplayback TTS * fix(useTextToSpeechExternal): resolve stateful playback issues and consolidate state logic * refactor: initialize tts voice and provider schema once per request * fix(tts): edge case, longer text inputs. TODO: use continuous stream for longer text inputs * fix(tts): pause global audio on conversation change * refactor: keyvMongo ban cache to allow db updates for unbanning, to prevent server restart * chore: eslint fix * refactor: make ban cache exclusively keyvMongo
danny-avila · May 23, 2024 · 514a502 · 514a502
1 parent 8e66683
commit 514a502
Show file tree

Hide file tree

Showing 10 changed files with 330 additions and 176 deletions.
diff --git a/api/server/middleware/checkBan.js b/api/server/middleware/checkBan.js
@@ -2,14 +2,12 @@ const Keyv = require('keyv');
 const uap = require('ua-parser-js');
 const { ViolationTypes } = require('librechat-data-provider');
 const { isEnabled, removePorts } = require('../utils');
-const keyvRedis = require('~/cache/keyvRedis');
+const keyvMongo = require('~/cache/keyvMongo');
 const denyRequest = require('./denyRequest');
 const { getLogStores } = require('~/cache');
 const User = require('~/models/User');
 
-const banCache = isEnabled(process.env.USE_REDIS)
-  ? new Keyv({ store: keyvRedis })
-  : new Keyv({ namespace: ViolationTypes.BAN, ttl: 0 });
+const banCache = new Keyv({ store: keyvMongo, namespace: ViolationTypes.BAN, ttl: 0 });
 const message = 'Your account has been temporarily banned due to violations of our service.';
 
 /**

diff --git a/api/server/services/Files/Audio/streamAudio.js b/api/server/services/Files/Audio/streamAudio.js
@@ -90,7 +90,7 @@ function findLastSeparatorIndex(text, separators = SEPARATORS) {
 }
 
 const MAX_NOT_FOUND_COUNT = 6;
-const MAX_NO_CHANGE_COUNT = 12;
+const MAX_NO_CHANGE_COUNT = 10;
 
 /**
  * @param {string} messageId
@@ -152,6 +152,64 @@ function createChunkProcessor(messageId) {
   return processChunks;
 }
 
+/**
+ * @param {string} text
+ * @param {number} [chunkSize=4000]
+ * @returns {{ text: string, isFinished: boolean }[]}
+ */
+function splitTextIntoChunks(text, chunkSize = 4000) {
+  if (!text) {
+    throw new Error('Text is required');
+  }
+
+  const chunks = [];
+  let startIndex = 0;
+  const textLength = text.length;
+
+  while (startIndex < textLength) {
+    let endIndex = Math.min(startIndex + chunkSize, textLength);
+    let chunkText = text.slice(startIndex, endIndex);
+
+    if (endIndex < textLength) {
+      let lastSeparatorIndex = -1;
+      for (const separator of SEPARATORS) {
+        const index = chunkText.lastIndexOf(separator);
+        if (index !== -1) {
+          lastSeparatorIndex = Math.max(lastSeparatorIndex, index);
+        }
+      }
+
+      if (lastSeparatorIndex !== -1) {
+        endIndex = startIndex + lastSeparatorIndex + 1;
+        chunkText = text.slice(startIndex, endIndex);
+      } else {
+        const nextSeparatorIndex = text.slice(endIndex).search(/\S/);
+        if (nextSeparatorIndex !== -1) {
+          endIndex += nextSeparatorIndex;
+          chunkText = text.slice(startIndex, endIndex);
+        }
+      }
+    }
+
+    chunkText = chunkText.trim();
+    if (chunkText) {
+      chunks.push({
+        text: chunkText,
+        isFinished: endIndex >= textLength,
+      });
+    } else if (chunks.length > 0) {
+      chunks[chunks.length - 1].isFinished = true;
+    }
+
+    startIndex = endIndex;
+    while (startIndex < textLength && text[startIndex].trim() === '') {
+      startIndex++;
+    }
+  }
+
+  return chunks;
+}
+
 /**
  * Input stream text to speech
  * @param {Express.Response} res
@@ -307,6 +365,7 @@ module.exports = {
   inputStreamTextToSpeech,
   findLastSeparatorIndex,
   createChunkProcessor,
+  splitTextIntoChunks,
   llmMessageSource,
   getRandomVoiceId,
 };
diff --git a/api/server/services/Files/Audio/streamAudio.spec.js b/api/server/services/Files/Audio/streamAudio.spec.js
@@ -1,5 +1,5 @@
+const { createChunkProcessor, splitTextIntoChunks } = require('./streamAudio');
 const { Message } = require('~/models/Message');
-const { createChunkProcessor } = require('./streamAudio');
 
 jest.mock('~/models/Message', () => ({
   Message: {
@@ -86,3 +86,52 @@ describe('processChunks', () => {
     expect(Message.findOne().lean).toHaveBeenCalledTimes(2);
   });
 });
+
+describe('splitTextIntoChunks', () => {
+  test('splits text into chunks of specified size with default separators', () => {
+    const text = 'This is a test. This is only a test! Make sure it works properly? Okay.';
+    const chunkSize = 20;
+    const expectedChunks = [
+      { text: 'This is a test.', isFinished: false },
+      { text: 'This is only a test!', isFinished: false },
+      { text: 'Make sure it works p', isFinished: false },
+      { text: 'roperly? Okay.', isFinished: true },
+    ];
+
+    const result = splitTextIntoChunks(text, chunkSize);
+    expect(result).toEqual(expectedChunks);
+  });
+
+  test('splits text into chunks with default size', () => {
+    const text = 'A'.repeat(8000) + '. The end.';
+    const expectedChunks = [
+      { text: 'A'.repeat(4000), isFinished: false },
+      { text: 'A'.repeat(4000), isFinished: false },
+      { text: '. The end.', isFinished: true },
+    ];
+
+    const result = splitTextIntoChunks(text);
+    expect(result).toEqual(expectedChunks);
+  });
+
+  test('returns a single chunk if text length is less than chunk size', () => {
+    const text = 'Short text.';
+    const expectedChunks = [{ text: 'Short text.', isFinished: true }];
+
+    const result = splitTextIntoChunks(text, 4000);
+    expect(result).toEqual(expectedChunks);
+  });
+
+  test('handles text with no separators correctly', () => {
+    const text = 'ThisTextHasNoSeparatorsAndIsVeryLong'.repeat(100);
+    const chunkSize = 4000;
+    const expectedChunks = [{ text: text, isFinished: true }];
+
+    const result = splitTextIntoChunks(text, chunkSize);
+    expect(result).toEqual(expectedChunks);
+  });
+
+  test('throws an error when text is empty', () => {
+    expect(() => splitTextIntoChunks('')).toThrow('Text is required');
+  });
+});