Skip to content

Commit

Permalink
Merge branch 'master' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
shivankacker committed Feb 20, 2024
2 parents 6523190 + fba55aa commit 7fb0685
Show file tree
Hide file tree
Showing 16 changed files with 204 additions and 38 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/test-base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Test

on:
workflow_call:

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ hashFiles('Pipfile.lock', 'compose/local/django/Dockerfile') }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Bake docker images
uses: docker/bake-action@v4
with:
load: true
set: |
*.cache-from=type=local,src=/tmp/.buildx-cache
*.cache-to=type=local,dest=/tmp/.buildx-cache-new
files: docker-compose.local.yaml

- name: Start services
run: docker compose -f docker-compose.local.yaml up -d --wait --no-build

- name: Check migrations
run: make checkmigration

# - name: Run tests
# run: make test-coverage

# - name: Upload coverage report
# uses: codecov/codecov-action@v3

- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
12 changes: 12 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: Test PR

on:
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
test:
uses: ./.github/workflows/test-base.yaml
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ logs:

makemigrations: up
docker exec django bash -c "python manage.py makemigrations"

checkmigration:
docker compose -f $(docker_config_file) exec django bash -c "python manage.py makemigrations --check --dry-run"

test: up
docker exec django bash -c "python manage.py test --keepdb --parallel=$(nproc)"
Expand Down
19 changes: 19 additions & 0 deletions ayushma/migrations/0051_project_tts_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.6 on 2024-02-11 15:23

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ayushma", "0050_alter_chat_model_alter_project_model"),
]

operations = [
migrations.AddField(
model_name="project",
name="tts_engine",
field=models.SmallIntegerField(
choices=[(1, "openai"), (2, "google")], default=2
),
),
]
1 change: 1 addition & 0 deletions ayushma/models/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class ChatMessage(BaseModel):
original_message = models.TextField(blank=True, null=True)
language = models.CharField(max_length=10, blank=False, default="en")
reference_documents = models.ManyToManyField(Document, blank=True)
# generated ayushma voice audio via TTS
audio = models.FileField(blank=True, null=True)
meta = models.JSONField(blank=True, null=True)
temperature = models.FloatField(blank=True, null=True)
Expand Down
5 changes: 5 additions & 0 deletions ayushma/models/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ class STTEngine(IntegerChoices):
SELF_HOSTED = 3


class TTSEngine(IntegerChoices):
OPENAI = (1, "openai")
GOOGLE = (2, "google")


class FeedBackRating(IntegerChoices):
HALLUCINATING = 1
WRONG = 2
Expand Down
5 changes: 4 additions & 1 deletion ayushma/models/project.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.contrib.postgres.fields import ArrayField
from django.db import models

from ayushma.models.enums import ModelType, STTEngine
from ayushma.models.enums import ModelType, STTEngine, TTSEngine
from ayushma.models.users import User
from utils.models.base import BaseModel

Expand All @@ -16,6 +16,9 @@ class Project(BaseModel):
stt_engine = models.IntegerField(
choices=STTEngine.choices, default=STTEngine.WHISPER
)
tts_engine = models.SmallIntegerField(
choices=TTSEngine.choices, default=TTSEngine.GOOGLE
)
model = models.IntegerField(choices=ModelType.choices, default=ModelType.GPT_3_5)
preset_questions = ArrayField(models.TextField(), null=True, blank=True)
is_default = models.BooleanField(default=False)
Expand Down
15 changes: 12 additions & 3 deletions ayushma/serializers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ class ConverseSerializer(serializers.Serializer):
stream = serializers.BooleanField(default=True)
generate_audio = serializers.BooleanField(default=True)
noonce = serializers.CharField(required=False)
transcript_start_time = serializers.FloatField(required=False)
transcript_end_time = serializers.FloatField(required=False)


class ChatDetailSerializer(serializers.ModelSerializer):
Expand Down Expand Up @@ -146,9 +148,11 @@ def get_chats(self, obj):
)
return [
{
"messageType": ChatMessageType.USER
if thread_message.role == "user"
else ChatMessageType.AYUSHMA,
"messageType": (
ChatMessageType.USER
if thread_message.role == "user"
else ChatMessageType.AYUSHMA
),
"message": thread_message.content[0].text.value,
"reference_documents": thread_message.content[0].text.annotations,
"language": "en",
Expand All @@ -159,3 +163,8 @@ def get_chats(self, obj):
chatmessages = ChatMessage.objects.filter(chat=obj).order_by("created_at")
context = {"request": self.context.get("request")}
return ChatMessageSerializer(chatmessages, many=True, context=context).data


class SpeechToTextSerializer(serializers.Serializer):
audio = serializers.FileField(required=True)
language = serializers.CharField(default="en")
1 change: 1 addition & 0 deletions ayushma/serializers/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class Meta:
"modified_at",
"description",
"stt_engine",
"tts_engine",
"model",
"is_default",
"display_preset_questions",
Expand Down
6 changes: 6 additions & 0 deletions ayushma/utils/converse.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def converse_api(
audio = request.data.get("audio")
text = request.data.get("text")
language = request.data.get("language") or "en"

try:
service: Service = request.service
except AttributeError:
Expand Down Expand Up @@ -128,6 +129,11 @@ def converse_api(
translated_text = transcript

elif converse_type == "text":
if request.data.get("transcript_start_time") and request.data.get(
"transcript_end_time"
):
stats["transcript_start_time"] = request.data["transcript_start_time"]
stats["transcript_end_time"] = request.data["transcript_end_time"]
translated_text = text

if language != "en":
Expand Down
54 changes: 35 additions & 19 deletions ayushma/utils/language_helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import re

from django.conf import settings
from google.cloud import texttospeech
from google.cloud import translate_v2 as translate
from openai import OpenAI
from rest_framework.exceptions import APIException

from ayushma.models.enums import TTSEngine


def translate_text(target, text):
try:
Expand Down Expand Up @@ -37,31 +41,43 @@ def sanitize_text(text):
return sanitized_text


def text_to_speech(text, language_code):
def text_to_speech(text, language_code, service):
try:
# in en-IN neural voice is not available
if language_code == "en-IN":
language_code = "en-US"

client = texttospeech.TextToSpeechClient()

text = sanitize_text(text)
synthesis_input = texttospeech.SynthesisInput(text=text)

voice = texttospeech.VoiceSelectionParams(
language_code=language_code, name=language_code_voice_map[language_code]
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)

response = client.synthesize_speech(
input=synthesis_input,
voice=voice,
audio_config=audio_config,
)

return response.audio_content

if service == TTSEngine.GOOGLE:
client = texttospeech.TextToSpeechClient()

synthesis_input = texttospeech.SynthesisInput(text=text)

voice = texttospeech.VoiceSelectionParams(
language_code=language_code, name=language_code_voice_map[language_code]
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)

response = client.synthesize_speech(
input=synthesis_input,
voice=voice,
audio_config=audio_config,
)

return response.audio_content
elif service == TTSEngine.OPENAI:
client = OpenAI(api_key=settings.OPENAI_API_KEY)
response = client.audio.speech.create(
model="tts-1-hd",
voice="nova",
input=text,
)
return response.read()
else:
raise APIException("Service not supported")
except Exception as e:
print(e)
return None
9 changes: 8 additions & 1 deletion ayushma/utils/openaiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def handle_post_response(
temperature,
stats,
language,
tts_engine,
generate_audio=True,
):
chat_message: ChatMessage = ChatMessage.objects.create(
Expand All @@ -225,7 +226,9 @@ def handle_post_response(
ayushma_voice = None
if generate_audio:
stats["tts_start_time"] = time.time()
ayushma_voice = text_to_speech(translated_chat_response, user_language)
ayushma_voice = text_to_speech(
translated_chat_response, user_language, tts_engine
)
stats["tts_end_time"] = time.time()

url = None
Expand Down Expand Up @@ -324,6 +327,8 @@ def converse(
elif message.messageType == ChatMessageType.AYUSHMA:
chat_history.append(AIMessage(content=f"Ayushma: {message.message}"))

tts_engine = chat.project.tts_engine

if not stream:
lang_chain_helper = LangChainHelper(
stream=False,
Expand All @@ -347,6 +352,7 @@ def converse(
temperature,
stats,
language,
tts_engine,
generate_audio,
)

Expand Down Expand Up @@ -404,6 +410,7 @@ def converse(
temperature,
stats,
language,
tts_engine,
generate_audio,
)

Expand Down
19 changes: 7 additions & 12 deletions ayushma/utils/speech_to_text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os

import openai
import requests
from django.conf import settings
from google.cloud import speech
from openai import OpenAI

from ayushma.models.enums import STTEngine

Expand All @@ -14,19 +14,14 @@ def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
# workaround for setting api version ( https://github.com/openai/openai-python/pull/491 )
current_api_version = openai.api_version
openai.api_version = "2020-11-07"
transcription = openai.Audio.transcribe(
"whisper-1",
file=audio,
client = OpenAI(api_key=self.api_key)
transcription = client.audio.transcriptions.create(
model="whisper-1",
# https://github.com/openai/openai-python/tree/main#file-uploads
file=(audio.name, audio.read()),
language=self.language_code.replace("-IN", ""),
api_key=self.api_key,
api_base="https://api.openai.com/v1",
api_type="open_ai",
api_version="2020-11-07", # Bug in openai package, this parameter is ignored
# api_version="2020-11-07",
)
openai.api_version = current_api_version
return transcription.text


Expand Down
Loading

0 comments on commit 7fb0685

Please sign in to comment.