-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathapp.py
177 lines (149 loc) · 4.91 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os
import time
import flask
import logging
import requests
import numpy as np
# from scipy.io import wavfile
from asr import ASR
from tts import TTS
from utils import *
app = flask.Flask(__name__)
# configure logger
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(threadName)s : %(message)s"
)
@app.route('/')
def index():
# check existence of ASR pretrained models
mdls = ["an4_pretrained_v2.pth", "librispeech_pretrained_v2.pth",
"ted_pretrained_v2.pth"]
asr_enabled = any([mdl in os.listdir("./asr/models") for mdl in mdls]) and \
"asr_model" in globals()
# check existence of TTS pretrained models
wavgan_model = "ljspeech.parallel_wavegan.v1"
mdls = ["fastspeech", "tacotron2", "transformer"]
tts_enabled = wavgan_model in os.listdir("./tts/models") and \
any([mdl in os.listdir("./tts/models") for mdl in mdls]) and \
"tts_model" in globals()
return flask.render_template("index.html",
asr_enabled = asr_enabled,
tts_enabled = tts_enabled,
show_menu = tts_enabled or asr_enabled)
@app.route('/send_message', methods=['POST'])
def call_chatbot():
"""
This function is called using a POST request and this function does the
following:
- sends the given data to Rasa server running in the background
- parse the server response, get the content.
- if the tts is enabled, passes the text to the TTS
- returns the result
Given:
the given data is on the following structure:
{
"useTTS": bool,
"text": str,
"id": int
}
}
Returns:
the returned object is on the following structure:
[{ "id": int, "type": "image", "body": str},
{ "id": int, "type": "text", "body": str, "snd":{ "audio": list,
"sample_rate": int}},
...
]
NOTE:
"snd" key is passed only if TTS is enabled and the message type is text
"""
# prase the given data
data = flask.json.loads(flask.request.data.decode('utf-8'))
use_tts, text, current_id= data["useTTS"], data["text"], data["id"]
app.logger.info("User Query: "+ text)
result = []
# call rasa: some requests get lost, loop till you get a response
while(True):
try:
# rasa rest API
url = "http://localhost:5005/webhooks/rest/webhook"
# rasa request must have a "sender" and "message" keys
res = requests.post(
url=url,
data=flask.json.dumps({"sender": "Rasa", "message": text}),
timeout=5).json()
except:
# mimic the rasa response when something wrong happens
res = [{'recipient_id': 'Rasa',
'text': "[SOMETHING WENT WRONG!!]"}]
if res: break
app.logger.info("Rasa Response:")
app.logger.info("\t"+ str(res))
for item in res:
d = {}
current_id += 1
d["id"] = current_id
d["type"] = "text" if "text" in item.keys() else "image"
d["body"] = item[d["type"]]
if use_tts and d["type"] == "text":
tic = time.time()
wav, sr = tts_model.synthesize(d["body"])
duration = len(wav) / sr
d["snd"] = {
"audio": wav.tolist(),
"sample_rate": sr,
}
toc = time.time()
app.logger.info( "TTS Duration: {} seconds".format(toc-tic) )
result.append(d)
# get back the result
flask_response = app.response_class(response=flask.json.dumps(result),
status=200,
mimetype='application/json' )
return flask_response
@app.route('/send_audio_msg', methods=['POST'])
def call_asr():
"""
This function is called using a POST request passing base64 string of webm
audio encoded using opus codecs. And this function parses this string and
extract the audio to be transcribed using the ASR model. After that, the
data is being sent back to the frontend as a JSON object like the following:
{"text": str}
NOTE:
if the audio is silent or the ASR couldn't transcribe, then an empty string
is sent instead.
"""
tic = time.time()
req = flask.request.data.decode("utf-8")
audio_arr = flask.json.loads(req)["data"]
wav = np.array(audio_arr, np.float32)
# normalize ([-1:1] normalization)
wav = normalize_audio(wav, method="-1_1")
# reduce noise (comment it to make ASR a bit faster)
wav = reduce_noise(wav, method="wiener")
# write the recorded audio (for debugging reasons)
# wavfile.write(filename="recorded.wav", rate=16000, data=wav)
# transcribe the provided data
out = asr_model.transcribe(wav)
toc = time.time()
app.logger.info("ASR Model Transcription: "+out)
app.logger.info("ASR Duration: {} seconds".format(toc-tic))
# form response
flask_response= app.response_class(response=flask.json.dumps({"text": out}),
status=200,
mimetype='application/json' )
return flask_response
if __name__ == '__main__':
conf = parse_yaml("conf.yaml")
# load ASR model
app.logger.info("===== Loading ASR Model =====")
asr_conf = conf["asr"]
asr_model = ASR(asr_conf)
# load TTS model
app.logger.info("===== Loading TTS Model =====")
tts_conf = conf["tts"]
tts_model = TTS(tts_conf)
# run server
app.logger.info("===== Running the Server =====")
app.run(host="0.0.0.0", port=5000)