forked from devicehive/AlexaDevice
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalexa_audio.py
141 lines (126 loc) · 4.16 KB
/
alexa_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
import threading
import math
import struct
import time
import alexa_audio_device
from subprocess import Popen, PIPE, STDOUT
from pocketsphinx import *
DETECT_HYSTERESIS = 1.2 # level should fall lower that background noise
DETECT_MIN_LENGTH_S = 2.5 # minimal length of record
DETECT_MAX_LENGTH_S = 10 # minimal amount of buffers to activate
class AlexaAudio:
def __init__(self, threshold, callback):
self.ad = alexa_audio_device.AlexaAudioDevice()
self.callback = callback
self.beep_buf = self._beep()
self.beep_short_buf = self._beep(150, 3000.0, 16000, 0.2)
self.is_run = True
self.average = 100.0
self.skip = 0
# init pocketsphinx
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
config.set_string('-keyphrase', 'alexa')
print("Voice threshold is " + str(threshold))
config.set_float('-kws_threshold', threshold)
self.decoder = Decoder(config)
self.decoder.start_utt()
self.capture_in_progress = False
self.buffer = None
self.notify = True
self.pt = threading.Thread(target=self.processAudio)
self.pt.start()
def _beep(self, length_ms = 150, frequency = 1000.0, framerate = 16000, amplitude = 0.2):
period = int(framerate / frequency)
snd = bytes()
for i in range(0, int(framerate * length_ms / 1000)):
val = 32767.0 * amplitude * math.sin(2.0 * math.pi * float(i % period) / period)
snd += struct.pack('<h', int(val))
return snd
def beep(self):
self.play(self.beep_buf)
self.ad.flush()
def beep_short(self):
self.play(self.beep_short_buf)
self.ad.flush()
def start_capture(self, notify = True):
self.beep_short()
self.capture_in_progress = True
self.detectBuffer = bytes()
self.notify = notify
def processAudio(self):
print("Audio Processing started.")
while self.is_run:
buf = self.ad.read(16000)
if buf is None:
print("Alexa audio processing exit")
break
if self.skip > 0:
self.skip -= len(buf)
continue
level = 0
for i in range(0, len(buf), 2):
val = struct.unpack_from('<h', buf, i)[0] # 16 bit little endian
level += abs(val)
level = level / (len(buf) / 2)
if self.capture_in_progress:
self.detectBuffer += buf
duration = len(self.detectBuffer)/16000/2
if duration >= DETECT_MAX_LENGTH_S or (
duration >= DETECT_MIN_LENGTH_S and
level < self.average * DETECT_HYSTERESIS):
self.capture_in_progress = False
print("Finished " + str(level) + "/" + str(self.average) + " "
+ str(duration) + "s")
self.buffer = self.detectBuffer
if self.notify:
threading.Thread(target=self.callback).start()
self.skip += 16000
#self.play(self.detectBuffer)
else:
self.decoder.process_raw(buf, False, False)
if self.decoder.hyp() != None:
self.start_capture()
self.detectBuffer += buf
print("Found Alexa keyword")
self.decoder.end_utt()
self.decoder.start_utt()
else:
self.average = (self.average + level) / 2
print("Audio Processing finished.")
def close(self):
self.is_run = False
self.pt.join()
self.ad.close()
def get_audio(self, timeout = None):
if timeout is not None:
self.start_capture(False)
for i in range(int(timeout)):
if(self.buffer is not None):
break
time.sleep(1)
if self.buffer is None:
res = self.detectBuffer
self.capture_in_progress = False
print('Timeout exceed, phrase might not be complete')
self.beep()
return res
res = self.buffer
self.buffer = None
self.beep()
return res
def play(self, audio):
self.skip += len(audio)
self.ad.write(audio)
def play_mp3(self, raw_audio):
p = Popen(['ffmpeg', '-i', '-', '-ac', '1', '-acodec',
'pcm_s16le', '-ar', '16000', '-f', 's16le', '-'],
stdout=PIPE, stdin=PIPE, stderr=PIPE)
pcm = p.communicate(input=raw_audio)[0]
self.play(pcm)
def play_wav(self, file, timeout=None, stop_event=None, repeat=False):
# TODO
print("play_wav " + file)