-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathmaster_control_mode_setup.py
139 lines (114 loc) · 4.49 KB
/
master_control_mode_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# run this script to set up master control mode
# you will be asked to speak three times (given only a second)
# speak as much as you can, in your normal tone
# author: @omegaui
# github: https://github.com/omegaui/linux-voice-control
# license: GNU GPL v3
import os
import wave
from array import array
from os.path import exists
import pyaudio
import whisper
from termcolor import cprint
import config_manager
from utils import trim
config_manager.init()
# initializing PyAudio ...
pyAudio = pyaudio.PyAudio()
# Opening Microphone Stream with above created configuration ...
stream = pyAudio.open(format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024)
training_data_set = dict()
try:
if not exists('training-data'):
os.mkdir('training-data')
except Exception as e:
print('unable to create training directory')
exit(1)
# records the mic and provides transcription result
# @returns: bytes of audio data
def listen():
dataframes = []
audio_chunks = array('h')
cprint("listening ...", "blue", attrs=["bold"])
for i in range(0, int(44100 / 1024 * 3)):
data = stream.read(1024, exception_on_overflow=False)
dataframes.append(data) # stacking every audio frame into the list
audio_chunks.extend(array('h', data))
audio_chunks = trim(audio_chunks)
if len(audio_chunks) == 0: # clip is empty
print('no voice')
return None
elif max(audio_chunks) < 2000: # no voice in clip
print('no speech in clip')
return None
cprint("transcribing ...", "green", attrs=["bold"])
cprint(f"You said: {transcribe(dataframes)}", 'blue', attrs=['bold'])
return dataframes
# saves the audio data and performs and returns the transcription result
# @returns: transcription text from audio data
def transcribe(dataframes):
WAVE_OUTPUT_FILENAME = 'training-data/internal-transcription.wav'
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(2)
wf.setsampwidth(pyAudio.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
wf.writeframes(b''.join(dataframes))
wf.close()
audio_model = whisper.load_model('base.en') # loading the audio model from whisper
result = audio_model.transcribe(WAVE_OUTPUT_FILENAME, fp16=False, language='english')
return result['text'].lower().strip()
cprint('\n-----------------------ignore-above-warnings-if-any-----------------------', 'red', attrs=['bold'])
cprint('Welcome to Master Control Mode Setup', "blue", attrs=['bold'])
cprint('We need to record some samples of your voice', "blue", attrs=['bold'])
system_name = config_manager.config['name']
cprint(f'Current System Name: {system_name}', 'blue', attrs=["bold"])
cprint(
f'Now, you will be asked to speak three times, you can speak whatever you want, e.g hey {system_name}',
'green', attrs=['bold'])
cprint('Note: You are given only 3 seconds to speak!', 'red', attrs=['bold'])
choice = input('Ready to speak? (y/n) default y: ')
if choice == '':
choice = 'y'
chances = 3 # no of samples needed
# collecting voice samples ...
while choice in 'yn':
if choice == 'y':
audio_frames = listen()
if audio_frames is not None:
chx = input('Record again or save? (y/n) default y: ')
if chx == '':
chx = 'y'
if chx == 'y':
cprint('>>> Saved!', 'green', attrs=['bold'])
training_data_set[chances] = audio_frames
chances -= 1
else:
cprint('Try Again!', 'blue', attrs=['bold'])
else:
cprint('Quitting Master Mode Setup', 'red', attrs=['bold'])
stream.close()
if chances == 0:
break
choice = input('Ready to speak again? (y/n) default y: ')
if choice == '':
choice = 'y'
# creating master data samples ...
if chances != 3:
cprint('Saving Training Data', 'blue', attrs=['bold'])
for key in training_data_set:
frames = b''.join(training_data_set[key])
wf = wave.open(f'training-data/master_mode_audio_sample{key}.wav', 'wb')
wf.setnchannels(2)
wf.setsampwidth(pyAudio.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
wf.writeframes(frames)
wf.close()
open('training-data/master-mode', "w").close()
cprint('Master Mode is all Set!', 'blue', attrs=['bold'])
else:
cprint('No Training Data collected, restart the program to try again!', 'red', attrs=['bold'])