-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtattle.py
590 lines (457 loc) · 21.1 KB
/
tattle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
#!/usr/bin/python
import os
import sys
import logging
import logging.handlers as handlers
import yaml
import json
import RPi.GPIO as GPIO
import pyaudio
import time
import threading
import boto3
import botocore
import requests
from sound_recorder import SoundRecorder
from datetime import datetime
from tempfile import mkstemp
from slacker import IncomingWebhook
file_log_format = '%(asctime)s %(levelname)s %(message)s'
console_log_format = file_log_format
logger = logging.getLogger()
logger.setLevel(logging.INFO)
file_handler = handlers.TimedRotatingFileHandler('tattle.log', when='midnight')
file_handler.setFormatter(logging.Formatter(file_log_format))
logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(console_log_format))
logger.addHandler(console_handler)
cfg = None
handset_pin = None
led_pin = None
need_receiver_reset = False #Requires the reciver to be 'hung up' before the next recording
run_test = False #Run once and exit
def main():
global cfg
global handset_pin
global led_pin
global need_receiver_reset
p = pyaudio.PyAudio()
device_count = p.get_device_count()
logger.info("")
logger.info("")
logger.info("-=Tattle App=-")
logger.info("")
logger.info("Loading config...")
led_pin = None
try:
with open("config.yaml", 'r') as ymlfile:
cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)
logger.info("\n\n" + yaml.dump(cfg, Dumper=yaml.Dumper))
handset_pin = cfg["handset"]["handset_pin"]
led_pin = cfg["recording"]["led_pin"]
if not handset_pin:
logger.error("Handset pin not configured")
return
except Exception:
logger.exception("Config read error")
return
logger.info("- Audio devices (" + str(device_count) + ") -")
for i in range(device_count):
device_info = p.get_device_info_by_index(i)
print_device_info(device_info)
logger.info("")
logger.info("- Default recording device -")
mic_device_info = p.get_default_input_device_info()
print_device_info(mic_device_info)
# Pin Setup:
GPIO.setwarnings(False)
GPIO.setmode(GPIO.BCM) # Broadcom pin-numbering scheme
GPIO.setup(handset_pin, GPIO.IN, pull_up_down=GPIO.PUD_UP) # Button pin set as input w/ pull-up
if led_pin:
GPIO.setup(led_pin, GPIO.OUT) # LED pin set as output
GPIO.output(led_pin, GPIO.LOW)
logged_waiting_msg = False
if is_phone_off_hook():
logger.warn("Receiver initial state is off the hook. Preventing false recording on start")
need_receiver_reset = True
while True:
try:
if not logged_waiting_msg:
logger.info("")
logger.info("-------------------------------------")
logger.info("Waiting for tattle...")
logger.info("-------------------------------------")
logger.info("")
logged_waiting_msg = True
if is_phone_off_hook() or run_test:
#Need to wait until the reciver is hung up again before a new recording can occur
if not need_receiver_reset:
logged_waiting_msg = False
capture_audio()
if run_test:
need_receiver_reset = True #Prevent multiple recording during the test cycle
else:
need_receiver_reset = False
except Exception:
logger.exception("Main loop error")
time.sleep(0.5)
def is_phone_off_hook():
return GPIO.input(handset_pin)
##TODO CHRIS Encode to MP3 if it makes sense (currently doesn't)
##TODO CHRIS move from path aws credentials to yaml settings
##TODO CHRIS flash LED on startup
##TODO CHRIS send slack msssage on service start/stop ?
##RODO CHRIS send initial slack msg immediately and updates as post-processing occurs
##TODO CHRIS https://www.instructables.com/id/Disable-the-Built-in-Sound-Card-of-Raspberry-Pi/
##TODO CHRIS https://aws.amazon.com/blogs/database/indexing-metadata-in-amazon-elasticsearch-service-using-aws-lambda-and-python/
##TODO CHRIS Doc disabling built-in audio - https://www.raspberrypi.org/forums/viewtopic.php?t=37873
def capture_audio():
rec_tmp_file_path = None
try:
global need_receiver_reset
base_name = "tattle_" + time.strftime('%Y%m%d_%H%M%S')
#recording
rec_filename = base_name + ".wav"
rec_tmp_fd, rec_tmp_file_path = mkstemp()
os.close(rec_tmp_fd)
tattle_props = {
'id' : base_name,
'rec_filename' : rec_filename,
'timestamp' : datetime.now().strftime("%Y/%m/%d %H:%M:%S"),
'rec_url' : None,
'rec_public_url' : None,
'rec_duration' : None,
'transcript_url' : None,
'transcript' : None,
'sentiment' : None,
'key_phrases' : [],
}
with SoundRecorder(rec_tmp_file_path) as rec:
logger.info("Starting recording - " + rec_filename)
rec.start_recording()
adjust_recording_led(True)
recording_min_secs = int(cfg["recording"]["min_secs"])
recording_max_secs = int(cfg["recording"]["max_secs"])
min_record_end = None
max_record_end = None
if recording_min_secs and recording_min_secs > 0:
min_record_end = time.time() + recording_min_secs
if recording_max_secs and recording_max_secs > 0:
max_record_end = time.time() + recording_max_secs
continue_recording = True
while continue_recording:
time.sleep(0.25)
min_met = min_record_end == None or time.time() >= min_record_end
max_met = not max_record_end == None and time.time() >= max_record_end
if not is_phone_off_hook():
continue_recording = False
#Force the recording to go the min duration
if not min_met:
continue_recording = True
#Abort the recording if it exceeded the max duration
if max_met:
logger.info("Aborting recording due to max duration (" + str(recording_max_secs) + ")")
continue_recording = False
need_receiver_reset = True
logger.info("Stopping recording")
rec.stop_recording()
adjust_recording_led(False)
duration = rec.get_duration()
tattle_props["rec_duration"] = duration
logger.info("Recording finished - Duration: " + str(round(duration, 2)))
t = threading.Thread(
target=post_capture_processing,
args=(tattle_props, base_name, rec_filename, rec_tmp_file_path),
)
t.setDaemon(True)
t.start()
except Exception:
logger.exception("Capture audio exception")
def print_device_info(device_info):
logger.info("Index: " + str(device_info.get('index')) + ", Name: " + device_info.get('name'))
def adjust_recording_led(enabled):
if enabled:
GPIO.output(led_pin, GPIO.HIGH)
else:
GPIO.output(led_pin, GPIO.LOW)
def post_capture_processing(tattle_props, base_name, rec_filename, rec_tmp_file_path):
props_tmp_file_path = None
transcript_tmp_file_path = None
try:
logger.info("Starting post-recording work...")
#recording properties
props_filename = base_name + ".json"
props_tmp_fd, props_tmp_file_path = mkstemp()
os.close(props_tmp_fd)
#transcript
transcript_filename = base_name + "_trascription"
transcript_tmp_fd, transcript_tmp_file_path = mkstemp()
os.close(transcript_tmp_fd)
aws_config = cfg["aws"]
aws_comprehend_region = aws_config["comprehend_region"]
s3_bucket = aws_config["s3_bucket"]
logger.info("Uploading recording to S3: " + rec_filename)
s3_url = upload_to_s3(rec_filename, rec_tmp_file_path, s3_bucket)
if s3_url:
tattle_props['rec_url'] = s3_url
s3_public_url = get_s3_presigned_url(rec_filename, s3_bucket)
if s3_public_url:
tattle_props['rec_public_url'] = s3_public_url
upload_props_to_s3(tattle_props, props_filename, props_tmp_file_path, s3_bucket)
if cfg["text_analysis"]["transcription"]:
transcription = transcribe_recording(transcript_filename, transcript_tmp_file_path, s3_url, s3_bucket, tattle_props)
if transcription and len(transcription) > 0:
upload_props_to_s3(tattle_props, props_filename, props_tmp_file_path, s3_bucket)
if analyze_text(tattle_props, transcription, aws_comprehend_region):
upload_props_to_s3(tattle_props, props_filename, props_tmp_file_path, s3_bucket)
send_slack_msg(tattle_props)
logger.info("Finished post-recording work")
logger.info("Final summary JSON document: \n" + json.dumps(tattle_props, sort_keys=True, indent=4))
except Exception:
logger.exception("Post-processing exception")
finally:
if rec_tmp_file_path:
os.remove(rec_tmp_file_path)
if props_tmp_file_path:
os.remove(props_tmp_file_path)
if transcript_tmp_file_path:
os.remove(transcript_tmp_file_path)
if run_test: #Quit after test
sys.exit(0)
def upload_props_to_s3(tattle_props, props_filename, props_tmp_file_path, s3_bucket):
with open(props_tmp_file_path, 'w') as out_file:
out_file.write(json.dumps(tattle_props, sort_keys=True, indent=4))
logger.info("Uploading tattle summary JSON document to S3: " + props_filename)
upload_to_s3(props_filename, props_tmp_file_path, s3_bucket)
def upload_to_s3(upload_filename, local_file_path, s3_bucket):
try:
logger.info("Uploading file to S3")
s3_upload_start = time.time()
s3_client = boto3.client('s3')
s3UploadRsp = s3_client.upload_file(local_file_path, s3_bucket, upload_filename)
##s3UploadRsp = s3_client.upload_file(local_file_path, s3_bucket, upload_filename, ExtraArgs={'ACL': 'public-read'})
logger.info("S3 upload response: " + str(s3UploadRsp))
s3_upload_elapsed = time.time() - s3_upload_start
logger.info("Uploaded file to S3 in "
+ str(round(s3_upload_elapsed, 2))
+ " seconds"
)
s3_url = "https://" + s3_bucket + ".s3.amazonaws.com/" + upload_filename
logger.info("S3 URL: " + s3_url)
return s3_url
except botocore.exceptions.ClientError as ce:
logger.exception("AWS S3 upload error (ClientError)")
except Exception:
logger.exception("AWS S3 upload error (Generic)")
def get_s3_presigned_url(filename, s3_bucket):
try:
logger.info("Fetching S3 presigned URL for: " + filename)
s3_start = time.time()
s3_client = boto3.client('s3')
s3PresignedUrlRsp = s3_client.generate_presigned_url('get_object', Params={'Bucket': s3_bucket, 'Key': filename}, ExpiresIn=604800)
logger.info("S3 presigned URL response: " + str(s3PresignedUrlRsp))
s3_elapsed = time.time() - s3_start
logger.info("S3 presigned URL retrieved in "
+ str(round(s3_elapsed, 2))
+ " seconds"
)
return s3PresignedUrlRsp
except botocore.exceptions.ClientError as ce:
logger.exception("AWS S3 upload error (ClientError)")
except Exception:
logger.exception("AWS S3 upload error (Generic)")
def transcribe_recording(transcript_name, transcript_file_path, s3_url, s3_bucket, tattle_props):
try:
logger.info("Starting transcribe job - " + s3_url)
s3_transcribe_start = time.time()
transcribe = boto3.client('transcribe')
job_name = transcript_name
job_uri = s3_url
transcribe.start_transcription_job(
TranscriptionJobName=job_name,
Media={'MediaFileUri': job_uri},
MediaFormat='wav',
LanguageCode='en-US',
OutputBucketName=s3_bucket
)
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
break
time.sleep(5)
logger.info("Transcript results:\n" + str(status))
s3_transcribe_elapsed = time.time() - s3_transcribe_start
logger.info("Transcribed recording in "
+ str(round(s3_transcribe_elapsed, 2))
+ " seconds"
)
if status['TranscriptionJob']['TranscriptionJobStatus'] == "COMPLETED":
transcript_url = status['TranscriptionJob']['Transcript']['TranscriptFileUri']
tattle_props['transcript_url'] = transcript_url
s3_transcript_filename = transcript_url[transcript_url.rfind("/") + 1 :]
logger.info("Downloading transcript: " + transcript_url)
s3_client = boto3.client('s3')
s3_download_start = time.time()
s3DownloadRsp = s3_client.download_file(s3_bucket, s3_transcript_filename, transcript_file_path)
logger.info("S3 download response: " + str(s3DownloadRsp))
s3_download_elapsed = time.time() - s3_download_start
logger.info("Downloaded file from S3 in "
+ str(round(s3_download_elapsed, 2))
+ " seconds"
)
with open(transcript_file_path, 'r') as f:
transcript_content = json.load(f)
transcripts = transcript_content["results"]["transcripts"]
transcript = transcripts[0]["transcript"]
logger.info("Transcript: " + str(transcript))
tattle_props["transcript"] = transcript
return transcript
except botocore.exceptions.ClientError:
logger.exception("AWS transcribe upload error (ClientError)")
except Exception:
logger.exception("AWS transcribe error (Generic)")
return None
def analyze_text(tattle_props, transcript, aws_comprehend_region):
if not transcript:
return False
transcript = transcript.strip()
if len(transcript) == 0:
return False
if aws_comprehend_region == None or len(aws_comprehend_region.strip()) == 0:
logger.info("Skipping sentiment analysis as no AWS region has been configured")
return False
logger.info("Starting text analysis...")
updated_props = False
comprehend = boto3.client(service_name='comprehend', region_name=aws_comprehend_region)
##Sentiment analysis
if cfg["text_analysis"]["sentiment"]:
s3_analysis_start = time.time()
sentiment_rsp = comprehend.detect_sentiment(Text=transcript, LanguageCode='en')
s3_analysis_elapsed = time.time() - s3_analysis_start
logger.info("Sentiment analysis completed in "
+ str(round(s3_analysis_elapsed, 2))
+ " seconds"
)
logger.info("Sentiment response:\n" + str(sentiment_rsp))
if sentiment_rsp["ResponseMetadata"]["HTTPStatusCode"] == 200:
tattle_props['sentiment'] = sentiment_rsp['Sentiment']
updated_props = True
##Key phrase analysis
if cfg["text_analysis"]["key_phrases"]:
s3_analysis_start = time.time()
key_phrases_rsp = comprehend.detect_key_phrases(Text=transcript, LanguageCode='en')
s3_analysis_elapsed = time.time() - s3_analysis_start
logger.info("Key phrase analysis completed in "
+ str(round(s3_analysis_elapsed, 2))
+ " seconds"
)
logger.info("Key phrases response:\n" + str(key_phrases_rsp))
if key_phrases_rsp["ResponseMetadata"]["HTTPStatusCode"] == 200 and 'KeyPhrases' in key_phrases_rsp:
for phrase_obj in key_phrases_rsp['KeyPhrases']:
phrase = phrase_obj['Text']
if phrase and len(phrase) > 0 and not phrase in tattle_props['key_phrases']:
tattle_props['key_phrases'].append(phrase)
updated_props = True
return updated_props
def send_slack_msg(tattle_props):
slack_cfg = cfg["slack"]
slack_webhook_url = slack_cfg["webhook_url"]
slack_channel = slack_cfg["channel"]
slack_username = slack_cfg["username"]
slack_icon_url = slack_cfg["icon_url"]
slack_icon_emoji = slack_cfg["icon_emoji"]
pipedream_cfg = cfg["pipedream"]
pipedream_webhook_url = pipedream_cfg["webhook_url"]
tattle_id = tattle_props["id"]
tattle_timestamp = tattle_props["timestamp"]
tattle_recording_url = tattle_props["rec_url"]
tattle_recording_public_url = tattle_props["rec_public_url"]
tattle_recording_duration = tattle_props["rec_duration"]
tattle_transcript = tattle_props["transcript"]
tattle_sentiment = tattle_props["sentiment"]
tattle_key_phrases = tattle_props["key_phrases"]
pipedream_msg = {}
#good, warning, danger
color = "good"
if tattle_sentiment:
if tattle_sentiment == "MIXED":
tattle_sentiment = "Mixed"
color = "warning"
elif tattle_sentiment == "POSITIVE":
tattle_sentiment = "Positive"
color = "good"
elif tattle_sentiment == "NEUTRAL":
tattle_sentiment = "Neutral"
color = "good"
elif tattle_sentiment == "NEGATIVE":
tattle_sentiment = "Negative"
color = "danger"
attachments = [{}]
attachment = attachments[0]
attachment["mrkdwn_in"] = ["text", "pretext"]
attachment["pretext"] = ":heavy_minus_sign: New " + str(int(round(tattle_recording_duration))) + "s tattle @ " + tattle_timestamp
attachment["fallback"] = attachment["pretext"]
attachment["color"] = color
pipedream_msg["timestamp"] = tattle_timestamp
pipedream_msg["duration"] = round(tattle_recording_duration, 2)
pipedream_msg["tattle_url_private"] = tattle_recording_url
pipedream_msg["tattle_url_public"] = tattle_recording_public_url
text_arr = []
url_link = "<" + tattle_recording_url + "|Private link (No expiration)>"
if tattle_recording_public_url and len(tattle_recording_public_url) > 0:
url_link = url_link + " - " + "<" + tattle_recording_public_url + "|Public link (7 day expiration)>"
text_arr.append(url_link)
if tattle_sentiment and len(tattle_sentiment) > 0:
text_arr.append("*Sentiment* " + tattle_sentiment)
pipedream_msg["sentiment"] = tattle_sentiment
if tattle_key_phrases and len(tattle_key_phrases) > 0:
text_arr.append("*Key Phrases* " + ", ".join(tattle_key_phrases))
pipedream_msg["key_phrases"] = tattle_key_phrases
if tattle_transcript and len(tattle_transcript) > 0:
if len(tattle_transcript) > 2000:
tattle_transcript = tattle_transcript[0:2000]
text_arr.append("*Transcript* " + tattle_transcript)
pipedream_msg["transcript"] = tattle_transcript
if not text_arr == None and len(text_arr) > 0:
text = "\n".join(text_arr)
attachment["text"] = text
#attachment["footer"] = "My footer"
attachments_json = json.dumps(attachments, sort_keys=True, indent=4)
slack_msg = {}
if not slack_channel == None and len(slack_channel) > 0:
slack_msg["channel"] = slack_channel
if not slack_icon_url == None and len(slack_icon_url) > 0:
slack_msg["icon_url"] = slack_icon_url
if not slack_icon_emoji == None and len(slack_icon_emoji) > 0:
slack_msg["icon_emoji"] = slack_icon_emoji
if not slack_username == None and len(slack_username) > 0:
slack_msg["username"] = slack_username
slack_msg["attachments"] = attachments
try:
logger.info("Slack WebHook postMessage json:\n" + json.dumps(slack_msg, sort_keys=True, indent=4))
webHook = IncomingWebhook(slack_webhook_url)
webHookRsp = webHook.post(slack_msg)
logger.info("Slack WebHook postMessage response: " + webHookRsp.text)
if not webHookRsp.ok:
logger.error("Slack WebHook message send failed: " + webHookRsp.text)
except Exception as e:
logger.exception("Slack WebHook message send error: " + str(e))
try:
logger.info("Pipedream WebHook post json:\n" + json.dumps(pipedream_msg, sort_keys=True, indent=4))
webHookRsp= requests.post(pipedream_webhook_url, data=json.dumps(pipedream_msg),
headers={'Content-Type': 'application/json'}
)
logger.info("Pipedream WebHook post response: " + webHookRsp.text)
if not webHookRsp.ok:
logger.error("Pipedream WebHook message send failed: " + webHookRsp.text)
except Exception as e:
logger.exception("Pipedream WebHook message send error: " + str(e))
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt, e:
logging.info("Stopping...")
finally:
GPIO.cleanup() # cleanup all GPIO
logging.info("Stopped")