Skip to content

Commit

Permalink
init repository
Browse files Browse the repository at this point in the history
  • Loading branch information
xzyaoi committed Sep 2, 2021
0 parents commit 2ed7a7b
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 0 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/aid-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Testing

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Test AID Model
uses: autoai-org/aid-testing-actions@0.0.3
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker*
pretrained/
runner*
*.pyc
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# eth-library-lab/speech

![AID Testing](https://github.com/eth-library-lab/speech/actions/workflows/aid-ci.yml/badge.svg)

> Speech Analysis Package
This project is based on the AID Project. Common topics are discussed in the [AID docs](https://aid.autoai.org). Make sure to read it!

## Usage


## Reference
8 changes: 8 additions & 0 deletions aid.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name="speech"
vendor="eth-library-lab"
tagline="Speech Analysis Package"

[[solvers]]
name="speechSolver"
class="speech/solver/speechSolver"
2 changes: 2 additions & 0 deletions prepip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo "Installing pip-required dependencies"
apt-get update && apt-get install sox -y
1 change: 1 addition & 0 deletions pretrained.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mlpm
textblob
deepspeech
2 changes: 2 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo "Finished Build..."
python -m textblob.download_corpora
Empty file added speech/bundle.py
Empty file.
64 changes: 64 additions & 0 deletions speech/solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from os import fsdecode
from mlpm.solver import Solver
from deepspeech import Model
import wave
import shlex
import subprocess
import numpy as np
from textblob import TextBlob
import textblob
try:
from shhlex import quote
except ImportError:
from pipes import quote

def convert_sample_rate(audio_path, desired_sample_rate):
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), desired_sample_rate)
try:
output = subprocess.check_output(shlex.split(sox_cmd), stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
raise RuntimeError('SoX returned non-zero status: {}'.format(e.stderr))
except OSError as e:
raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(desired_sample_rate, e.strerror))

return desired_sample_rate, np.frombuffer(output, np.int16)

class speechSolver(Solver):
def __init__(self, toml_file=None):
super().__init__(toml_file)
# Do you Init Work here
self.ds = Model("pretrained/deepspeech-0.9.3-models.pbmm")
self.scorepath = ("pretrained/deepspeech-0.9.3-models.scorer")
self.ds.enableExternalScorer(self.scorepath)
self.desired_sample_rate = self.ds.sampleRate()
self.ready()
def infer(self, data):
# if you need to get file uploaded, get the path from input_file_path in data
# First convert audio file to wav format
fin = wave.open(data['input_file_path'], 'rb')
fs_orig = fin.getframerate()
resampled = False
if fs_orig != self.desired_sample_rate:
resampled = True
fs_new, audio = convert_sample_rate(data['input_file_path'], self.desired_sample_rate)
else:
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)

audio_length = fin.getnframes() * (1/fs_orig)
fin.close()

result = self.ds.stt(audio)
textblob_analyzer = TextBlob(result)
sentiment = []
for sentence in textblob_analyzer.sentences:
sentiment.append({
'sentence': str(sentence),
'polarity': sentence.sentiment.polarity,
'subjectivity': sentence.sentiment.subjectivity
})
return {
"transcript": result,
"audio_length": audio_length,
"resampled": resampled,
"sentiment": sentiment
}

0 comments on commit 2ed7a7b

Please sign in to comment.