-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
114 lines (87 loc) · 3.47 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from flask import Flask, request, jsonify
import librosa
import numpy as np
from scipy import stats
import pandas as pd
import speech_recognition as sr
import pickle
import glob
from pathlib import Path
from pydub import AudioSegment
from pydub.silence import split_on_silence
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
app = Flask(__name__)
# Load the lyric vector
dbfile = open('tag_vector', 'rb')
lyr = pickle.load(dbfile)
dbfile.close()
lyr = lyr['top']
# Load the MFCC features index table
index_file = open('query_vector', 'rb')
features_index = pickle.load(index_file)
index_file.close()
token = RegexpTokenizer('\s+|\-+|\.+|\@+|\t+|\n+|[0-9]+|\"+|\>+|\,+|\?+|\:+|\{+|\(+|\[+|\)+|\}+|\]+|\<+|\_+|\!+|\/+|\|+|\\+|\*+|\=+|\^+|\$+|\&+|\#+|\*+|\++|;+', gaps=True)
lem = WordNetLemmatizer()
def compute_mfcc(song_dir):
name = 'mfcc'
size = 20
moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')
columns = []
for m in moments:
col = ((name, m, '{:02d}'.format(i + 1)) for i in range(size))
columns.extend(col)
names = ('feature', 'statistics', 'number')
columns = pd.MultiIndex.from_tuples(columns, names=names)
features = pd.Series(index=columns, dtype=np.float32)
y, sr = librosa.load(song_dir)
stft = np.abs(librosa.stft(y, n_fft=2048, hop_length=512))
assert stft.shape[0] == 1 + 2048 // 2
assert np.ceil(len(y) / 512) <= stft.shape[1] <= np.ceil(len(y) / 512) + 1
mel = librosa.feature.melspectrogram(sr=sr, S=stft ** 2)
f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
features["mfcc", 'mean'] = np.mean(f, axis=1)
features["mfcc", 'std'] = np.std(f, axis=1)
features["mfcc", 'skew'] = stats.skew(f, axis=1)
features["mfcc", 'kurtosis'] = stats.kurtosis(f, axis=1)
features["mfcc", 'median'] = np.median(f, axis=1)
features["mfcc", 'min'] = np.min(f, axis=1)
features["mfcc", 'max'] = np.max(f, axis=1)
return features
@app.route('/compute_mfcc', methods=['POST'])
def get_mfcc():
if 'file' not in request.files:
return jsonify({'error': 'No file part in the request.'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No selected file.'}), 400
try:
audio_features = compute_mfcc(file)
return jsonify(audio_features.to_dict()), 200
except Exception as e:
return jsonify({'error': 'An error occurred while computing MFCC features.', 'details': str(e)}), 500
@app.route('/process_lyrics', methods=['POST'])
def process_lyrics():
if 'file' not in request.files:
return jsonify({'error': 'No file part in the request.'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No selected file.'}), 400
try:
lyric = ''
with sr.AudioFile(file) as source:
audio_data = sr.record(source)
text = sr.recognize_google(audio_data)
lyric += text.lower()
lyric = token.tokenize(lyric)
lyric = [lem.lemmatize(k) for k in lyric]
arr = [0 for i in range(5000)]
for i in lyric:
for j in range(1, len(lyr)):
if i == lyr[j]:
arr[i] += 1
return jsonify({'lyrics_vector': arr}), 200
except Exception as e:
return jsonify({'error': 'An error occurred while processing lyrics.', 'details': str(e)}), 500
if __name__ == '__main__':
app.run(debug=True)