-
Notifications
You must be signed in to change notification settings - Fork 247
/
Copy pathfeats.py
106 lines (92 loc) · 3.88 KB
/
feats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from email import header
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as ms
import pandas as pd
import librosa
import librosa.display
class FeatExtrct():
def __init__(self, data, sr):
self.frame_length = 0.04 # 40ms
self.frame_shift = 0.01 # 30ms
self.win_length = int(self.frame_length*sr)
self.hop_length = int(self.frame_shift*sr)
self.n_fft = 256
self.data = data
self.sr = sr
def _pitch_extract(self):
pitches, magnitudes = librosa.core.piptrack(y=self.data, sr=self.sr, \
win_length=self.win_length, hop_length=self.hop_length, \
threshold=1, fmin=75, fmax=1600)
max_indexes = np.argmax(magnitudes, axis=0)
pitches = pitches[max_indexes, range(magnitudes.shape[1])]
return pitches
def _mfcc_extract(self):
mfcc = librosa.feature.mfcc(y=self.data, sr=self.sr, \
win_length=self.win_length, hop_length=self.hop_length, \
center=True, n_mfcc=13)
mfcc_delta = librosa.feature.delta(mfcc, mode="nearest")
mfcc_delta2 = librosa.feature.delta(mfcc, mode="nearest", order=2)
mfcc_delta12 = np.vstack((mfcc, mfcc_delta, mfcc_delta2))
return mfcc_delta12
def _zero_crossing_rate(self):
zcr = librosa.feature.zero_crossing_rate(self.data, \
frame_length=self.win_length, hop_length=self.hop_length)
return zcr
def get_feats(self):
pitch = self._pitch_extract()
mfcc = self._mfcc_extract()
zcr = self._zero_crossing_rate()
return np.vstack((pitch, mfcc, zcr))
class DataPrep():
def __init__(self, textgrid):
self.textgrid = textgrid
self.wavlist = None
def _get_all_wavs(self):
df = pd.read_csv(self.textgrid, header=0, sep=",")
self.wavlist = df.loc[:,["文件名", "序号", "音节起始时刻点", "韵母起始时刻点", "音节结束时刻点"]]
# print(self.wavlist)
def _get_label(self, labelfile):
with open(labelfile, 'r') as fp:
return []
def _get_feats(self, data, sr):
FE = FeatExtrct(data, sr)
feats = FE.get_feats()
return feats
def write_feat(self, feat_pa):
self._get_all_wavs()
for i in range(len(self.wavlist)):
# for i in range(0,1):
conso_offset = self.wavlist.loc[i, '音节起始时刻点']
conso_duration = self.wavlist.loc[i, '韵母起始时刻点'] - self.wavlist.loc[i, '音节起始时刻点']
vowel_offset = self.wavlist.loc[i, '韵母起始时刻点']
vowel_duration = self.wavlist.loc[i, '音节结束时刻点'] - self.wavlist.loc[i, '韵母起始时刻点']
wav_path = self.wavlist.loc[i, "文件名"]
person_name = wav_path.split("/")[-1].split(".")[0] + "_" + str(self.wavlist.loc[i, "序号"])
print("person_name : ", person_name)
conso_data, _ = librosa.load(wav_path, sr=16000, offset=conso_offset, duration=conso_duration)
vowel_data, _ = librosa.load(wav_path, sr=16000, offset=vowel_offset, duration=vowel_duration)
final_feats = []
# 辅音标签为0, 元音标签为1
if conso_duration >= 0.05:
conso_feats = self._get_feats(conso_data, 16000)
conso_feats = np.r_[conso_feats, np.expand_dims(np.zeros(conso_feats[0].size),axis=0)]
final_feats = conso_feats
if vowel_duration >= 0.05:
vowel_feats = self._get_feats(vowel_data, 16000)
vowel_feats = np.r_[vowel_feats, np.expand_dims(np.ones(vowel_feats[0].size),axis=0)]
if len(final_feats) == 0:
final_feats = vowel_feats
else:
final_feats = np.hstack((final_feats, vowel_feats))
# person_feats = np.hstack((conso_feats, vowel_feats))
feat_path = feat_pa + "_" + person_name + ".npy"
np.save(file=feat_path, arr=final_feats)
if __name__ == '__main__':
wavdir = "data/textgrid/select_TextGrid.csv"
feat_path = "data/textgrid/feat"
DP = DataPrep(wavdir)
DP.write_feat(feat_path)