-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathenhance_wrapper.m
156 lines (137 loc) · 4.94 KB
/
enhance_wrapper.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
function enhance_wrapper(stubFn, inDir, outDir, part, overwrite, ignoreErrors, filePerChan, inFilesOrPattern, normalizeOutput)
% Wrapper like for CHiME3, but working with arbitrary multichannel wavsxs
%
% For compatibility with CHiME3 wrapper, takes as argument an enhancement
% stub function with the following interface:
%
% [Y data] = stubFn(X, fail, sr, fileName);
%
% Y is the estimated single channel spectrogram of the speech, X is the
% multi-channel spectrogram of the noisy speech, N is the multi-channel
% spectrogram of up to 5 seconds of noise preceeding the speech, Ncov is
% the computed frequency-dependent noise covariance matrix, fail is a
% binary vector indicating whether each mic has failed, TDOA is a matrix of
% TDOA estimates for each channel for each frame measured in seconds, and
% sr is the sampling rate.
%
% Data will be written in a standard directory structure rooted at outDir.
% Output wav files in the wav/ subdirectory and data as mat files in the
% data/ subdirectory.
%
% Part is a tuple [n N] meaning process the nth of N sets of utterances to
% allow for easy parallelization across matlab sessions.
if ~exist('overwrite', 'var') || isempty(overwrite), overwrite = false; end
if ~exist('part', 'var') || isempty(part), part = [1 1]; end
if ~exist('ignoreErrors', 'var') || isempty(ignoreErrors), ignoreErrors = false; end
if ~exist('filePerChan', 'var') || isempty(filePerChan), filePerChan = false; end
if ~exist('inFilesOrPattern', 'var'), inFilesOrPattern = ''; end
if ~exist('normalizeOutput', 'var') || isempty(normalizeOutput), normalizeOutput = false; end
% Define hyper-parameters
pow_thresh=-20; % threshold in dB below which a microphone is considered to fail
wlen = 1024; % STFT window length
if strcmp(inDir, outDir)
error('Not overwriting input: %s == %s', inDir, outDir);
end
if isempty(inFilesOrPattern)
if filePerChan
inFilesOrPattern = '(real|simu).*\.CH1\.wav$';
else
inFilesOrPattern = '.*\.wav$';
end
end
if ~iscell(inFilesOrPattern)
inFiles = findFiles(inDir, inFilesOrPattern);
% Shuffle file list reproducibly
inFiles = inFiles(runWithRandomSeed(22, @randperm, length(inFiles)));
else
inFiles = inFilesOrPattern;
end
if filePerChan
lastNChan = 0;
end
for f = part(1):part(2):length(inFiles)
inFile = fullfile(inDir, inFiles{f});
inFileNoCh = strrep(inFiles{f}, '.CH1', '');
outWavFile = fullfile(outDir, 'wav', inFileNoCh);
outMaskFile = fullfile(outDir, 'data', strrep(inFileNoCh, '.wav', '.mat'));
if exist(outWavFile, 'file') && ~overwrite
fprintf('%d: Skipping %s\n', f, outWavFile);
continue
else
fprintf('%d: %s\n', f, outWavFile);
end
% Read file
if filePerChan
[inD inF inE] = fileparts(inFile);
assert(reMatch(inF, '\.CH1'));
info = audioinfo(inFile);
fs = info.SampleRate;
x = zeros(info.TotalSamples, lastNChan);
if filePerChan == 2
startChan = 0;
else
startChan = 1;
end
for i = startChan : 22
chanFile = fullfile(inD, [strrep(inF, '.CH1', sprintf('.CH%d', i)) inE]);
if ~exist(chanFile, 'file')
if i > 0
break
else
error('Expected to find CH0 file %s', chanFile)
end
end
[x(:,i-startChan+1) fsi] = audioread(chanFile);
assert(fsi == fs);
end
if (lastNChan > 0) && (size(x,2) ~= lastNChan)
warning('Messl:Chime3:NChanChanged', 'Number of channels changed from %d to %d', lastNChan, size(x,2));
end
lastNChan = size(x,2);
else
[x fs] = audioread(inFile);
end
nsampl = size(x,1);
% Determine if any mics have failed
%the shape of x is lengthX6
dim = floor(nsampl/10);
for i = 1:10
temp = x(dim*(i-1)+1:dim*(i),:);
xpow = sum(temp.^2,1);
xpow=10*log10(xpow/max(xpow));
fail(i,:)=(xpow<=pow_thresh);
end;
fail = any(fail);
% xpow=sum(x.^2,1);
% xpow=10*log10(xpow/max(xpow));
% fail=(xpow<=pow_thresh);
% if any(fail)
% fprintf('%s',inFile)
% end
% STFT
X = stft_multi(x.',wlen);
[nbin,nfram,~] = size(X);
%%% Call the stub
try
[Y data] = stubFn(X, fail, fs, inFiles{f});
catch ex
if ignoreErrors
disp(getReport(ex))
% Y = X;
% data = [];
continue;
else
rethrow(ex)
end
end
data = structCast(data, @isnumeric, @single);
% Inverse STFT and write WAV file with one source per channel
y = istft_multi(Y, nsampl).';
if normalizeOutput
y = y * 0.999/max(abs(y(:)));
end
ensureDirExists(outWavFile);
audiowrite(outWavFile, y, fs);
ensureDirExists(outMaskFile);
save(outMaskFile, 'data', 'fs', 'nbin', 'nfram', 'nsampl', 'fail', 'normalizeOutput','-v6');
end