Skip to content

Commit

Permalink
Merge pull request #126 from tsurumeso/develop
Browse files Browse the repository at this point in the history
Merge to master
  • Loading branch information
tsurumeso authored Apr 17, 2023
2 parents 0efc190 + 1997807 commit 311dbe1
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ python train.py --dataset path/to/dataset --reduction_rate 0.5 --mixup_rate 0.5
```

## References
- [1] Jansson et al., "Singing Voice Separation with Deep U-Net Convolutional Networks", https://ismir2017.smcnus.org/wp-content/uploads/2017/10/171_Paper.pdf
- [1] Jansson et al., "Singing Voice Separation with Deep U-Net Convolutional Networks", https://ejhumphrey.com/assets/pdf/jansson2017singing.pdf
- [2] Takahashi et al., "Multi-scale Multi-band DenseNets for Audio Source Separation", https://arxiv.org/pdf/1706.09588.pdf
- [3] Takahashi et al., "MMDENSELSTM: AN EFFICIENT COMBINATION OF CONVOLUTIONAL AND RECURRENT NEURAL NETWORKS FOR AUDIO SOURCE SEPARATION", https://arxiv.org/pdf/1805.02410.pdf
- [4] Liutkus et al., "The 2016 Signal Separation Evaluation Campaign", Latent Variable Analysis and Signal Separation - 12th International Conference
8 changes: 4 additions & 4 deletions augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@
continue

X, _ = librosa.load(
mix_path, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
y, _ = librosa.load(
inst_path, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')

X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
v = X - y
Expand All @@ -60,9 +60,9 @@
subprocess.call(cmd_v, stderr=subprocess.DEVNULL)

y, _ = librosa.load(
output_i, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
v, _ = librosa.load(
output_v, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')

X = y + v

Expand Down
12 changes: 8 additions & 4 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,18 @@ def main():
device = torch.device('cpu')
model = nets.CascadedNet(args.n_fft, 32, 128)
model.load_state_dict(torch.load(args.pretrained_model, map_location=device))
if torch.cuda.is_available() and args.gpu >= 0:
device = torch.device('cuda:{}'.format(args.gpu))
model.to(device)
if args.gpu >= 0:
if torch.cuda.is_available():
device = torch.device('cuda:{}'.format(args.gpu))
model.to(device)
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = torch.device('mps')
model.to(device)
print('done')

print('loading wave source...', end=' ')
X, sr = librosa.load(
args.input, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
args.input, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
basename = os.path.splitext(os.path.basename(args.input))[0]
print('done')

Expand Down
12 changes: 6 additions & 6 deletions lib/spec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def wave_to_spectrogram(wave, hop_length, n_fft):
wave_left = np.asfortranarray(wave[0])
wave_right = np.asfortranarray(wave[1])

spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
spec = np.asfortranarray([spec_left, spec_right])

return spec
Expand Down Expand Up @@ -152,9 +152,9 @@ def cache_or_load(mix_path, inst_path, sr, hop_length, n_fft):
y = np.load(inst_cache_path)
else:
X, _ = librosa.load(
mix_path, sr, False, dtype=np.float32, res_type='kaiser_fast')
mix_path, sr=sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
y, _ = librosa.load(
inst_path, sr, False, dtype=np.float32, res_type='kaiser_fast')
inst_path, sr=sr, mono=False, dtype=np.float32, res_type='kaiser_fast')

X, y = align_wave_head_and_tail(X, y, sr)

Expand Down Expand Up @@ -196,9 +196,9 @@ def spectrogram_to_wave(spec, hop_length=1024):
], axis=0) * 0.2

X, _ = librosa.load(
sys.argv[1], 44100, False, dtype=np.float32, res_type='kaiser_fast')
sys.argv[1], sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast')
y, _ = librosa.load(
sys.argv[2], 44100, False, dtype=np.float32, res_type='kaiser_fast')
sys.argv[2], sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast')

X, y = align_wave_head_and_tail(X, y, 44100)
X_spec = wave_to_spectrogram(X, 1024, 2048)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# install from https://pytorch.org/get-started/locally/
# torch>=1.5.1
# torch>=1.12.0
# torchvision>=0.6.1
tqdm>=4.30
librosa>=0.6.3,<0.9
Expand Down

0 comments on commit 311dbe1

Please sign in to comment.