Merge pull request #126 from tsurumeso/develop

Merge to master
tsurumeso · Apr 17, 2023 · 311dbe1 · 311dbe1
2 parents 0efc190 + 1997807
commit 311dbe1
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ python train.py --dataset path/to/dataset --reduction_rate 0.5 --mixup_rate 0.5
 ```
 
 ## References
-- [1] Jansson et al., "Singing Voice Separation with Deep U-Net Convolutional Networks", https://ismir2017.smcnus.org/wp-content/uploads/2017/10/171_Paper.pdf
+- [1] Jansson et al., "Singing Voice Separation with Deep U-Net Convolutional Networks", https://ejhumphrey.com/assets/pdf/jansson2017singing.pdf
 - [2] Takahashi et al., "Multi-scale Multi-band DenseNets for Audio Source Separation", https://arxiv.org/pdf/1706.09588.pdf
 - [3] Takahashi et al., "MMDENSELSTM: AN EFFICIENT COMBINATION OF CONVOLUTIONAL AND RECURRENT NEURAL NETWORKS FOR AUDIO SOURCE SEPARATION", https://arxiv.org/pdf/1805.02410.pdf
 - [4] Liutkus et al., "The 2016 Signal Separation Evaluation Campaign", Latent Variable Analysis and Signal Separation - 12th International Conference
diff --git a/augment.py b/augment.py
@@ -47,9 +47,9 @@
             continue
 
         X, _ = librosa.load(
-            mix_path, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
+            mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
         y, _ = librosa.load(
-            inst_path, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
+            inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
 
         X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
         v = X - y
@@ -60,9 +60,9 @@
         subprocess.call(cmd_v, stderr=subprocess.DEVNULL)
 
         y, _ = librosa.load(
-            output_i, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
+            output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
         v, _ = librosa.load(
-            output_v, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
+            output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
 
         X = y + v
 

diff --git a/inference.py b/inference.py
@@ -125,14 +125,18 @@ def main():
     device = torch.device('cpu')
     model = nets.CascadedNet(args.n_fft, 32, 128)
     model.load_state_dict(torch.load(args.pretrained_model, map_location=device))
-    if torch.cuda.is_available() and args.gpu >= 0:
-        device = torch.device('cuda:{}'.format(args.gpu))
-        model.to(device)
+    if args.gpu >= 0:
+        if torch.cuda.is_available():
+            device = torch.device('cuda:{}'.format(args.gpu))
+            model.to(device)
+        elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
+            device = torch.device('mps')
+            model.to(device)
     print('done')
 
     print('loading wave source...', end=' ')
     X, sr = librosa.load(
-        args.input, args.sr, False, dtype=np.float32, res_type='kaiser_fast')
+        args.input, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
     basename = os.path.splitext(os.path.basename(args.input))[0]
     print('done')
 

diff --git a/lib/spec_utils.py b/lib/spec_utils.py
@@ -27,8 +27,8 @@ def wave_to_spectrogram(wave, hop_length, n_fft):
     wave_left = np.asfortranarray(wave[0])
     wave_right = np.asfortranarray(wave[1])
 
-    spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
-    spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
+    spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
+    spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
     spec = np.asfortranarray([spec_left, spec_right])
 
     return spec
@@ -152,9 +152,9 @@ def cache_or_load(mix_path, inst_path, sr, hop_length, n_fft):
         y = np.load(inst_cache_path)
     else:
         X, _ = librosa.load(
-            mix_path, sr, False, dtype=np.float32, res_type='kaiser_fast')
+            mix_path, sr=sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
         y, _ = librosa.load(
-            inst_path, sr, False, dtype=np.float32, res_type='kaiser_fast')
+            inst_path, sr=sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
 
         X, y = align_wave_head_and_tail(X, y, sr)
 
@@ -196,9 +196,9 @@ def spectrogram_to_wave(spec, hop_length=1024):
     ], axis=0) * 0.2
 
     X, _ = librosa.load(
-        sys.argv[1], 44100, False, dtype=np.float32, res_type='kaiser_fast')
+        sys.argv[1], sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast')
     y, _ = librosa.load(
-        sys.argv[2], 44100, False, dtype=np.float32, res_type='kaiser_fast')
+        sys.argv[2], sr=44100, mono=False, dtype=np.float32, res_type='kaiser_fast')
 
     X, y = align_wave_head_and_tail(X, y, 44100)
     X_spec = wave_to_spectrogram(X, 1024, 2048)

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 # install from https://pytorch.org/get-started/locally/
-# torch>=1.5.1
+# torch>=1.12.0
 # torchvision>=0.6.1
 tqdm>=4.30
 librosa>=0.6.3,<0.9