From e1638c1922d6fc85b3a38516f45775186c9d9a6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ju=CC=88rgen=20Hock?= <juergen.hock@jurihock.de>
Date: Sun, 24 Mar 2024 22:46:47 +0100
Subject: [PATCH] Add pitch shifting feature

---
 README.md               | 15 +++++++------
 pyproject.toml          |  3 +++
 requirements.txt        |  1 +
 src/remucs/__main__.py  | 16 +++++++++-----
 src/remucs/analysis.py  |  9 ++------
 src/remucs/remucs.py    |  4 ++--
 src/remucs/synthesis.py | 49 ++++++++++++++++++++++++++++++++++++-----
 src/remucs/utils.py     | 28 +++++++++++++++++++++++
 8 files changed, 99 insertions(+), 26 deletions(-)
 create mode 100644 src/remucs/utils.py

diff --git a/README.md b/README.md
index b367d7b..79c92b8 100644
--- a/README.md
+++ b/README.md
@@ -5,8 +5,7 @@
 ![test](https://img.shields.io/github/actions/workflow/status/jurihock/remucs/test.yml?branch=main&label=test)
 ![pypi](https://img.shields.io/pypi/v/remucs?color=gold)
 
-The purpose of the _remucs_ command line tool is to extract the individual stems from a mix and remix them again in a certain way.
-Since the stem extraction is based on the [adefossez/demucs](https://github.com/adefossez/demucs) engine, the choice is restricted to the _drum_, _bass_, _vocal_ and _other_ sources.
+The purpose of the _remucs_ command line tool is to extract the individual stems from a mix and remix them again in a certain way, e.g. by adjusting the volume gain, left-right channel balance and last but not least, transient-preserving pitch shifting. Since the stem extraction is based on the [adefossez/demucs](https://github.com/adefossez/demucs) engine, the stem choice is restricted to the _drum_, _bass_, _vocal_ and _other_ sources.
 
 ## Usage
 
@@ -17,11 +16,13 @@ Options:
   -f, --fine            Use fine-tuned “htdemucs_ft” model.
   -n, --norm            Normalize output amplitude.
   -m, --mono            Convert stereo input to mono.
-  -b, --bala TEXT       Balance of individual stems [bass,drums,other,vocals].
-                        [default: 0,0,0,0]
-  -g, --gain TEXT       Gain of individual stems [bass,drums,other,vocals].
-                        [default: 1,1,1,1]
-  -d, --data DIRECTORY  Directory where to store intermediate files.
+  -b, --bala TEXT       Balance of individual stems "bass,drums,other,vocals",
+                        e.g. "0,0.5,1,-1". [default: 0,0,0,0]
+  -g, --gain TEXT       Gain of individual stems "bass,drums,other,vocals",
+                        e.g. "2,1,0.5,0". [default: 1,1,1,1]
+  -p, --pitch TEXT      Pitch shifting factor in semitones followed by cents,
+                        e.g -12 or +12 or +3-50. [default: 0]
+  -d, --data DIRECTORY  Directory where to store the intermediate files.
                         [default: <user’s home directory>]
   -q, --quiet           Don't trash stdout.
   -V, --version         Show the version and exit.
diff --git a/pyproject.toml b/pyproject.toml
index e3a09a9..c6898c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ dependencies = [
   "click",
   "numpy",
   "soundfile",
+  "stftpitchshift",
   "tqdm",
 ]
 
@@ -71,6 +72,7 @@ addopts = ["--capture=tee-sys"]
 
 [tool.pylint]
 max-args = 10
+max-locals = 42
 max-line-length = 200
 exclude-protected = ["_load_audio"] # demucs.api.Separator._load_audio
 
@@ -83,3 +85,4 @@ disable = [
 [tool.pyright]
 reportMissingImports          = false # import demucs.{separate,api}
 reportPossiblyUnboundVariable = false # import demucs.{separate,api}
+reportPrivateImportUsage      = false # demucs.api.save_audio
diff --git a/requirements.txt b/requirements.txt
index d257ef9..e63a1d6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@
 click
 numpy
 soundfile
+stftpitchshift
 tqdm
diff --git a/src/remucs/__main__.py b/src/remucs/__main__.py
index ff48262..add16c7 100644
--- a/src/remucs/__main__.py
+++ b/src/remucs/__main__.py
@@ -6,6 +6,7 @@
 # pylint: disable=wildcard-import,unused-wildcard-import
 from remucs.common import *
 from remucs.remucs import remucs
+from remucs.utils import cent, semitone
 
 @click.command(                context_settings={'help_option_names': ['-h', '--help']},
                                no_args_is_help=True)
@@ -28,16 +29,20 @@
 @click.option('-b', '--bala',
                                default=','.join(["0"]*len(STEMS)),
                                show_default=True,
-                               help=f'Balance of individual stems [{",".join(sorted(STEMS))}].')
+                               help=f'Balance of individual stems \"{",".join(sorted(STEMS))}\", e.g. \"0,0.5,1,-1\".')
 @click.option('-g', '--gain',
                                default=','.join(["1"]*len(STEMS)),
                                show_default=True,
-                               help=f'Gain of individual stems [{",".join(sorted(STEMS))}].')
+                               help=f'Gain of individual stems \"{",".join(sorted(STEMS))}\", e.g. \"2,1,0.5,0\".')
+@click.option('-p', '--pitch',
+                               default='0',
+                               show_default=True,
+                               help='Pitch shifting factor in semitones followed by cents, e.g -12 or +12 or +3-50.')
 @click.option('-d', '--data',
                                default=pathlib.Path().home(),
                                show_default=True,
                                type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=pathlib.Path),
-                               help='Directory where to store intermediate files.')
+                               help='Directory where to store the intermediate files.')
 @click.option('-q', '--quiet',
                                default=False,
                                is_flag=True,
@@ -46,15 +51,16 @@
                                VERSION,
                                '-V', '--version',
                                message='%(version)s')
-def main(files, fine, norm, mono, bala, gain, data, quiet):
+def main(files, fine, norm, mono, bala, gain, pitch, data, quiet):
 
     try:
 
         balance = [float(_) for _ in bala.split(',')]
         gain    = [float(_) for _ in gain.split(',')]
+        pitch   = semitone(pitch) * cent(pitch)
 
         for file in list(set(files)):
-            remucs(file, fine=fine, norm=norm, mono=mono, balance=balance, gain=gain, data=data, quiet=quiet)
+            remucs(file, fine=fine, norm=norm, mono=mono, balance=balance, gain=gain, pitch=pitch, data=data, quiet=quiet)
 
     except Exception as error:
 
diff --git a/src/remucs/analysis.py b/src/remucs/analysis.py
index 95fe16b..558a531 100644
--- a/src/remucs/analysis.py
+++ b/src/remucs/analysis.py
@@ -1,4 +1,3 @@
-import hashlib
 import os
 import warnings
 
@@ -8,6 +7,7 @@
 
 # pylint: disable=wildcard-import,unused-wildcard-import
 from remucs.common import *
+from remucs.utils import filehash
 
 DEMUCS = None
 
@@ -26,11 +26,6 @@
 if not DEMUCS:
     warnings.warn('In order to use remucs, you also need to install demucs!')
 
-def checksum(file, digest):
-
-    with open(file, 'rb') as stream:
-        return hashlib.file_digest(stream, digest).hexdigest()
-
 def analyze_demucs_separate(model, src, dst, quiet):
 
     dst = next(iter(dst.values()))
@@ -110,7 +105,7 @@ def analyze(file, data, *, model='htdemucs', quiet=True):
 
     check = data / (DIGEST + suffix)
     hash0 = check.read_text().strip() if check.exists() else None
-    hash1 = checksum(src, DIGEST).strip()
+    hash1 = filehash(src, DIGEST).strip()
 
     if hash0 != hash1:
 
diff --git a/src/remucs/remucs.py b/src/remucs/remucs.py
index 63f8db7..2dc5d68 100644
--- a/src/remucs/remucs.py
+++ b/src/remucs/remucs.py
@@ -7,7 +7,7 @@
 from remucs.analysis import analyze
 from remucs.synthesis import synthesize
 
-def remucs(file, *, fine=False, norm=False, mono=False, balance=None, gain=None, data='~', quiet=True):
+def remucs(file, *, fine=False, norm=False, mono=False, balance=None, gain=None, pitch=1.0, data='~', quiet=True):
 
     file = pathlib.Path(file)
 
@@ -33,4 +33,4 @@ def remucs(file, *, fine=False, norm=False, mono=False, balance=None, gain=None,
     model = MODELS[fine]
 
     analyze(src, data, model=model, quiet=quiet)
-    synthesize(dst, data, model=model, norm=norm, mono=mono, balance=balance, gain=gain, quiet=quiet)
+    synthesize(dst, data, model=model, norm=norm, mono=mono, balance=balance, gain=gain, pitch=pitch, quiet=quiet)
diff --git a/src/remucs/synthesis.py b/src/remucs/synthesis.py
index 30d3db8..4a76a69 100644
--- a/src/remucs/synthesis.py
+++ b/src/remucs/synthesis.py
@@ -1,11 +1,12 @@
 import click
 import numpy
 import soundfile
+import stftpitchshift
 
 # pylint: disable=wildcard-import,unused-wildcard-import
 from remucs.common import *
 
-def parse_balance_weights(balance):
+def stereo_balance_weights(balance):
 
     if balance is None:
         balance = numpy.zeros(len(STEMS))
@@ -18,7 +19,7 @@ def parse_balance_weights(balance):
 
     return numpy.clip(y[..., None, None] * [-1, +1] + 1, 0, 1)
 
-def parse_gain_weights(gain):
+def stereo_gain_weights(gain):
 
     if gain is None:
         gain = numpy.ones(len(STEMS))
@@ -31,7 +32,33 @@ def parse_gain_weights(gain):
 
     return numpy.clip(y[..., None, None], -10, +10)
 
-def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance=None, gain=None, quiet=True):
+def shiftpitch(x, *, samplerate, factor, quefrency):
+
+    x = numpy.atleast_2d(x)
+    y = numpy.zeros_like(x)
+    assert len(x.shape) == 2 and x.shape[-1] == 2
+
+    framesize = 4 * 1024
+    overlap   = 4
+    hopsize   = framesize // overlap
+    normalize = True
+
+    pitchshifter = stftpitchshift.StftPitchShift(
+        framesize=framesize,
+        hopsize=hopsize,
+        samplerate=samplerate)
+
+    for i in range(x.shape[-1]):
+
+        y[:, i] = pitchshifter.shiftpitch(
+            x[:, i],
+            factors=factor,
+            quefrency=quefrency,
+            normalization=normalize)
+
+    return y
+
+def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance=None, gain=None, pitch=1.0, quiet=True):
 
     suffix = file.suffix
 
@@ -41,8 +68,8 @@ def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance=
     if not quiet:
         click.echo(f'Synthesizing {dst.resolve()}')
 
-    balance = parse_balance_weights(balance)
-    gain    = parse_gain_weights(gain)
+    balance = stereo_balance_weights(balance)
+    gain    = stereo_gain_weights(gain)
 
     x, sr = zip(*[soundfile.read(stem) for stem in src])
 
@@ -51,6 +78,18 @@ def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance=
     x  = numpy.array(x)
     assert len(x.shape) == 3 and x.shape[-1] == 2
 
+    if pitch and pitch > 0 and pitch != 1:
+
+        if not quiet:
+            click.echo(f'Applying pitch shifting by factor {pitch}')
+
+        stems       = [STEMS.index(stem) for stem in ['bass', 'other', 'vocals']]
+        factors     = [pitch] * len(stems)
+        quefrencies = [0, 0, 1e-3]
+
+        for i, stem in enumerate(stems):
+            x[stem] = shiftpitch(x[stem], samplerate=sr, factor=factors[i], quefrency=quefrencies[i])
+
     if not quiet:
         if mono:
             click.echo('Converting input to mono')
diff --git a/src/remucs/utils.py b/src/remucs/utils.py
new file mode 100644
index 0000000..908185d
--- /dev/null
+++ b/src/remucs/utils.py
@@ -0,0 +1,28 @@
+import hashlib
+import re
+
+def semitone(value):
+
+    match = re.match('([+,-]?\\d+){1}([+,-]\\d+){0,1}', value)
+    assert match is not None
+
+    return pow(2, float(match[1]) / 12)
+
+def cent(value):
+
+    match = re.match('([+,-]?\\d+){1}([+,-]\\d+){0,1}', value)
+    assert match is not None
+
+    return pow(2, float(match[2] or 0) / 1200)
+
+def kilo(value):
+
+    if value.lower().endswith('k'):
+        return int(value[:-1]) * 1024
+
+    return int(value)
+
+def filehash(file, digest):
+
+    with open(file, 'rb') as stream:
+        return hashlib.file_digest(stream, digest).hexdigest()