From e1638c1922d6fc85b3a38516f45775186c9d9a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ju=CC=88rgen=20Hock?= Date: Sun, 24 Mar 2024 22:46:47 +0100 Subject: [PATCH] Add pitch shifting feature --- README.md | 15 +++++++------ pyproject.toml | 3 +++ requirements.txt | 1 + src/remucs/__main__.py | 16 +++++++++----- src/remucs/analysis.py | 9 ++------ src/remucs/remucs.py | 4 ++-- src/remucs/synthesis.py | 49 ++++++++++++++++++++++++++++++++++++----- src/remucs/utils.py | 28 +++++++++++++++++++++++ 8 files changed, 99 insertions(+), 26 deletions(-) create mode 100644 src/remucs/utils.py diff --git a/README.md b/README.md index b367d7b..79c92b8 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,7 @@ ![test](https://img.shields.io/github/actions/workflow/status/jurihock/remucs/test.yml?branch=main&label=test) ![pypi](https://img.shields.io/pypi/v/remucs?color=gold) -The purpose of the _remucs_ command line tool is to extract the individual stems from a mix and remix them again in a certain way. -Since the stem extraction is based on the [adefossez/demucs](https://github.com/adefossez/demucs) engine, the choice is restricted to the _drum_, _bass_, _vocal_ and _other_ sources. +The purpose of the _remucs_ command line tool is to extract the individual stems from a mix and remix them again in a certain way, e.g. by adjusting the volume gain, left-right channel balance and last but not least, transient-preserving pitch shifting. Since the stem extraction is based on the [adefossez/demucs](https://github.com/adefossez/demucs) engine, the stem choice is restricted to the _drum_, _bass_, _vocal_ and _other_ sources. ## Usage @@ -17,11 +16,13 @@ Options: -f, --fine Use fine-tuned “htdemucs_ft” model. -n, --norm Normalize output amplitude. -m, --mono Convert stereo input to mono. - -b, --bala TEXT Balance of individual stems [bass,drums,other,vocals]. - [default: 0,0,0,0] - -g, --gain TEXT Gain of individual stems [bass,drums,other,vocals]. - [default: 1,1,1,1] - -d, --data DIRECTORY Directory where to store intermediate files. + -b, --bala TEXT Balance of individual stems "bass,drums,other,vocals", + e.g. "0,0.5,1,-1". [default: 0,0,0,0] + -g, --gain TEXT Gain of individual stems "bass,drums,other,vocals", + e.g. "2,1,0.5,0". [default: 1,1,1,1] + -p, --pitch TEXT Pitch shifting factor in semitones followed by cents, + e.g -12 or +12 or +3-50. [default: 0] + -d, --data DIRECTORY Directory where to store the intermediate files. [default: ] -q, --quiet Don't trash stdout. -V, --version Show the version and exit. diff --git a/pyproject.toml b/pyproject.toml index e3a09a9..c6898c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "click", "numpy", "soundfile", + "stftpitchshift", "tqdm", ] @@ -71,6 +72,7 @@ addopts = ["--capture=tee-sys"] [tool.pylint] max-args = 10 +max-locals = 42 max-line-length = 200 exclude-protected = ["_load_audio"] # demucs.api.Separator._load_audio @@ -83,3 +85,4 @@ disable = [ [tool.pyright] reportMissingImports = false # import demucs.{separate,api} reportPossiblyUnboundVariable = false # import demucs.{separate,api} +reportPrivateImportUsage = false # demucs.api.save_audio diff --git a/requirements.txt b/requirements.txt index d257ef9..e63a1d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ click numpy soundfile +stftpitchshift tqdm diff --git a/src/remucs/__main__.py b/src/remucs/__main__.py index ff48262..add16c7 100644 --- a/src/remucs/__main__.py +++ b/src/remucs/__main__.py @@ -6,6 +6,7 @@ # pylint: disable=wildcard-import,unused-wildcard-import from remucs.common import * from remucs.remucs import remucs +from remucs.utils import cent, semitone @click.command( context_settings={'help_option_names': ['-h', '--help']}, no_args_is_help=True) @@ -28,16 +29,20 @@ @click.option('-b', '--bala', default=','.join(["0"]*len(STEMS)), show_default=True, - help=f'Balance of individual stems [{",".join(sorted(STEMS))}].') + help=f'Balance of individual stems \"{",".join(sorted(STEMS))}\", e.g. \"0,0.5,1,-1\".') @click.option('-g', '--gain', default=','.join(["1"]*len(STEMS)), show_default=True, - help=f'Gain of individual stems [{",".join(sorted(STEMS))}].') + help=f'Gain of individual stems \"{",".join(sorted(STEMS))}\", e.g. \"2,1,0.5,0\".') +@click.option('-p', '--pitch', + default='0', + show_default=True, + help='Pitch shifting factor in semitones followed by cents, e.g -12 or +12 or +3-50.') @click.option('-d', '--data', default=pathlib.Path().home(), show_default=True, type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=pathlib.Path), - help='Directory where to store intermediate files.') + help='Directory where to store the intermediate files.') @click.option('-q', '--quiet', default=False, is_flag=True, @@ -46,15 +51,16 @@ VERSION, '-V', '--version', message='%(version)s') -def main(files, fine, norm, mono, bala, gain, data, quiet): +def main(files, fine, norm, mono, bala, gain, pitch, data, quiet): try: balance = [float(_) for _ in bala.split(',')] gain = [float(_) for _ in gain.split(',')] + pitch = semitone(pitch) * cent(pitch) for file in list(set(files)): - remucs(file, fine=fine, norm=norm, mono=mono, balance=balance, gain=gain, data=data, quiet=quiet) + remucs(file, fine=fine, norm=norm, mono=mono, balance=balance, gain=gain, pitch=pitch, data=data, quiet=quiet) except Exception as error: diff --git a/src/remucs/analysis.py b/src/remucs/analysis.py index 95fe16b..558a531 100644 --- a/src/remucs/analysis.py +++ b/src/remucs/analysis.py @@ -1,4 +1,3 @@ -import hashlib import os import warnings @@ -8,6 +7,7 @@ # pylint: disable=wildcard-import,unused-wildcard-import from remucs.common import * +from remucs.utils import filehash DEMUCS = None @@ -26,11 +26,6 @@ if not DEMUCS: warnings.warn('In order to use remucs, you also need to install demucs!') -def checksum(file, digest): - - with open(file, 'rb') as stream: - return hashlib.file_digest(stream, digest).hexdigest() - def analyze_demucs_separate(model, src, dst, quiet): dst = next(iter(dst.values())) @@ -110,7 +105,7 @@ def analyze(file, data, *, model='htdemucs', quiet=True): check = data / (DIGEST + suffix) hash0 = check.read_text().strip() if check.exists() else None - hash1 = checksum(src, DIGEST).strip() + hash1 = filehash(src, DIGEST).strip() if hash0 != hash1: diff --git a/src/remucs/remucs.py b/src/remucs/remucs.py index 63f8db7..2dc5d68 100644 --- a/src/remucs/remucs.py +++ b/src/remucs/remucs.py @@ -7,7 +7,7 @@ from remucs.analysis import analyze from remucs.synthesis import synthesize -def remucs(file, *, fine=False, norm=False, mono=False, balance=None, gain=None, data='~', quiet=True): +def remucs(file, *, fine=False, norm=False, mono=False, balance=None, gain=None, pitch=1.0, data='~', quiet=True): file = pathlib.Path(file) @@ -33,4 +33,4 @@ def remucs(file, *, fine=False, norm=False, mono=False, balance=None, gain=None, model = MODELS[fine] analyze(src, data, model=model, quiet=quiet) - synthesize(dst, data, model=model, norm=norm, mono=mono, balance=balance, gain=gain, quiet=quiet) + synthesize(dst, data, model=model, norm=norm, mono=mono, balance=balance, gain=gain, pitch=pitch, quiet=quiet) diff --git a/src/remucs/synthesis.py b/src/remucs/synthesis.py index 30d3db8..4a76a69 100644 --- a/src/remucs/synthesis.py +++ b/src/remucs/synthesis.py @@ -1,11 +1,12 @@ import click import numpy import soundfile +import stftpitchshift # pylint: disable=wildcard-import,unused-wildcard-import from remucs.common import * -def parse_balance_weights(balance): +def stereo_balance_weights(balance): if balance is None: balance = numpy.zeros(len(STEMS)) @@ -18,7 +19,7 @@ def parse_balance_weights(balance): return numpy.clip(y[..., None, None] * [-1, +1] + 1, 0, 1) -def parse_gain_weights(gain): +def stereo_gain_weights(gain): if gain is None: gain = numpy.ones(len(STEMS)) @@ -31,7 +32,33 @@ def parse_gain_weights(gain): return numpy.clip(y[..., None, None], -10, +10) -def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance=None, gain=None, quiet=True): +def shiftpitch(x, *, samplerate, factor, quefrency): + + x = numpy.atleast_2d(x) + y = numpy.zeros_like(x) + assert len(x.shape) == 2 and x.shape[-1] == 2 + + framesize = 4 * 1024 + overlap = 4 + hopsize = framesize // overlap + normalize = True + + pitchshifter = stftpitchshift.StftPitchShift( + framesize=framesize, + hopsize=hopsize, + samplerate=samplerate) + + for i in range(x.shape[-1]): + + y[:, i] = pitchshifter.shiftpitch( + x[:, i], + factors=factor, + quefrency=quefrency, + normalization=normalize) + + return y + +def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance=None, gain=None, pitch=1.0, quiet=True): suffix = file.suffix @@ -41,8 +68,8 @@ def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance= if not quiet: click.echo(f'Synthesizing {dst.resolve()}') - balance = parse_balance_weights(balance) - gain = parse_gain_weights(gain) + balance = stereo_balance_weights(balance) + gain = stereo_gain_weights(gain) x, sr = zip(*[soundfile.read(stem) for stem in src]) @@ -51,6 +78,18 @@ def synthesize(file, data, *, model='htdemucs', norm=False, mono=False, balance= x = numpy.array(x) assert len(x.shape) == 3 and x.shape[-1] == 2 + if pitch and pitch > 0 and pitch != 1: + + if not quiet: + click.echo(f'Applying pitch shifting by factor {pitch}') + + stems = [STEMS.index(stem) for stem in ['bass', 'other', 'vocals']] + factors = [pitch] * len(stems) + quefrencies = [0, 0, 1e-3] + + for i, stem in enumerate(stems): + x[stem] = shiftpitch(x[stem], samplerate=sr, factor=factors[i], quefrency=quefrencies[i]) + if not quiet: if mono: click.echo('Converting input to mono') diff --git a/src/remucs/utils.py b/src/remucs/utils.py new file mode 100644 index 0000000..908185d --- /dev/null +++ b/src/remucs/utils.py @@ -0,0 +1,28 @@ +import hashlib +import re + +def semitone(value): + + match = re.match('([+,-]?\\d+){1}([+,-]\\d+){0,1}', value) + assert match is not None + + return pow(2, float(match[1]) / 12) + +def cent(value): + + match = re.match('([+,-]?\\d+){1}([+,-]\\d+){0,1}', value) + assert match is not None + + return pow(2, float(match[2] or 0) / 1200) + +def kilo(value): + + if value.lower().endswith('k'): + return int(value[:-1]) * 1024 + + return int(value) + +def filehash(file, digest): + + with open(file, 'rb') as stream: + return hashlib.file_digest(stream, digest).hexdigest()