Skip to content

Commit

Permalink
Merge pull request #46 from sp-nitech/next_release
Browse files Browse the repository at this point in the history
Version 1.0.0
  • Loading branch information
takenori-y authored Jul 23, 2023
2 parents cb03339 + ac28e90 commit b9e570b
Show file tree
Hide file tree
Showing 33 changed files with 490 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
max-parallel: 4
matrix:
python-version: [3.8]
pytorch-version: [1.10.0, 2.0.1]
pytorch-version: [1.11.0, 2.0.1]

steps:
- name: Clone
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ init:
dev:
test -d venv || python$(PYTHON_VERSION) -m venv venv; \
. ./venv/bin/activate; python -m pip install pip --upgrade; \
python -m pip install torch==1.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
python -m pip install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
python -m pip install -e .[dev]

dist:
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ diffsptk
*diffsptk* is a differentiable version of [SPTK](https://github.com/sp-nitech/SPTK) based on the PyTorch framework.

[![Latest Manual](https://img.shields.io/badge/docs-latest-blue.svg)](https://sp-nitech.github.io/diffsptk/latest/)
[![Stable Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/0.6.0/)
[![Stable Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/1.0.0/)
[![Downloads](https://pepy.tech/badge/diffsptk)](https://pepy.tech/project/diffsptk)
[![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyTorch Version](https://img.shields.io/badge/pytorch-1.10.0%20%7C%202.0.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyTorch Version](https://img.shields.io/badge/pytorch-1.11.0%20%7C%202.0.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyPI Version](https://img.shields.io/pypi/v/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
[![Codecov](https://codecov.io/gh/sp-nitech/diffsptk/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sp-nitech/diffsptk)
[![License](https://img.shields.io/github/license/sp-nitech/diffsptk.svg)](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)
Expand All @@ -17,7 +17,7 @@ diffsptk
Requirements
------------
- Python 3.8+
- PyTorch 1.10.0+
- PyTorch 1.11.0+


Documentation
Expand Down
5 changes: 3 additions & 2 deletions diffsptk/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@
from .lar2par import LogAreaRatioToParcorCoefficients
from .lbg import LindeBuzoGrayAlgorithm
from .lbg import LindeBuzoGrayAlgorithm as LBG
from .levdur import PseudoLevinsonDurbinRecursion
from .levdur import PseudoLevinsonDurbinRecursion as LevinsonDurbinRecursion
from .levdur import LevinsonDurbin
from .linear_intpl import LinearInterpolation
from .lpc import LinearPredictiveCodingAnalysis
from .lpc import LinearPredictiveCodingAnalysis as LPC
from .lpc2par import LinearPredictiveCoefficientsToParcorCoefficients
from .lpccheck import LinearPredictiveCoefficientsStabilityCheck
from .magic_intpl import MagicNumberInterpolation
from .mc2b import MelCepstrumToMLSADigitalFilterCoefficients
from .mcpf import MelCepstrumPostfiltering
from .mfcc import MelFrequencyCepstralCoefficientsAnalysis
Expand Down Expand Up @@ -78,6 +78,7 @@
from .pqmf import PseudoQuadratureMirrorFilterBanks
from .pqmf import PseudoQuadratureMirrorFilterBanks as PQMF
from .quantize import UniformQuantization
from .rlevdur import ReverseLevinsonDurbin
from .rmse import RootMeanSquaredError
from .rmse import RootMeanSquaredError as RMSE
from .root_pol import DurandKernerMethod
Expand Down
2 changes: 1 addition & 1 deletion diffsptk/core/ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def forward(self, x, f0):

H = torch.cat((H_alpha, H_beta), dim=-1) # (B, N, J, 6)
w = self.window[i, : self.segment_length[i]] # (J,)
Hw = H.transpose(-2, -1) * w # (B, N, 6, J)
Hw = H.mT * w # (B, N, 6, J)
R = torch.matmul(Hw, H) # (B, N, 6, 6)

index_gamma = origin.unsqueeze(-1) + j[..., 1:-1] # (B, N, J)
Expand Down
9 changes: 5 additions & 4 deletions diffsptk/core/excite.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import torch
import torch.nn as nn
import torch.nn.functional as F

from ..misc.utils import UNVOICED_SYMBOL
from .linear_intpl import LinearInterpolation
Expand Down Expand Up @@ -79,16 +80,16 @@ def forward(self, p):
mask = torch.repeat_interleave(mask, self.frame_period, dim=-1)

# Extend right side for interpolation.
tmp_mask = torch.cat((base_mask[..., :1] * 0, base_mask), dim=-1)
tmp_mask = F.pad(base_mask, (1, 0))
tmp_mask = torch.eq(tmp_mask[..., 1:] - tmp_mask[..., :-1], -1)
p[tmp_mask] = torch.roll(p, 1, dims=-1)[tmp_mask]

# Interpolate pitch.
if p.dim() != 1:
p = p.transpose(-2, -1)
p = p.mT
p = self.linear_intpl(p)
if p.dim() != 1:
p = p.transpose(-2, -1)
p = p.mT
p *= mask

# Compute phase.
Expand All @@ -101,7 +102,7 @@ def forward(self, p):

if self.voiced_region == "pulse":
r = torch.ceil(phase)
r = torch.cat((r[..., :1] * 0, r), dim=-1)
r = F.pad(r, (1, 0))
pulse_pos = torch.ge(r[..., 1:] - r[..., :-1], 1)
e = torch.zeros_like(p)
e[pulse_pos] = torch.sqrt(p[pulse_pos])
Expand Down
9 changes: 0 additions & 9 deletions diffsptk/core/fftcep.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,6 @@ def forward(self, x):
[-0.8539, 4.6173, -0.5496, -0.3207]])
"""
# Torch's pad only supports 3D, 4D, 5D padding with non-constant padding.
d = x.dim()
for _ in range(3 - d):
x = x.unsqueeze(0)

M = self.cep_order
H = self.fft_length // 2

Expand All @@ -102,8 +97,4 @@ def forward(self, x):

indices = [0, M] if H == M else [0]
v[..., indices] *= 0.5

# Revert shape.
for _ in range(3 - d):
v = v.squeeze(0)
return v
2 changes: 1 addition & 1 deletion diffsptk/core/gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def forward(self, x):
y = posterior.sum(dim=0)
nu = px / y.view(-1, 1)
nm = torch.matmul(nu.unsqueeze(-1), self.mu.unsqueeze(-2))
mn = nm.transpose(1, 2)
mn = nm.mT
a = pxx - y.view(-1, 1, 1) * (nm + mn - mm)
b = xi.view(-1, 1, 1) * self.ubm_sigma
diff = self.ubm_mu - self.mu
Expand Down
3 changes: 1 addition & 2 deletions diffsptk/core/grpdelay.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ def forward(self, b, a=None):

# Remove gain.
K, a1 = torch.split(a, [1, order], dim=-1)
K = K * 0 + 1
a2 = torch.cat((K, a1), dim=-1).unsqueeze(-1)
a2 = F.pad(a1, (1, 0), value=1).unsqueeze(-1)

# Perform full convolution.
b1 = F.pad(b, (order, order))
Expand Down
23 changes: 17 additions & 6 deletions diffsptk/core/levdur.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,27 @@
import torch
import torch.nn as nn

from ..misc.utils import check_size
from ..misc.utils import symmetric_toeplitz


class PseudoLevinsonDurbinRecursion(nn.Module):
class LevinsonDurbin(nn.Module):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/levdur.html>`_
for details. Note that the current implementation does not use the Durbin's
algorithm though the class name includes it.
for details.
Parameters
----------
lpc_order : int >= 0 [scalar]
Order of LPC coefficients, :math:`M`.
"""

def __init__(self):
super(PseudoLevinsonDurbinRecursion, self).__init__()
def __init__(self, lpc_order):
super(LevinsonDurbin, self).__init__()

self.lpc_order = lpc_order

assert 0 <= self.lpc_order

def forward(self, r):
"""Solve a Yule-Walker linear system.
Expand All @@ -48,12 +57,14 @@ def forward(self, r):
>>> x = diffsptk.nrand(4)
tensor([ 0.8226, -0.0284, -0.5715, 0.2127, 0.1217])
>>> acorr = diffsptk.AutocorrelationAnalysis(2, 5)
>>> levdur = diffsptk.LevinsonDurbinRecursion()
>>> levdur = diffsptk.LevinsonDurbin(2)
>>> a = levdur(acorr(x))
>>> a
tensor([0.8726, 0.1475, 0.5270])
"""
check_size(r.size(-1), self.lpc_order + 1, "dimension of autocorrelation")

# Make Toeplitz matrix.
R = symmetric_toeplitz(r[..., :-1])

Expand Down
4 changes: 2 additions & 2 deletions diffsptk/core/linear_intpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def forward(self, x):
assert x.dim() == 3, "Input must be 3D tensor"
B, T, D = x.shape

x = x.transpose(1, 2)
x = x.mT
x = self.pad(x)
x = F.interpolate(
x,
Expand All @@ -86,7 +86,7 @@ def forward(self, x):
)[
..., :-1
] # Remove the padded value.
y = x.transpose(1, 2).reshape(B, -1, D)
y = x.mT.reshape(B, -1, D)

if d == 1:
y = y.view(-1)
Expand Down
4 changes: 2 additions & 2 deletions diffsptk/core/lpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import torch.nn as nn

from .acorr import AutocorrelationAnalysis
from .levdur import PseudoLevinsonDurbinRecursion
from .levdur import LevinsonDurbin


class LinearPredictiveCodingAnalysis(nn.Module):
Expand All @@ -39,7 +39,7 @@ def __init__(self, lpc_order, frame_length):

self.lpc = nn.Sequential(
AutocorrelationAnalysis(lpc_order, frame_length),
PseudoLevinsonDurbinRecursion(),
LevinsonDurbin(lpc_order),
)

def forward(self, x):
Expand Down
143 changes: 143 additions & 0 deletions diffsptk/core/magic_intpl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# ------------------------------------------------------------------------ #
# Copyright 2022 SPTK Working Group #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# ------------------------------------------------------------------------ #

import torch
import torch.nn as nn

from ..misc.utils import UNVOICED_SYMBOL


class MagicNumberInterpolation(nn.Module):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/magic_intpl.html>`_
for details.
Parameters
----------
magic_number : float [scalar]
Magic number.
"""

def __init__(self, magic_number=UNVOICED_SYMBOL):
super(MagicNumberInterpolation, self).__init__()

self.impl = MagicNumberInterpolationImpl.apply
self.register_buffer("magic_number", torch.tensor(magic_number))

def forward(self, x):
"""Interpolate magic number.
Parameters
----------
x : Tensor [shape=(B, N, D) or (N, D) or (N,)]
Data containing magic number.
Returns
-------
y : Tensor [shape=(B, N, D) or (N, D) or (N,)]
Data after interpolation.
Examples
--------
>>> x = torch.tensor([0, 1, 2, 0, 4, 0]).float()
>>> x
tensor([0., 1., 2., 0., 4., 0.])
>>> magic_intpl = diffsptk.MagicNumberInterpolation(0)
>>> y = magic_intpl(x)
>>> y
tensor([1., 1., 2., 3., 4., 4.])
"""
y = self.impl(x, self.magic_number)
return y


class MagicNumberInterpolationImpl(torch.autograd.Function):
@staticmethod
def forward(ctx, x, magic_number):
ctx.save_for_backward(x, magic_number)

# Pass through if magic number is not found
if torch.all(x != magic_number):
return x

d = x.dim()
if d == 1:
x = x.view(1, -1, 1)
elif d == 2:
x = x.unsqueeze(0)
assert x.dim() == 3, "Input must be 3D tensor"
B, T, D = x.shape

def compute_lerp_inputs(x, magic_number):
is_magic_number = x == magic_number

starts = []
ends = []
weights = []
for i in range(x.size(0)):
uniques, counts = torch.unique_consecutive(
is_magic_number[i],
return_inverse=False,
return_counts=True,
dim=-1,
)
w = torch.repeat_interleave(uniques / (counts + 1), counts, dim=-1)
if uniques[0]:
w[..., : counts[0]] = 0
w = torch.cumsum(w, dim=-1)
w = w - torch.cumsum(w * ~is_magic_number[i], dim=-1)
if uniques[0]:
w[..., : counts[0]] = 1
if uniques[-1]:
w[..., -counts[-1] :] = 0

uniques, indices = torch.unique_consecutive(
x[i],
return_inverse=True,
return_counts=False,
dim=-1,
)
pos = uniques == magic_number
uniques[pos] = torch.roll(uniques, 1, dims=-1)[pos]
s = uniques[indices]
uniques[pos] = torch.roll(uniques, -1, dims=-1)[pos]
e = uniques[indices]

starts.append(s)
ends.append(e)
weights.append(w)

starts = torch.stack(starts)
ends = torch.stack(ends)
weights = torch.stack(weights)
return starts, ends, weights

x = x.mT.reshape(B * D, T)
starts, ends, weights = compute_lerp_inputs(x, magic_number)
y = torch.lerp(starts, ends, weights)
y = y.reshape(B, D, T).mT

if d == 1:
y = y.view(-1)
elif d == 2:
y = y.squeeze(0)
return y

@staticmethod
def backward(ctx, grad_output):
x, magic_number = ctx.saved_tensors
return grad_output * (x != magic_number), None
3 changes: 2 additions & 1 deletion diffsptk/core/mgc2mgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from ..misc.utils import cexp
from ..misc.utils import clog
Expand Down Expand Up @@ -79,7 +80,7 @@ def forward(self, c1):
Output cepstrum.
"""
c01 = torch.cat((c1[..., :1] * 0, c1[..., 1:]), dim=-1)
c01 = F.pad(c1[..., 1:], (1, 0))
C1 = torch.fft.fft(c01, n=self.n_fft)

if self.in_gamma == 0:
Expand Down
Loading

0 comments on commit b9e570b

Please sign in to comment.