Merge pull request #46 from sp-nitech/next_release

Version 1.0.0
sp-nitech · Jul 23, 2023 · b9e570b · b9e570b
2 parents cb03339 + ac28e90
commit b9e570b
Show file tree

Hide file tree

Showing 33 changed files with 490 additions and 65 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       max-parallel: 4
       matrix:
         python-version: [3.8]
-        pytorch-version: [1.10.0, 2.0.1]
+        pytorch-version: [1.11.0, 2.0.1]
 
     steps:
       - name: Clone

diff --git a/Makefile b/Makefile
@@ -24,7 +24,7 @@ init:
 dev:
 	test -d venv || python$(PYTHON_VERSION) -m venv venv; \
 	. ./venv/bin/activate; python -m pip install pip --upgrade; \
-	python -m pip install torch==1.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
+	python -m pip install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
 	python -m pip install -e .[dev]
 
 dist:

diff --git a/README.md b/README.md
@@ -3,10 +3,10 @@ diffsptk
 *diffsptk* is a differentiable version of [SPTK](https://github.com/sp-nitech/SPTK) based on the PyTorch framework.
 
 [![Latest Manual](https://img.shields.io/badge/docs-latest-blue.svg)](https://sp-nitech.github.io/diffsptk/latest/)
-[![Stable Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/0.6.0/)
+[![Stable Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/1.0.0/)
 [![Downloads](https://pepy.tech/badge/diffsptk)](https://pepy.tech/project/diffsptk)
 [![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
-[![PyTorch Version](https://img.shields.io/badge/pytorch-1.10.0%20%7C%202.0.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
+[![PyTorch Version](https://img.shields.io/badge/pytorch-1.11.0%20%7C%202.0.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
 [![PyPI Version](https://img.shields.io/pypi/v/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
 [![Codecov](https://codecov.io/gh/sp-nitech/diffsptk/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sp-nitech/diffsptk)
 [![License](https://img.shields.io/github/license/sp-nitech/diffsptk.svg)](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)
@@ -17,7 +17,7 @@ diffsptk
 Requirements
 ------------
 - Python 3.8+
-- PyTorch 1.10.0+
+- PyTorch 1.11.0+
 
 
 Documentation

diff --git a/diffsptk/core/__init__.py b/diffsptk/core/__init__.py
@@ -44,13 +44,13 @@
 from .lar2par import LogAreaRatioToParcorCoefficients
 from .lbg import LindeBuzoGrayAlgorithm
 from .lbg import LindeBuzoGrayAlgorithm as LBG
-from .levdur import PseudoLevinsonDurbinRecursion
-from .levdur import PseudoLevinsonDurbinRecursion as LevinsonDurbinRecursion
+from .levdur import LevinsonDurbin
 from .linear_intpl import LinearInterpolation
 from .lpc import LinearPredictiveCodingAnalysis
 from .lpc import LinearPredictiveCodingAnalysis as LPC
 from .lpc2par import LinearPredictiveCoefficientsToParcorCoefficients
 from .lpccheck import LinearPredictiveCoefficientsStabilityCheck
+from .magic_intpl import MagicNumberInterpolation
 from .mc2b import MelCepstrumToMLSADigitalFilterCoefficients
 from .mcpf import MelCepstrumPostfiltering
 from .mfcc import MelFrequencyCepstralCoefficientsAnalysis
@@ -78,6 +78,7 @@
 from .pqmf import PseudoQuadratureMirrorFilterBanks
 from .pqmf import PseudoQuadratureMirrorFilterBanks as PQMF
 from .quantize import UniformQuantization
+from .rlevdur import ReverseLevinsonDurbin
 from .rmse import RootMeanSquaredError
 from .rmse import RootMeanSquaredError as RMSE
 from .root_pol import DurandKernerMethod

diff --git a/diffsptk/core/ap.py b/diffsptk/core/ap.py
@@ -248,7 +248,7 @@ def forward(self, x, f0):
 
             H = torch.cat((H_alpha, H_beta), dim=-1)  # (B, N, J, 6)
             w = self.window[i, : self.segment_length[i]]  # (J,)
-            Hw = H.transpose(-2, -1) * w  # (B, N, 6, J)
+            Hw = H.mT * w  # (B, N, 6, J)
             R = torch.matmul(Hw, H)  # (B, N, 6, 6)
 
             index_gamma = origin.unsqueeze(-1) + j[..., 1:-1]  # (B, N, J)

diff --git a/diffsptk/core/excite.py b/diffsptk/core/excite.py
@@ -16,6 +16,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from ..misc.utils import UNVOICED_SYMBOL
 from .linear_intpl import LinearInterpolation
@@ -79,16 +80,16 @@ def forward(self, p):
         mask = torch.repeat_interleave(mask, self.frame_period, dim=-1)
 
         # Extend right side for interpolation.
-        tmp_mask = torch.cat((base_mask[..., :1] * 0, base_mask), dim=-1)
+        tmp_mask = F.pad(base_mask, (1, 0))
         tmp_mask = torch.eq(tmp_mask[..., 1:] - tmp_mask[..., :-1], -1)
         p[tmp_mask] = torch.roll(p, 1, dims=-1)[tmp_mask]
 
         # Interpolate pitch.
         if p.dim() != 1:
-            p = p.transpose(-2, -1)
+            p = p.mT
         p = self.linear_intpl(p)
         if p.dim() != 1:
-            p = p.transpose(-2, -1)
+            p = p.mT
         p *= mask
 
         # Compute phase.
@@ -101,7 +102,7 @@ def forward(self, p):
 
         if self.voiced_region == "pulse":
             r = torch.ceil(phase)
-            r = torch.cat((r[..., :1] * 0, r), dim=-1)
+            r = F.pad(r, (1, 0))
             pulse_pos = torch.ge(r[..., 1:] - r[..., :-1], 1)
             e = torch.zeros_like(p)
             e[pulse_pos] = torch.sqrt(p[pulse_pos])

diff --git a/diffsptk/core/fftcep.py b/diffsptk/core/fftcep.py
@@ -79,11 +79,6 @@ def forward(self, x):
                 [-0.8539,  4.6173, -0.5496, -0.3207]])
 
         """
-        # Torch's pad only supports 3D, 4D, 5D padding with non-constant padding.
-        d = x.dim()
-        for _ in range(3 - d):
-            x = x.unsqueeze(0)
-
         M = self.cep_order
         H = self.fft_length // 2
 
@@ -102,8 +97,4 @@ def forward(self, x):
 
         indices = [0, M] if H == M else [0]
         v[..., indices] *= 0.5
-
-        # Revert shape.
-        for _ in range(3 - d):
-            v = v.squeeze(0)
         return v
diff --git a/diffsptk/core/gmm.py b/diffsptk/core/gmm.py
@@ -316,7 +316,7 @@ def forward(self, x):
                     y = posterior.sum(dim=0)
                     nu = px / y.view(-1, 1)
                     nm = torch.matmul(nu.unsqueeze(-1), self.mu.unsqueeze(-2))
-                    mn = nm.transpose(1, 2)
+                    mn = nm.mT
                     a = pxx - y.view(-1, 1, 1) * (nm + mn - mm)
                     b = xi.view(-1, 1, 1) * self.ubm_sigma
                     diff = self.ubm_mu - self.mu

diff --git a/diffsptk/core/grpdelay.py b/diffsptk/core/grpdelay.py
@@ -86,8 +86,7 @@ def forward(self, b, a=None):
 
             # Remove gain.
             K, a1 = torch.split(a, [1, order], dim=-1)
-            K = K * 0 + 1
-            a2 = torch.cat((K, a1), dim=-1).unsqueeze(-1)
+            a2 = F.pad(a1, (1, 0), value=1).unsqueeze(-1)
 
             # Perform full convolution.
             b1 = F.pad(b, (order, order))

diff --git a/diffsptk/core/levdur.py b/diffsptk/core/levdur.py
@@ -17,18 +17,27 @@
 import torch
 import torch.nn as nn
 
+from ..misc.utils import check_size
 from ..misc.utils import symmetric_toeplitz
 
 
-class PseudoLevinsonDurbinRecursion(nn.Module):
+class LevinsonDurbin(nn.Module):
     """See `this page <https://sp-nitech.github.io/sptk/latest/main/levdur.html>`_
-    for details. Note that the current implementation does not use the Durbin's
-    algorithm though the class name includes it.
+    for details.
+
+    Parameters
+    ----------
+    lpc_order : int >= 0 [scalar]
+        Order of LPC coefficients, :math:`M`.
 
     """
 
-    def __init__(self):
-        super(PseudoLevinsonDurbinRecursion, self).__init__()
+    def __init__(self, lpc_order):
+        super(LevinsonDurbin, self).__init__()
+
+        self.lpc_order = lpc_order
+
+        assert 0 <= self.lpc_order
 
     def forward(self, r):
         """Solve a Yule-Walker linear system.
@@ -48,12 +57,14 @@ def forward(self, r):
         >>> x = diffsptk.nrand(4)
         tensor([ 0.8226, -0.0284, -0.5715,  0.2127,  0.1217])
         >>> acorr = diffsptk.AutocorrelationAnalysis(2, 5)
-        >>> levdur = diffsptk.LevinsonDurbinRecursion()
+        >>> levdur = diffsptk.LevinsonDurbin(2)
         >>> a = levdur(acorr(x))
         >>> a
         tensor([0.8726, 0.1475, 0.5270])
 
         """
+        check_size(r.size(-1), self.lpc_order + 1, "dimension of autocorrelation")
+
         # Make Toeplitz matrix.
         R = symmetric_toeplitz(r[..., :-1])
 

diff --git a/diffsptk/core/linear_intpl.py b/diffsptk/core/linear_intpl.py
@@ -76,7 +76,7 @@ def forward(self, x):
         assert x.dim() == 3, "Input must be 3D tensor"
         B, T, D = x.shape
 
-        x = x.transpose(1, 2)
+        x = x.mT
         x = self.pad(x)
         x = F.interpolate(
             x,
@@ -86,7 +86,7 @@ def forward(self, x):
         )[
             ..., :-1
         ]  # Remove the padded value.
-        y = x.transpose(1, 2).reshape(B, -1, D)
+        y = x.mT.reshape(B, -1, D)
 
         if d == 1:
             y = y.view(-1)

diff --git a/diffsptk/core/lpc.py b/diffsptk/core/lpc.py
@@ -17,7 +17,7 @@
 import torch.nn as nn
 
 from .acorr import AutocorrelationAnalysis
-from .levdur import PseudoLevinsonDurbinRecursion
+from .levdur import LevinsonDurbin
 
 
 class LinearPredictiveCodingAnalysis(nn.Module):
@@ -39,7 +39,7 @@ def __init__(self, lpc_order, frame_length):
 
         self.lpc = nn.Sequential(
             AutocorrelationAnalysis(lpc_order, frame_length),
-            PseudoLevinsonDurbinRecursion(),
+            LevinsonDurbin(lpc_order),
         )
 
     def forward(self, x):

diff --git a/diffsptk/core/magic_intpl.py b/diffsptk/core/magic_intpl.py
@@ -0,0 +1,143 @@
+# ------------------------------------------------------------------------ #
+# Copyright 2022 SPTK Working Group                                        #
+#                                                                          #
+# Licensed under the Apache License, Version 2.0 (the "License");          #
+# you may not use this file except in compliance with the License.         #
+# You may obtain a copy of the License at                                  #
+#                                                                          #
+#     http://www.apache.org/licenses/LICENSE-2.0                           #
+#                                                                          #
+# Unless required by applicable law or agreed to in writing, software      #
+# distributed under the License is distributed on an "AS IS" BASIS,        #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and      #
+# limitations under the License.                                           #
+# ------------------------------------------------------------------------ #
+
+import torch
+import torch.nn as nn
+
+from ..misc.utils import UNVOICED_SYMBOL
+
+
+class MagicNumberInterpolation(nn.Module):
+    """See `this page <https://sp-nitech.github.io/sptk/latest/main/magic_intpl.html>`_
+    for details.
+
+    Parameters
+    ----------
+    magic_number : float [scalar]
+        Magic number.
+
+    """
+
+    def __init__(self, magic_number=UNVOICED_SYMBOL):
+        super(MagicNumberInterpolation, self).__init__()
+
+        self.impl = MagicNumberInterpolationImpl.apply
+        self.register_buffer("magic_number", torch.tensor(magic_number))
+
+    def forward(self, x):
+        """Interpolate magic number.
+
+        Parameters
+        ----------
+        x : Tensor [shape=(B, N, D) or (N, D) or (N,)]
+            Data containing magic number.
+
+        Returns
+        -------
+        y : Tensor [shape=(B, N, D) or (N, D) or (N,)]
+            Data after interpolation.
+
+        Examples
+        --------
+        >>> x = torch.tensor([0, 1, 2, 0, 4, 0]).float()
+        >>> x
+        tensor([0., 1., 2., 0., 4., 0.])
+        >>> magic_intpl = diffsptk.MagicNumberInterpolation(0)
+        >>> y = magic_intpl(x)
+        >>> y
+        tensor([1., 1., 2., 3., 4., 4.])
+
+        """
+        y = self.impl(x, self.magic_number)
+        return y
+
+
+class MagicNumberInterpolationImpl(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, magic_number):
+        ctx.save_for_backward(x, magic_number)
+
+        # Pass through if magic number is not found
+        if torch.all(x != magic_number):
+            return x
+
+        d = x.dim()
+        if d == 1:
+            x = x.view(1, -1, 1)
+        elif d == 2:
+            x = x.unsqueeze(0)
+        assert x.dim() == 3, "Input must be 3D tensor"
+        B, T, D = x.shape
+
+        def compute_lerp_inputs(x, magic_number):
+            is_magic_number = x == magic_number
+
+            starts = []
+            ends = []
+            weights = []
+            for i in range(x.size(0)):
+                uniques, counts = torch.unique_consecutive(
+                    is_magic_number[i],
+                    return_inverse=False,
+                    return_counts=True,
+                    dim=-1,
+                )
+                w = torch.repeat_interleave(uniques / (counts + 1), counts, dim=-1)
+                if uniques[0]:
+                    w[..., : counts[0]] = 0
+                w = torch.cumsum(w, dim=-1)
+                w = w - torch.cumsum(w * ~is_magic_number[i], dim=-1)
+                if uniques[0]:
+                    w[..., : counts[0]] = 1
+                if uniques[-1]:
+                    w[..., -counts[-1] :] = 0
+
+                uniques, indices = torch.unique_consecutive(
+                    x[i],
+                    return_inverse=True,
+                    return_counts=False,
+                    dim=-1,
+                )
+                pos = uniques == magic_number
+                uniques[pos] = torch.roll(uniques, 1, dims=-1)[pos]
+                s = uniques[indices]
+                uniques[pos] = torch.roll(uniques, -1, dims=-1)[pos]
+                e = uniques[indices]
+
+                starts.append(s)
+                ends.append(e)
+                weights.append(w)
+
+            starts = torch.stack(starts)
+            ends = torch.stack(ends)
+            weights = torch.stack(weights)
+            return starts, ends, weights
+
+        x = x.mT.reshape(B * D, T)
+        starts, ends, weights = compute_lerp_inputs(x, magic_number)
+        y = torch.lerp(starts, ends, weights)
+        y = y.reshape(B, D, T).mT
+
+        if d == 1:
+            y = y.view(-1)
+        elif d == 2:
+            y = y.squeeze(0)
+        return y
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        x, magic_number = ctx.saved_tensors
+        return grad_output * (x != magic_number), None
diff --git a/diffsptk/core/mgc2mgc.py b/diffsptk/core/mgc2mgc.py
@@ -17,6 +17,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 
 from ..misc.utils import cexp
 from ..misc.utils import clog
@@ -79,7 +80,7 @@ def forward(self, c1):
             Output cepstrum.
 
         """
-        c01 = torch.cat((c1[..., :1] * 0, c1[..., 1:]), dim=-1)
+        c01 = F.pad(c1[..., 1:], (1, 0))
         C1 = torch.fft.fft(c01, n=self.n_fft)
 
         if self.in_gamma == 0: