Release 1.0.28

* The DSP library now builds for Apple M1 chips and above on MacOS. * Implemented abs_max2, abs_min2, abs_max3 and abs_min3 functions. * Implemented sign_min, sign_max, sign_minmax functions. * Updated build scripts. * Updated module versions in dependencies.
lsp-plugins · Dec 16, 2024 · 7db5651 · 7db5651
2 parents cfe499f + 32362fa
commit 7db5651
Show file tree

Hide file tree

Showing 192 changed files with 11,106 additions and 1,853 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,13 @@
 * RECENT CHANGES
 *******************************************************************************
 
+=== 1.0.28 ===
+* The DSP library now builds for Apple M1 chips and above on MacOS.
+* Implemented abs_max2, abs_min2, abs_max3 and abs_min3 functions.
+* Implemented sign_min, sign_max, sign_minmax functions.
+* Updated build scripts.
+* Updated module versions in dependencies.
+
 === 1.0.27 ===
 * Updated build scripts.
 * Updated module versions in dependencies.

diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@ This library provides set of functions that perform SIMD-optimized
 computing on several hardware architectures.
 
 Currently supported set of SIMD extensions:
-  * i586 architecture (32-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and AVX512;
-  * x86_64 architecture (64-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and AVX512;
+  * i586 architecture (32-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and partial support of AVX512;
+  * x86_64 architecture (64-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and partial support of AVX512;
   * armv7 architecture (32-bit): NEON;
   * AArch64 architecture (64-bit): ASIMD.
 
@@ -37,15 +37,16 @@ Current set of functions provided:
 The build and correct unit test execution has been confirmed for following platforms:
 * FreeBSD
 * GNU/Linux
+* MacOS
 * OpenBSD
 * Windows 32-bit
 * Windows 64-bit
 
 ## Supported architectures
 
 The support of following list of hardware architectures has been implemented:
-* i386 (32-bit) - full support (AVX-512 on the way).
-* x86_64 (64-bit) - full support (AVX-512 on the way).
+* i386 (32-bit) - full support (SSE1-SSE3, AVX, AVX2, partial support for AVX-512).
+* x86_64 (64-bit) - full support (SSE1-SSE3, AVX, AVX2, partial support for AVX-512).
 * ARMv6A - full support.
 * ARMv7A - full support.
 * AArch64 - full support.

diff --git a/include/lsp-plug.in/dsp/common/pan.h b/include/lsp-plug.in/dsp/common/pan.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
+ *
+ * This file is part of lsp-dsp-lib
+ * Created on: 10 нояб. 2024 г.
+ *
+ * lsp-dsp-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * any later version.
+ *
+ * lsp-dsp-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef LSP_PLUG_IN_DSP_COMMON_PAN_H_
+#define LSP_PLUG_IN_DSP_COMMON_PAN_H_
+
+#include <lsp-plug.in/dsp/common/types.h>
+
+LSP_DSP_LIB_BEGIN_NAMESPACE
+
+#pragma pack(push, 1)
+
+/**
+ * Definition for the panorama calulation function (parallel form)
+ *
+ * @param dst destination buffer to store value
+ * @param l left channel data
+ * @param r right channel data
+ * @param dfl default value if it is not possible to compute panorama
+ * @param count number of samples to process
+ */
+typedef void (* LSP_DSP_LIB_TYPE(depan_t))(float *dst, const float *l, const float *r, float dfl, size_t count);
+
+#pragma pack(pop)
+
+LSP_DSP_LIB_END_NAMESPACE
+
+/**
+ * Calculate the linear pan law panorama position between left and right channels (parallel form):
+ *   pan = abs(R) / (abs(L) + abs(R))
+ *
+ * @param dst destination buffer to store value
+ * @param l left channel data
+ * @param r right channel data
+ * @param dfl default value if it is not possible to compute panorama
+ * @param count number of samples to process
+ */
+LSP_DSP_LIB_SYMBOL(void, depan_lin, float *dst, const float *l, const float *r, float dfl, size_t count);
+
+/**
+ * Calculate the equal power pan law (quadratic) panorama position between left and right channels (parallel form):
+ *   pan = R^2 / (L^2 + R^2)
+ *
+ * @param dst destination buffer to store value
+ * @param l left channel data
+ * @param r right channel data
+ * @param dfl default value if it is not possible to compute panorama
+ * @param count number of samples to process
+ */
+LSP_DSP_LIB_SYMBOL(void, depan_eqpow, float *dst, const float *l, const float *r, float dfl, size_t count);
+
+#endif /* LSP_PLUG_IN_DSP_COMMON_PAN_H_ */
diff --git a/include/lsp-plug.in/dsp/common/pcomplex.h b/include/lsp-plug.in/dsp/common/pcomplex.h
@@ -238,4 +238,13 @@ LSP_DSP_LIB_SYMBOL(void, pcomplex_r2c_div2, float *dst, const float *src, size_t
  */
 LSP_DSP_LIB_SYMBOL(void, pcomplex_r2c_rdiv2, float *dst, const float *src, size_t count);
 
+/** Compute complex correlation between two sources and store to the result array
+ *
+ * @param dst_corr array to store normalized correlation
+ * @param src1 set of complex numbers
+ * @param src2 set of complex numbers
+ * @param count count number of elements to process
+ */
+LSP_DSP_LIB_SYMBOL(void, pcomplex_corr, float *dst_corr, const float *src1, const float *src2, size_t count);
+
 #endif /* LSP_PLUG_IN_DSP_COMMON_PCOMPLEX_H_ */
diff --git a/include/lsp-plug.in/dsp/common/pmath/abs_vv.h b/include/lsp-plug.in/dsp/common/pmath/abs_vv.h
@@ -87,6 +87,22 @@ LSP_DSP_LIB_SYMBOL(void, abs_div2, float *dst, const float *src, size_t count);
  */
 LSP_DSP_LIB_SYMBOL(void, abs_rdiv2, float *dst, const float *src, size_t count);
 
+/** Calculate absolute values: dst[i] = max(abs(src[i]), dst[i])
+ *
+ * @param dst destination vector
+ * @param src source vector
+ * @param count number of elements
+ */
+LSP_DSP_LIB_SYMBOL(void, abs_max2, float *dst, const float *src, size_t count);
+
+/** Calculate absolute values: dst[i] = min(abs(src[i]), dst[i])
+ *
+ * @param dst destination vector
+ * @param src source vector
+ * @param count number of elements
+ */
+LSP_DSP_LIB_SYMBOL(void, abs_min2, float *dst, const float *src, size_t count);
+
 /** Calculate absolute values: dst[i] = src1[i] + abs(src2[i])
  *
  * @param dst destination vector
@@ -141,4 +157,22 @@ LSP_DSP_LIB_SYMBOL(void, abs_div3, float *dst, const float *src1, const float *s
  */
 LSP_DSP_LIB_SYMBOL(void, abs_rdiv3, float *dst, const float *src1, const float *src2, size_t count);
 
+/** Calculate absolute values: dst[i] = max(src1[i], abs(src2[i]))
+ *
+ * @param dst destination vector
+ * @param src1 source vector 1
+ * @param src2 source vector 2
+ * @param count number of elements
+ */
+LSP_DSP_LIB_SYMBOL(void, abs_max3, float *dst, const float *src1, const float *src2, size_t count);
+
+/** Calculate absolute values: dst[i] = min(src1[i], abs(src2[i]))
+ *
+ * @param dst destination vector
+ * @param src1 source vector 1
+ * @param src2 source vector 2
+ * @param count number of elements
+ */
+LSP_DSP_LIB_SYMBOL(void, abs_min3, float *dst, const float *src1, const float *src2, size_t count);
+
 #endif /* LSP_PLUG_IN_DSP_COMMON_PMATH_ABS_VV_H_ */
diff --git a/include/lsp-plug.in/dsp/common/search/minmax.h b/include/lsp-plug.in/dsp/common/search/minmax.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
  * Created on: 31 мар. 2020 г.
@@ -48,6 +48,14 @@ LSP_DSP_LIB_SYMBOL(float, max, const float *src, size_t count);
  */
 LSP_DSP_LIB_SYMBOL(float, abs_max, const float *src, size_t count);
 
+/** Get maximum ignoring sign: result = src[i] : abs(src[i]) -> max
+ *
+ * @param src source array
+ * @param count number of elements
+ * @return result
+ */
+LSP_DSP_LIB_SYMBOL(float, sign_max, const float *src, size_t count);
+
 /** Get absolute minimum: result = min { abs(src[i]) }
  *
  * @param src source array
@@ -56,9 +64,19 @@ LSP_DSP_LIB_SYMBOL(float, abs_max, const float *src, size_t count);
  */
 LSP_DSP_LIB_SYMBOL(float, abs_min, const float *src, size_t count);
 
+/** Get maximum ignoring sign: result = src[i] : abs(src[i]) -> min
+ *
+ * @param src source array
+ * @param count number of elements
+ * @return result
+ */
+LSP_DSP_LIB_SYMBOL(float, sign_min, const float *src, size_t count);
+
 /** Calculate min { src }, max { src }
  *
  * @param src source vector
+ * @param min pointer to store minimum value
+ * @param max pointer to store maximum value
  * @param count number of elements
  * @return maximum value
  */
@@ -67,9 +85,23 @@ LSP_DSP_LIB_SYMBOL(void, minmax, const float *src, size_t count, float *min, flo
 /** Calculate min { abs(src) }, max { abs(src) }
  *
  * @param src source vector
+ * @param min pointer to store minimum value
+ * @param max pointer to store maximum value
  * @param count number of elements
  * @return maximum value
  */
 LSP_DSP_LIB_SYMBOL(void, abs_minmax, const float *src, size_t count, float *min, float *max);
 
+/** Calculate:
+ *    min = src[i] : abs(src[i]) -> min,
+ *    max = src[i] : abs(src[i]) -> max
+ *
+ * @param src source vector
+ * @param min pointer to store minimum value
+ * @param max pointer to store maximum value
+ * @param count number of elements
+ * @return maximum value
+ */
+LSP_DSP_LIB_SYMBOL(void, sign_minmax, const float *src, size_t count, float *min, float *max);
+
 #endif /* LSP_PLUG_IN_DSP_COMMON_SEARCH_MINMAX_H_ */
diff --git a/include/lsp-plug.in/dsp/dsp.h b/include/lsp-plug.in/dsp/dsp.h
@@ -1,6 +1,6 @@
 /*
- * Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
- *           (C) 2023 Vladimir Sadovnikov <sadko4u@gmail.com>
+ * Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
+ *           (C) 2024 Vladimir Sadovnikov <sadko4u@gmail.com>
  *
  * This file is part of lsp-dsp-lib
  * Created on: 31 мар. 2020 г.
@@ -58,6 +58,7 @@
 #include <lsp-plug.in/dsp/common/graphics.h>
 #include <lsp-plug.in/dsp/common/hmath.h>
 #include <lsp-plug.in/dsp/common/mix.h>
+#include <lsp-plug.in/dsp/common/pan.h>
 #include <lsp-plug.in/dsp/common/msmatrix.h>
 #include <lsp-plug.in/dsp/common/pcomplex.h>
 #include <lsp-plug.in/dsp/common/pmath.h>

diff --git a/include/lsp-plug.in/dsp/version.h b/include/lsp-plug.in/dsp/version.h
@@ -25,7 +25,7 @@
 // Define version of headers
 #define LSP_DSP_LIB_MAJOR           1
 #define LSP_DSP_LIB_MINOR           0
-#define LSP_DSP_LIB_MICRO           27
+#define LSP_DSP_LIB_MICRO           28
 
 #if defined(__WINDOWS__) || defined(__WIN32__) || defined(__WIN64__) || defined(_WIN64) || defined(_WIN32) || defined(__WINNT) || defined(__WINNT__)
     #define LSP_DSP_LIB_EXPORT_MODIFIER     __declspec(dllexport)

diff --git a/include/private/dsp/arch/aarch64/asimd/correlation.h b/include/private/dsp/arch/aarch64/asimd/correlation.h
@@ -139,7 +139,7 @@ namespace lsp
 
         static const float corr_const[] __lsp_aligned16 =
         {
-            LSP_DSP_VEC8(1e-10f)
+            LSP_DSP_VEC8(1e-18f)
         };
 
         void corr_incr(dsp::correlation_t *corr, float *dst,
@@ -213,9 +213,9 @@ namespace lsp
                 __ASM_EMIT("dup         v0.4s, v9.s[3]")                    /* v0   = xv' = T[7] */
                 __ASM_EMIT("dup         v1.4s, v5.s[3]")                    /* v1   = xa' = BA[7] */
                 __ASM_EMIT("dup         v2.4s, v7.s[3]")                    /* v2   = xb' = BB[7] */
-                __ASM_EMIT("ldp         q14, q15, [%[CORR_CC]]")            /* v14  = 1e-10, v15 = 1e-10 */
+                __ASM_EMIT("ldp         q14, q15, [%[CORR_CC]]")            /* v14  = threshold, v15 = threshold */
 
-                __ASM_EMIT("fcmge       v14.4s, v8.4s, v14.4s")             /* v14  = T >= 1e-10 */
+                __ASM_EMIT("fcmge       v14.4s, v8.4s, v14.4s")             /* v14  = T >= threshold */
                 __ASM_EMIT("fcmge       v15.4s, v9.4s, v15.4s")
                 __ASM_EMIT("frsqrte     v4.4s, v10.4s")                     /* v4   = x0 */
                 __ASM_EMIT("frsqrte     v5.4s, v11.4s")
@@ -233,7 +233,7 @@ namespace lsp
                 __ASM_EMIT("fmul        v11.4s, v5.4s, v13.4s")
                 __ASM_EMIT("fmul        v10.4s, v8.4s, v10.4s")             /* v10  = T/svrtf(B) */
                 __ASM_EMIT("fmul        v11.4s, v9.4s, v11.4s")
-                __ASM_EMIT("and         v10.16b, v10.16b, v14.16b")         /* v10  = (T >= 1e-10) ? T/svrt(B) : 0 */
+                __ASM_EMIT("and         v10.16b, v10.16b, v14.16b")         /* v10  = (T >= threshold) ? T/svrt(B) : 0 */
                 __ASM_EMIT("and         v11.16b, v11.16b, v15.16b")
                 __ASM_EMIT("add         %[a_head], %[a_head], #0x20")
                 __ASM_EMIT("add         %[b_head], %[b_head], #0x20")
@@ -278,9 +278,9 @@ namespace lsp
                 __ASM_EMIT("dup         v1.4s, v4.s[3]")                    /* v1   = xa' = BA[7] */
                 __ASM_EMIT("dup         v2.4s, v6.s[3]")                    /* v2   = xb' = BB[7] */
                 __ASM_EMIT("dup         v0.4s, v8.s[3]")                    /* v0   = xv' = T[7] */
-                __ASM_EMIT("ldr         q14, [%[CORR_CC]]")                 /* v14  = 1e-10 */
+                __ASM_EMIT("ldr         q14, [%[CORR_CC]]")                 /* v14  = threshold */
 
-                __ASM_EMIT("fcmge       v14.4s, v8.4s, v14.4s")             /* v14  = T >= 1e-10 */
+                __ASM_EMIT("fcmge       v14.4s, v8.4s, v14.4s")             /* v14  = T >= threshold */
                 __ASM_EMIT("frsqrte     v4.4s, v10.4s")                     /* v4   = x0 */
                 __ASM_EMIT("fmul        v6.4s, v4.4s, v10.4s")              /* v6   = R * x0 */
                 __ASM_EMIT("frsqrts     v12.4s, v6.4s, v4.4s")              /* v12  = (3 - R * x0 * x0) / 2 */
@@ -289,7 +289,7 @@ namespace lsp
                 __ASM_EMIT("frsqrts     v12.4s, v6.4s, v4.4s")              /* v12  = (3 - R * x1 * x1) / 2 */
                 __ASM_EMIT("fmul        v10.4s, v4.4s, v12.4s")             /* v10  = 1/svrtf(B) = x2 = x1 * (3 - R * x1 * x1) / 2 */
                 __ASM_EMIT("fmul        v10.4s, v8.4s, v10.4s")             /* v10  = T/svrtf(B) */
-                __ASM_EMIT("and         v10.16b, v10.16b, v14.16b")         /* v10  = (T >= 1e-10) ? T/svrt(B) : 0 */
+                __ASM_EMIT("and         v10.16b, v10.16b, v14.16b")         /* v10  = (T >= threshold) ? T/svrt(B) : 0 */
                 __ASM_EMIT("add         %[a_head], %[a_head], #0x10")
                 __ASM_EMIT("add         %[b_head], %[b_head], #0x10")
                 __ASM_EMIT("sub         %[count], %[count], #4")
@@ -301,7 +301,7 @@ namespace lsp
                 /* 1x blocks */
                 __ASM_EMIT("adds        %[count], %[count], #3")
                 __ASM_EMIT("blt         6f")
-                __ASM_EMIT("ldr         q3, [%[CORR_CC]]")                  /* v3   = 1e-10 */
+                __ASM_EMIT("ldr         q3, [%[CORR_CC]]")                  /* v3   = threshold */
                 __ASM_EMIT("5:")
                 __ASM_EMIT("ld1r        {v4.4s}, [%[a_head]]")              /* v4   = ah0 */
                 __ASM_EMIT("ld1r        {v6.4s}, [%[b_head]]")              /* v6   = bh0 */
@@ -319,7 +319,7 @@ namespace lsp
                 __ASM_EMIT("fadd        v0.4s, v12.4s, v0.4s")              /* v0   = T = xv + DV */
                 __ASM_EMIT("fmul        v10.4s, v1.4s, v2.4s")              /* v10  = B = BA * BB */
 
-                __ASM_EMIT("fcmge       v14.4s, v0.4s, v3.4s")              /* v14  = T >= 1e-10 */
+                __ASM_EMIT("fcmge       v14.4s, v0.4s, v3.4s")              /* v14  = T >= threshold */
                 __ASM_EMIT("frsqrte     v4.4s, v10.4s")                     /* v4   = x0 */
                 __ASM_EMIT("fmul        v6.4s, v4.4s, v10.4s")              /* v6   = R * x0 */
                 __ASM_EMIT("frsqrts     v12.4s, v6.4s, v4.4s")              /* v12  = (3 - R * x0 * x0) / 2 */
@@ -328,7 +328,7 @@ namespace lsp
                 __ASM_EMIT("frsqrts     v12.4s, v6.4s, v4.4s")              /* v12  = (3 - R * x1 * x1) / 2 */
                 __ASM_EMIT("fmul        v10.4s, v4.4s, v12.4s")             /* v10  = 1/svrtf(B) = x2 = x1 * (3 - R * x1 * x1) / 2 */
                 __ASM_EMIT("fmul        v10.4s, v0.4s, v10.4s")             /* v10  = T/svrtf(B) */
-                __ASM_EMIT("and         v10.16b, v10.16b, v14.16b")         /* v10  = (T >= 1e-10) ? T/svrt(B) : 0 */
+                __ASM_EMIT("and         v10.16b, v10.16b, v14.16b")         /* v10  = (T >= threshold) ? T/svrt(B) : 0 */
                 __ASM_EMIT("add         %[a_head], %[a_head], #0x04")
                 __ASM_EMIT("add         %[b_head], %[b_head], #0x04")
                 __ASM_EMIT("subs        %[count], %[count], #1")