Skip to content

Commit

Permalink
Use an interpolated FIR filter for cubic resampling
Browse files Browse the repository at this point in the history
Similar to how the bsinc filters work, but optimized for 4-point filtering. At
least the SSE version is notably faster than calculating the coefficients in
real time.
  • Loading branch information
kcat committed Feb 7, 2023
1 parent 0de7ea4 commit da845dd
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 22 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,9 @@ set(CORE_OBJS
core/converter.h
core/cpu_caps.cpp
core/cpu_caps.h
core/cubic_defs.h
core/cubic_tables.cpp
core/cubic_tables.h
core/devformat.cpp
core/devformat.h
core/device.cpp
Expand Down
11 changes: 11 additions & 0 deletions alc/alu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "core/buffer_storage.h"
#include "core/context.h"
#include "core/cpu_caps.h"
#include "core/cubic_tables.h"
#include "core/devformat.h"
#include "core/device.h"
#include "core/effects/base.h"
Expand Down Expand Up @@ -211,6 +212,14 @@ inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
#endif
return Resample_<LerpTag,CTag>;
case Resampler::Cubic:
#ifdef HAVE_NEON
if((CPUCapFlags&CPU_CAP_NEON))
return Resample_<CubicTag,NEONTag>;
#endif
#ifdef HAVE_SSE
if((CPUCapFlags&CPU_CAP_SSE))
return Resample_<CubicTag,SSETag>;
#endif
return Resample_<CubicTag,CTag>;
case Resampler::BSinc12:
case Resampler::BSinc24:
Expand Down Expand Up @@ -262,7 +271,9 @@ ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState
{
case Resampler::Point:
case Resampler::Linear:
break;
case Resampler::Cubic:
state->cubic.filter = gCubicSpline.Tab;
break;
case Resampler::FastBSinc12:
case Resampler::BSinc12:
Expand Down
13 changes: 13 additions & 0 deletions core/cubic_defs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef CORE_CUBIC_DEFS_H
#define CORE_CUBIC_DEFS_H

/* The number of distinct phase intervals within the cubic filter tables. */
constexpr unsigned int CubicPhaseBits{5};
constexpr unsigned int CubicPhaseCount{1 << CubicPhaseBits};

struct CubicCoefficients {
float mCoeffs[4];
float mDeltas[4];
};

#endif /* CORE_CUBIC_DEFS_H */
59 changes: 59 additions & 0 deletions core/cubic_tables.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@

#include "cubic_tables.h"

#include <algorithm>
#include <array>
#include <cassert>
#include <cmath>
#include <limits>
#include <memory>
#include <stdexcept>

#include "alnumbers.h"
#include "core/mixer/defs.h"


namespace {

using uint = unsigned int;

struct SplineFilterArray {
alignas(16) CubicCoefficients mTable[CubicPhaseCount]{};

constexpr SplineFilterArray()
{
/* Fill in the main coefficients. */
for(size_t pi{0};pi < CubicPhaseCount;++pi)
{
const double mu{pi / double{CubicPhaseCount}};
const double mu2{mu*mu}, mu3{mu2*mu};
mTable[pi].mCoeffs[0] = static_cast<float>(-0.5*mu3 + mu2 + -0.5*mu);
mTable[pi].mCoeffs[1] = static_cast<float>( 1.5*mu3 + -2.5*mu2 + 1.0);
mTable[pi].mCoeffs[2] = static_cast<float>(-1.5*mu3 + 2.0*mu2 + 0.5*mu);
mTable[pi].mCoeffs[3] = static_cast<float>( 0.5*mu3 + -0.5*mu2);
}

/* Fill in the coefficient deltas. */
for(size_t pi{0};pi < CubicPhaseCount-1;++pi)
{
mTable[pi].mDeltas[0] = mTable[pi+1].mCoeffs[0] - mTable[pi].mCoeffs[0];
mTable[pi].mDeltas[1] = mTable[pi+1].mCoeffs[1] - mTable[pi].mCoeffs[1];
mTable[pi].mDeltas[2] = mTable[pi+1].mCoeffs[2] - mTable[pi].mCoeffs[2];
mTable[pi].mDeltas[3] = mTable[pi+1].mCoeffs[3] - mTable[pi].mCoeffs[3];
}

const size_t pi{CubicPhaseCount - 1};
mTable[pi].mDeltas[0] = -mTable[pi].mCoeffs[0];
mTable[pi].mDeltas[1] = -mTable[pi].mCoeffs[1];
mTable[pi].mDeltas[2] = 1.0f - mTable[pi].mCoeffs[2];
mTable[pi].mDeltas[3] = -mTable[pi].mCoeffs[3];
}

constexpr const CubicCoefficients *getTable() const noexcept { return mTable; }
};

constexpr SplineFilterArray SplineFilter{};

} // namespace

const CubicTable gCubicSpline{SplineFilter.getTable()};
16 changes: 16 additions & 0 deletions core/cubic_tables.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef CORE_CUBIC_TABLES_H
#define CORE_CUBIC_TABLES_H

#include "cubic_defs.h"


struct CubicTable {
const CubicCoefficients *Tab;
};

/* A Catmull-Rom spline. The spline passes through the center two samples,
* ensuring no discontinuity while moving through a series of samples.
*/
extern const CubicTable gCubicSpline;

#endif /* CORE_CUBIC_TABLES_H */
9 changes: 9 additions & 0 deletions core/mixer/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "core/bufferline.h"
#include "core/resampler_limits.h"

struct CubicCoefficients;
struct HrtfChannelState;
struct HrtfFilter;
struct MixHrtfFilter;
Expand Down Expand Up @@ -51,7 +52,15 @@ struct BsincState {
const float *filter;
};

struct CubicState {
/* Filter coefficients, and coefficient deltas. Starting at phase index 0,
* each subsequent phase index follows contiguously.
*/
const CubicCoefficients *filter;
};

union InterpState {
CubicState cubic;
BsincState bsinc;
};

Expand Down
35 changes: 26 additions & 9 deletions core/mixer/mixer_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#include <limits>

#include "alnumeric.h"
#include "core/bsinc_tables.h"
#include "core/bsinc_defs.h"
#include "core/cubic_defs.h"
#include "defs.h"
#include "hrtfbase.h"

Expand All @@ -20,23 +21,39 @@ struct FastBSincTag;

namespace {

constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};
constexpr uint BsincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
constexpr uint BsincPhaseDiffOne{1 << BsincPhaseBitDiff};

constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};

inline float do_point(const InterpState&, const float *RESTRICT vals, const uint)
{ return vals[0]; }
inline float do_lerp(const InterpState&, const float *RESTRICT vals, const uint frac)
{ return lerpf(vals[0], vals[1], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
inline float do_cubic(const InterpState&, const float *RESTRICT vals, const uint frac)
{ return cubic(vals[0], vals[1], vals[2], vals[3], static_cast<float>(frac)*(1.0f/MixerFracOne)); }
inline float do_cubic(const InterpState &istate, const float *RESTRICT vals, const uint frac)
{
/* Calculate the phase index and factor. */
const uint pi{frac >> CubicPhaseBitDiff};
const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};

const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(istate.cubic.filter + pi);

// Apply the phase interpolated filter.
return (filter->mCoeffs[0] + pf*filter->mDeltas[0]) * vals[0]
+ (filter->mCoeffs[1] + pf*filter->mDeltas[1]) * vals[1]
+ (filter->mCoeffs[2] + pf*filter->mDeltas[2]) * vals[2]
+ (filter->mCoeffs[3] + pf*filter->mDeltas[3]) * vals[3];
}
inline float do_bsinc(const InterpState &istate, const float *RESTRICT vals, const uint frac)
{
const size_t m{istate.bsinc.m};
ASSUME(m > 0);

// Calculate the phase index and factor.
const uint pi{frac >> FracPhaseBitDiff};
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
const uint pi{frac >> BsincPhaseBitDiff};
const float pf{static_cast<float>(frac & (BsincPhaseDiffOne-1)) * (1.0f/BsincPhaseDiffOne)};

const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
const float *RESTRICT phd{fil + m};
Expand All @@ -55,8 +72,8 @@ inline float do_fastbsinc(const InterpState &istate, const float *RESTRICT vals,
ASSUME(m > 0);

// Calculate the phase index and factor.
const uint pi{frac >> FracPhaseBitDiff};
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
const uint pi{frac >> BsincPhaseBitDiff};
const float pf{static_cast<float>(frac & (BsincPhaseDiffOne-1)) * (1.0f/BsincPhaseDiffOne)};

const float *RESTRICT fil{istate.bsinc.filter + m*pi*2};
const float *RESTRICT phd{fil + m};
Expand Down
52 changes: 45 additions & 7 deletions core/mixer/mixer_neon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@

#include "alnumeric.h"
#include "core/bsinc_defs.h"
#include "core/cubic_defs.h"
#include "defs.h"
#include "hrtfbase.h"

struct NEONTag;
struct LerpTag;
struct CubicTag;
struct BSincTag;
struct FastBSincTag;

Expand All @@ -22,6 +24,14 @@ struct FastBSincTag;

namespace {

constexpr uint BSincPhaseBitDiff{MixerFracBits - BSincPhaseBits};
constexpr uint BSincPhaseDiffOne{1 << BSincPhaseBitDiff};
constexpr uint BSincPhaseDiffMask{BSincPhaseDiffOne - 1u};

constexpr uint CubicPhaseBitDiff{MixerFracBits - CubicPhaseBits};
constexpr uint CubicPhaseDiffOne{1 << CubicPhaseBitDiff};
constexpr uint CubicPhaseDiffMask{CubicPhaseDiffOne - 1u};

inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
{
float32x4_t ret{vmovq_n_f32(l0)};
Expand All @@ -31,9 +41,6 @@ inline float32x4_t set_f4(float l0, float l1, float l2, float l3)
return ret;
}

constexpr uint FracPhaseBitDiff{MixerFracBits - BSincPhaseBits};
constexpr uint FracPhaseDiffOne{1 << FracPhaseBitDiff};

inline void ApplyCoeffs(float2 *RESTRICT Values, const size_t IrSize, const ConstHrirSpan Coeffs,
const float left, const float right)
{
Expand Down Expand Up @@ -183,6 +190,37 @@ float *Resample_<LerpTag,NEONTag>(const InterpState*, float *RESTRICT src, uint
return dst.data();
}

template<>
float *Resample_<CubicTag,NEONTag>(const InterpState *state, float *RESTRICT src, uint frac,
uint increment, const al::span<float> dst)
{
const CubicCoefficients *RESTRICT filter = al::assume_aligned<16>(state->cubic.filter);

src -= 1;
for(float &out_sample : dst)
{
const uint pi{frac >> CubicPhaseBitDiff};
const float pf{static_cast<float>(frac&CubicPhaseDiffMask) * (1.0f/CubicPhaseDiffOne)};
const float32x4_t pf4{vdupq_n_f32(pf)};

/* Apply the phase interpolated filter. */

/* f = fil + pf*phd */
const float32x4_t f4 = vmlaq_f32(vld1q_f32(filter[pi].mCoeffs), pf4,
vld1q_f32(filter[pi].mDeltas));
/* r = f*src */
float32x4_t r4{vmulq_f32(f4, vld1q_f32(src))};

r4 = vaddq_f32(r4, vrev64q_f32(r4));
out_sample = vget_lane_f32(vadd_f32(vget_low_f32(r4), vget_high_f32(r4)), 0);

frac += increment;
src += frac>>MixerFracBits;
frac &= MixerFracMask;
}
return dst.data();
}

template<>
float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src, uint frac,
uint increment, const al::span<float> dst)
Expand All @@ -196,8 +234,8 @@ float *Resample_<BSincTag,NEONTag>(const InterpState *state, float *RESTRICT src
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
const uint pi{frac >> FracPhaseBitDiff};
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
const uint pi{frac >> BSincPhaseBitDiff};
const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};

// Apply the scale and phase interpolated filter.
float32x4_t r4{vdupq_n_f32(0.0f)};
Expand Down Expand Up @@ -242,8 +280,8 @@ float *Resample_<FastBSincTag,NEONTag>(const InterpState *state, float *RESTRICT
for(float &out_sample : dst)
{
// Calculate the phase index and factor.
const uint pi{frac >> FracPhaseBitDiff};
const float pf{static_cast<float>(frac & (FracPhaseDiffOne-1)) * (1.0f/FracPhaseDiffOne)};
const uint pi{frac >> BSincPhaseBitDiff};
const float pf{static_cast<float>(frac&BSincPhaseDiffMask) * (1.0f/BSincPhaseDiffOne)};

// Apply the phase interpolated filter.
float32x4_t r4{vdupq_n_f32(0.0f)};
Expand Down
Loading

0 comments on commit da845dd

Please sign in to comment.