diff --git a/benchmarks/BM_smoothers.cpp b/benchmarks/BM_smoothers.cpp index caca19b8d..b9755be7c 100644 --- a/benchmarks/BM_smoothers.cpp +++ b/benchmarks/BM_smoothers.cpp @@ -34,24 +34,24 @@ class SmootherFixture : public benchmark::Fixture { std::vector output; }; -BENCHMARK_DEFINE_F(SmootherFixture, Linear) (benchmark::State& state) +BENCHMARK_DEFINE_F(SmootherFixture, OnePole) (benchmark::State& state) { - sfz::Smoother smoother; + sfz::OnePoleSmoother smoother; smoother.setSmoothing(10, sfz::config::defaultSampleRate); for (auto _ : state) { smoother.process(input, absl::MakeSpan(output)); } } -// BENCHMARK_DEFINE_F(SmootherFixture, Multiplicative)(benchmark::State& state) { -// sfz::MultiplicativeSmoother smoother; -// smoother.setSmoothing(10, sfz::config::defaultSampleRate); -// for (auto _ : state) -// { -// smoother.process(input, absl::MakeSpan(output)); -// } -// } +BENCHMARK_DEFINE_F(SmootherFixture, Linear) (benchmark::State& state) +{ + sfz::LinearSmoother smoother; + smoother.setSmoothing(10, sfz::config::defaultSampleRate); + for (auto _ : state) { + smoother.process(input, absl::MakeSpan(output)); + } +} +BENCHMARK_REGISTER_F(SmootherFixture, OnePole)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); BENCHMARK_REGISTER_F(SmootherFixture, Linear)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); -// BENCHMARK_REGISTER_F(SmootherFixture, Multiplicative)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); BENCHMARK_MAIN(); diff --git a/src/sfizz/Smoothers.cpp b/src/sfizz/Smoothers.cpp index c2f6e2fb3..9dcd8023d 100644 --- a/src/sfizz/Smoothers.cpp +++ b/src/sfizz/Smoothers.cpp @@ -9,14 +9,15 @@ #include "MathHelpers.h" #include "SfzHelpers.h" #include "SIMDHelpers.h" +#include namespace sfz { -Smoother::Smoother() +OnePoleSmoother::OnePoleSmoother() { } -void Smoother::setSmoothing(uint8_t smoothValue, float sampleRate) +void OnePoleSmoother::setSmoothing(uint8_t smoothValue, float sampleRate) { smoothing = (smoothValue > 0); if (smoothing) { @@ -24,12 +25,12 @@ void Smoother::setSmoothing(uint8_t smoothValue, float sampleRate) } } -void Smoother::reset(float value) +void OnePoleSmoother::reset(float value) { filter.reset(value); } -void Smoother::process(absl::Span input, absl::Span output, bool canShortcut) +void OnePoleSmoother::process(absl::Span input, absl::Span output, bool canShortcut) { CHECK_SPAN_SIZES(input, output); if (input.size() == 0) @@ -53,4 +54,123 @@ void Smoother::process(absl::Span input, absl::Span output, } } +/// +LinearSmoother::LinearSmoother() +{ +} + +void LinearSmoother::setSmoothing(uint8_t smoothValue, float sampleRate) +{ + const float smoothTime = 1e-3f * smoothValue; + smoothFrames_ = static_cast(smoothTime * sampleRate); +} + +void LinearSmoother::reset(float value) +{ + current_ = value; + target_ = value; + step_ = 0.0; + //framesToTarget_ = 0; +} + +void LinearSmoother::process(absl::Span input, absl::Span output, bool canShortcut) +{ + CHECK_SPAN_SIZES(input, output); + + uint32_t i = 0; + const uint32_t count = static_cast(input.size()); + if (count == 0) + return; + + float current = current_; + float target = target_; + + if (canShortcut && current == target && current == input.front()) { + if (input.data() != output.data()) + copy(input, output); + reset(input.back()); + return; + } + + float step = step_; + // int32_t framesToTarget = framesToTarget_; + const int32_t smoothFrames = smoothFrames_; + + for (; i + 15 < count; i += 16) { + const float nextTarget = input[i + 15]; + if (target != nextTarget) { + target = nextTarget; + //framesToTarget = (framesToTarget > 0) ? framesToTarget : smoothFrames; + //step = (target - current) / max(1, framesToTarget); + step = (target - current) / max(1, smoothFrames); + } + const simde__m128 targetX4 = simde_mm_set1_ps(target); + if (target > current) { + simde__m128 stepX4 = simde_mm_set1_ps(step); + simde__m128 tmp1X4 = simde_mm_mul_ps(stepX4, simde_mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f)); + simde__m128 tmp2X4 = simde_mm_shuffle_ps(tmp1X4, tmp1X4, SIMDE_MM_SHUFFLE(3, 3, 3, 3)); + simde__m128 current1X4 = simde_mm_add_ps(simde_mm_set1_ps(current), tmp1X4); + simde_mm_storeu_ps(&output[i], simde_mm_min_ps(current1X4, targetX4)); + simde__m128 current2X4 = simde_mm_add_ps(current1X4, tmp2X4); + simde_mm_storeu_ps(&output[i + 4], simde_mm_min_ps(current2X4, targetX4)); + simde__m128 current3X4 = simde_mm_add_ps(current2X4, tmp2X4); + simde_mm_storeu_ps(&output[i + 8], simde_mm_min_ps(current3X4, targetX4)); + simde__m128 current4X4 = simde_mm_add_ps(current3X4, tmp2X4); + simde__m128 limited4X4 = simde_mm_min_ps(current4X4, targetX4); + simde_mm_storeu_ps(&output[i + 12], limited4X4); + current = simde_mm_cvtss_f32(simde_mm_shuffle_ps(limited4X4, limited4X4, SIMDE_MM_SHUFFLE(3, 3, 3, 3))); + } + else if (target < current) { + simde__m128 stepX4 = simde_mm_set1_ps(step); + simde__m128 tmp1X4 = simde_mm_mul_ps(stepX4, simde_mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f)); + simde__m128 tmp2X4 = simde_mm_shuffle_ps(tmp1X4, tmp1X4, SIMDE_MM_SHUFFLE(3, 3, 3, 3)); + simde__m128 current1X4 = simde_mm_add_ps(simde_mm_set1_ps(current), tmp1X4); + simde_mm_storeu_ps(&output[i], simde_mm_max_ps(current1X4, targetX4)); + simde__m128 current2X4 = simde_mm_add_ps(current1X4, tmp2X4); + simde_mm_storeu_ps(&output[i + 4], simde_mm_max_ps(current2X4, targetX4)); + simde__m128 current3X4 = simde_mm_add_ps(current2X4, tmp2X4); + simde_mm_storeu_ps(&output[i + 8], simde_mm_max_ps(current3X4, targetX4)); + simde__m128 current4X4 = simde_mm_add_ps(current3X4, tmp2X4); + simde__m128 limited4X4 = simde_mm_max_ps(current4X4, targetX4); + simde_mm_storeu_ps(&output[i + 12], limited4X4); + current = simde_mm_cvtss_f32(simde_mm_shuffle_ps(limited4X4, limited4X4, SIMDE_MM_SHUFFLE(3, 3, 3, 3))); + } + else { + simde_mm_storeu_ps(&output[i], targetX4); + simde_mm_storeu_ps(&output[i + 4], targetX4); + simde_mm_storeu_ps(&output[i + 8], targetX4); + simde_mm_storeu_ps(&output[i + 12], targetX4); + } + //framesToTarget -= 16; + } + + if (i < count) { + const float nextTarget = input[count - 1]; + if (target != nextTarget) { + target = nextTarget; + // framesToTarget = (framesToTarget > 0) ? framesToTarget : smoothFrames; + // step = (target - current) / max(1, framesToTarget); + step = (target - current) / max(1, smoothFrames); + } + if (target > current) { + for (; i < count; ++i) + output[i] = current = min(target, current + step); + } + else if (target < current) { + for (; i < count; ++i) + output[i] = current = max(target, current + step); + } + else { + for (; i < count; ++i) + output[i] = target; + } + //framesToTarget -= count; + } + + current_ = current; + target_ = target; + step_ = step; + //framesToTarget_ = max(0, framesToTarget); +} + } diff --git a/src/sfizz/Smoothers.h b/src/sfizz/Smoothers.h index 50f99821c..e62190ebe 100644 --- a/src/sfizz/Smoothers.h +++ b/src/sfizz/Smoothers.h @@ -14,9 +14,9 @@ namespace sfz { * @brief Wrapper class for a one pole filter smoother * */ -class Smoother { +class OnePoleSmoother { public: - Smoother(); + OnePoleSmoother(); /** * @brief Set the filter cutoff based on the sfz smoothing value * and the sample rate. @@ -49,4 +49,51 @@ class Smoother { OnePoleFilter filter {}; }; +/** + * @brief Linear smoother + * + */ +class LinearSmoother { +public: + LinearSmoother(); + /** + * @brief Set the filter cutoff based on the sfz smoothing value + * and the sample rate. + * + * @param smoothValue + * @param sampleRate + */ + void setSmoothing(uint8_t smoothValue, float sampleRate); + /** + * @brief Reset the filter state to a given value + * + * @param value + */ + void reset(float value = 0.0f); + /** + * @brief Process a span of data. Input and output can refer to the same + * memory. + * + * @param input + * @param output + * @param canShortcut whether we can have a fast path if the filter is within + * a reasonable range around the first value of the input + * span. + */ + void process(absl::Span input, absl::Span output, bool canShortcut = false); + + float current() const { return current_; } +private: + float current_ = 0.0; + float target_ = 0.0; + float step_ = 0.0; + //int32_t framesToTarget_ = 0; + int32_t smoothFrames_ = 0; +}; + +/** + * @brief Default smoother + */ +using Smoother = LinearSmoother; + }