Merge pull request #85 from jpcima/multiply-add

Add multiplyAdd with fixed gain
sfztools · Mar 2, 2020 · 2e5cdba · 2e5cdba
2 parents d2e44cc + 42343c8
commit 2e5cdba
Show file tree

Hide file tree

Showing 7 changed files with 219 additions and 2 deletions.
diff --git a/benchmarks/BM_multiplyAddFixedGain.cpp b/benchmarks/BM_multiplyAddFixedGain.cpp
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+// This code is part of the sfizz library and is licensed under a BSD 2-clause
+// license. You should have receive a LICENSE.md file along with the code.
+// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz
+
+#include "SIMDHelpers.h"
+#include <benchmark/benchmark.h>
+#include <random>
+#include <numeric>
+#include <vector>
+#include <cmath>
+#include <iostream>
+
+class MultiplyAddFixedGain : public benchmark::Fixture {
+public:
+  void SetUp(const ::benchmark::State& state) {
+    std::random_device rd { };
+    std::mt19937 gen { rd() };
+    std::uniform_real_distribution<float> dist { 0, 1 };
+    input = std::vector<float>(state.range(0));
+    output = std::vector<float>(state.range(0));
+    gain = dist(gen);
+    std::fill(output.begin(), output.end(), 1.0f );
+    std::generate(input.begin(), input.end(), [&]() { return dist(gen); });
+  }
+
+  void TearDown(const ::benchmark::State& state [[maybe_unused]]) {
+
+  }
+
+  float gain = {};
+  std::vector<float> input;
+  std::vector<float> output;
+};
+
+BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Straight)(benchmark::State& state) {
+    for (auto _ : state)
+    {
+        for (int i = 0; i < state.range(0); ++i)
+            output[i] += gain * input[i];
+    }
+}
+
+BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Scalar)(benchmark::State& state) {
+    for (auto _ : state)
+    {
+        sfz::multiplyAdd<float, false>(gain, input, absl::MakeSpan(output));
+    }
+}
+
+BENCHMARK_DEFINE_F(MultiplyAddFixedGain, SIMD)(benchmark::State& state) {
+    for (auto _ : state)
+    {
+        sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(output));
+    }
+}
+
+BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Scalar_Unaligned)(benchmark::State& state) {
+    for (auto _ : state)
+    {
+        sfz::multiplyAdd<float, false>(gain, absl::MakeSpan(input).subspan(1), absl::MakeSpan(output).subspan(1));
+    }
+}
+
+BENCHMARK_DEFINE_F(MultiplyAddFixedGain, SIMD_Unaligned)(benchmark::State& state) {
+    for (auto _ : state)
+    {
+        sfz::multiplyAdd<float, true>(gain, absl::MakeSpan(input).subspan(1), absl::MakeSpan(output).subspan(1));
+    }
+}
+
+BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Straight)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
+BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Scalar)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
+BENCHMARK_REGISTER_F(MultiplyAddFixedGain, SIMD)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
+BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Scalar_Unaligned)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
+BENCHMARK_REGISTER_F(MultiplyAddFixedGain, SIMD_Unaligned)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
+BENCHMARK_MAIN();
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -47,6 +47,7 @@ target_link_libraries(bm_ADSR PRIVATE sfizz::sfizz)
 
 sfizz_add_benchmark(bm_add BM_add.cpp)
 sfizz_add_benchmark(bm_multiplyAdd BM_multiplyAdd.cpp)
+sfizz_add_benchmark(bm_multiplyAddFixedGain BM_multiplyAddFixedGain.cpp)
 sfizz_add_benchmark(bm_subtract BM_subtract.cpp)
 sfizz_add_benchmark(bm_copy BM_copy.cpp)
 sfizz_add_benchmark(bm_pan BM_pan.cpp)

diff --git a/src/sfizz/AudioSpan.h b/src/sfizz/AudioSpan.h
@@ -11,6 +11,7 @@
 #include "Debug.h"
 #include "LeakDetector.h"
 #include "SIMDHelpers.h"
+#include "absl/types/span.h"
 #include <array>
 #include <initializer_list>
 #include <type_traits>
@@ -306,6 +307,43 @@ class AudioSpan {
         }
     }
 
+    /**
+     * @brief Add another AudioSpan with a compatible number of channels to the current
+     * AudioSpan, applying an elementwise gain to the operand.
+     *
+     * @param other the other AudioSpan
+     * @param gain the gain to apply
+     */
+    template <class U, size_t N, typename = std::enable_if<N <= MaxChannels>>
+    void multiplyAdd(AudioSpan<U, N>& other, absl::Span<const Type> gain)
+    {
+        static_assert(!std::is_const<Type>::value, "Can't allow mutating operations on const AudioSpans");
+        ASSERT(other.getNumChannels() == numChannels);
+        ASSERT(gain.size() == numFrames);
+        if (other.getNumChannels() == numChannels) {
+            for (size_t i = 0; i < numChannels; ++i)
+                sfz::multiplyAdd(gain, other.getConstSpan(i), getSpan(i));
+        }
+    }
+
+    /**
+     * @brief Add another AudioSpan with a compatible number of channels to the current
+     * AudioSpan, applying a fixed gain to the operand.
+     *
+     * @param other the other AudioSpan
+     * @param gain the gain to apply
+     */
+    template <class U, size_t N, typename = std::enable_if<N <= MaxChannels>>
+    void multiplyAdd(AudioSpan<U, N>& other, const Type gain)
+    {
+        static_assert(!std::is_const<Type>::value, "Can't allow mutating operations on const AudioSpans");
+        ASSERT(other.getNumChannels() == numChannels);
+        if (other.getNumChannels() == numChannels) {
+            for (size_t i = 0; i < numChannels; ++i)
+                sfz::multiplyAdd(gain, other.getConstSpan(i), getSpan(i));
+        }
+    }
+
     /**
      * @brief Copy the elements of another AudioSpan with a compatible number of channels
      * to the current AudioSpan.

diff --git a/src/sfizz/SIMDDummy.cpp b/src/sfizz/SIMDDummy.cpp
@@ -76,6 +76,12 @@ void sfz::multiplyAdd<float, true>(absl::Span<const float> gain, absl::Span<cons
     multiplyAdd<float, false>(gain, input, output);
 }
 
+template <>
+void sfz::multiplyAdd<float, true>(const float gain, absl::Span<const float> input, absl::Span<float> output) noexcept
+{
+    multiplyAdd<float, false>(gain, input, output);
+}
+
 template <>
 float sfz::loopingSFZIndex<float, true>(absl::Span<const float> jumps, absl::Span<float> leftCoeff, absl::Span<float> rightCoeff, absl::Span<int> indices, float floatIndex, float loopEnd, float loopStart) noexcept
 {

diff --git a/src/sfizz/SIMDHelpers.h b/src/sfizz/SIMDHelpers.h
@@ -491,6 +491,12 @@ namespace _internals {
     {
         *output++ += (*gain++) * (*input++);
     }
+
+    template <class T>
+    inline void snippetMultiplyAdd(const T gain, const T*& input, T*& output)
+    {
+        *output++ += gain * (*input++);
+    }
 }
 
 /**
@@ -520,6 +526,20 @@ void multiplyAdd(absl::Span<const T> gain, absl::Span<const T> input, absl::Span
 template <>
 void multiplyAdd<float, true>(absl::Span<const float> gain, absl::Span<const float> input, absl::Span<float> output) noexcept;
 
+template <class T, bool SIMD = SIMDConfig::multiplyAdd>
+void multiplyAdd(const T gain, absl::Span<const T> input, absl::Span<T> output) noexcept
+{
+    ASSERT(input.size() <= output.size());
+    auto* in = input.begin();
+    auto* out = output.begin();
+    auto* sentinel = out + std::min(output.size(), input.size());
+    while (out < sentinel)
+        _internals::snippetMultiplyAdd<T>(gain, in, out);
+}
+
+template <>
+void multiplyAdd<float, true>(const float gain, absl::Span<const float> input, absl::Span<float> output) noexcept;
+
 namespace _internals {
     template <class T>
     inline void snippetRampLinear(T*& output, T& value, T step)

diff --git a/src/sfizz/SIMDSSE.cpp b/src/sfizz/SIMDSSE.cpp
@@ -316,6 +316,29 @@ void sfz::multiplyAdd<float, true>(absl::Span<const float> gain, absl::Span<cons
         _internals::snippetMultiplyAdd<float>(g, in, out);
 }
 
+template <>
+void sfz::multiplyAdd<float, true>(const float gain, absl::Span<const float> input, absl::Span<float> output) noexcept
+{
+    auto* in = input.begin();
+    auto* out = output.begin();
+    const auto size = std::min(output.size(), input.size());
+    const auto* lastAligned = prevAligned(output.begin() + size);
+
+    while (unaligned(out, in) && out < lastAligned)
+        _internals::snippetMultiplyAdd<float>(gain, in, out);
+
+    auto mmGain = _mm_set1_ps(gain);
+    while (out < lastAligned) {
+        auto mmOut = _mm_load_ps(out);
+        mmOut = _mm_add_ps(_mm_mul_ps(mmGain, _mm_load_ps(in)), mmOut);
+        _mm_store_ps(out, mmOut);
+        incrementAll<TypeAlignment>(in, out);
+    }
+
+    while (out < output.end())
+        _internals::snippetMultiplyAdd<float>(gain, in, out);
+}
+
 template <>
 float sfz::loopingSFZIndex<float, true>(absl::Span<const float> jumps,
     absl::Span<float> leftCoeffs,

diff --git a/tests/SIMDHelpersT.cpp b/tests/SIMDHelpersT.cpp
@@ -606,6 +606,57 @@ TEST_CASE("[Helpers] Add (SIMD vs scalar)")
     REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
 }
 
+TEST_CASE("[Helpers] MultiplyAdd (SIMD)")
+{
+    std::array<float, 5> gain { 0.0f, 0.1f, 0.2f, 0.3f, 0.4f };
+    std::array<float, 5> input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    std::array<float, 5> output { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f };
+    std::array<float, 5> expected { 5.0f, 4.2f, 3.6f, 3.2f, 3.0f };
+    sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(output));
+    REQUIRE(output == expected);
+}
+
+TEST_CASE("[Helpers] MultiplyAdd (SIMD vs scalar)")
+{
+    std::vector<float> gain(bigBufferSize);
+    std::vector<float> input(bigBufferSize);
+    std::vector<float> outputScalar(bigBufferSize);
+    std::vector<float> outputSIMD(bigBufferSize);
+    absl::c_iota(gain, 0.0f);
+    absl::c_iota(input, 0.0f);
+    absl::c_iota(outputScalar, 0.0f);
+    absl::c_iota(outputSIMD, 0.0f);
+
+    sfz::multiplyAdd<float, false>(gain, input, absl::MakeSpan(outputScalar));
+    sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(outputSIMD));
+    REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
+}
+
+TEST_CASE("[Helpers] MultiplyAdd fixed gain (SIMD)")
+{
+    float gain = 0.3f;
+    std::array<float, 5> input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    std::array<float, 5> output { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f };
+    std::array<float, 5> expected { 5.3f, 4.6f, 3.9f, 3.2f, 2.5f };
+    sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(output));
+    REQUIRE(output == expected);
+}
+
+TEST_CASE("[Helpers] MultiplyAdd fixed gain (SIMD vs scalar)")
+{
+    float gain = 0.3f;
+    std::vector<float> input(bigBufferSize);
+    std::vector<float> outputScalar(bigBufferSize);
+    std::vector<float> outputSIMD(bigBufferSize);
+    absl::c_iota(input, 0.0f);
+    absl::c_iota(outputScalar, 0.0f);
+    absl::c_iota(outputSIMD, 0.0f);
+
+    sfz::multiplyAdd<float, false>(gain, input, absl::MakeSpan(outputScalar));
+    sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(outputSIMD));
+    REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
+}
+
 TEST_CASE("[Helpers] Subtract")
 {
     std::array<float, 5> input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
@@ -717,7 +768,7 @@ TEST_CASE("[Helpers] Mean Squared (SIMD vs scalar)")
     REQUIRE(sfz::meanSquared<float, false>(input) == sfz::meanSquared<float, true>(input));
 }
 
-TEST_CASE("[Helpers] Cumulative sum ")
+TEST_CASE("[Helpers] Cumulative sum")
 {
     std::array<float, 6> input { 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f }; // 1.1 2.3 3.6 5.0f 6.5 8.1
     std::array<float, 6> output;
@@ -737,7 +788,7 @@ TEST_CASE("[Helpers] Cumulative sum (SIMD vs Scalar)")
     REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
 }
 
-TEST_CASE("[Helpers] Diff ")
+TEST_CASE("[Helpers] Diff")
 {
     std::array<float, 6> input { 1.1f, 2.3f, 3.6f, 5.0f, 6.5f, 8.1f };
     std::array<float, 6> output;