diff --git a/benchmarks/BM_multiplyAddFixedGain.cpp b/benchmarks/BM_multiplyAddFixedGain.cpp new file mode 100644 index 000000000..b484389dc --- /dev/null +++ b/benchmarks/BM_multiplyAddFixedGain.cpp @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: BSD-2-Clause + +// This code is part of the sfizz library and is licensed under a BSD 2-clause +// license. You should have receive a LICENSE.md file along with the code. +// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz + +#include "SIMDHelpers.h" +#include +#include +#include +#include +#include +#include + +class MultiplyAddFixedGain : public benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& state) { + std::random_device rd { }; + std::mt19937 gen { rd() }; + std::uniform_real_distribution dist { 0, 1 }; + input = std::vector(state.range(0)); + output = std::vector(state.range(0)); + gain = dist(gen); + std::fill(output.begin(), output.end(), 1.0f ); + std::generate(input.begin(), input.end(), [&]() { return dist(gen); }); + } + + void TearDown(const ::benchmark::State& state [[maybe_unused]]) { + + } + + float gain = {}; + std::vector input; + std::vector output; +}; + +BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Straight)(benchmark::State& state) { + for (auto _ : state) + { + for (int i = 0; i < state.range(0); ++i) + output[i] += gain * input[i]; + } +} + +BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Scalar)(benchmark::State& state) { + for (auto _ : state) + { + sfz::multiplyAdd(gain, input, absl::MakeSpan(output)); + } +} + +BENCHMARK_DEFINE_F(MultiplyAddFixedGain, SIMD)(benchmark::State& state) { + for (auto _ : state) + { + sfz::multiplyAdd(gain, input, absl::MakeSpan(output)); + } +} + +BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Scalar_Unaligned)(benchmark::State& state) { + for (auto _ : state) + { + sfz::multiplyAdd(gain, absl::MakeSpan(input).subspan(1), absl::MakeSpan(output).subspan(1)); + } +} + +BENCHMARK_DEFINE_F(MultiplyAddFixedGain, SIMD_Unaligned)(benchmark::State& state) { + for (auto _ : state) + { + sfz::multiplyAdd(gain, absl::MakeSpan(input).subspan(1), absl::MakeSpan(output).subspan(1)); + } +} + +BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Straight)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); +BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Scalar)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); +BENCHMARK_REGISTER_F(MultiplyAddFixedGain, SIMD)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); +BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Scalar_Unaligned)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); +BENCHMARK_REGISTER_F(MultiplyAddFixedGain, SIMD_Unaligned)->RangeMultiplier(4)->Range(1 << 2, 1 << 12); +BENCHMARK_MAIN(); diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 00bbd8026..b143ac5a8 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -47,6 +47,7 @@ target_link_libraries(bm_ADSR PRIVATE sfizz::sfizz) sfizz_add_benchmark(bm_add BM_add.cpp) sfizz_add_benchmark(bm_multiplyAdd BM_multiplyAdd.cpp) +sfizz_add_benchmark(bm_multiplyAddFixedGain BM_multiplyAddFixedGain.cpp) sfizz_add_benchmark(bm_subtract BM_subtract.cpp) sfizz_add_benchmark(bm_copy BM_copy.cpp) sfizz_add_benchmark(bm_pan BM_pan.cpp) diff --git a/src/sfizz/AudioSpan.h b/src/sfizz/AudioSpan.h index b4cb4e759..312c1e1b2 100644 --- a/src/sfizz/AudioSpan.h +++ b/src/sfizz/AudioSpan.h @@ -11,6 +11,7 @@ #include "Debug.h" #include "LeakDetector.h" #include "SIMDHelpers.h" +#include "absl/types/span.h" #include #include #include @@ -306,6 +307,43 @@ class AudioSpan { } } + /** + * @brief Add another AudioSpan with a compatible number of channels to the current + * AudioSpan, applying an elementwise gain to the operand. + * + * @param other the other AudioSpan + * @param gain the gain to apply + */ + template > + void multiplyAdd(AudioSpan& other, absl::Span gain) + { + static_assert(!std::is_const::value, "Can't allow mutating operations on const AudioSpans"); + ASSERT(other.getNumChannels() == numChannels); + ASSERT(gain.size() == numFrames); + if (other.getNumChannels() == numChannels) { + for (size_t i = 0; i < numChannels; ++i) + sfz::multiplyAdd(gain, other.getConstSpan(i), getSpan(i)); + } + } + + /** + * @brief Add another AudioSpan with a compatible number of channels to the current + * AudioSpan, applying a fixed gain to the operand. + * + * @param other the other AudioSpan + * @param gain the gain to apply + */ + template > + void multiplyAdd(AudioSpan& other, const Type gain) + { + static_assert(!std::is_const::value, "Can't allow mutating operations on const AudioSpans"); + ASSERT(other.getNumChannels() == numChannels); + if (other.getNumChannels() == numChannels) { + for (size_t i = 0; i < numChannels; ++i) + sfz::multiplyAdd(gain, other.getConstSpan(i), getSpan(i)); + } + } + /** * @brief Copy the elements of another AudioSpan with a compatible number of channels * to the current AudioSpan. diff --git a/src/sfizz/SIMDDummy.cpp b/src/sfizz/SIMDDummy.cpp index 22a1140b2..2a0ca34d0 100644 --- a/src/sfizz/SIMDDummy.cpp +++ b/src/sfizz/SIMDDummy.cpp @@ -76,6 +76,12 @@ void sfz::multiplyAdd(absl::Span gain, absl::Span(gain, input, output); } +template <> +void sfz::multiplyAdd(const float gain, absl::Span input, absl::Span output) noexcept +{ + multiplyAdd(gain, input, output); +} + template <> float sfz::loopingSFZIndex(absl::Span jumps, absl::Span leftCoeff, absl::Span rightCoeff, absl::Span indices, float floatIndex, float loopEnd, float loopStart) noexcept { diff --git a/src/sfizz/SIMDHelpers.h b/src/sfizz/SIMDHelpers.h index f4c5a2d7a..b3725129b 100644 --- a/src/sfizz/SIMDHelpers.h +++ b/src/sfizz/SIMDHelpers.h @@ -491,6 +491,12 @@ namespace _internals { { *output++ += (*gain++) * (*input++); } + + template + inline void snippetMultiplyAdd(const T gain, const T*& input, T*& output) + { + *output++ += gain * (*input++); + } } /** @@ -520,6 +526,20 @@ void multiplyAdd(absl::Span gain, absl::Span input, absl::Span template <> void multiplyAdd(absl::Span gain, absl::Span input, absl::Span output) noexcept; +template +void multiplyAdd(const T gain, absl::Span input, absl::Span output) noexcept +{ + ASSERT(input.size() <= output.size()); + auto* in = input.begin(); + auto* out = output.begin(); + auto* sentinel = out + std::min(output.size(), input.size()); + while (out < sentinel) + _internals::snippetMultiplyAdd(gain, in, out); +} + +template <> +void multiplyAdd(const float gain, absl::Span input, absl::Span output) noexcept; + namespace _internals { template inline void snippetRampLinear(T*& output, T& value, T step) diff --git a/src/sfizz/SIMDSSE.cpp b/src/sfizz/SIMDSSE.cpp index ba98c12d6..74adfb666 100644 --- a/src/sfizz/SIMDSSE.cpp +++ b/src/sfizz/SIMDSSE.cpp @@ -316,6 +316,29 @@ void sfz::multiplyAdd(absl::Span gain, absl::Span(g, in, out); } +template <> +void sfz::multiplyAdd(const float gain, absl::Span input, absl::Span output) noexcept +{ + auto* in = input.begin(); + auto* out = output.begin(); + const auto size = std::min(output.size(), input.size()); + const auto* lastAligned = prevAligned(output.begin() + size); + + while (unaligned(out, in) && out < lastAligned) + _internals::snippetMultiplyAdd(gain, in, out); + + auto mmGain = _mm_set1_ps(gain); + while (out < lastAligned) { + auto mmOut = _mm_load_ps(out); + mmOut = _mm_add_ps(_mm_mul_ps(mmGain, _mm_load_ps(in)), mmOut); + _mm_store_ps(out, mmOut); + incrementAll(in, out); + } + + while (out < output.end()) + _internals::snippetMultiplyAdd(gain, in, out); +} + template <> float sfz::loopingSFZIndex(absl::Span jumps, absl::Span leftCoeffs, diff --git a/tests/SIMDHelpersT.cpp b/tests/SIMDHelpersT.cpp index 51f9426c0..01c087eca 100644 --- a/tests/SIMDHelpersT.cpp +++ b/tests/SIMDHelpersT.cpp @@ -606,6 +606,57 @@ TEST_CASE("[Helpers] Add (SIMD vs scalar)") REQUIRE(approxEqual(outputScalar, outputSIMD)); } +TEST_CASE("[Helpers] MultiplyAdd (SIMD)") +{ + std::array gain { 0.0f, 0.1f, 0.2f, 0.3f, 0.4f }; + std::array input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + std::array output { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f }; + std::array expected { 5.0f, 4.2f, 3.6f, 3.2f, 3.0f }; + sfz::multiplyAdd(gain, input, absl::MakeSpan(output)); + REQUIRE(output == expected); +} + +TEST_CASE("[Helpers] MultiplyAdd (SIMD vs scalar)") +{ + std::vector gain(bigBufferSize); + std::vector input(bigBufferSize); + std::vector outputScalar(bigBufferSize); + std::vector outputSIMD(bigBufferSize); + absl::c_iota(gain, 0.0f); + absl::c_iota(input, 0.0f); + absl::c_iota(outputScalar, 0.0f); + absl::c_iota(outputSIMD, 0.0f); + + sfz::multiplyAdd(gain, input, absl::MakeSpan(outputScalar)); + sfz::multiplyAdd(gain, input, absl::MakeSpan(outputSIMD)); + REQUIRE(approxEqual(outputScalar, outputSIMD)); +} + +TEST_CASE("[Helpers] MultiplyAdd fixed gain (SIMD)") +{ + float gain = 0.3f; + std::array input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + std::array output { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f }; + std::array expected { 5.3f, 4.6f, 3.9f, 3.2f, 2.5f }; + sfz::multiplyAdd(gain, input, absl::MakeSpan(output)); + REQUIRE(output == expected); +} + +TEST_CASE("[Helpers] MultiplyAdd fixed gain (SIMD vs scalar)") +{ + float gain = 0.3f; + std::vector input(bigBufferSize); + std::vector outputScalar(bigBufferSize); + std::vector outputSIMD(bigBufferSize); + absl::c_iota(input, 0.0f); + absl::c_iota(outputScalar, 0.0f); + absl::c_iota(outputSIMD, 0.0f); + + sfz::multiplyAdd(gain, input, absl::MakeSpan(outputScalar)); + sfz::multiplyAdd(gain, input, absl::MakeSpan(outputSIMD)); + REQUIRE(approxEqual(outputScalar, outputSIMD)); +} + TEST_CASE("[Helpers] Subtract") { std::array input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; @@ -717,7 +768,7 @@ TEST_CASE("[Helpers] Mean Squared (SIMD vs scalar)") REQUIRE(sfz::meanSquared(input) == sfz::meanSquared(input)); } -TEST_CASE("[Helpers] Cumulative sum ") +TEST_CASE("[Helpers] Cumulative sum") { std::array input { 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f }; // 1.1 2.3 3.6 5.0f 6.5 8.1 std::array output; @@ -737,7 +788,7 @@ TEST_CASE("[Helpers] Cumulative sum (SIMD vs Scalar)") REQUIRE(approxEqual(outputScalar, outputSIMD)); } -TEST_CASE("[Helpers] Diff ") +TEST_CASE("[Helpers] Diff") { std::array input { 1.1f, 2.3f, 3.6f, 5.0f, 6.5f, 8.1f }; std::array output;