Skip to content

Commit

Permalink
Merge pull request #85 from jpcima/multiply-add
Browse files Browse the repository at this point in the history
Add multiplyAdd with fixed gain
  • Loading branch information
jpcima authored Mar 2, 2020
2 parents d2e44cc + 42343c8 commit 2e5cdba
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 2 deletions.
78 changes: 78 additions & 0 deletions benchmarks/BM_multiplyAddFixedGain.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// SPDX-License-Identifier: BSD-2-Clause

// This code is part of the sfizz library and is licensed under a BSD 2-clause
// license. You should have receive a LICENSE.md file along with the code.
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#include "SIMDHelpers.h"
#include <benchmark/benchmark.h>
#include <random>
#include <numeric>
#include <vector>
#include <cmath>
#include <iostream>

class MultiplyAddFixedGain : public benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& state) {
std::random_device rd { };
std::mt19937 gen { rd() };
std::uniform_real_distribution<float> dist { 0, 1 };
input = std::vector<float>(state.range(0));
output = std::vector<float>(state.range(0));
gain = dist(gen);
std::fill(output.begin(), output.end(), 1.0f );
std::generate(input.begin(), input.end(), [&]() { return dist(gen); });
}

void TearDown(const ::benchmark::State& state [[maybe_unused]]) {

}

float gain = {};
std::vector<float> input;
std::vector<float> output;
};

BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Straight)(benchmark::State& state) {
for (auto _ : state)
{
for (int i = 0; i < state.range(0); ++i)
output[i] += gain * input[i];
}
}

BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Scalar)(benchmark::State& state) {
for (auto _ : state)
{
sfz::multiplyAdd<float, false>(gain, input, absl::MakeSpan(output));
}
}

BENCHMARK_DEFINE_F(MultiplyAddFixedGain, SIMD)(benchmark::State& state) {
for (auto _ : state)
{
sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(output));
}
}

BENCHMARK_DEFINE_F(MultiplyAddFixedGain, Scalar_Unaligned)(benchmark::State& state) {
for (auto _ : state)
{
sfz::multiplyAdd<float, false>(gain, absl::MakeSpan(input).subspan(1), absl::MakeSpan(output).subspan(1));
}
}

BENCHMARK_DEFINE_F(MultiplyAddFixedGain, SIMD_Unaligned)(benchmark::State& state) {
for (auto _ : state)
{
sfz::multiplyAdd<float, true>(gain, absl::MakeSpan(input).subspan(1), absl::MakeSpan(output).subspan(1));
}
}

BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Straight)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Scalar)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
BENCHMARK_REGISTER_F(MultiplyAddFixedGain, SIMD)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
BENCHMARK_REGISTER_F(MultiplyAddFixedGain, Scalar_Unaligned)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
BENCHMARK_REGISTER_F(MultiplyAddFixedGain, SIMD_Unaligned)->RangeMultiplier(4)->Range(1 << 2, 1 << 12);
BENCHMARK_MAIN();
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ target_link_libraries(bm_ADSR PRIVATE sfizz::sfizz)

sfizz_add_benchmark(bm_add BM_add.cpp)
sfizz_add_benchmark(bm_multiplyAdd BM_multiplyAdd.cpp)
sfizz_add_benchmark(bm_multiplyAddFixedGain BM_multiplyAddFixedGain.cpp)
sfizz_add_benchmark(bm_subtract BM_subtract.cpp)
sfizz_add_benchmark(bm_copy BM_copy.cpp)
sfizz_add_benchmark(bm_pan BM_pan.cpp)
Expand Down
38 changes: 38 additions & 0 deletions src/sfizz/AudioSpan.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "Debug.h"
#include "LeakDetector.h"
#include "SIMDHelpers.h"
#include "absl/types/span.h"
#include <array>
#include <initializer_list>
#include <type_traits>
Expand Down Expand Up @@ -306,6 +307,43 @@ class AudioSpan {
}
}

/**
* @brief Add another AudioSpan with a compatible number of channels to the current
* AudioSpan, applying an elementwise gain to the operand.
*
* @param other the other AudioSpan
* @param gain the gain to apply
*/
template <class U, size_t N, typename = std::enable_if<N <= MaxChannels>>
void multiplyAdd(AudioSpan<U, N>& other, absl::Span<const Type> gain)
{
static_assert(!std::is_const<Type>::value, "Can't allow mutating operations on const AudioSpans");
ASSERT(other.getNumChannels() == numChannels);
ASSERT(gain.size() == numFrames);
if (other.getNumChannels() == numChannels) {
for (size_t i = 0; i < numChannels; ++i)
sfz::multiplyAdd(gain, other.getConstSpan(i), getSpan(i));
}
}

/**
* @brief Add another AudioSpan with a compatible number of channels to the current
* AudioSpan, applying a fixed gain to the operand.
*
* @param other the other AudioSpan
* @param gain the gain to apply
*/
template <class U, size_t N, typename = std::enable_if<N <= MaxChannels>>
void multiplyAdd(AudioSpan<U, N>& other, const Type gain)
{
static_assert(!std::is_const<Type>::value, "Can't allow mutating operations on const AudioSpans");
ASSERT(other.getNumChannels() == numChannels);
if (other.getNumChannels() == numChannels) {
for (size_t i = 0; i < numChannels; ++i)
sfz::multiplyAdd(gain, other.getConstSpan(i), getSpan(i));
}
}

/**
* @brief Copy the elements of another AudioSpan with a compatible number of channels
* to the current AudioSpan.
Expand Down
6 changes: 6 additions & 0 deletions src/sfizz/SIMDDummy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ void sfz::multiplyAdd<float, true>(absl::Span<const float> gain, absl::Span<cons
multiplyAdd<float, false>(gain, input, output);
}

template <>
void sfz::multiplyAdd<float, true>(const float gain, absl::Span<const float> input, absl::Span<float> output) noexcept
{
multiplyAdd<float, false>(gain, input, output);
}

template <>
float sfz::loopingSFZIndex<float, true>(absl::Span<const float> jumps, absl::Span<float> leftCoeff, absl::Span<float> rightCoeff, absl::Span<int> indices, float floatIndex, float loopEnd, float loopStart) noexcept
{
Expand Down
20 changes: 20 additions & 0 deletions src/sfizz/SIMDHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ namespace _internals {
{
*output++ += (*gain++) * (*input++);
}

template <class T>
inline void snippetMultiplyAdd(const T gain, const T*& input, T*& output)
{
*output++ += gain * (*input++);
}
}

/**
Expand Down Expand Up @@ -520,6 +526,20 @@ void multiplyAdd(absl::Span<const T> gain, absl::Span<const T> input, absl::Span
template <>
void multiplyAdd<float, true>(absl::Span<const float> gain, absl::Span<const float> input, absl::Span<float> output) noexcept;

template <class T, bool SIMD = SIMDConfig::multiplyAdd>
void multiplyAdd(const T gain, absl::Span<const T> input, absl::Span<T> output) noexcept
{
ASSERT(input.size() <= output.size());
auto* in = input.begin();
auto* out = output.begin();
auto* sentinel = out + std::min(output.size(), input.size());
while (out < sentinel)
_internals::snippetMultiplyAdd<T>(gain, in, out);
}

template <>
void multiplyAdd<float, true>(const float gain, absl::Span<const float> input, absl::Span<float> output) noexcept;

namespace _internals {
template <class T>
inline void snippetRampLinear(T*& output, T& value, T step)
Expand Down
23 changes: 23 additions & 0 deletions src/sfizz/SIMDSSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,29 @@ void sfz::multiplyAdd<float, true>(absl::Span<const float> gain, absl::Span<cons
_internals::snippetMultiplyAdd<float>(g, in, out);
}

template <>
void sfz::multiplyAdd<float, true>(const float gain, absl::Span<const float> input, absl::Span<float> output) noexcept
{
auto* in = input.begin();
auto* out = output.begin();
const auto size = std::min(output.size(), input.size());
const auto* lastAligned = prevAligned(output.begin() + size);

while (unaligned(out, in) && out < lastAligned)
_internals::snippetMultiplyAdd<float>(gain, in, out);

auto mmGain = _mm_set1_ps(gain);
while (out < lastAligned) {
auto mmOut = _mm_load_ps(out);
mmOut = _mm_add_ps(_mm_mul_ps(mmGain, _mm_load_ps(in)), mmOut);
_mm_store_ps(out, mmOut);
incrementAll<TypeAlignment>(in, out);
}

while (out < output.end())
_internals::snippetMultiplyAdd<float>(gain, in, out);
}

template <>
float sfz::loopingSFZIndex<float, true>(absl::Span<const float> jumps,
absl::Span<float> leftCoeffs,
Expand Down
55 changes: 53 additions & 2 deletions tests/SIMDHelpersT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,57 @@ TEST_CASE("[Helpers] Add (SIMD vs scalar)")
REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
}

TEST_CASE("[Helpers] MultiplyAdd (SIMD)")
{
std::array<float, 5> gain { 0.0f, 0.1f, 0.2f, 0.3f, 0.4f };
std::array<float, 5> input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
std::array<float, 5> output { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f };
std::array<float, 5> expected { 5.0f, 4.2f, 3.6f, 3.2f, 3.0f };
sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(output));
REQUIRE(output == expected);
}

TEST_CASE("[Helpers] MultiplyAdd (SIMD vs scalar)")
{
std::vector<float> gain(bigBufferSize);
std::vector<float> input(bigBufferSize);
std::vector<float> outputScalar(bigBufferSize);
std::vector<float> outputSIMD(bigBufferSize);
absl::c_iota(gain, 0.0f);
absl::c_iota(input, 0.0f);
absl::c_iota(outputScalar, 0.0f);
absl::c_iota(outputSIMD, 0.0f);

sfz::multiplyAdd<float, false>(gain, input, absl::MakeSpan(outputScalar));
sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(outputSIMD));
REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
}

TEST_CASE("[Helpers] MultiplyAdd fixed gain (SIMD)")
{
float gain = 0.3f;
std::array<float, 5> input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
std::array<float, 5> output { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f };
std::array<float, 5> expected { 5.3f, 4.6f, 3.9f, 3.2f, 2.5f };
sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(output));
REQUIRE(output == expected);
}

TEST_CASE("[Helpers] MultiplyAdd fixed gain (SIMD vs scalar)")
{
float gain = 0.3f;
std::vector<float> input(bigBufferSize);
std::vector<float> outputScalar(bigBufferSize);
std::vector<float> outputSIMD(bigBufferSize);
absl::c_iota(input, 0.0f);
absl::c_iota(outputScalar, 0.0f);
absl::c_iota(outputSIMD, 0.0f);

sfz::multiplyAdd<float, false>(gain, input, absl::MakeSpan(outputScalar));
sfz::multiplyAdd<float, true>(gain, input, absl::MakeSpan(outputSIMD));
REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
}

TEST_CASE("[Helpers] Subtract")
{
std::array<float, 5> input { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
Expand Down Expand Up @@ -717,7 +768,7 @@ TEST_CASE("[Helpers] Mean Squared (SIMD vs scalar)")
REQUIRE(sfz::meanSquared<float, false>(input) == sfz::meanSquared<float, true>(input));
}

TEST_CASE("[Helpers] Cumulative sum ")
TEST_CASE("[Helpers] Cumulative sum")
{
std::array<float, 6> input { 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f }; // 1.1 2.3 3.6 5.0f 6.5 8.1
std::array<float, 6> output;
Expand All @@ -737,7 +788,7 @@ TEST_CASE("[Helpers] Cumulative sum (SIMD vs Scalar)")
REQUIRE(approxEqual<float>(outputScalar, outputSIMD));
}

TEST_CASE("[Helpers] Diff ")
TEST_CASE("[Helpers] Diff")
{
std::array<float, 6> input { 1.1f, 2.3f, 3.6f, 5.0f, 6.5f, 8.1f };
std::array<float, 6> output;
Expand Down

0 comments on commit 2e5cdba

Please sign in to comment.