Skip to content

Commit

Permalink
Add a NEON codepath for panning and width
Browse files Browse the repository at this point in the history
  • Loading branch information
paulfd committed Sep 20, 2020
1 parent a2ecaf9 commit afc390e
Show file tree
Hide file tree
Showing 12 changed files with 422 additions and 81 deletions.
134 changes: 134 additions & 0 deletions benchmarks/BM_pan_arm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// SPDX-License-Identifier: BSD-2-Clause

// This code is part of the sfizz library and is licensed under a BSD 2-clause
// license. You should have receive a LICENSE.md file along with the code.
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#include "Panning.h"
#include "simd/Common.h"
#include <benchmark/benchmark.h>
#include <random>
#include <absl/algorithm/container.h>
#include "absl/types/span.h"
#include <arm_neon.h>

#include <jsl/allocator>
template <class T, std::size_t A = 16>
using aligned_vector = std::vector<T, jsl::aligned_allocator<T, A>>;

// Number of elements in the table, odd for equal volume at center
constexpr int panSize = 4095;

// Table of pan values for the left channel, extra element for safety
static const auto panData = []()
{
std::array<float, panSize + 1> pan;
int i = 0;

for (; i < panSize; ++i)
pan[i] = std::cos(i * (piTwo<double>() / (panSize - 1)));

for (; i < static_cast<int>(pan.size()); ++i)
pan[i] = pan[panSize - 1];

return pan;
}();

float _panLookup(float pan)
{
// reduce range, round to nearest
int index = lroundPositive(pan * (panSize - 1));
return panData[index];
}

void panScalar(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = panEnvelope + size;
while (panEnvelope < sentinel) {
auto p =(*panEnvelope + 1.0f) * 0.5f;
p = clamp(p, 0.0f, 1.0f);
*leftBuffer *= _panLookup(p);
*rightBuffer *= _panLookup(1 - p);
incrementAll(panEnvelope, leftBuffer, rightBuffer);
}
}

void panSIMD(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = panEnvelope + size;
int32_t indices[4];
while (panEnvelope < sentinel) {
float32x4_t mmPan = vld1q_f32(panEnvelope);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(1.0f));
mmPan = vmulq_n_f32(mmPan, 0.5f * panSize);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(0.5f));
mmPan = vminq_f32(mmPan, vdupq_n_f32(panSize));
mmPan = vmaxq_f32(mmPan, vdupq_n_f32(0.0f));
int32x4_t mmIdx = vcvtq_s32_f32(mmPan);
vst1q_s32(indices, mmIdx);

leftBuffer[0] *= panData[indices[0]];
rightBuffer[0] *= panData[panSize - indices[0] - 1];
leftBuffer[1] *= panData[indices[1]];
rightBuffer[1] *= panData[panSize - indices[1]- 1];
leftBuffer[2] *= panData[indices[2]];
rightBuffer[2] *= panData[panSize - indices[2]- 1];
leftBuffer[3] *= panData[indices[3]];
rightBuffer[3] *= panData[panSize - indices[3]- 1];

incrementAll<4>(panEnvelope, leftBuffer, rightBuffer);
}
}

class PanFixture : public benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& state) {
std::random_device rd { };
std::mt19937 gen { rd() };
std::uniform_real_distribution<float> dist { -1.0f, 1.0f };
pan.resize(state.range(0));
right.resize(state.range(0));
left.resize(state.range(0));

if (!willAlign<16>(pan.data(), left.data(), right.data()))
std::cout << "Will not align!" << '\n';
absl::c_generate(pan, [&]() { return dist(gen); });
absl::c_generate(left, [&]() { return dist(gen); });
absl::c_generate(right, [&]() { return dist(gen); });
}

void TearDown(const ::benchmark::State& /* state */) {

}

aligned_vector<float> pan;
aligned_vector<float> right;
aligned_vector<float> left;
};

BENCHMARK_DEFINE_F(PanFixture, PanScalar)(benchmark::State& state) {
for (auto _ : state)
{
panScalar(pan.data(), left.data(), right.data(), state.range(0));
}
}

BENCHMARK_DEFINE_F(PanFixture, PanSIMD)(benchmark::State& state) {
for (auto _ : state)
{
panSIMD(pan.data(), left.data(), right.data(), state.range(0));
}
}

BENCHMARK_DEFINE_F(PanFixture, PanSfizz)(benchmark::State& state) {
for (auto _ : state)
{
sfz::pan(pan.data(), left.data(), right.data(), state.range(0));
}
}

// Register the function as a benchmark
BENCHMARK_REGISTER_F(PanFixture, PanScalar)->RangeMultiplier(4)->Range((1 << 4), (1 << 12));
BENCHMARK_REGISTER_F(PanFixture, PanSIMD)->RangeMultiplier(4)->Range((1 << 4), (1 << 12));
BENCHMARK_REGISTER_F(PanFixture, PanSfizz)->RangeMultiplier(4)->Range((1 << 4), (1 << 12));
BENCHMARK_MAIN();
6 changes: 6 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ if (TARGET bm_resample)
add_dependencies(sfizz_benchmarks bm_resample)
endif()

if (SFIZZ_SYSTEM_PROCESSOR MATCHES "armv7l")
sfizz_add_benchmark(bm_pan_arm BM_pan_arm.cpp ../src/sfizz/Panning.cpp)
target_link_libraries(bm_pan_arm PRIVATE sfizz-jsl)
add_dependencies(sfizz_benchmarks bm_pan_arm)
endif()

configure_file("sample.wav" "${CMAKE_BINARY_DIR}/benchmarks/sample1.wav" COPYONLY)
configure_file("sample.wav" "${CMAKE_BINARY_DIR}/benchmarks/sample2.wav" COPYONLY)
configure_file("sample.wav" "${CMAKE_BINARY_DIR}/benchmarks/sample3.wav" COPYONLY)
Expand Down
4 changes: 2 additions & 2 deletions cmake/SfizzConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-Werror=return-type)
if (SFIZZ_SYSTEM_PROCESSOR MATCHES "^(i.86|x86_64)$")
add_compile_options(-msse2)
elseif (SFIZZ_SYSTEM_PROCESSOR MATCHES "^(armv.*)$")
add_compile_options(-mfloat-abi=hard)
elseif(SFIZZ_SYSTEM_PROCESSOR MATCHES "^(arm.*)$")
add_compile_options(-mfpu=neon)
add_compile_options(-mfloat-abi=hard)
endif()
elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_CXX_STANDARD 17)
Expand Down
1 change: 1 addition & 0 deletions cmake/SfizzSIMDSourceFiles.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ macro(sfizz_add_simd_sources SOURCES_VAR PREFIX)

list (APPEND ${SOURCES_VAR}
${PREFIX}/sfizz/SIMDHelpers.cpp
${PREFIX}/sfizz/simd/HelpersNEON.cpp
${PREFIX}/sfizz/simd/HelpersSSE.cpp
${PREFIX}/sfizz/simd/HelpersAVX.cpp)

Expand Down
139 changes: 124 additions & 15 deletions src/sfizz/Panning.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
#include "Panning.h"
#include "MathHelpers.h"
#include <array>
#include <cmath>

#if SFIZZ_HAVE_NEON
#include <arm_neon.h>
#include "simd/Common.h"
using Type = float;
constexpr unsigned TypeAlignment = 4;
constexpr unsigned ByteAlignment = TypeAlignment * sizeof(Type);
#endif


namespace sfz
{
// Number of elements in the table, odd for equal volume at center
constexpr int panSize = 4095;

constexpr int panSize { 4095 };

// Table of pan values for the left channel, extra element for safety
static const auto panData = []()
Expand All @@ -25,35 +35,134 @@ static const auto panData = []()
float panLookup(float pan)
{
// reduce range, round to nearest
int index = lroundPositive(pan * (panSize - 1));
const auto index = lroundPositive(pan * (panSize - 1));
return panData[index];
}

inline void tickPan(const float* pan, float* leftBuffer, float* rightBuffer)
{
auto p = (*pan + 1.0f) * 0.5f;
p = clamp(p, 0.0f, 1.0f);
*leftBuffer *= panLookup(p);
*rightBuffer *= panLookup(1 - p);
}

void pan(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = panEnvelope + size;

#if SFIZZ_HAVE_NEON
const auto firstAligned = prevAligned<ByteAlignment>(panEnvelope + TypeAlignment - 1);

if (willAlign<ByteAlignment>(panEnvelope, leftBuffer, rightBuffer) && (firstAligned < sentinel)) {
while (panEnvelope < firstAligned) {
tickPan(panEnvelope, leftBuffer, rightBuffer);
incrementAll(panEnvelope, leftBuffer, rightBuffer);
}

uint32_t indices[TypeAlignment];
float leftPan[TypeAlignment];
float rightPan[TypeAlignment];
const auto lastAligned = prevAligned<ByteAlignment>(sentinel);
while (panEnvelope < lastAligned) {
float32x4_t mmPan = vld1q_f32(panEnvelope);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(1.0f));
mmPan = vmulq_n_f32(mmPan, 0.5f * panSize);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(0.5f));
uint32x4_t mmIdx = vcvtq_u32_f32(mmPan);
mmIdx = vminq_u32(mmIdx, vdupq_n_u32(panSize - 1));
mmIdx = vmaxq_u32(mmIdx, vdupq_n_u32(0));
vst1q_u32(indices, mmIdx);

leftPan[0] = panData[indices[0]];
rightPan[0] = panData[panSize - indices[0] - 1];
leftPan[1] = panData[indices[1]];
rightPan[1] = panData[panSize - indices[1] - 1];
leftPan[2] = panData[indices[2]];
rightPan[2] = panData[panSize - indices[2] - 1];
leftPan[3] = panData[indices[3]];
rightPan[3] = panData[panSize - indices[3] - 1];

vst1q_f32(leftBuffer, vmulq_f32(vld1q_f32(leftBuffer), vld1q_f32(leftPan)));
vst1q_f32(rightBuffer, vmulq_f32(vld1q_f32(rightBuffer), vld1q_f32(rightPan)));

incrementAll<TypeAlignment>(panEnvelope, leftBuffer, rightBuffer);
}
}
#endif

while (panEnvelope < sentinel) {
auto p =(*panEnvelope + 1.0f) * 0.5f;
p = clamp(p, 0.0f, 1.0f);
*leftBuffer *= panLookup(p);
*rightBuffer *= panLookup(1 - p);
tickPan(panEnvelope, leftBuffer, rightBuffer);
incrementAll(panEnvelope, leftBuffer, rightBuffer);
}

}

inline void tickWidth(const float* width, float* leftBuffer, float* rightBuffer)
{
float w = (*width + 1.0f) * 0.5f;
w = clamp(w, 0.0f, 1.0f);
const auto coeff1 = panLookup(w);
const auto coeff2 = panLookup(1 - w);
const auto l = *leftBuffer;
const auto r = *rightBuffer;
*leftBuffer = l * coeff2 + r * coeff1;
*rightBuffer = l * coeff1 + r * coeff2;
}

void width(const float* widthEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = widthEnvelope + size;

#if SFIZZ_HAVE_NEON
const auto firstAligned = prevAligned<ByteAlignment>(widthEnvelope + TypeAlignment - 1);

if (willAlign<ByteAlignment>(widthEnvelope, leftBuffer, rightBuffer) && firstAligned < sentinel) {
while (widthEnvelope < firstAligned) {
tickWidth(widthEnvelope, leftBuffer, rightBuffer);
incrementAll(widthEnvelope, leftBuffer, rightBuffer);
}

uint32_t indices[TypeAlignment];
float coeff1[TypeAlignment];
float coeff2[TypeAlignment];
const auto lastAligned = prevAligned<ByteAlignment>(sentinel);
while (widthEnvelope < lastAligned) {
float32x4_t mmWidth = vld1q_f32(widthEnvelope);
mmWidth = vaddq_f32(mmWidth, vdupq_n_f32(1.0f));
mmWidth = vmulq_n_f32(mmWidth, 0.5f * panSize);
mmWidth = vaddq_f32(mmWidth, vdupq_n_f32(0.5f));
uint32x4_t mmIdx = vcvtq_u32_f32(mmWidth);
mmIdx = vminq_u32(mmIdx, vdupq_n_u32(panSize - 1));
mmIdx = vmaxq_u32(mmIdx, vdupq_n_u32(0));
vst1q_u32(indices, mmIdx);

coeff1[0] = panData[indices[0]];
coeff2[0] = panData[panSize - indices[0] - 1];
coeff1[1] = panData[indices[1]];
coeff2[1] = panData[panSize - indices[1] - 1];
coeff1[2] = panData[indices[2]];
coeff2[2] = panData[panSize - indices[2] - 1];
coeff1[3] = panData[indices[3]];
coeff2[3] = panData[panSize - indices[3] - 1];

float32x4_t mmCoeff1 = vld1q_f32(coeff1);
float32x4_t mmCoeff2 = vld1q_f32(coeff2);
float32x4_t mmLeft = vld1q_f32(leftBuffer);
float32x4_t mmRight = vld1q_f32(rightBuffer);

vst1q_f32(leftBuffer, vaddq_f32(vmulq_f32(mmCoeff2, mmLeft), vmulq_f32(mmCoeff1, mmRight)));
vst1q_f32(rightBuffer, vaddq_f32(vmulq_f32(mmCoeff1, mmLeft), vmulq_f32(mmCoeff2, mmRight)));

incrementAll<TypeAlignment>(widthEnvelope, leftBuffer, rightBuffer);
}
}
#endif // SFIZZ_HAVE_NEON

while (widthEnvelope < sentinel) {
float w = (*widthEnvelope + 1.0f) * 0.5f;
w = clamp(w, 0.0f, 1.0f);
const auto coeff1 = panLookup(w);
const auto coeff2 = panLookup(1 - w);
const auto l = *leftBuffer;
const auto r = *rightBuffer;
*leftBuffer = l * coeff2 + r * coeff1;
*rightBuffer = l * coeff1 + r * coeff2;
tickWidth(widthEnvelope, leftBuffer, rightBuffer);
incrementAll(widthEnvelope, leftBuffer, rightBuffer);
}
}

}
13 changes: 8 additions & 5 deletions src/sfizz/Panning.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@ namespace sfz
{

/**
* @brief Lookup a value from the pan table
*
* @param pan
* @return float
*/
* @brief Lookup a value from the pan table
* No check is done on the range, needs to be capped
* between 0 and panSize.
*
* @param pan
* @return float
*/
float panLookup(float pan);


/**
* @brief Pans a mono signal left or right
*
Expand Down
2 changes: 1 addition & 1 deletion src/sfizz/effects/Width.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/

#include "Width.h"
#include "Opcode.h"
#include "Panning.h"
#include "Opcode.h"
#include "absl/memory/memory.h"

namespace sfz {
Expand Down
Loading

0 comments on commit afc390e

Please sign in to comment.