Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm pan tweaks #426

Merged
merged 1 commit into from
Sep 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions benchmarks/BM_pan_arm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// SPDX-License-Identifier: BSD-2-Clause

// This code is part of the sfizz library and is licensed under a BSD 2-clause
// license. You should have receive a LICENSE.md file along with the code.
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#include "Panning.h"
#include "simd/Common.h"
#include <benchmark/benchmark.h>
#include <random>
#include <absl/algorithm/container.h>
#include "absl/types/span.h"
#include <arm_neon.h>

#include <jsl/allocator>
template <class T, std::size_t A = 16>
using aligned_vector = std::vector<T, jsl::aligned_allocator<T, A>>;

// Number of elements in the table, odd for equal volume at center
constexpr int panSize = 4095;

// Table of pan values for the left channel, extra element for safety
static const auto panData = []()
{
std::array<float, panSize + 1> pan;
int i = 0;

for (; i < panSize; ++i)
pan[i] = std::cos(i * (piTwo<double>() / (panSize - 1)));

for (; i < static_cast<int>(pan.size()); ++i)
pan[i] = pan[panSize - 1];

return pan;
}();

float _panLookup(float pan)
{
// reduce range, round to nearest
int index = lroundPositive(pan * (panSize - 1));
return panData[index];
}

void panScalar(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = panEnvelope + size;
while (panEnvelope < sentinel) {
auto p =(*panEnvelope + 1.0f) * 0.5f;
p = clamp(p, 0.0f, 1.0f);
*leftBuffer *= _panLookup(p);
*rightBuffer *= _panLookup(1 - p);
incrementAll(panEnvelope, leftBuffer, rightBuffer);
}
}

void panSIMD(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = panEnvelope + size;
int32_t indices[4];
while (panEnvelope < sentinel) {
float32x4_t mmPan = vld1q_f32(panEnvelope);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(1.0f));
mmPan = vmulq_n_f32(mmPan, 0.5f * panSize);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(0.5f));
mmPan = vminq_f32(mmPan, vdupq_n_f32(panSize));
mmPan = vmaxq_f32(mmPan, vdupq_n_f32(0.0f));
int32x4_t mmIdx = vcvtq_s32_f32(mmPan);
vst1q_s32(indices, mmIdx);

leftBuffer[0] *= panData[indices[0]];
rightBuffer[0] *= panData[panSize - indices[0] - 1];
leftBuffer[1] *= panData[indices[1]];
rightBuffer[1] *= panData[panSize - indices[1]- 1];
leftBuffer[2] *= panData[indices[2]];
rightBuffer[2] *= panData[panSize - indices[2]- 1];
leftBuffer[3] *= panData[indices[3]];
rightBuffer[3] *= panData[panSize - indices[3]- 1];

incrementAll<4>(panEnvelope, leftBuffer, rightBuffer);
}
}

class PanFixture : public benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& state) {
std::random_device rd { };
std::mt19937 gen { rd() };
std::uniform_real_distribution<float> dist { -1.0f, 1.0f };
pan.resize(state.range(0));
right.resize(state.range(0));
left.resize(state.range(0));

if (!willAlign<16>(pan.data(), left.data(), right.data()))
std::cout << "Will not align!" << '\n';
absl::c_generate(pan, [&]() { return dist(gen); });
absl::c_generate(left, [&]() { return dist(gen); });
absl::c_generate(right, [&]() { return dist(gen); });
}

void TearDown(const ::benchmark::State& /* state */) {

}

aligned_vector<float> pan;
aligned_vector<float> right;
aligned_vector<float> left;
};

BENCHMARK_DEFINE_F(PanFixture, PanScalar)(benchmark::State& state) {
for (auto _ : state)
{
panScalar(pan.data(), left.data(), right.data(), state.range(0));
}
}

BENCHMARK_DEFINE_F(PanFixture, PanSIMD)(benchmark::State& state) {
for (auto _ : state)
{
panSIMD(pan.data(), left.data(), right.data(), state.range(0));
}
}

BENCHMARK_DEFINE_F(PanFixture, PanSfizz)(benchmark::State& state) {
for (auto _ : state)
{
sfz::pan(pan.data(), left.data(), right.data(), state.range(0));
}
}

// Register the function as a benchmark
BENCHMARK_REGISTER_F(PanFixture, PanScalar)->RangeMultiplier(4)->Range((1 << 4), (1 << 12));
BENCHMARK_REGISTER_F(PanFixture, PanSIMD)->RangeMultiplier(4)->Range((1 << 4), (1 << 12));
BENCHMARK_REGISTER_F(PanFixture, PanSfizz)->RangeMultiplier(4)->Range((1 << 4), (1 << 12));
BENCHMARK_MAIN();
6 changes: 6 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ if (TARGET bm_resample)
add_dependencies(sfizz_benchmarks bm_resample)
endif()

if (SFIZZ_SYSTEM_PROCESSOR MATCHES "armv7l")
sfizz_add_benchmark(bm_pan_arm BM_pan_arm.cpp ../src/sfizz/Panning.cpp)
target_link_libraries(bm_pan_arm PRIVATE sfizz-jsl)
add_dependencies(sfizz_benchmarks bm_pan_arm)
endif()

configure_file("sample.wav" "${CMAKE_BINARY_DIR}/benchmarks/sample1.wav" COPYONLY)
configure_file("sample.wav" "${CMAKE_BINARY_DIR}/benchmarks/sample2.wav" COPYONLY)
configure_file("sample.wav" "${CMAKE_BINARY_DIR}/benchmarks/sample3.wav" COPYONLY)
Expand Down
4 changes: 2 additions & 2 deletions cmake/SfizzConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-Werror=return-type)
if (SFIZZ_SYSTEM_PROCESSOR MATCHES "^(i.86|x86_64)$")
add_compile_options(-msse2)
elseif (SFIZZ_SYSTEM_PROCESSOR MATCHES "^(armv.*)$")
add_compile_options(-mfloat-abi=hard)
elseif(SFIZZ_SYSTEM_PROCESSOR MATCHES "^(arm.*)$")
add_compile_options(-mfpu=neon)
add_compile_options(-mfloat-abi=hard)
endif()
elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_CXX_STANDARD 17)
Expand Down
1 change: 1 addition & 0 deletions cmake/SfizzSIMDSourceFiles.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ macro(sfizz_add_simd_sources SOURCES_VAR PREFIX)

list (APPEND ${SOURCES_VAR}
${PREFIX}/sfizz/SIMDHelpers.cpp
${PREFIX}/sfizz/simd/HelpersNEON.cpp
${PREFIX}/sfizz/simd/HelpersSSE.cpp
${PREFIX}/sfizz/simd/HelpersAVX.cpp)

Expand Down
139 changes: 124 additions & 15 deletions src/sfizz/Panning.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
#include "Panning.h"
#include "MathHelpers.h"
#include <array>
#include <cmath>

#if SFIZZ_HAVE_NEON
#include <arm_neon.h>
#include "simd/Common.h"
using Type = float;
constexpr unsigned TypeAlignment = 4;
constexpr unsigned ByteAlignment = TypeAlignment * sizeof(Type);
#endif


namespace sfz
{
// Number of elements in the table, odd for equal volume at center
constexpr int panSize = 4095;

constexpr int panSize { 4095 };

// Table of pan values for the left channel, extra element for safety
static const auto panData = []()
Expand All @@ -25,35 +35,134 @@ static const auto panData = []()
float panLookup(float pan)
{
// reduce range, round to nearest
int index = lroundPositive(pan * (panSize - 1));
const int index = lroundPositive(pan * (panSize - 1));
return panData[index];
}

inline void tickPan(const float* pan, float* leftBuffer, float* rightBuffer)
{
auto p = (*pan + 1.0f) * 0.5f;
p = clamp(p, 0.0f, 1.0f);
*leftBuffer *= panLookup(p);
*rightBuffer *= panLookup(1 - p);
}

void pan(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = panEnvelope + size;

#if SFIZZ_HAVE_NEON
const auto firstAligned = prevAligned<ByteAlignment>(panEnvelope + TypeAlignment - 1);

if (willAlign<ByteAlignment>(panEnvelope, leftBuffer, rightBuffer) && (firstAligned < sentinel)) {
while (panEnvelope < firstAligned) {
tickPan(panEnvelope, leftBuffer, rightBuffer);
incrementAll(panEnvelope, leftBuffer, rightBuffer);
}

uint32_t indices[TypeAlignment];
float leftPan[TypeAlignment];
float rightPan[TypeAlignment];
const auto lastAligned = prevAligned<ByteAlignment>(sentinel);
while (panEnvelope < lastAligned) {
float32x4_t mmPan = vld1q_f32(panEnvelope);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(1.0f));
mmPan = vmulq_n_f32(mmPan, 0.5f * panSize);
mmPan = vaddq_f32(mmPan, vdupq_n_f32(0.5f));
uint32x4_t mmIdx = vcvtq_u32_f32(mmPan);
mmIdx = vminq_u32(mmIdx, vdupq_n_u32(panSize - 1));
mmIdx = vmaxq_u32(mmIdx, vdupq_n_u32(0));
vst1q_u32(indices, mmIdx);

leftPan[0] = panData[indices[0]];
rightPan[0] = panData[panSize - indices[0] - 1];
leftPan[1] = panData[indices[1]];
rightPan[1] = panData[panSize - indices[1] - 1];
leftPan[2] = panData[indices[2]];
rightPan[2] = panData[panSize - indices[2] - 1];
leftPan[3] = panData[indices[3]];
rightPan[3] = panData[panSize - indices[3] - 1];

vst1q_f32(leftBuffer, vmulq_f32(vld1q_f32(leftBuffer), vld1q_f32(leftPan)));
vst1q_f32(rightBuffer, vmulq_f32(vld1q_f32(rightBuffer), vld1q_f32(rightPan)));

incrementAll<TypeAlignment>(panEnvelope, leftBuffer, rightBuffer);
}
}
#endif

while (panEnvelope < sentinel) {
auto p =(*panEnvelope + 1.0f) * 0.5f;
p = clamp(p, 0.0f, 1.0f);
*leftBuffer *= panLookup(p);
*rightBuffer *= panLookup(1 - p);
tickPan(panEnvelope, leftBuffer, rightBuffer);
incrementAll(panEnvelope, leftBuffer, rightBuffer);
}

}

inline void tickWidth(const float* width, float* leftBuffer, float* rightBuffer)
{
float w = (*width + 1.0f) * 0.5f;
w = clamp(w, 0.0f, 1.0f);
const auto coeff1 = panLookup(w);
const auto coeff2 = panLookup(1 - w);
const auto l = *leftBuffer;
const auto r = *rightBuffer;
*leftBuffer = l * coeff2 + r * coeff1;
*rightBuffer = l * coeff1 + r * coeff2;
}

void width(const float* widthEnvelope, float* leftBuffer, float* rightBuffer, unsigned size) noexcept
{
const auto sentinel = widthEnvelope + size;

#if SFIZZ_HAVE_NEON
const auto firstAligned = prevAligned<ByteAlignment>(widthEnvelope + TypeAlignment - 1);

if (willAlign<ByteAlignment>(widthEnvelope, leftBuffer, rightBuffer) && firstAligned < sentinel) {
while (widthEnvelope < firstAligned) {
tickWidth(widthEnvelope, leftBuffer, rightBuffer);
incrementAll(widthEnvelope, leftBuffer, rightBuffer);
}

uint32_t indices[TypeAlignment];
float coeff1[TypeAlignment];
float coeff2[TypeAlignment];
const auto lastAligned = prevAligned<ByteAlignment>(sentinel);
while (widthEnvelope < lastAligned) {
float32x4_t mmWidth = vld1q_f32(widthEnvelope);
mmWidth = vaddq_f32(mmWidth, vdupq_n_f32(1.0f));
mmWidth = vmulq_n_f32(mmWidth, 0.5f * panSize);
mmWidth = vaddq_f32(mmWidth, vdupq_n_f32(0.5f));
uint32x4_t mmIdx = vcvtq_u32_f32(mmWidth);
mmIdx = vminq_u32(mmIdx, vdupq_n_u32(panSize - 1));
mmIdx = vmaxq_u32(mmIdx, vdupq_n_u32(0));
vst1q_u32(indices, mmIdx);

coeff1[0] = panData[indices[0]];
coeff2[0] = panData[panSize - indices[0] - 1];
coeff1[1] = panData[indices[1]];
coeff2[1] = panData[panSize - indices[1] - 1];
coeff1[2] = panData[indices[2]];
coeff2[2] = panData[panSize - indices[2] - 1];
coeff1[3] = panData[indices[3]];
coeff2[3] = panData[panSize - indices[3] - 1];

float32x4_t mmCoeff1 = vld1q_f32(coeff1);
float32x4_t mmCoeff2 = vld1q_f32(coeff2);
float32x4_t mmLeft = vld1q_f32(leftBuffer);
float32x4_t mmRight = vld1q_f32(rightBuffer);

vst1q_f32(leftBuffer, vaddq_f32(vmulq_f32(mmCoeff2, mmLeft), vmulq_f32(mmCoeff1, mmRight)));
vst1q_f32(rightBuffer, vaddq_f32(vmulq_f32(mmCoeff1, mmLeft), vmulq_f32(mmCoeff2, mmRight)));

incrementAll<TypeAlignment>(widthEnvelope, leftBuffer, rightBuffer);
}
}
#endif // SFIZZ_HAVE_NEON

while (widthEnvelope < sentinel) {
float w = (*widthEnvelope + 1.0f) * 0.5f;
w = clamp(w, 0.0f, 1.0f);
const auto coeff1 = panLookup(w);
const auto coeff2 = panLookup(1 - w);
const auto l = *leftBuffer;
const auto r = *rightBuffer;
*leftBuffer = l * coeff2 + r * coeff1;
*rightBuffer = l * coeff1 + r * coeff2;
tickWidth(widthEnvelope, leftBuffer, rightBuffer);
incrementAll(widthEnvelope, leftBuffer, rightBuffer);
}
}

}
13 changes: 8 additions & 5 deletions src/sfizz/Panning.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@ namespace sfz
{

/**
* @brief Lookup a value from the pan table
*
* @param pan
* @return float
*/
* @brief Lookup a value from the pan table
* No check is done on the range, needs to be capped
* between 0 and panSize.
*
* @param pan
* @return float
*/
float panLookup(float pan);


/**
* @brief Pans a mono signal left or right
*
Expand Down
2 changes: 1 addition & 1 deletion src/sfizz/effects/Width.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/

#include "Width.h"
#include "Opcode.h"
#include "Panning.h"
#include "Opcode.h"
#include "absl/memory/memory.h"

namespace sfz {
Expand Down
Loading