Skip to content

Commit

Permalink
Add further NEON steps to the codepath
Browse files Browse the repository at this point in the history
  • Loading branch information
paulfd committed Sep 18, 2020
1 parent 4c639fe commit 3f5d2e2
Showing 1 changed file with 30 additions and 23 deletions.
53 changes: 30 additions & 23 deletions src/sfizz/Panning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ void pan(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsign
{
#if SFIZZ_HAVE_NEON
uint32_t indices[4];
float leftPan[4];
float rightPan[4];
const auto sentinel = panEnvelope + size;
while (panEnvelope < sentinel) {
float32x4_t mmPan = vld1q_f32(panEnvelope);
Expand All @@ -50,16 +52,18 @@ void pan(const float* panEnvelope, float* leftBuffer, float* rightBuffer, unsign
uint32x4_t mmIdx = vcvtq_u32_f32(mmPan);
vst1q_u32(indices, mmIdx);

#define UNROLLED_BLOCK(i) \
leftBuffer[i] *= panData[indices[i]]; \
rightBuffer[i] *= panData[panSize - indices[i] - 1];

UNROLLED_BLOCK(0)
UNROLLED_BLOCK(1)
UNROLLED_BLOCK(2)
UNROLLED_BLOCK(3)
leftPan[0] = panData[indices[0]];
rightPan[0] = panData[panSize - indices[0] - 1];
leftPan[1] = panData[indices[1]];
rightPan[1] = panData[panSize - indices[1] - 1];
leftPan[2] = panData[indices[2]];
rightPan[2] = panData[panSize - indices[2] - 1];
leftPan[3] = panData[indices[3]];
rightPan[3] = panData[panSize - indices[3] - 1];

#undef UNROLLED_BLOCK
vst1q_f32(leftBuffer, vmulq_f32(vld1q_f32(leftBuffer), vld1q_f32(leftPan)));
vst1q_f32(rightBuffer, vmulq_f32(vld1q_f32(rightBuffer), vld1q_f32(rightPan)));

incrementAll<4>(panEnvelope, leftBuffer, rightBuffer);
}
Expand All @@ -80,7 +84,8 @@ void width(const float* widthEnvelope, float* leftBuffer, float* rightBuffer, un
{
#if SFIZZ_HAVE_NEON
uint32_t indices[4];
float coeff1, coeff2, l, r;
float coeff1[4];
float coeff2[4];
const auto sentinel = widthEnvelope + size;
while (widthEnvelope < sentinel) {
float32x4_t mmWidth = vld1q_f32(widthEnvelope);
Expand All @@ -92,20 +97,22 @@ void width(const float* widthEnvelope, float* leftBuffer, float* rightBuffer, un
uint32x4_t mmIdx = vcvtq_u32_f32(mmWidth);
vst1q_u32(indices, mmIdx);

#define UNROLLED_BLOCK(i) \
coeff1 = panData[indices[i]]; \
coeff2 = panData[panSize - indices[i] - 1]; \
l = leftBuffer[i]; \
r = rightBuffer[i]; \
rightBuffer[i] = l * coeff2 + r * coeff1; \
leftBuffer[i] = l * coeff1 + r * coeff2;

UNROLLED_BLOCK(0)
UNROLLED_BLOCK(1)
UNROLLED_BLOCK(2)
UNROLLED_BLOCK(3)

#undef UNROLLED_BLOCK
coeff1[0] = panData[indices[0]];
coeff2[0] = panData[panSize - indices[0] - 1];
coeff1[1] = panData[indices[1]];
coeff2[1] = panData[panSize - indices[1] - 1];
coeff1[2] = panData[indices[2]];
coeff2[2] = panData[panSize - indices[2] - 1];
coeff1[3] = panData[indices[3]];
coeff2[3] = panData[panSize - indices[3] - 1];

float32x4_t mmCoeff1 = vld1q_f32(coeff1);
float32x4_t mmCoeff2 = vld1q_f32(coeff2);
float32x4_t mmLeft = vld1q_f32(leftBuffer);
float32x4_t mmRight = vld1q_f32(rightBuffer);

vst1q_f32(leftBuffer, vaddq_f32(vmulq_f32(mmCoeff2, mmLeft), vmulq_f32(mmCoeff1, mmRight));
vst1q_f32(rightBuffer, vaddq_f32(vmulq_f32(mmCoeff1, mmLeft), vmulq_f32(mmCoeff2, mmRight));

incrementAll<4>(widthEnvelope, leftBuffer, rightBuffer);
}
Expand Down

0 comments on commit 3f5d2e2

Please sign in to comment.