From 6e0b494191107f1e5b3afa6981825d314f800a50 Mon Sep 17 00:00:00 2001 From: Manuel Virgilio Date: Tue, 14 Nov 2023 22:42:28 +0100 Subject: [PATCH 1/2] added cpu optimization detection and avx optimization --- Makefile | 1 + common/optimization/avx.cpp | 72 ++++++++++++++++++++ common/optimization/avx.hpp | 13 ++++ common/optimization/default.cpp | 26 +++++++ common/optimization/default.hpp | 14 ++++ common/optimization/optimization.cpp | 65 ++++++++++++++++++ common/optimization/optimization.hpp | 13 ++++ plugins/dragonfly-early-reflections/DSP.cpp | 16 ++--- plugins/dragonfly-early-reflections/DSP.hpp | 3 + plugins/dragonfly-early-reflections/Makefile | 7 +- plugins/dragonfly-hall-reverb/DSP.cpp | 34 ++++----- plugins/dragonfly-hall-reverb/DSP.hpp | 3 + plugins/dragonfly-hall-reverb/Makefile | 7 +- plugins/dragonfly-plate-reverb/DSP.cpp | 16 ++--- plugins/dragonfly-plate-reverb/DSP.hpp | 3 + plugins/dragonfly-plate-reverb/Makefile | 7 +- plugins/dragonfly-room-reverb/DSP.cpp | 34 ++++----- plugins/dragonfly-room-reverb/DSP.hpp | 3 + plugins/dragonfly-room-reverb/Makefile | 7 +- 19 files changed, 286 insertions(+), 58 deletions(-) create mode 100644 common/optimization/avx.cpp create mode 100644 common/optimization/avx.hpp create mode 100644 common/optimization/default.cpp create mode 100644 common/optimization/default.hpp create mode 100644 common/optimization/optimization.cpp create mode 100644 common/optimization/optimization.hpp diff --git a/Makefile b/Makefile index 8694093..272c253 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ clean: rm -f common/*.d common/*.o rm -f common/freeverb/*.d common/freeverb/*.o rm -f common/kiss_fft/*.d common/kiss_fft/*.o + rm -f common/optimization/*.d common/optimization/*.o rm -f dpf/utils/lv2_ttl_generator.d # -------------------------------------------------------------- diff --git a/common/optimization/avx.cpp b/common/optimization/avx.cpp new file mode 100644 index 0000000..539097d --- /dev/null +++ b/common/optimization/avx.cpp @@ -0,0 +1,72 @@ +#include "avx.hpp" + +#include + +void VSUM32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length) +{ + float extracted[8]; + int32_t processed = 0; + + while ( processed < length ) + { + int processing = ((length - processed) % 8) + 1; + + const __m256 mm_op1 = _mm256_loadu_ps(op1 + processed); + const __m256 mm_op2 = _mm256_loadu_ps(op2 + processed ); + __m256 mm_res = _mm256_add_ps(mm_op1, mm_op2); + + _mm256_storeu_ps(extracted, mm_res); + for ( int i = 0 ; i < processing ; i++ ) + { + result[processed + i] = extracted[i]; + } + + processed += processing; + } +} + +void VMUL32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length) +{ + float extracted[8]; + int32_t processed = 0; + + while ( processed < length ) + { + int processing = ((length - processed) % 8) + 1; + const __m256 mm_op1 = _mm256_loadu_ps(op1 + processed); + const __m256 mm_op2 = _mm256_loadu_ps(op2 + processed); + __m256 mm_res = _mm256_mul_ps(mm_op1, mm_op2); + + _mm256_storeu_ps(extracted, mm_res); + for ( int i = 0 ; i < processing ; i++ ) + { + result[processed + i] = extracted[i]; + } + + processed += processing; + } +} + +void VMUL32FLOAT_V_avx(const float value, const float* vec, float* result, int32_t length) +{ + float extracted[8]; + const __m256 mm_op1 = _mm256_set1_ps(value); + + int processed = 0; + while ( processed < length ) + { + int processing = ((length - processed) % 8) + 1; + + const __m256 mm_op2 = _mm256_loadu_ps(vec + processed); + __m256 mm_res = _mm256_mul_ps(mm_op1, mm_op2); + + _mm256_storeu_ps(extracted, mm_res); + for ( int i = 0 ; i < processing ; i++ ) + { + result[processed + i] = extracted[i]; + } + + processed += processing; + } +} + diff --git a/common/optimization/avx.hpp b/common/optimization/avx.hpp new file mode 100644 index 0000000..6b0d1e3 --- /dev/null +++ b/common/optimization/avx.hpp @@ -0,0 +1,13 @@ + +#ifndef OPTIMIZATION_AVX_H_INCLUDED +#define OPTIMIZATION_AVX_H_INCLUDED + +#include + +void VSUM32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length); + +void VMUL32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length); + +void VMUL32FLOAT_V_avx(const float value, const float* vec, float* result, int32_t length); + +#endif \ No newline at end of file diff --git a/common/optimization/default.cpp b/common/optimization/default.cpp new file mode 100644 index 0000000..b12033f --- /dev/null +++ b/common/optimization/default.cpp @@ -0,0 +1,26 @@ +#include "default.hpp" + +void VSUM32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length) +{ + for ( int32_t i = 0 ; i < length ; i++ ) + { + result[i] = op1[i] + op2[i]; + } +} + +void VMUL32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length) +{ + for ( int32_t i = 0 ; i < length ; i++ ) + { + result[i] = op1[i] * op2[i]; + } +} + +void VMUL32FLOAT_V_default(const float value, const float* vec, float* result, int32_t length) +{ + for ( int32_t i = 0 ; i < length ; i++ ) + { + result[i] = value * vec[i]; + } +} + diff --git a/common/optimization/default.hpp b/common/optimization/default.hpp new file mode 100644 index 0000000..ebdd7a8 --- /dev/null +++ b/common/optimization/default.hpp @@ -0,0 +1,14 @@ + +#ifndef OPTIMIZATION_DEFAULT_H_INCLUDED +#define OPTIMIZATION_DEFAULT_H_INCLUDED + +#include + +void VSUM32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length); + +void VMUL32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length); + +void VMUL32FLOAT_V_default(const float value, const float* vec, float* result, int32_t length); + + +#endif \ No newline at end of file diff --git a/common/optimization/optimization.cpp b/common/optimization/optimization.cpp new file mode 100644 index 0000000..b6066f9 --- /dev/null +++ b/common/optimization/optimization.cpp @@ -0,0 +1,65 @@ + +#include "optimization.hpp" +#include + +#include "default.hpp" +#include "avx.hpp" + +typedef struct +{ + void (*VSUM32FLOAT)(const float* op1, const float* op2, float* result, int32_t length); + void (*VMUL32FLOAT)(const float* op1, const float* op2, float* result, int32_t length); + void (*VMUL32FLOAT_V)(const float value, const float* vec, float* result, int32_t length); +} _CpuOptimization; + +int8_t OptimizationInitialized = 0; +_CpuOptimization CpuOptimization; + +void SetupOptimization() +{ + __builtin_cpu_init(); + if ( __builtin_cpu_supports("avx") ) + { + CpuOptimization.VSUM32FLOAT = VSUM32FLOAT_avx; + CpuOptimization.VMUL32FLOAT = VMUL32FLOAT_avx; + CpuOptimization.VMUL32FLOAT_V = VMUL32FLOAT_V_avx; + } + else + { + CpuOptimization.VSUM32FLOAT = VSUM32FLOAT_default; + CpuOptimization.VMUL32FLOAT = VMUL32FLOAT_default; + CpuOptimization.VMUL32FLOAT_V = VMUL32FLOAT_V_default; + } + OptimizationInitialized = 1; +} + + +void VSUM32FLOAT(const float* op1, const float* op2, float* result, int32_t length) +{ + if (!OptimizationInitialized) + { + SetupOptimization(); + } + + CpuOptimization.VSUM32FLOAT(op1,op2,result,length); +} + +void VMUL32FLOAT(const float* op1, const float* op2, float* result, int32_t length) +{ + if (!OptimizationInitialized) + { + SetupOptimization(); + } + + CpuOptimization.VMUL32FLOAT(op1,op2,result,length); +} + +void VMUL32FLOAT_V(const float value, const float* vec, float* result, int32_t length) +{ + if (!OptimizationInitialized) + { + SetupOptimization(); + } + + CpuOptimization.VMUL32FLOAT_V(value,vec,result,length); +} \ No newline at end of file diff --git a/common/optimization/optimization.hpp b/common/optimization/optimization.hpp new file mode 100644 index 0000000..a3f1a89 --- /dev/null +++ b/common/optimization/optimization.hpp @@ -0,0 +1,13 @@ + +#ifndef OPTIMIZATION_H_INCLUDED +#define OPTIMIZATION_H_INCLUDED + +#include + +void VSUM32FLOAT(const float* op1, const float* op2, float* result, int32_t length); + +void VMUL32FLOAT(const float* op1, const float* op2, float* result, int32_t length); + +void VMUL32FLOAT_V(const float value, const float* vec, float* result, int32_t length); + +#endif \ No newline at end of file diff --git a/plugins/dragonfly-early-reflections/DSP.cpp b/plugins/dragonfly-early-reflections/DSP.cpp index d1f3439..858caa4 100644 --- a/plugins/dragonfly-early-reflections/DSP.cpp +++ b/plugins/dragonfly-early-reflections/DSP.cpp @@ -19,6 +19,7 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#include "optimization/optimization.hpp" #include "DSP.hpp" @@ -91,16 +92,13 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra buffer_frames ); - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] = - dryLevel * inputs[0][offset + i] + - wetLevel * output_buffer[0][i]; - - outputs[1][offset + i] = - dryLevel * inputs[1][offset + i] + - wetLevel * output_buffer[1][i]; - } + VMUL32FLOAT_V( dryLevel, &inputs[0][offset], dry_buffer, buffer_frames ); + VMUL32FLOAT_V( wetLevel, output_buffer[0], wet_buffer, buffer_frames ); + VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[0][offset], buffer_frames ); + VMUL32FLOAT_V( dryLevel, &inputs[1][offset], dry_buffer, buffer_frames ); + VMUL32FLOAT_V( wetLevel, output_buffer[1], wet_buffer, buffer_frames ); + VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[1][offset], buffer_frames ); } } diff --git a/plugins/dragonfly-early-reflections/DSP.hpp b/plugins/dragonfly-early-reflections/DSP.hpp index 6b43d74..447cf72 100644 --- a/plugins/dragonfly-early-reflections/DSP.hpp +++ b/plugins/dragonfly-early-reflections/DSP.hpp @@ -46,6 +46,9 @@ class DragonflyReverbDSP : public AbstractDSP { float input_buffer[2][BUFFER_SIZE]; float output_buffer[2][BUFFER_SIZE]; + float dry_buffer[BUFFER_SIZE]; + float wet_buffer[BUFFER_SIZE]; + void setInputLPF(float freq); void setInputHPF(float freq); }; diff --git a/plugins/dragonfly-early-reflections/Makefile b/plugins/dragonfly-early-reflections/Makefile index 5b28096..f0c1af3 100644 --- a/plugins/dragonfly-early-reflections/Makefile +++ b/plugins/dragonfly-early-reflections/Makefile @@ -14,7 +14,10 @@ NAME = DragonflyEarlyReflections FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c + ../../common/kiss_fft/kiss_fftr.c \ + ../../common/optimization/optimization.cpp \ + ../../common/optimization/default.cpp \ + ../../common/optimization/avx.cpp ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -58,7 +61,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) diff --git a/plugins/dragonfly-hall-reverb/DSP.cpp b/plugins/dragonfly-hall-reverb/DSP.cpp index 3878b12..dcad8c1 100644 --- a/plugins/dragonfly-hall-reverb/DSP.cpp +++ b/plugins/dragonfly-hall-reverb/DSP.cpp @@ -18,6 +18,7 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#include "optimization/optimization.hpp" #include "DSP.hpp" @@ -116,10 +117,11 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra early_out_buffer[1], buffer_frames); - for (uint32_t i = 0; i < buffer_frames; i++) { - late_in_buffer[0][i] = early_send * early_out_buffer[0][i] + inputs[0][offset + i]; - late_in_buffer[1][i] = early_send * early_out_buffer[1][i] + inputs[1][offset + i]; - } + VMUL32FLOAT_V(early_send, early_out_buffer[0], early_buffer, buffer_frames); + VSUM32FLOAT(early_buffer, &inputs[0][offset], late_in_buffer[0], buffer_frames ); + + VMUL32FLOAT_V(early_send, early_out_buffer[1], early_buffer, buffer_frames); + VSUM32FLOAT(early_buffer, &inputs[1][offset], late_in_buffer[1], buffer_frames ); late.processreplace( const_cast(late_in_buffer[0]), @@ -128,23 +130,23 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra late_out_buffer[1], buffer_frames); - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] = dryLevel * inputs[0][offset + i]; - outputs[1][offset + i] = dryLevel * inputs[1][offset + i]; - } + VMUL32FLOAT_V(dryLevel, &inputs[0][offset], &outputs[0][offset], buffer_frames); + VMUL32FLOAT_V(dryLevel, &inputs[1][offset], &outputs[1][offset], buffer_frames); if( earlyLevel > 0.0 ){ - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] += earlyLevel * early_out_buffer[0][i]; - outputs[1][offset + i] += earlyLevel * early_out_buffer[1][i]; - } + VMUL32FLOAT_V(earlyLevel, early_out_buffer[0], early_buffer, buffer_frames); + VSUM32FLOAT(&outputs[0][offset], early_buffer, &outputs[0][offset], buffer_frames); + + VMUL32FLOAT_V(earlyLevel, early_out_buffer[1], early_buffer, buffer_frames); + VSUM32FLOAT(&outputs[1][offset], early_buffer, &outputs[1][offset], buffer_frames); } if( lateLevel > 0.0 ){ - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] += lateLevel * late_out_buffer[0][i]; - outputs[1][offset + i] += lateLevel * late_out_buffer[1][i]; - } + VMUL32FLOAT_V(lateLevel, late_out_buffer[0], late_buffer, buffer_frames); + VSUM32FLOAT(&outputs[0][offset], late_buffer, &outputs[0][offset], buffer_frames); + + VMUL32FLOAT_V(lateLevel, late_out_buffer[1], late_buffer, buffer_frames); + VSUM32FLOAT(&outputs[1][offset], late_buffer, &outputs[1][offset], buffer_frames); } } } diff --git a/plugins/dragonfly-hall-reverb/DSP.hpp b/plugins/dragonfly-hall-reverb/DSP.hpp index 1271c6e..72102ab 100644 --- a/plugins/dragonfly-hall-reverb/DSP.hpp +++ b/plugins/dragonfly-hall-reverb/DSP.hpp @@ -47,6 +47,9 @@ class DragonflyReverbDSP : public AbstractDSP { float early_out_buffer[2][BUFFER_SIZE]; float late_in_buffer[2][BUFFER_SIZE]; float late_out_buffer[2][BUFFER_SIZE]; + + float early_buffer[BUFFER_SIZE]; + float late_buffer[BUFFER_SIZE]; }; #endif diff --git a/plugins/dragonfly-hall-reverb/Makefile b/plugins/dragonfly-hall-reverb/Makefile index 8c5c246..9170538 100644 --- a/plugins/dragonfly-hall-reverb/Makefile +++ b/plugins/dragonfly-hall-reverb/Makefile @@ -14,7 +14,10 @@ NAME = DragonflyHallReverb FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c + ../../common/kiss_fft/kiss_fftr.c \ + ../../common/optimization/optimization.cpp \ + ../../common/optimization/default.cpp \ + ../../common/optimization/avx.cpp ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -59,7 +62,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) diff --git a/plugins/dragonfly-plate-reverb/DSP.cpp b/plugins/dragonfly-plate-reverb/DSP.cpp index 375cd39..3a32418 100644 --- a/plugins/dragonfly-plate-reverb/DSP.cpp +++ b/plugins/dragonfly-plate-reverb/DSP.cpp @@ -19,6 +19,7 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#include "optimization/optimization.hpp" #include "DSP.hpp" @@ -245,16 +246,13 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra buffer_frames ); - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] = - dryLevel * inputs[0][offset + i] + - wetLevel * output_buffer[0][i]; - - outputs[1][offset + i] = - dryLevel * inputs[1][offset + i] + - wetLevel * output_buffer[1][i]; - } + VMUL32FLOAT_V( dryLevel, &inputs[0][offset], dry_buffer, buffer_frames); + VMUL32FLOAT_V( wetLevel, output_buffer[0], wet_buffer, buffer_frames); + VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[0][offset], buffer_frames); + VMUL32FLOAT_V( dryLevel, &inputs[1][offset], dry_buffer, buffer_frames); + VMUL32FLOAT_V( wetLevel, output_buffer[1], wet_buffer, buffer_frames); + VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[1][offset], buffer_frames); } } diff --git a/plugins/dragonfly-plate-reverb/DSP.hpp b/plugins/dragonfly-plate-reverb/DSP.hpp index 8907af0..da40d82 100644 --- a/plugins/dragonfly-plate-reverb/DSP.hpp +++ b/plugins/dragonfly-plate-reverb/DSP.hpp @@ -79,6 +79,9 @@ class DragonflyReverbDSP : public AbstractDSP { float filtered_input_buffer[2][BUFFER_SIZE]; float output_buffer[2][BUFFER_SIZE]; + float dry_buffer[BUFFER_SIZE]; + float wet_buffer[BUFFER_SIZE]; + void setInputLPF(float freq); void setInputHPF(float freq); }; diff --git a/plugins/dragonfly-plate-reverb/Makefile b/plugins/dragonfly-plate-reverb/Makefile index 9b12f9c..bddfe0e 100644 --- a/plugins/dragonfly-plate-reverb/Makefile +++ b/plugins/dragonfly-plate-reverb/Makefile @@ -14,7 +14,10 @@ NAME = DragonflyPlateReverb FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c + ../../common/kiss_fft/kiss_fftr.c \ + ../../common/optimization/optimization.cpp \ + ../../common/optimization/default.cpp \ + ../../common/optimization/avx.cpp ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -59,7 +62,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) diff --git a/plugins/dragonfly-room-reverb/DSP.cpp b/plugins/dragonfly-room-reverb/DSP.cpp index c99e0b6..8c39e5e 100644 --- a/plugins/dragonfly-room-reverb/DSP.cpp +++ b/plugins/dragonfly-room-reverb/DSP.cpp @@ -19,6 +19,7 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#include "optimization/optimization.hpp" #include "DSP.hpp" @@ -130,10 +131,11 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra early_out_buffer[1], buffer_frames); - for (uint32_t i = 0; i < buffer_frames; i++) { - late_in_buffer[0][i] = early_send * early_out_buffer[0][i] + filtered_input_buffer[0][i]; - late_in_buffer[1][i] = early_send * early_out_buffer[1][i] + filtered_input_buffer[1][i]; - } + VMUL32FLOAT_V(early_send, early_out_buffer[0], late_buffer, buffer_frames); + VSUM32FLOAT(late_buffer, filtered_input_buffer[0], late_in_buffer[0], buffer_frames); + + VMUL32FLOAT_V(early_send, early_out_buffer[1], late_buffer, buffer_frames); + VSUM32FLOAT(late_buffer, filtered_input_buffer[1], late_in_buffer[1], buffer_frames); late.processreplace( const_cast(late_in_buffer[0]), @@ -142,23 +144,23 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra late_out_buffer[1], buffer_frames); - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] = dryLevel * inputs[0][offset + i]; - outputs[1][offset + i] = dryLevel * inputs[1][offset + i]; - } + VMUL32FLOAT_V(dryLevel, &inputs[0][offset], &outputs[0][offset], buffer_frames); + VMUL32FLOAT_V(dryLevel, &inputs[1][offset], &outputs[1][offset], buffer_frames); if( earlyLevel > 0.0 ){ - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] += earlyLevel * early_out_buffer[0][i]; - outputs[1][offset + i] += earlyLevel * early_out_buffer[1][i]; - } + VMUL32FLOAT_V(earlyLevel, early_out_buffer[0], early_buffer, buffer_frames); + VSUM32FLOAT(early_buffer, &outputs[0][offset], &outputs[0][offset], buffer_frames); + + VMUL32FLOAT_V(earlyLevel, early_out_buffer[1], early_buffer, buffer_frames); + VSUM32FLOAT(early_buffer, &outputs[1][offset], &outputs[1][offset], buffer_frames); } if( lateLevel > 0.0 ){ - for (uint32_t i = 0; i < buffer_frames; i++) { - outputs[0][offset + i] += lateLevel * late_out_buffer[0][i]; - outputs[1][offset + i] += lateLevel * late_out_buffer[1][i]; - } + VMUL32FLOAT_V(lateLevel, late_out_buffer[0], late_buffer, buffer_frames); + VSUM32FLOAT(late_buffer, &outputs[0][offset], &outputs[0][offset], buffer_frames); + + VMUL32FLOAT_V(lateLevel, late_out_buffer[1], late_buffer, buffer_frames); + VSUM32FLOAT(late_buffer, &outputs[1][offset], &outputs[1][offset], buffer_frames); } } } diff --git a/plugins/dragonfly-room-reverb/DSP.hpp b/plugins/dragonfly-room-reverb/DSP.hpp index b1a7aff..92fe2e2 100644 --- a/plugins/dragonfly-room-reverb/DSP.hpp +++ b/plugins/dragonfly-room-reverb/DSP.hpp @@ -52,6 +52,9 @@ class DragonflyReverbDSP : public AbstractDSP { float late_in_buffer[2][BUFFER_SIZE]; float late_out_buffer[2][BUFFER_SIZE]; + float early_buffer[BUFFER_SIZE]; + float late_buffer[BUFFER_SIZE]; + void setInputLPF(float freq); void setInputHPF(float freq); }; diff --git a/plugins/dragonfly-room-reverb/Makefile b/plugins/dragonfly-room-reverb/Makefile index 0fd6537..24f0631 100644 --- a/plugins/dragonfly-room-reverb/Makefile +++ b/plugins/dragonfly-room-reverb/Makefile @@ -14,7 +14,10 @@ NAME = DragonflyRoomReverb FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c + ../../common/kiss_fft/kiss_fftr.c \ + ../../common/optimization/optimization.cpp \ + ../../common/optimization/default.cpp \ + ../../common/optimization/avx.cpp ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -59,7 +62,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) From edea88ad1f659ebc74b11ac632b67f9a5591a1a7 Mon Sep 17 00:00:00 2001 From: Manuel Virgilio Date: Wed, 15 Nov 2023 19:43:53 +0100 Subject: [PATCH 2/2] added macro to enable SIMD code, insertion of makefile to handle the optimization files inclusion --- common/optimization/Makefile.optimization.mk | 41 ++++++++++++ common/optimization/avx.cpp | 68 +++++++++++++++++--- common/optimization/avx.hpp | 4 +- common/optimization/default.cpp | 10 ++- common/optimization/default.hpp | 3 +- common/optimization/optimization.cpp | 25 +++++-- common/optimization/optimization.hpp | 4 +- plugins/dragonfly-early-reflections/DSP.cpp | 19 +++++- plugins/dragonfly-early-reflections/DSP.hpp | 2 + plugins/dragonfly-early-reflections/Makefile | 12 ++-- plugins/dragonfly-hall-reverb/DSP.cpp | 55 +++++++++++----- plugins/dragonfly-hall-reverb/DSP.hpp | 3 - plugins/dragonfly-hall-reverb/Makefile | 12 ++-- plugins/dragonfly-plate-reverb/DSP.cpp | 17 ++++- plugins/dragonfly-plate-reverb/DSP.hpp | 2 + plugins/dragonfly-plate-reverb/Makefile | 12 ++-- plugins/dragonfly-room-reverb/DSP.cpp | 51 ++++++++++----- plugins/dragonfly-room-reverb/DSP.hpp | 3 - plugins/dragonfly-room-reverb/Makefile | 12 ++-- 19 files changed, 276 insertions(+), 79 deletions(-) create mode 100644 common/optimization/Makefile.optimization.mk diff --git a/common/optimization/Makefile.optimization.mk b/common/optimization/Makefile.optimization.mk new file mode 100644 index 0000000..99369f6 --- /dev/null +++ b/common/optimization/Makefile.optimization.mk @@ -0,0 +1,41 @@ + +ifeq ($(OS),Windows_NT) + MACHINE = WIN32 + ifeq ($(PROCESSOR_ARCHITECTURE),AMD64) + ARCH = AMD64 + endif + ifeq ($(PROCESSOR_ARCHITECTURE),x86) + ARCH = IA32 + endif +else + UNAME_M := $(shell uname -m) + ifeq ($(UNAME_M),x86_64) + ARCH = AMD64 + endif + ifneq ($(filter %86,$(UNAME_M)),) + ARCH += IA32 + endif + ifneq ($(filter arm%,$(UNAME_M)),) + ARCH += ARM + endif +endif + +ifeq ($(ARCH), AMD64) +USE_PLUGIN_SIMD = true +CXXFLAGS += -mavx -mfma -DUSE_PLUGIN_SIMD +endif + +# -------------------------- +# FIle Inclusion +ifeq ($(USE_PLUGIN_SIMD), true) +FILES_COMMON += \ + ../../common/optimization/optimization.cpp \ + ../../common/optimization/default.cpp + +ifeq ($(ARCH), AMD64) +FILES_COMMON += \ + ../../common/optimization/avx.cpp +endif + +endif + diff --git a/common/optimization/avx.cpp b/common/optimization/avx.cpp index 539097d..7d7480c 100644 --- a/common/optimization/avx.cpp +++ b/common/optimization/avx.cpp @@ -2,21 +2,38 @@ #include -void VSUM32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length) +int8_t AVXInitialized = 0; +int8_t FMASupported = 0; + +void CheckFMASupport() +{ + if ( !AVXInitialized ) + { + __builtin_cpu_init(); + if ( __builtin_cpu_supports("fma") ) + { + FMASupported = 1; + } + + AVXInitialized = 1; + } +} + +void VADD32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length) { float extracted[8]; int32_t processed = 0; while ( processed < length ) { - int processing = ((length - processed) % 8) + 1; + int32_t processing = ((length - processed) % 8) + 1; const __m256 mm_op1 = _mm256_loadu_ps(op1 + processed); const __m256 mm_op2 = _mm256_loadu_ps(op2 + processed ); __m256 mm_res = _mm256_add_ps(mm_op1, mm_op2); _mm256_storeu_ps(extracted, mm_res); - for ( int i = 0 ; i < processing ; i++ ) + for ( int32_t i = 0 ; i < processing ; i++ ) { result[processed + i] = extracted[i]; } @@ -32,13 +49,13 @@ void VMUL32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t while ( processed < length ) { - int processing = ((length - processed) % 8) + 1; + int32_t processing = ((length - processed) % 8) + 1; const __m256 mm_op1 = _mm256_loadu_ps(op1 + processed); const __m256 mm_op2 = _mm256_loadu_ps(op2 + processed); __m256 mm_res = _mm256_mul_ps(mm_op1, mm_op2); _mm256_storeu_ps(extracted, mm_res); - for ( int i = 0 ; i < processing ; i++ ) + for ( int32_t i = 0 ; i < processing ; i++ ) { result[processed + i] = extracted[i]; } @@ -52,16 +69,16 @@ void VMUL32FLOAT_V_avx(const float value, const float* vec, float* result, int32 float extracted[8]; const __m256 mm_op1 = _mm256_set1_ps(value); - int processed = 0; + int32_t processed = 0; while ( processed < length ) { - int processing = ((length - processed) % 8) + 1; + int32_t processing = ((length - processed) % 8) + 1; const __m256 mm_op2 = _mm256_loadu_ps(vec + processed); __m256 mm_res = _mm256_mul_ps(mm_op1, mm_op2); _mm256_storeu_ps(extracted, mm_res); - for ( int i = 0 ; i < processing ; i++ ) + for ( int32_t i = 0 ; i < processing ; i++ ) { result[processed + i] = extracted[i]; } @@ -70,3 +87,38 @@ void VMUL32FLOAT_V_avx(const float value, const float* vec, float* result, int32 } } +void VMADD32FLOAT_V_avx(const float value, const float* vmul, const float* vadd, float* result, int32_t length) +{ + float extracted[8]; + const __m256 mm_val = _mm256_set1_ps(value); + + CheckFMASupport(); + + int32_t processed = 0; + while ( processed < length ) + { + int32_t processing = ((length - processed) % 8) + 1; + + const __m256 mm_mul = _mm256_loadu_ps(vmul + processed); + const __m256 mm_add = _mm256_loadu_ps(vadd + processed); + + __m256 mm_res; + if ( FMASupported ) + { + mm_res = _mm256_fmadd_ps(mm_val, mm_mul, mm_add); + } + else + { + mm_res = _mm256_mul_ps(mm_val, mm_mul); + mm_res = _mm256_add_ps(mm_res, mm_add); + } + + _mm256_storeu_ps(extracted, mm_res); + for ( int32_t i = 0 ; i < processing ; i++ ) + { + result[processed + i] = extracted[i]; + } + + processed += processing; + } +} diff --git a/common/optimization/avx.hpp b/common/optimization/avx.hpp index 6b0d1e3..6716f4b 100644 --- a/common/optimization/avx.hpp +++ b/common/optimization/avx.hpp @@ -4,10 +4,12 @@ #include -void VSUM32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length); +void VADD32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length); void VMUL32FLOAT_avx(const float* op1, const float* op2, float* result, int32_t length); void VMUL32FLOAT_V_avx(const float value, const float* vec, float* result, int32_t length); +void VMADD32FLOAT_V_avx(const float value, const float* vmul, const float* vadd, float* result, int32_t length); + #endif \ No newline at end of file diff --git a/common/optimization/default.cpp b/common/optimization/default.cpp index b12033f..dc946a2 100644 --- a/common/optimization/default.cpp +++ b/common/optimization/default.cpp @@ -1,6 +1,6 @@ #include "default.hpp" -void VSUM32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length) +void VADD32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length) { for ( int32_t i = 0 ; i < length ; i++ ) { @@ -24,3 +24,11 @@ void VMUL32FLOAT_V_default(const float value, const float* vec, float* result, i } } +void VMADD32FLOAT_V_default(const float value, const float* vmul, const float* vadd, float* result, int32_t length) +{ + for ( int32_t i = 0 ; i < length ; i++ ) + { + result[i] = (value * vmul[i]) + vadd[i]; + } +} + diff --git a/common/optimization/default.hpp b/common/optimization/default.hpp index ebdd7a8..a351371 100644 --- a/common/optimization/default.hpp +++ b/common/optimization/default.hpp @@ -4,11 +4,12 @@ #include -void VSUM32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length); +void VADD32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length); void VMUL32FLOAT_default(const float* op1, const float* op2, float* result, int32_t length); void VMUL32FLOAT_V_default(const float value, const float* vec, float* result, int32_t length); +void VMADD32FLOAT_V_default(const float value, const float* vmul, const float* vadd, float* result, int32_t length); #endif \ No newline at end of file diff --git a/common/optimization/optimization.cpp b/common/optimization/optimization.cpp index b6066f9..a6f8bd4 100644 --- a/common/optimization/optimization.cpp +++ b/common/optimization/optimization.cpp @@ -7,9 +7,10 @@ typedef struct { - void (*VSUM32FLOAT)(const float* op1, const float* op2, float* result, int32_t length); + void (*VADD32FLOAT)(const float* op1, const float* op2, float* result, int32_t length); void (*VMUL32FLOAT)(const float* op1, const float* op2, float* result, int32_t length); void (*VMUL32FLOAT_V)(const float value, const float* vec, float* result, int32_t length); + void (*VMADD32FLOAT_V)(const float value, const float* vmul, const float* vadd, float* result, int32_t length); } _CpuOptimization; int8_t OptimizationInitialized = 0; @@ -18,30 +19,34 @@ _CpuOptimization CpuOptimization; void SetupOptimization() { __builtin_cpu_init(); +#if defined(__x86_64__) if ( __builtin_cpu_supports("avx") ) { - CpuOptimization.VSUM32FLOAT = VSUM32FLOAT_avx; + CpuOptimization.VADD32FLOAT = VADD32FLOAT_avx; CpuOptimization.VMUL32FLOAT = VMUL32FLOAT_avx; CpuOptimization.VMUL32FLOAT_V = VMUL32FLOAT_V_avx; + CpuOptimization.VMADD32FLOAT_V = VMADD32FLOAT_V_avx; } else +#endif { - CpuOptimization.VSUM32FLOAT = VSUM32FLOAT_default; + CpuOptimization.VADD32FLOAT = VADD32FLOAT_default; CpuOptimization.VMUL32FLOAT = VMUL32FLOAT_default; CpuOptimization.VMUL32FLOAT_V = VMUL32FLOAT_V_default; + CpuOptimization.VMADD32FLOAT_V = VMADD32FLOAT_V_default; } OptimizationInitialized = 1; } -void VSUM32FLOAT(const float* op1, const float* op2, float* result, int32_t length) +void VADD32FLOAT(const float* op1, const float* op2, float* result, int32_t length) { if (!OptimizationInitialized) { SetupOptimization(); } - CpuOptimization.VSUM32FLOAT(op1,op2,result,length); + CpuOptimization.VADD32FLOAT(op1,op2,result,length); } void VMUL32FLOAT(const float* op1, const float* op2, float* result, int32_t length) @@ -62,4 +67,14 @@ void VMUL32FLOAT_V(const float value, const float* vec, float* result, int32_t l } CpuOptimization.VMUL32FLOAT_V(value,vec,result,length); +} + +void VMADD32FLOAT_V(const float value, const float* vmul, const float* vadd, float* result, int32_t length) +{ + if (!OptimizationInitialized) + { + SetupOptimization(); + } + + CpuOptimization.VMADD32FLOAT_V(value,vmul,vadd,result,length); } \ No newline at end of file diff --git a/common/optimization/optimization.hpp b/common/optimization/optimization.hpp index a3f1a89..47e4d99 100644 --- a/common/optimization/optimization.hpp +++ b/common/optimization/optimization.hpp @@ -4,10 +4,12 @@ #include -void VSUM32FLOAT(const float* op1, const float* op2, float* result, int32_t length); +void VADD32FLOAT(const float* op1, const float* op2, float* result, int32_t length); void VMUL32FLOAT(const float* op1, const float* op2, float* result, int32_t length); void VMUL32FLOAT_V(const float value, const float* vec, float* result, int32_t length); +void VMADD32FLOAT_V(const float value, const float* vmul, const float* vadd, float* result, int32_t length); + #endif \ No newline at end of file diff --git a/plugins/dragonfly-early-reflections/DSP.cpp b/plugins/dragonfly-early-reflections/DSP.cpp index 858caa4..3288bb6 100644 --- a/plugins/dragonfly-early-reflections/DSP.cpp +++ b/plugins/dragonfly-early-reflections/DSP.cpp @@ -19,7 +19,10 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" + +#ifdef USE_PLUGIN_SIMD #include "optimization/optimization.hpp" +#endif #include "DSP.hpp" @@ -92,13 +95,25 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra buffer_frames ); +#ifdef USE_PLUGIN_SIMD VMUL32FLOAT_V( dryLevel, &inputs[0][offset], dry_buffer, buffer_frames ); VMUL32FLOAT_V( wetLevel, output_buffer[0], wet_buffer, buffer_frames ); - VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[0][offset], buffer_frames ); + VADD32FLOAT(dry_buffer, wet_buffer, &outputs[0][offset], buffer_frames ); VMUL32FLOAT_V( dryLevel, &inputs[1][offset], dry_buffer, buffer_frames ); VMUL32FLOAT_V( wetLevel, output_buffer[1], wet_buffer, buffer_frames ); - VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[1][offset], buffer_frames ); + VADD32FLOAT(dry_buffer, wet_buffer, &outputs[1][offset], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] = + dryLevel * inputs[0][offset + i] + + wetLevel * output_buffer[0][i]; + + outputs[1][offset + i] = + dryLevel * inputs[1][offset + i] + + wetLevel * output_buffer[1][i]; + } +#endif } } diff --git a/plugins/dragonfly-early-reflections/DSP.hpp b/plugins/dragonfly-early-reflections/DSP.hpp index 447cf72..99a25c3 100644 --- a/plugins/dragonfly-early-reflections/DSP.hpp +++ b/plugins/dragonfly-early-reflections/DSP.hpp @@ -46,8 +46,10 @@ class DragonflyReverbDSP : public AbstractDSP { float input_buffer[2][BUFFER_SIZE]; float output_buffer[2][BUFFER_SIZE]; +#ifdef USE_PLUGIN_SIMD float dry_buffer[BUFFER_SIZE]; float wet_buffer[BUFFER_SIZE]; +#endif void setInputLPF(float freq); void setInputHPF(float freq); diff --git a/plugins/dragonfly-early-reflections/Makefile b/plugins/dragonfly-early-reflections/Makefile index f0c1af3..df78bc6 100644 --- a/plugins/dragonfly-early-reflections/Makefile +++ b/plugins/dragonfly-early-reflections/Makefile @@ -14,10 +14,7 @@ NAME = DragonflyEarlyReflections FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c \ - ../../common/optimization/optimization.cpp \ - ../../common/optimization/default.cpp \ - ../../common/optimization/avx.cpp + ../../common/kiss_fft/kiss_fftr.c ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -51,6 +48,11 @@ FILES_UI = $(FILES_COMMON) \ ../../common/Selection.cpp \ ../../common/Bitstream_Vera_Sans_Regular.cpp +# -------------------------------------------------------------- +# Check for optimization support + +include ../../common/optimization/Makefile.optimization.mk + # -------------------------------------------------------------- # Do some magic @@ -61,7 +63,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) diff --git a/plugins/dragonfly-hall-reverb/DSP.cpp b/plugins/dragonfly-hall-reverb/DSP.cpp index dcad8c1..1f12522 100644 --- a/plugins/dragonfly-hall-reverb/DSP.cpp +++ b/plugins/dragonfly-hall-reverb/DSP.cpp @@ -18,7 +18,9 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#ifdef USE_PLUGIN_SIMD #include "optimization/optimization.hpp" +#endif #include "DSP.hpp" @@ -116,12 +118,16 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra early_out_buffer[0], early_out_buffer[1], buffer_frames); - - VMUL32FLOAT_V(early_send, early_out_buffer[0], early_buffer, buffer_frames); - VSUM32FLOAT(early_buffer, &inputs[0][offset], late_in_buffer[0], buffer_frames ); - - VMUL32FLOAT_V(early_send, early_out_buffer[1], early_buffer, buffer_frames); - VSUM32FLOAT(early_buffer, &inputs[1][offset], late_in_buffer[1], buffer_frames ); + +#ifdef USE_PLUGIN_SIMD + VMADD32FLOAT_V(early_send, early_out_buffer[0], &inputs[0][offset], late_in_buffer[0], buffer_frames ); + VMADD32FLOAT_V(early_send, early_out_buffer[1], &inputs[1][offset], late_in_buffer[1], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + late_in_buffer[0][i] = early_send * early_out_buffer[0][i] + inputs[0][offset + i]; + late_in_buffer[1][i] = early_send * early_out_buffer[1][i] + inputs[1][offset + i]; + } +#endif late.processreplace( const_cast(late_in_buffer[0]), @@ -129,24 +135,39 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra late_out_buffer[0], late_out_buffer[1], buffer_frames); - + +#ifdef USE_PLUGIN_SIMD VMUL32FLOAT_V(dryLevel, &inputs[0][offset], &outputs[0][offset], buffer_frames); VMUL32FLOAT_V(dryLevel, &inputs[1][offset], &outputs[1][offset], buffer_frames); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] = dryLevel * inputs[0][offset + i]; + outputs[1][offset + i] = dryLevel * inputs[1][offset + i]; + } +#endif if( earlyLevel > 0.0 ){ - VMUL32FLOAT_V(earlyLevel, early_out_buffer[0], early_buffer, buffer_frames); - VSUM32FLOAT(&outputs[0][offset], early_buffer, &outputs[0][offset], buffer_frames); - - VMUL32FLOAT_V(earlyLevel, early_out_buffer[1], early_buffer, buffer_frames); - VSUM32FLOAT(&outputs[1][offset], early_buffer, &outputs[1][offset], buffer_frames); +#ifdef USE_PLUGIN_SIMD + VMADD32FLOAT_V(earlyLevel, early_out_buffer[0], &outputs[0][offset], &outputs[0][offset], buffer_frames ); + VMADD32FLOAT_V(earlyLevel, early_out_buffer[1], &outputs[1][offset], &outputs[1][offset], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] += earlyLevel * early_out_buffer[0][i]; + outputs[1][offset + i] += earlyLevel * early_out_buffer[1][i]; + } +#endif } if( lateLevel > 0.0 ){ - VMUL32FLOAT_V(lateLevel, late_out_buffer[0], late_buffer, buffer_frames); - VSUM32FLOAT(&outputs[0][offset], late_buffer, &outputs[0][offset], buffer_frames); - - VMUL32FLOAT_V(lateLevel, late_out_buffer[1], late_buffer, buffer_frames); - VSUM32FLOAT(&outputs[1][offset], late_buffer, &outputs[1][offset], buffer_frames); +#ifdef USE_PLUGIN_SIMD + VMADD32FLOAT_V(lateLevel, late_out_buffer[0], &outputs[0][offset], &outputs[0][offset], buffer_frames ); + VMADD32FLOAT_V(lateLevel, late_out_buffer[1], &outputs[1][offset], &outputs[1][offset], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] += lateLevel * late_out_buffer[0][i]; + outputs[1][offset + i] += lateLevel * late_out_buffer[1][i]; + } +#endif } } } diff --git a/plugins/dragonfly-hall-reverb/DSP.hpp b/plugins/dragonfly-hall-reverb/DSP.hpp index 72102ab..1271c6e 100644 --- a/plugins/dragonfly-hall-reverb/DSP.hpp +++ b/plugins/dragonfly-hall-reverb/DSP.hpp @@ -47,9 +47,6 @@ class DragonflyReverbDSP : public AbstractDSP { float early_out_buffer[2][BUFFER_SIZE]; float late_in_buffer[2][BUFFER_SIZE]; float late_out_buffer[2][BUFFER_SIZE]; - - float early_buffer[BUFFER_SIZE]; - float late_buffer[BUFFER_SIZE]; }; #endif diff --git a/plugins/dragonfly-hall-reverb/Makefile b/plugins/dragonfly-hall-reverb/Makefile index 9170538..c08ebcf 100644 --- a/plugins/dragonfly-hall-reverb/Makefile +++ b/plugins/dragonfly-hall-reverb/Makefile @@ -14,10 +14,7 @@ NAME = DragonflyHallReverb FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c \ - ../../common/optimization/optimization.cpp \ - ../../common/optimization/default.cpp \ - ../../common/optimization/avx.cpp + ../../common/kiss_fft/kiss_fftr.c ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -52,6 +49,11 @@ FILES_UI = $(FILES_COMMON) \ ../../common/Spectrogram.cpp \ ../../common/Bitstream_Vera_Sans_Regular.cpp +# -------------------------------------------------------------- +# Check for optimization support + +include ../../common/optimization/Makefile.optimization.mk + # -------------------------------------------------------------- # Do some magic @@ -62,7 +64,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) diff --git a/plugins/dragonfly-plate-reverb/DSP.cpp b/plugins/dragonfly-plate-reverb/DSP.cpp index 3a32418..b44d0cc 100644 --- a/plugins/dragonfly-plate-reverb/DSP.cpp +++ b/plugins/dragonfly-plate-reverb/DSP.cpp @@ -19,7 +19,9 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#ifdef USE_PLUGIN_SIMD #include "optimization/optimization.hpp" +#endif #include "DSP.hpp" @@ -246,13 +248,24 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra buffer_frames ); +#ifdef USE_PLUGIN_SIMD VMUL32FLOAT_V( dryLevel, &inputs[0][offset], dry_buffer, buffer_frames); VMUL32FLOAT_V( wetLevel, output_buffer[0], wet_buffer, buffer_frames); - VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[0][offset], buffer_frames); + VADD32FLOAT(dry_buffer, wet_buffer, &outputs[0][offset], buffer_frames); VMUL32FLOAT_V( dryLevel, &inputs[1][offset], dry_buffer, buffer_frames); VMUL32FLOAT_V( wetLevel, output_buffer[1], wet_buffer, buffer_frames); - VSUM32FLOAT(dry_buffer, wet_buffer, &outputs[1][offset], buffer_frames); + VADD32FLOAT(dry_buffer, wet_buffer, &outputs[1][offset], buffer_frames); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] = + dryLevel * inputs[0][offset + i] + + wetLevel * output_buffer[0][i]; + outputs[1][offset + i] = + dryLevel * inputs[1][offset + i] + + wetLevel * output_buffer[1][i]; + } +#endif } } diff --git a/plugins/dragonfly-plate-reverb/DSP.hpp b/plugins/dragonfly-plate-reverb/DSP.hpp index da40d82..07206b2 100644 --- a/plugins/dragonfly-plate-reverb/DSP.hpp +++ b/plugins/dragonfly-plate-reverb/DSP.hpp @@ -79,8 +79,10 @@ class DragonflyReverbDSP : public AbstractDSP { float filtered_input_buffer[2][BUFFER_SIZE]; float output_buffer[2][BUFFER_SIZE]; +#ifdef USE_PLUGIN_SIMD float dry_buffer[BUFFER_SIZE]; float wet_buffer[BUFFER_SIZE]; +#endif void setInputLPF(float freq); void setInputHPF(float freq); diff --git a/plugins/dragonfly-plate-reverb/Makefile b/plugins/dragonfly-plate-reverb/Makefile index bddfe0e..2dfbb7f 100644 --- a/plugins/dragonfly-plate-reverb/Makefile +++ b/plugins/dragonfly-plate-reverb/Makefile @@ -14,10 +14,7 @@ NAME = DragonflyPlateReverb FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c \ - ../../common/optimization/optimization.cpp \ - ../../common/optimization/default.cpp \ - ../../common/optimization/avx.cpp + ../../common/kiss_fft/kiss_fftr.c ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -52,6 +49,11 @@ FILES_UI = $(FILES_COMMON) \ ../../common/Spectrogram.cpp \ ../../common/Bitstream_Vera_Sans_Regular.cpp +# -------------------------------------------------------------- +# Check for optimization support + +include ../../common/optimization/Makefile.optimization.mk + # -------------------------------------------------------------- # Do some magic @@ -62,7 +64,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3) diff --git a/plugins/dragonfly-room-reverb/DSP.cpp b/plugins/dragonfly-room-reverb/DSP.cpp index 8c39e5e..8cbf6fc 100644 --- a/plugins/dragonfly-room-reverb/DSP.cpp +++ b/plugins/dragonfly-room-reverb/DSP.cpp @@ -19,7 +19,9 @@ #include "DistrhoPlugin.hpp" #include "DistrhoPluginInfo.h" #include "extra/ScopedDenormalDisable.hpp" +#ifdef USE_PLUGIN_SIMD #include "optimization/optimization.hpp" +#endif #include "DSP.hpp" @@ -131,11 +133,15 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra early_out_buffer[1], buffer_frames); - VMUL32FLOAT_V(early_send, early_out_buffer[0], late_buffer, buffer_frames); - VSUM32FLOAT(late_buffer, filtered_input_buffer[0], late_in_buffer[0], buffer_frames); - - VMUL32FLOAT_V(early_send, early_out_buffer[1], late_buffer, buffer_frames); - VSUM32FLOAT(late_buffer, filtered_input_buffer[1], late_in_buffer[1], buffer_frames); +#ifdef USE_PLUGIN_SIMD + VMADD32FLOAT_V(early_send, early_out_buffer[0], filtered_input_buffer[0], late_in_buffer[0], buffer_frames ); + VMADD32FLOAT_V(early_send, early_out_buffer[1], filtered_input_buffer[1], late_in_buffer[1], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + late_in_buffer[0][i] = early_send * early_out_buffer[0][i] + filtered_input_buffer[0][i]; + late_in_buffer[1][i] = early_send * early_out_buffer[1][i] + filtered_input_buffer[1][i]; + } +#endif late.processreplace( const_cast(late_in_buffer[0]), @@ -144,23 +150,38 @@ void DragonflyReverbDSP::run(const float** inputs, float** outputs, uint32_t fra late_out_buffer[1], buffer_frames); +#ifdef USE_PLUGIN_SIMD VMUL32FLOAT_V(dryLevel, &inputs[0][offset], &outputs[0][offset], buffer_frames); VMUL32FLOAT_V(dryLevel, &inputs[1][offset], &outputs[1][offset], buffer_frames); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] = dryLevel * inputs[0][offset + i]; + outputs[1][offset + i] = dryLevel * inputs[1][offset + i]; + } +#endif if( earlyLevel > 0.0 ){ - VMUL32FLOAT_V(earlyLevel, early_out_buffer[0], early_buffer, buffer_frames); - VSUM32FLOAT(early_buffer, &outputs[0][offset], &outputs[0][offset], buffer_frames); - - VMUL32FLOAT_V(earlyLevel, early_out_buffer[1], early_buffer, buffer_frames); - VSUM32FLOAT(early_buffer, &outputs[1][offset], &outputs[1][offset], buffer_frames); +#ifdef USE_PLUGIN_SIMD + VMADD32FLOAT_V(earlyLevel, early_out_buffer[0], &outputs[0][offset], &outputs[0][offset], buffer_frames ); + VMADD32FLOAT_V(earlyLevel, early_out_buffer[1], &outputs[1][offset], &outputs[1][offset], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] += earlyLevel * early_out_buffer[0][i]; + outputs[1][offset + i] += earlyLevel * early_out_buffer[1][i]; + } +#endif } if( lateLevel > 0.0 ){ - VMUL32FLOAT_V(lateLevel, late_out_buffer[0], late_buffer, buffer_frames); - VSUM32FLOAT(late_buffer, &outputs[0][offset], &outputs[0][offset], buffer_frames); - - VMUL32FLOAT_V(lateLevel, late_out_buffer[1], late_buffer, buffer_frames); - VSUM32FLOAT(late_buffer, &outputs[1][offset], &outputs[1][offset], buffer_frames); +#ifdef USE_PLUGIN_SIMD + VMADD32FLOAT_V(lateLevel, late_out_buffer[0], &outputs[0][offset], &outputs[0][offset], buffer_frames ); + VMADD32FLOAT_V(lateLevel, late_out_buffer[1], &outputs[1][offset], &outputs[1][offset], buffer_frames ); +#else + for (uint32_t i = 0; i < buffer_frames; i++) { + outputs[0][offset + i] += lateLevel * late_out_buffer[0][i]; + outputs[1][offset + i] += lateLevel * late_out_buffer[1][i]; + } +#endif } } } diff --git a/plugins/dragonfly-room-reverb/DSP.hpp b/plugins/dragonfly-room-reverb/DSP.hpp index 92fe2e2..b1a7aff 100644 --- a/plugins/dragonfly-room-reverb/DSP.hpp +++ b/plugins/dragonfly-room-reverb/DSP.hpp @@ -52,9 +52,6 @@ class DragonflyReverbDSP : public AbstractDSP { float late_in_buffer[2][BUFFER_SIZE]; float late_out_buffer[2][BUFFER_SIZE]; - float early_buffer[BUFFER_SIZE]; - float late_buffer[BUFFER_SIZE]; - void setInputLPF(float freq); void setInputHPF(float freq); }; diff --git a/plugins/dragonfly-room-reverb/Makefile b/plugins/dragonfly-room-reverb/Makefile index 24f0631..ff0be81 100644 --- a/plugins/dragonfly-room-reverb/Makefile +++ b/plugins/dragonfly-room-reverb/Makefile @@ -14,10 +14,7 @@ NAME = DragonflyRoomReverb FILES_COMMON = DSP.cpp \ ../../common/kiss_fft/kiss_fft.c \ - ../../common/kiss_fft/kiss_fftr.c \ - ../../common/optimization/optimization.cpp \ - ../../common/optimization/default.cpp \ - ../../common/optimization/avx.cpp + ../../common/kiss_fft/kiss_fftr.c ifneq ($(SYSTEM_FREEVERB3),true) FILES_COMMON += \ @@ -52,6 +49,11 @@ FILES_UI = $(FILES_COMMON) \ ../../common/Spectrogram.cpp \ ../../common/Bitstream_Vera_Sans_Regular.cpp +# -------------------------------------------------------------- +# Check for optimization support + +include ../../common/optimization/Makefile.optimization.mk + # -------------------------------------------------------------- # Do some magic @@ -62,7 +64,7 @@ include ../../dpf/Makefile.plugins.mk # -------------------------------------------------------------- # Build dependencies -BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT -mavx +BUILD_CXX_FLAGS += -I../../common -DLIBFV3_FLOAT ifeq ($(SYSTEM_FREEVERB3),true) BUILD_CXX_FLAGS += -DLIBSRATE1 BUILD_CXX_FLAGS += $(shell $(PKG_CONFIG) --cflags freeverb3-3)