From d2d73172ccac336b4b16e0af755dcba5465aa44e Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Mon, 23 Jun 2025 18:44:19 +0200
Subject: [PATCH 01/12] added PoC for DSP AR and fixed point calculation for C3

---
 usermods/audioreactive/audio_reactive.cpp | 236 ++++++++++++++++++----
 usermods/audioreactive/audio_source.h     |  37 +++-
 2 files changed, 228 insertions(+), 45 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 06268560a7..31779793b8 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -153,7 +153,11 @@ static bool useBandPassFilter = false;                    // if true, enables a
 // some prototypes, to ensure consistent interfaces
 static float fftAddAvg(int from, int to);   // average of several FFT result bins
 void FFTcode(void * parameter);      // audio processing task: read samples, run FFT, fill GEQ channels from FFT results
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
 static void runMicFilter(uint16_t numSamples, float *sampleBuffer);          // pre-filtering of raw samples (band-pass)
+#else
+static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer);
+#endif
 static void postProcessFFTResults(bool noiseGateOpen, int numberOfChannels); // post-processing and post-amp of GEQ channels
 
 static TaskHandle_t FFT_Task = nullptr;
@@ -193,8 +197,16 @@ constexpr uint16_t samplesFFT_2 = 256;          // meaningfull part of FFT resul
 #define LOG_256  5.54517744f                            // log(256)
 
 // These are the input and output vectors.  Input vectors receive computed results from FFT.
-static float* vReal = nullptr;                  // FFT sample inputs / freq output -  these are our raw result bins
-static float* vImag = nullptr;                  // imaginary parts
+static float* valFFT = nullptr;                // FFT sample inputs / freq output -  these are our raw result bins
+//static float* vImag = nullptr;                  // imaginary parts
+
+// pre-computed window function
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+__attribute__((aligned(16))) int16_t* windowFFT;
+#else
+__attribute__((aligned(16))) float* windowFFT;
+#endif
+
 
 // Create FFT object
 // lib_deps += https://github.com/kosme/arduinoFFT#develop @ 1.9.2
@@ -204,14 +216,15 @@ static float* vImag = nullptr;                  // imaginary parts
 // Below options are forcing ArduinoFFT to use sqrtf() instead of sqrt()
 // #define sqrt_internal sqrtf          // see https://github.com/kosme/arduinoFFT/pull/83 - since v2.0.0 this must be done in build_flags
 
-#include <arduinoFFT.h>             // FFT object is created in FFTcode
+//#include <arduinoFFT.h>             // FFT object is created in FFTcode
+#include "esp_dsp.h"                // ESP-IDF DSP library for FFT and window functions
 // Helper functions
 
 // compute average of several FFT result bins
-static float fftAddAvg(int from, int to) {
+static float fftAddAvg(int from, int to) { //!!!TODO: need to hanlde integer values
   float result = 0.0f;
   for (int i = from; i <= to; i++) {
-    result += vReal[i];
+    result += valFFT[i];
   }
   return result / float(to - from + 1);
 }
@@ -223,17 +236,37 @@ void FFTcode(void * parameter)
 {
   DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID());
 
-  // allocate FFT buffers on first call
-  if (vReal == nullptr) vReal = (float*) calloc(sizeof(float), samplesFFT);
-  if (vImag == nullptr) vImag = (float*) calloc(sizeof(float), samplesFFT);
-  if ((vReal == nullptr) || (vImag == nullptr)) {
-    // something went wrong
-    if (vReal) free(vReal); vReal = nullptr;
-    if (vImag) free(vImag); vImag = nullptr;
-    return;
-  }
+  // allocate and initialize FFT buffers on first call
+  if (valFFT == nullptr) valFFT = (float*) calloc(sizeof(float), samplesFFT * 2);
+  if ((valFFT == nullptr)) return; // something went wrong
   // Create FFT object with weighing factor storage
-  ArduinoFFT<float> FFT = ArduinoFFT<float>( vReal, vImag, samplesFFT, SAMPLE_RATE, true);
+  //ArduinoFFT<float> FFT = ArduinoFFT<float>( vReal, vImag, samplesFFT, SAMPLE_RATE, true);
+
+  // create window
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+  if (windowFFT == nullptr) windowFFT = (int16_t*) calloc(sizeof(int16_t), samplesFFT);
+  if ((windowFFT == nullptr)) return; // something went wrong
+  if (dsps_fft2r_init_sc16(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
+
+  // create window function for FFT, "abuse" valFFT[] as temporary storage
+  //dsps_wind_hann_f32(valFFT, samplesFFT);
+  //dsps_wind_blackman_harris_f32(valFFT, samplesFFT);
+  dsps_wind_flat_top_f32(valFFT, samplesFFT);
+  // convert float window to 16-bit int
+  for (int i = 0; i < samplesFFT; i++) {
+    windowFFT[i] = (int16_t)(valFFT[i] * 32767.0f);
+  }
+  int16_t* valFFT16 = (int16_t*)valFFT;         // alias to access buffer as int16_t (intermediately)
+#else
+  if (windowFFT == nullptr) windowFFT = (float*) calloc(sizeof(float), samplesFFT);
+  if ((windowFFT == nullptr)) return; // something went wrong
+  if (dsps_fft2r_init_fc32(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
+  // create window function for FFT
+  //dsps_wind_hann_f32(windowFFT, samplesFFT);
+  //dsps_wind_blackman_harris_f32(windowFFT, samplesFFT);
+  dsps_wind_flat_top_f32(windowFFT, samplesFFT);
+#endif
+
 
   // see https://www.freertos.org/vtaskdelayuntil.html
   const TickType_t xFrequency = FFT_MIN_CYCLE * portTICK_PERIOD_MS;  
@@ -255,8 +288,7 @@ void FFTcode(void * parameter)
 #endif
 
     // get a fresh batch of samples from I2S
-    if (audioSource) audioSource->getSamples(vReal, samplesFFT);
-    memset(vImag, 0, samplesFFT * sizeof(float));   // set imaginary parts to 0
+    if (audioSource) audioSource->getSamples(valFFT, samplesFFT); // note: valFFT is used as a int16_t buffer on C3 and S2, could optimize RAM use by only allocating half the size (but makes code harder to read)
 
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
     if (start < esp_timer_get_time()) { // filter out overflows
@@ -270,15 +302,28 @@ void FFTcode(void * parameter)
 
     // band pass filter - can reduce noise floor by a factor of 50
     // downside: frequencies below 100Hz will be ignored
-    if (useBandPassFilter) runMicFilter(samplesFFT, vReal);
+    #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+    if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
+    #else
+    if (useBandPassFilter) runMicFilter(samplesFFT, valFFT16); // TODO: test this function!!!
+    #endif
 
     // find highest sample in the batch
+    #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
     float maxSample = 0.0f;                         // max sample from FFT batch
     for (int i=0; i < samplesFFT; i++) {
 	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
-	    if ((vReal[i] <= (INT16_MAX - 1024)) && (vReal[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
-        if (fabsf((float)vReal[i]) > maxSample) maxSample = fabsf((float)vReal[i]);
+	    if ((valFFT[i] <= (INT16_MAX - 1024)) && (valFFT[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
+        if (fabsf((float)valFFT[i]) > maxSample) maxSample = fabsf((float)valFFT[i]);
+    }
+    #else
+    int32_t maxSample = 0;                         // max sample from FFT batch
+    for (int i=0; i < samplesFFT; i++) {
+	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
+	    if ((valFFT16[i] <= (INT16_MAX - 1024)) && (valFFT16[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
+        if (abs(valFFT16[i]) > maxSample) maxSample = abs(valFFT16[i]);
     }
+    #endif
     // release highest sample to volume reactive effects early - not strictly necessary here - could also be done at the end of the function
     // early release allows the filters (getSample() and agcAvg()) to work with fresh values - we will have matching gain and noise gate values when we want to process the FFT results.
     micDataReal = maxSample;
@@ -289,32 +334,118 @@ void FFTcode(void * parameter)
     if (sampleAvg > 0.25f) { // noise gate open means that FFT results will be used. Don't run FFT if results are not needed.
 #endif
 
-      // run FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2)
-      FFT.dcRemoval();                                            // remove DC offset
-      FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward); // Weigh data using "Flat Top" function - better amplitude accuracy
-      //FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
-      FFT.compute( FFTDirection::Forward );                       // Compute FFT
-      FFT.complexToMagnitude();                                   // Compute magnitudes
-      vReal[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
+      // run FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3)
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+      // remove DC offset
+      float sum = 0;
+      for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
+      float mean = sum / samplesFFT;
+      for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
+      //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
+      for (int i = samplesFFT - 1; i >= 0 ; i--) {
+        // fill the buffer back to front to avoid overwriting samples
+        float windowed_sample = valFFT[i] * windowFFT[i];
+        valFFT[i * 2] = windowed_sample;
+        valFFT[i * 2 + 1] = 0.0; // set imaginary part to zero
+      }
+#ifdef CONFIG_IDF_TARGET_ESP32S3
+      dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT
+#else
+      dsps_fft2r_fc32_ae32(valFFT, samplesFFT); // ESP32 optimized version of FFT
+#endif
+      dsps_bit_rev_fc32(valFFT, samplesFFT);  // bit reverse
+
+      // convert to magnitude
+      for (int i = 1; i < samplesFFT_2; i++) { // skip [0] as it is DC offset
+        float real_part = valFFT[i * 2];
+        float imag_part = valFFT[i * 2 + 1];
+        valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part); //TODO: would the use of the more accurate sqrt() make a difference?
+      }
+#else
+
+//debug function:  Generate sine wave 
+/*
+  for (int i = 0; i < samplesFFT; i++) {
+    float time = (float)i / 22000;
+    float sample = 0.0;
+    sample =  sin(2.0 * PI * 1000 * time); // 1000 Hz sine wav
+    valFFT16[i] = (int16_t)(sample * 2000.0);  // scale up
+  }
+*/ //!!! remove
+  
+      // remove DC offset
+      int32_t sum = 0;
+      for (int i = 0; i < samplesFFT; i++) sum += valFFT16[i];
+      int32_t mean = sum / samplesFFT;
+      for (int i = 0; i < samplesFFT; i++) valFFT16[i] -= mean;
+      //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
+      for (int i = samplesFFT - 1; i >= 0 ; i--) {
+        // fill the buffer back to front to avoid overwriting samples
+        int16_t windowed_sample = ((int32_t)valFFT16[i] * (int32_t)windowFFT[i]) >> 15; // both values are ±15bit
+        valFFT16[i * 2] = windowed_sample;
+        valFFT16[i * 2 + 1] = 0; // set imaginary part to zero
+        
+        // debug: no windowing, just copy samples!!!
+        //valFFT16[i * 2] = valFFT16[i];
+        //valFFT16[i * 2 + 1] = 0; // set imaginary part to zero
+        //Serial.println(windowed_sample); 
+      }
+/*
+    for (int i=0; i < samplesFFT; i++) {
+      Serial.println(valFFT16[i*2]); // -> looks correct
+    }*/
+
+      dsps_fft2r_sc16_ansi(valFFT16, samplesFFT); // perform FFT on complex value pairs (Re,Im)
+      dsps_bit_rev_sc16(valFFT16, samplesFFT);    // bit reverse i.e. "unshuffle" the results
+/*
+         for (int i=0; i < samplesFFT; i++) {
+        Serial.println(valFFT16[i]);
+       }*/ //!!! remove
+      // convert to magnitude, FFT returns interleaved complex values [Re,Im,Re,Im,...]
+      for (int i = 1; i < samplesFFT_2; i++) { // skip [0] as it is DC offset
+        int32_t real_part = valFFT16[i * 2];
+        int32_t imag_part = valFFT16[i * 2 + 1];
+        
+        valFFT16[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
+        /*
+        Serial.print(valFFT16[i]);
+        Serial.print(","); Serial.print(real_part);
+        Serial.print(","); Serial.println(imag_part);
+        */ //!!! remove
+      }
+
+//Serial.println("*************************************************"); //debug
+      for(int i = 0; i < samplesFFT_2; i++) {
+ //Serial.println(valFFT16[i]); //debug
+      }
+  
+      // convert to float
+      for (int i = samplesFFT_2-1; i > 0; i--) {
+        float scaledvalue = (float)valFFT16[i] / 32.0f; // scale to match float FFT and convert to float: back to front to avoid overwriting samples, skip [0]
+        valFFT[i] = scaledvalue;
+      }
+
+    //Serial.println("*************************************************"); //debug
+    //  Serial.println("*************************************************"); //debug
 
-      FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);                // let the effects know which freq was most dominant
-      FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
 
+#endif
+      valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
       haveDoneFFT = true;
 #endif
 
     } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this.
-      memset(vReal, 0, samplesFFT * sizeof(float));
+      memset(valFFT, 0, samplesFFT * sizeof(float)); // only lower half of buffer contains FFT results, so only clear that part
       FFT_MajorPeak = 1;
       FFT_Magnitude = 0.001;
     }
-
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3) // note: for S2 and C3 scaling is done in fftAddAvg to avoid signal loss
     for (int i = 0; i < samplesFFT; i++) {
-      float t = fabsf(vReal[i]);                      // just to be sure - values in fft bins should be positive any way
-      vReal[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
+      float t = fabsf(valFFT[i]);                      // just to be sure - values in fft bins should be positive any way
+      valFFT[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
     } // for()
-
+#endif
     // mapping of FFT result bins to frequency channels
     if (fabsf(sampleAvg) > 0.5f) { // noise gate open
 #if 0
@@ -407,6 +538,7 @@ void FFTcode(void * parameter)
 // Pre / Postprocessing  //
 ///////////////////////////
 
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
 static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // pre-filtering of raw samples (band-pass)
 {
   // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency
@@ -437,6 +569,40 @@ static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // p
         sampleBuffer[i] = sampleBuffer[i] - lowfilt;
   }
 }
+#else
+static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer)  // pre-filtering of raw samples (band-pass)
+{
+  // low frequency cutoff parameter 17.15 fixed point format
+  //constexpr int32_t ALPHA_FP = 1311;    // 0.04f * (1<<15) (150Hz)
+  //constexpr int32_t ALPHA_FP = 983;     // 0.03f * (1<<15) (110Hz)
+  constexpr int32_t ALPHA_FP = 737;      // 0.0225f * (1<<15) (80Hz)
+  //constexpr int32_t ALPHA_FP = 555;     // 0.01693f * (1<<15) (60Hz)
+
+  // high frequency cutoff parameters 16.16 fixed point format
+  //constexpr int32_t BETA1_FP = 49152;   // 0.75f * (1<<16) (11KHz)
+  //constexpr int32_t BETA1_FP = 53740;   // 0.82f * (1<<16) (15KHz)
+  //constexpr int32_t BETA1_FP = 54297;   // 0.8285f * (1<<16) (18KHz)
+  constexpr int32_t BETA1_FP = 55706;     // 0.85f * (1<<16) (20KHz)
+  constexpr int32_t BETA2_FP = (65536 - BETA1_FP) / 2;  // ((1.0f - beta1) / 2.0f) * (1<<16)
+
+  static int32_t last_vals[2] = { 0 };    // FIR high freq cutoff filter (scaled by sample range)
+  static int32_t lowfilt_fp = 0;          // IIR low frequency cutoff filter (16.16 fixed point)
+
+  for (int i = 0; i < numSamples; i++) {
+    // FIR lowpass filter to remove high frequency noise
+    int32_t highFilteredSample_fp;
+
+    if (i < (numSamples - 1))
+      highFilteredSample_fp = (BETA1_FP * (int32_t)sampleBuffer[i] + BETA2_FP * last_vals[0] + BETA2_FP * (int32_t)sampleBuffer[i + 1]) >> 16; // smooth out spikes
+    else
+      highFilteredSample_fp = (BETA1_FP * (int32_t)sampleBuffer[i] + BETA2_FP * last_vals[0] + BETA2_FP * last_vals[1]) >> 16; // special handling for last sample in array
+    last_vals[1] = last_vals[0];
+    last_vals[0] = (int32_t)sampleBuffer[i];
+    lowfilt_fp += ALPHA_FP * (highFilteredSample_fp - (lowfilt_fp >> 15)); // low pass filter in 17.15 fixed point format
+    sampleBuffer[i] = highFilteredSample_fp - (lowfilt_fp >> 15);;
+  }
+}
+#endif
 
 static void postProcessFFTResults(bool noiseGateOpen, int numberOfChannels) // post-processing and post-amp of GEQ channels
 {
@@ -524,7 +690,7 @@ static void detectSamplePeak(void) {
   // Poor man's beat detection by seeing if sample > Average + some value.
   // This goes through ALL of the 255 bins - but ignores stupid settings
   // Then we got a peak, else we don't. The peak has to time out on its own in order to support UDP sound sync.
-  if ((sampleAvg > 1) && (maxVol > 0) && (binNum > 4) && (vReal[binNum] > maxVol) && ((millis() - timeOfPeak) > 100)) {
+  if ((sampleAvg > 1) && (maxVol > 0) && (binNum > 4) && (valFFT[binNum] > maxVol) && ((millis() - timeOfPeak) > 100)) {
     havePeak = true;
   }
 
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index a14f8def0b..8d43799c6f 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -134,7 +134,7 @@ class AudioSource {
        Read num_samples from the microphone, and store them in the provided
        buffer
     */
-    virtual void getSamples(float *buffer, uint16_t num_samples) = 0;
+    virtual void getSamples(void *buffer, uint16_t num_samples) = 0;
 
     /* check if the audio source driver was initialized successfully */
     virtual bool isInitialized(void) {return(_initialized);}
@@ -314,7 +314,7 @@ class I2SSource : public AudioSource {
       if (_mclkPin != I2S_PIN_NO_CHANGE) PinManager::deallocatePin(_mclkPin, PinOwner::UM_Audioreactive);
     }
 
-    virtual void getSamples(float *buffer, uint16_t num_samples) {
+    virtual void getSamples(void *buffer, uint16_t num_samples) {
       if (_initialized) {
         esp_err_t err;
         size_t bytes_read = 0;        /* Counter variable to check if we actually got enough data */
@@ -333,18 +333,35 @@ class I2SSource : public AudioSource {
         }
 
         // Store samples in sample buffer and update DC offset
-        for (int i = 0; i < num_samples; i++) {
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+        int16_t* _buffer = static_cast<int16_t*>(buffer); // use integer samples on ESP32-S2 and ESP32-C3
+        constexpr int32_t FIXEDSHIFT = 8; // shift by 8 bits for fixed point math (no loss at 24bit input sample resolution)
+        int32_t intSampleScale = _sampleScale * (1<<FIXEDSHIFT); // _sampleScale is <= 1.0f, shift for fixed point math
+#else
+        float* _buffer = static_cast<float*>(buffer);
+#endif
 
+        for (int i = 0; i < num_samples; i++) {
           newSamples[i] = postProcessSample(newSamples[i]);  // perform postprocessing (needed for ADC samples)
-          
-          float currSample = 0.0f;
-#ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
-              currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
+
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+  #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
+          int32_t currSample = newSamples[i] >> FIXEDSHIFT;   // shift to avoid overlow in multiplication
+          currSample = (currSample * intSampleScale) >> 16;    // scale samples, shift down to 16bit
+  #else
+          int32_t currSample = newSamples[i];                 // 16bit input -> use as-is
+  #endif
+          _buffer[i] = (int16_t)currSample;
 #else
-              currSample = (float) newSamples[i];                 // 16bit input -> use as-is
+          float currSample = 0.0f;
+  #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
+          currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
+  #else
+          currSample = (float) newSamples[i];                 // 16bit input -> use as-is
+  #endif
+          _buffer[i] = currSample;
+          _buffer[i] *= _sampleScale;                         // scale samples
 #endif
-          buffer[i] = currSample;
-          buffer[i] *= _sampleScale;                              // scale samples
         }
       }
     }

From 9c8f8ef3e5a8f6bec26e32a11c776b561c69f4e1 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Tue, 24 Jun 2025 08:22:11 +0200
Subject: [PATCH 02/12] fixed FFT scaling, first working verson, tested only on
 C3

---
 usermods/audioreactive/audio_reactive.cpp | 52 +++--------------------
 usermods/audioreactive/audio_source.h     | 14 +++++-
 2 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 31779793b8..f1596c751b 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -239,8 +239,6 @@ void FFTcode(void * parameter)
   // allocate and initialize FFT buffers on first call
   if (valFFT == nullptr) valFFT = (float*) calloc(sizeof(float), samplesFFT * 2);
   if ((valFFT == nullptr)) return; // something went wrong
-  // Create FFT object with weighing factor storage
-  //ArduinoFFT<float> FFT = ArduinoFFT<float>( vReal, vImag, samplesFFT, SAMPLE_RATE, true);
 
   // create window
 #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
@@ -256,7 +254,7 @@ void FFTcode(void * parameter)
   for (int i = 0; i < samplesFFT; i++) {
     windowFFT[i] = (int16_t)(valFFT[i] * 32767.0f);
   }
-  int16_t* valFFT16 = (int16_t*)valFFT;         // alias to access buffer as int16_t (intermediately)
+  int16_t* valFFT16 = (int16_t*)valFFT; // alias to use float buffer as int16_t storage (intermediately during FFT processing)
 #else
   if (windowFFT == nullptr) windowFFT = (float*) calloc(sizeof(float), samplesFFT);
   if ((windowFFT == nullptr)) return; // something went wrong
@@ -362,17 +360,6 @@ void FFTcode(void * parameter)
         valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part); //TODO: would the use of the more accurate sqrt() make a difference?
       }
 #else
-
-//debug function:  Generate sine wave 
-/*
-  for (int i = 0; i < samplesFFT; i++) {
-    float time = (float)i / 22000;
-    float sample = 0.0;
-    sample =  sin(2.0 * PI * 1000 * time); // 1000 Hz sine wav
-    valFFT16[i] = (int16_t)(sample * 2000.0);  // scale up
-  }
-*/ //!!! remove
-  
       // remove DC offset
       int32_t sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT16[i];
@@ -384,51 +371,24 @@ void FFTcode(void * parameter)
         int16_t windowed_sample = ((int32_t)valFFT16[i] * (int32_t)windowFFT[i]) >> 15; // both values are ±15bit
         valFFT16[i * 2] = windowed_sample;
         valFFT16[i * 2 + 1] = 0; // set imaginary part to zero
-        
-        // debug: no windowing, just copy samples!!!
-        //valFFT16[i * 2] = valFFT16[i];
-        //valFFT16[i * 2 + 1] = 0; // set imaginary part to zero
-        //Serial.println(windowed_sample); 
       }
-/*
-    for (int i=0; i < samplesFFT; i++) {
-      Serial.println(valFFT16[i*2]); // -> looks correct
-    }*/
 
       dsps_fft2r_sc16_ansi(valFFT16, samplesFFT); // perform FFT on complex value pairs (Re,Im)
       dsps_bit_rev_sc16(valFFT16, samplesFFT);    // bit reverse i.e. "unshuffle" the results
-/*
-         for (int i=0; i < samplesFFT; i++) {
-        Serial.println(valFFT16[i]);
-       }*/ //!!! remove
+
       // convert to magnitude, FFT returns interleaved complex values [Re,Im,Re,Im,...]
-      for (int i = 1; i < samplesFFT_2; i++) { // skip [0] as it is DC offset
+      for (int i = 1; i < samplesFFT_2; i++) { // skip [0], it is DC offset
         int32_t real_part = valFFT16[i * 2];
         int32_t imag_part = valFFT16[i * 2 + 1];
-        
         valFFT16[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
-        /*
-        Serial.print(valFFT16[i]);
-        Serial.print(","); Serial.print(real_part);
-        Serial.print(","); Serial.println(imag_part);
-        */ //!!! remove
       }
 
-//Serial.println("*************************************************"); //debug
-      for(int i = 0; i < samplesFFT_2; i++) {
- //Serial.println(valFFT16[i]); //debug
-      }
-  
-      // convert to float
+      // convert to float for further processing TODO: continue in integer math?
       for (int i = samplesFFT_2-1; i > 0; i--) {
-        float scaledvalue = (float)valFFT16[i] / 32.0f; // scale to match float FFT and convert to float: back to front to avoid overwriting samples, skip [0]
+        float scaledvalue = (float)valFFT16[i] * 64.0f; // scale to match float FFT and convert to float: back to front to avoid overwriting samples, skip [0]
+        // note: scaling value of 64 was found by using simulated sine waves and comparing the results.
         valFFT[i] = scaledvalue;
       }
-
-    //Serial.println("*************************************************"); //debug
-    //  Serial.println("*************************************************"); //debug
-
-
 #endif
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index 8d43799c6f..f3c93bb67b 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -331,8 +331,20 @@ class I2SSource : public AudioSource {
           DEBUGSR_PRINTF("Failed to get enough samples: wanted: %d read: %d\n", sizeof(newSamples), bytes_read);
           return;
         }
+/*
+  //debug function:  Generate sine wave 
+ for (int i = 0; i < num_samples; i++) {
+    float time = (float)i / 22000;
+    float sample = 0.0;
+    sample =  sin(2.0 * PI * 1000 * time); // 1000 Hz sine wav
+    sample +=  sin(2.0 * PI * 250 * time); // 1000 Hz sine wav
+    newSamples[i] = (int32_t)(sample * 990848.0); //(float)(millis()<<7));  // scale up
+  }
+*/ //!!!remove
 
-        // Store samples in sample buffer and update DC offset
+
+
+        // Store samples in sample buffer
 #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
         int16_t* _buffer = static_cast<int16_t*>(buffer); // use integer samples on ESP32-S2 and ESP32-C3
         constexpr int32_t FIXEDSHIFT = 8; // shift by 8 bits for fixed point math (no loss at 24bit input sample resolution)

From 0131786ac974689993706eb6f7fb2626775e1459 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Tue, 24 Jun 2025 22:36:47 +0200
Subject: [PATCH 03/12] working version, debug of scaling in progress

---
 usermods/audioreactive/audio_reactive.cpp | 135 +++++++++++++---------
 usermods/audioreactive/audio_source.h     |  52 +++++----
 2 files changed, 108 insertions(+), 79 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index f1596c751b..d3bd0b1698 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -24,6 +24,8 @@
  * ....
  */
 
+#define FFT_PREFER_EXACT_PEAKS  // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
+
 #if !defined(FFTTASK_PRIORITY)
 #define FFTTASK_PRIORITY 1 // standard: looptask prio
 //#define FFTTASK_PRIORITY 2 // above looptask, below asyc_tcp
@@ -193,12 +195,20 @@ constexpr uint16_t samplesFFT = 512;            // Samples in an FFT batch - Thi
 constexpr uint16_t samplesFFT_2 = 256;          // meaningfull part of FFT results - only the "lower half" contains useful information.
 // the following are observed values, supported by a bit of "educated guessing"
 //#define FFT_DOWNSCALE 0.65f                             // 20kHz - downscaling factor for FFT results - "Flat-Top" window @20Khz, old freq channels 
+#ifdef FFT_PREFER_EXACT_PEAKS
+#define FFT_DOWNSCALE 0.40f                             // downscaling factor for FFT results, RMS averaging for "Blackman-Harris" Window @22kHz (credit to MM)
+#else
 #define FFT_DOWNSCALE 0.46f                             // downscaling factor for FFT results - for "Flat-Top" window @22Khz, new freq channels
+#endif
 #define LOG_256  5.54517744f                            // log(256)
 
 // These are the input and output vectors.  Input vectors receive computed results from FFT.
-static float* valFFT = nullptr;                // FFT sample inputs / freq output -  these are our raw result bins
-//static float* vImag = nullptr;                  // imaginary parts
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+static float* valFFT = nullptr;
+#else
+static int16_t* valFFT = nullptr;
+#endif
+
 
 // pre-computed window function
 #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
@@ -216,17 +226,31 @@ __attribute__((aligned(16))) float* windowFFT;
 // Below options are forcing ArduinoFFT to use sqrtf() instead of sqrt()
 // #define sqrt_internal sqrtf          // see https://github.com/kosme/arduinoFFT/pull/83 - since v2.0.0 this must be done in build_flags
 
-//#include <arduinoFFT.h>             // FFT object is created in FFTcode
-#include "esp_dsp.h"                // ESP-IDF DSP library for FFT and window functions
+// ESP-IDF DSP library for FFT and window functions
+#include "dsps_fft2r.h"
+#ifdef FFT_PREFER_EXACT_PEAKS
+#include "dsps_wind_blackman_harris.h"
+#else
+#include "dsps_wind_flat_top.h"
+#endif
+
 // Helper functions
 
 // compute average of several FFT result bins
-static float fftAddAvg(int from, int to) { //!!!TODO: need to hanlde integer values
+static float fftAddAvg(int from, int to) { //!!!TODO: need to hanlde integer values and save as float for S2 and C3
+  #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
   float result = 0.0f;
   for (int i = from; i <= to; i++) {
     result += valFFT[i];
   }
-  return result / float(to - from + 1);
+  #else
+  int32_t result = 0;
+  for (int i = from; i <= to; i++) {
+    result += valFFT[i];
+  }
+  result *= 64; // scale result to match float values. note: scaling value of 64 was found by using simulated sine waves and comparing the results.
+  #endif
+  return float(result) / float(to - from + 1); // return average as float
 }
 
 //
@@ -236,33 +260,41 @@ void FFTcode(void * parameter)
 {
   DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID());
 
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
   // allocate and initialize FFT buffers on first call
   if (valFFT == nullptr) valFFT = (float*) calloc(sizeof(float), samplesFFT * 2);
   if ((valFFT == nullptr)) return; // something went wrong
-
   // create window
-#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+  if (windowFFT == nullptr) windowFFT = (float*) calloc(sizeof(float), samplesFFT);
+  if ((windowFFT == nullptr)) return; // something went wrong
+  if (dsps_fft2r_init_fc32(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
+  // create window function for FFT
+#ifdef FFT_PREFER_EXACT_PEAKS
+  dsps_wind_blackman_harris_f32(windowFFT, samplesFFT);
+#else
+  dsps_wind_flat_top_f32(windowFFT, samplesFFT);
+#endif
+#else
+// allocate and initialize FFT buffers on first call
+  if (valFFT == nullptr) valFFT = (int16_t*) calloc(sizeof(int16_t), samplesFFT * 2);
+  if ((valFFT == nullptr)) return; // something went wrong
+  // create window
   if (windowFFT == nullptr) windowFFT = (int16_t*) calloc(sizeof(int16_t), samplesFFT);
   if ((windowFFT == nullptr)) return; // something went wrong
   if (dsps_fft2r_init_sc16(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
-
-  // create window function for FFT, "abuse" valFFT[] as temporary storage
-  //dsps_wind_hann_f32(valFFT, samplesFFT);
-  //dsps_wind_blackman_harris_f32(valFFT, samplesFFT);
-  dsps_wind_flat_top_f32(valFFT, samplesFFT);
+  // create window function for FFT
+  float *windowFloat = (float*) calloc(sizeof(float), samplesFFT); // temporary buffer for window function
+  if ((windowFloat == nullptr)) return; // something went wrong
+#ifdef FFT_PREFER_EXACT_PEAKS
+  dsps_wind_blackman_harris_f32(windowFloat, samplesFFT);
+#else
+  dsps_wind_flat_top_f32(windowFloat, samplesFFT);
+#endif
   // convert float window to 16-bit int
   for (int i = 0; i < samplesFFT; i++) {
-    windowFFT[i] = (int16_t)(valFFT[i] * 32767.0f);
+    windowFFT[i] = (int16_t)(windowFloat[i] * 32767.0f);
   }
-  int16_t* valFFT16 = (int16_t*)valFFT; // alias to use float buffer as int16_t storage (intermediately during FFT processing)
-#else
-  if (windowFFT == nullptr) windowFFT = (float*) calloc(sizeof(float), samplesFFT);
-  if ((windowFFT == nullptr)) return; // something went wrong
-  if (dsps_fft2r_init_fc32(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
-  // create window function for FFT
-  //dsps_wind_hann_f32(windowFFT, samplesFFT);
-  //dsps_wind_blackman_harris_f32(windowFFT, samplesFFT);
-  dsps_wind_flat_top_f32(windowFFT, samplesFFT);
+  free(windowFloat); // free temporary buffer
 #endif
 
 
@@ -291,19 +323,19 @@ void FFTcode(void * parameter)
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
     if (start < esp_timer_get_time()) { // filter out overflows
       uint64_t sampleTimeInMillis = (esp_timer_get_time() - start +5ULL) / 10ULL; // "+5" to ensure proper rounding
-      sampleTime = (sampleTimeInMillis*3 + sampleTime*7)/10; // smooth
+      sampleTime = (sampleTimeInMillis + sampleTime*49)/50; // smooth !!! revert change debug only
     }
     start = esp_timer_get_time(); // start measuring FFT time
 #endif
 
     xLastWakeTime = xTaskGetTickCount();       // update "last unblocked time" for vTaskDelay
 
-    // band pass filter - can reduce noise floor by a factor of 50
+    // band pass filter - can reduce noise floor by a factor of 50 and avoid aliasing effects to base & high frequency bands
     // downside: frequencies below 100Hz will be ignored
     #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
-    if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
+   // if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
     #else
-    if (useBandPassFilter) runMicFilter(samplesFFT, valFFT16); // TODO: test this function!!!
+    if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
     #endif
 
     // find highest sample in the batch
@@ -318,8 +350,8 @@ void FFTcode(void * parameter)
     int32_t maxSample = 0;                         // max sample from FFT batch
     for (int i=0; i < samplesFFT; i++) {
 	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
-	    if ((valFFT16[i] <= (INT16_MAX - 1024)) && (valFFT16[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
-        if (abs(valFFT16[i]) > maxSample) maxSample = abs(valFFT16[i]);
+	    if ((valFFT[i] <= (INT16_MAX - 1024)) && (valFFT[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
+        if (abs(valFFT[i]) > maxSample) maxSample = abs(valFFT[i]);
     }
     #endif
     // release highest sample to volume reactive effects early - not strictly necessary here - could also be done at the end of the function
@@ -348,46 +380,42 @@ void FFTcode(void * parameter)
       }
 #ifdef CONFIG_IDF_TARGET_ESP32S3
       dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT
-#else
+#elif defined(CONFIG_IDF_TARGET_ESP32)
       dsps_fft2r_fc32_ae32(valFFT, samplesFFT); // ESP32 optimized version of FFT
+#else
+      dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT
 #endif
-      dsps_bit_rev_fc32(valFFT, samplesFFT);  // bit reverse
+      dsps_bit_rev_fc32(valFFT, samplesFFT);    // bit reverse
 
       // convert to magnitude
-      for (int i = 1; i < samplesFFT_2; i++) { // skip [0] as it is DC offset
+      for (int i = 1; i < samplesFFT_2; i++) {  // skip [0] as it is DC offset
         float real_part = valFFT[i * 2];
         float imag_part = valFFT[i * 2 + 1];
-        valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part); //TODO: would the use of the more accurate sqrt() make a difference?
+        valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part);
+        //valFFT[i] = valFFT[i] / 16.0f;          // Reduce magnitude. Want end result to be scaled linear and ~4096 max.  !!! 
       }
 #else
       // remove DC offset
       int32_t sum = 0;
-      for (int i = 0; i < samplesFFT; i++) sum += valFFT16[i];
+      for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
       int32_t mean = sum / samplesFFT;
-      for (int i = 0; i < samplesFFT; i++) valFFT16[i] -= mean;
+      for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
       //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
       for (int i = samplesFFT - 1; i >= 0 ; i--) {
         // fill the buffer back to front to avoid overwriting samples
-        int16_t windowed_sample = ((int32_t)valFFT16[i] * (int32_t)windowFFT[i]) >> 15; // both values are ±15bit
-        valFFT16[i * 2] = windowed_sample;
-        valFFT16[i * 2 + 1] = 0; // set imaginary part to zero
+        int16_t windowed_sample = ((int32_t)valFFT[i] * (int32_t)windowFFT[i]) >> 15; // both values are ±15bit
+        valFFT[i * 2] = windowed_sample;
+        valFFT[i * 2 + 1] = 0; // set imaginary part to zero
       }
 
-      dsps_fft2r_sc16_ansi(valFFT16, samplesFFT); // perform FFT on complex value pairs (Re,Im)
-      dsps_bit_rev_sc16(valFFT16, samplesFFT);    // bit reverse i.e. "unshuffle" the results
+      dsps_fft2r_sc16_ansi(valFFT, samplesFFT); // perform FFT on complex value pairs (Re,Im)
+      dsps_bit_rev_sc16(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
 
       // convert to magnitude, FFT returns interleaved complex values [Re,Im,Re,Im,...]
       for (int i = 1; i < samplesFFT_2; i++) { // skip [0], it is DC offset
-        int32_t real_part = valFFT16[i * 2];
-        int32_t imag_part = valFFT16[i * 2 + 1];
-        valFFT16[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
-      }
-
-      // convert to float for further processing TODO: continue in integer math?
-      for (int i = samplesFFT_2-1; i > 0; i--) {
-        float scaledvalue = (float)valFFT16[i] * 64.0f; // scale to match float FFT and convert to float: back to front to avoid overwriting samples, skip [0]
-        // note: scaling value of 64 was found by using simulated sine waves and comparing the results.
-        valFFT[i] = scaledvalue;
+        int32_t real_part = valFFT[i * 2];
+        int32_t imag_part = valFFT[i * 2 + 1];
+        valFFT[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
       }
 #endif
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
@@ -400,7 +428,7 @@ void FFTcode(void * parameter)
       FFT_MajorPeak = 1;
       FFT_Magnitude = 0.001;
     }
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3) // note: for S2 and C3 scaling is done in fftAddAvg to avoid signal loss
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3) // note: for S2 and C3 scaling is done above
     for (int i = 0; i < samplesFFT; i++) {
       float t = fabsf(valFFT[i]);                      // just to be sure - values in fft bins should be positive any way
       valFFT[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
@@ -1295,8 +1323,7 @@ class AudioReactive : public Usermod {
         periph_module_reset(PERIPH_I2S0_MODULE);   // not possible on -C3
       #endif
       delay(100);         // Give that poor microphone some time to setup.
-
-      useBandPassFilter = false;
+      useBandPassFilter = false; //true; !!! // filter fixes aliasing to base & highest frequency bands and reduces noise floor (use for all mic inputs)
 
       #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
         if ((i2sckPin == I2S_PIN_NO_CHANGE) && (i2ssdPin >= 0) && (i2swsPin >= 0) && ((dmType == 1) || (dmType == 4)) ) dmType = 5;   // dummy user support: SCK == -1 --means--> PDM microphone
@@ -1338,7 +1365,6 @@ class AudioReactive : public Usermod {
         case 5:
           DEBUGSR_PRINT(F("AR: I2S PDM Microphone - ")); DEBUGSR_PRINTLN(F(I2S_PDM_MIC_CHANNEL_TEXT));
           audioSource = new I2SSource(SAMPLE_RATE, BLOCK_SIZE, 1.0f/4.0f);
-          useBandPassFilter = true;  // this reduces the noise floor on SPM1423 from 5% Vpp (~380) down to 0.05% Vpp (~5)
           delay(100);
           if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin);
           break;
@@ -1346,6 +1372,7 @@ class AudioReactive : public Usermod {
         case 6:
           DEBUGSR_PRINTLN(F("AR: ES8388 Source"));
           audioSource = new ES8388Source(SAMPLE_RATE, BLOCK_SIZE);
+          useBandPassFilter = false;
           delay(100);
           if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin, i2sckPin, mclkPin);
           break;
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index f3c93bb67b..69ffeddf16 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -331,48 +331,50 @@ class I2SSource : public AudioSource {
           DEBUGSR_PRINTF("Failed to get enough samples: wanted: %d read: %d\n", sizeof(newSamples), bytes_read);
           return;
         }
-/*
-  //debug function:  Generate sine wave 
- for (int i = 0; i < num_samples; i++) {
-    float time = (float)i / 22000;
-    float sample = 0.0;
-    sample =  sin(2.0 * PI * 1000 * time); // 1000 Hz sine wav
-    sample +=  sin(2.0 * PI * 250 * time); // 1000 Hz sine wav
-    newSamples[i] = (int32_t)(sample * 990848.0); //(float)(millis()<<7));  // scale up
-  }
-*/ //!!!remove
 
+        //debug function:  Generate sine wave
+      for (int i = 0; i < num_samples; i++) {
+          float time = (float)i / 22000;
+          float sample = 0.0;
+          sample =  sin(2.0 * PI * 1000 * time); // 1kHz sine wave
+          sample +=  sin(2.0 * PI * 250 * time); // 250 Hz sine wave
+          sample +=  sin(2.0 * PI * 4000 * time); // 4kHz sine wave
+          newSamples[i] = (int32_t)(sample * 990848.0); //(float)(millis()<<7));  // scale up
+        }
+      //!!!remove
 
 
         // Store samples in sample buffer
-#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
-        int16_t* _buffer = static_cast<int16_t*>(buffer); // use integer samples on ESP32-S2 and ESP32-C3
-        constexpr int32_t FIXEDSHIFT = 8; // shift by 8 bits for fixed point math (no loss at 24bit input sample resolution)
-        int32_t intSampleScale = _sampleScale * (1<<FIXEDSHIFT); // _sampleScale is <= 1.0f, shift for fixed point math
-#else
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
         float* _buffer = static_cast<float*>(buffer);
+#else
+        int16_t* _buffer = static_cast<int16_t*>(buffer); // use integer samples on ESP32-S2 and ESP32-C3
+        //constexpr int32_t FIXEDSHIFT = 8; // shift by 8 bits for fixed point math (no loss at 24bit input sample resolution)
+        //int32_t intSampleScale = _sampleScale * (1<<FIXEDSHIFT); // _sampleScale <= 1.0f, shift for fixed point math
 #endif
 
         for (int i = 0; i < num_samples; i++) {
           newSamples[i] = postProcessSample(newSamples[i]);  // perform postprocessing (needed for ADC samples)
 
-#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
   #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
-          int32_t currSample = newSamples[i] >> FIXEDSHIFT;   // shift to avoid overlow in multiplication
-          currSample = (currSample * intSampleScale) >> 16;    // scale samples, shift down to 16bit
+          float currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
   #else
-          int32_t currSample = newSamples[i];                 // 16bit input -> use as-is
+          float currSample = (float) newSamples[i];                 // 16bit input -> use as-is
   #endif
-          _buffer[i] = (int16_t)currSample;
+          _buffer[i] = currSample;
+          _buffer[i] *= _sampleScale;                               // scale samples
 #else
-          float currSample = 0.0f;
   #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
-          currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
+          // note on sample scaling: scaling is only used for inputs with master clock and those are better suited for ESP32 or S3
+          //int32_t currSample = newSamples[i] >> FIXEDSHIFT;   // shift to avoid overlow in multiplication
+          //currSample = (currSample * intSampleScale) >> 16;   // scale samples, shift down to 16bit
+          int16_t currSample = newSamples[i] >> 16;           // no sample scaling, just shift down to 16bit (not scaling saves ~0.4ms on C3)
   #else
-          currSample = (float) newSamples[i];                 // 16bit input -> use as-is
+          //int32_t currSample = (newSamples[i] * intSampleScale) >> FIXEDSHIFT;   // scale samples, shift back down to 16bit
+          int16_t currSample = newSamples[i];                 // 16bit input -> use as-is
   #endif
-          _buffer[i] = currSample;
-          _buffer[i] *= _sampleScale;                         // scale samples
+          _buffer[i] = (int16_t)currSample;
 #endif
         }
       }

From da91a0b63159321d2a0f8f7ecf32b51dfc6a3dc6 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Wed, 25 Jun 2025 20:17:23 +0200
Subject: [PATCH 04/12] debug version, fixed problem on S3: turns out to be mem
 alignment

---
 usermods/audioreactive/audio_reactive.cpp | 66 ++++++++++++++++++-----
 usermods/audioreactive/audio_source.h     | 12 +++--
 2 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index d3bd0b1698..1ca821e895 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -24,7 +24,7 @@
  * ....
  */
 
-#define FFT_PREFER_EXACT_PEAKS  // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
+//#define FFT_PREFER_EXACT_PEAKS  // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
 
 #if !defined(FFTTASK_PRIORITY)
 #define FFTTASK_PRIORITY 1 // standard: looptask prio
@@ -248,7 +248,7 @@ static float fftAddAvg(int from, int to) { //!!!TODO: need to hanlde integer val
   for (int i = from; i <= to; i++) {
     result += valFFT[i];
   }
-  result *= 64; // scale result to match float values. note: scaling value of 64 was found by using simulated sine waves and comparing the results.
+  result *= 128; // scale result to match float values. note: scaling value of was found by using simulated sine waves and comparing the results (the raw factor between float and int FFT is 256) TODO: check  this is correct!!!
   #endif
   return float(result) / float(to - from + 1); // return average as float
 }
@@ -262,11 +262,19 @@ void FFTcode(void * parameter)
 
 #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
   // allocate and initialize FFT buffers on first call
-  if (valFFT == nullptr) valFFT = (float*) calloc(sizeof(float), samplesFFT * 2);
-  if ((valFFT == nullptr)) return; // something went wrong
+  if (valFFT == nullptr) {
+    float* raw_buffer = (float*)heap_caps_malloc((2 * samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
+    if ((raw_buffer == nullptr)) return; // something went wrong
+    valFFT = (float*)(((uintptr_t)raw_buffer + 15) & ~15);  // SIMD requires aligned memory to 16-byte boundary. note in IDF5 there is MALLOC_CAP_SIMD available
+  }
+
   // create window
-  if (windowFFT == nullptr) windowFFT = (float*) calloc(sizeof(float), samplesFFT);
-  if ((windowFFT == nullptr)) return; // something went wrong
+  if (windowFFT == nullptr) {
+    float* raw_buffer = (float*)heap_caps_malloc((samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
+    if ((raw_buffer == nullptr)) return; // something went wrong
+    windowFFT = (float*)(((uintptr_t)raw_buffer + 15) & ~15);  // SIMD requires aligned memory to 16-byte boundary
+  }
+
   if (dsps_fft2r_init_fc32(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
   // create window function for FFT
 #ifdef FFT_PREFER_EXACT_PEAKS
@@ -323,7 +331,7 @@ void FFTcode(void * parameter)
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
     if (start < esp_timer_get_time()) { // filter out overflows
       uint64_t sampleTimeInMillis = (esp_timer_get_time() - start +5ULL) / 10ULL; // "+5" to ensure proper rounding
-      sampleTime = (sampleTimeInMillis + sampleTime*49)/50; // smooth !!! revert change debug only
+      sampleTime = (sampleTimeInMillis*3 + sampleTime*7)/10; // smooth
     }
     start = esp_timer_get_time(); // start measuring FFT time
 #endif
@@ -332,11 +340,8 @@ void FFTcode(void * parameter)
 
     // band pass filter - can reduce noise floor by a factor of 50 and avoid aliasing effects to base & high frequency bands
     // downside: frequencies below 100Hz will be ignored
-    #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
-   // if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
-    #else
+    useBandPassFilter = false; //!!! debug, remove
     if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
-    #endif
 
     // find highest sample in the batch
     #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
@@ -366,6 +371,8 @@ void FFTcode(void * parameter)
 
       // run FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3)
 #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+      Serial.print("raw: ");
+      for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i]); Serial.print(", "); }Serial.println(" ");//!!!
       // remove DC offset
       float sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
@@ -378,8 +385,12 @@ void FFTcode(void * parameter)
         valFFT[i * 2] = windowed_sample;
         valFFT[i * 2 + 1] = 0.0; // set imaginary part to zero
       }
+            //DEBUG:
+     // Serial.print("windowed: ");
+    //  for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i*2]); Serial.print(", "); } Serial.println(" ");//!!!
 #ifdef CONFIG_IDF_TARGET_ESP32S3
-      dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT
+      dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT (requires 16bit aligned buffer!)
+      //dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT
 #elif defined(CONFIG_IDF_TARGET_ESP32)
       dsps_fft2r_fc32_ae32(valFFT, samplesFFT); // ESP32 optimized version of FFT
 #else
@@ -394,10 +405,23 @@ void FFTcode(void * parameter)
         valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part);
         //valFFT[i] = valFFT[i] / 16.0f;          // Reduce magnitude. Want end result to be scaled linear and ~4096 max.  !!! 
       }
+            //DEBUG:
+            Serial.println("**");
+      for (int i = 0; i < samplesFFT/16; i++) {
+        sum=0;
+        for (int k = i*8; k < i*8 + 8; k++) {
+          sum += valFFT[k];
+        }
+        Serial.print(sum);
+        Serial.print(", ");
+      }
+      Serial.println("**");
 #else
       // remove DC offset
       int32_t sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
+      Serial.print("raw: ");
+      for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i]); Serial.print(", "); }Serial.println(" ");//!!!
       int32_t mean = sum / samplesFFT;
       for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
       //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
@@ -407,6 +431,9 @@ void FFTcode(void * parameter)
         valFFT[i * 2] = windowed_sample;
         valFFT[i * 2 + 1] = 0; // set imaginary part to zero
       }
+      //DEBUG:
+      //Serial.print("windowed: ");
+      //for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i*2]); Serial.print(", "); } Serial.println(" ");//!!!
 
       dsps_fft2r_sc16_ansi(valFFT, samplesFFT); // perform FFT on complex value pairs (Re,Im)
       dsps_bit_rev_sc16(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
@@ -417,6 +444,18 @@ void FFTcode(void * parameter)
         int32_t imag_part = valFFT[i * 2 + 1];
         valFFT[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
       }
+
+      //DEBUG:
+      Serial.println("**");
+      for (int i = 0; i < samplesFFT/16; i++) {
+        sum=0;
+        for (int k = i*8; k < i*8 + 8; k++) {
+          sum += valFFT[k];
+        }
+        Serial.print(sum);
+        Serial.print(", ");
+      }
+      Serial.println("**");
 #endif
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
@@ -465,6 +504,7 @@ void FFTcode(void * parameter)
 #else
       /* new mapping, optimized for 22050 Hz by softhack007 */
                                                     // bins frequency  range
+      useBandPassFilter = false; //!!! debug, remove
       if (useBandPassFilter) {
         // skip frequencies below 100hz
         fftCalc[ 0] = 0.8f * fftAddAvg(3,4);
@@ -1323,7 +1363,7 @@ class AudioReactive : public Usermod {
         periph_module_reset(PERIPH_I2S0_MODULE);   // not possible on -C3
       #endif
       delay(100);         // Give that poor microphone some time to setup.
-      useBandPassFilter = false; //true; !!! // filter fixes aliasing to base & highest frequency bands and reduces noise floor (use for all mic inputs)
+      useBandPassFilter = true; // filter fixes aliasing to base & highest frequency bands and reduces noise floor (use for all mic inputs)
 
       #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
         if ((i2sckPin == I2S_PIN_NO_CHANGE) && (i2ssdPin >= 0) && (i2swsPin >= 0) && ((dmType == 1) || (dmType == 4)) ) dmType = 5;   // dummy user support: SCK == -1 --means--> PDM microphone
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index 69ffeddf16..65240de561 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -334,12 +334,16 @@ class I2SSource : public AudioSource {
 
         //debug function:  Generate sine wave
       for (int i = 0; i < num_samples; i++) {
+          float frequencies[] = {250.0, 1200.0, 2600.0, 5500.0}; // 4 test frequencies
+          float amplitudes[] = {10000, 8000, 6000, 4000};
           float time = (float)i / 22000;
           float sample = 0.0;
-          sample =  sin(2.0 * PI * 1000 * time); // 1kHz sine wave
-          sample +=  sin(2.0 * PI * 250 * time); // 250 Hz sine wave
-          sample +=  sin(2.0 * PI * 4000 * time); // 4kHz sine wave
-          newSamples[i] = (int32_t)(sample * 990848.0); //(float)(millis()<<7));  // scale up
+          //float scale = 990848.0;
+          sample =  sin(2.0 * PI * frequencies[0] * time)* amplitudes[0] *65536; 
+          sample +=  sin(2.0 * PI * frequencies[1] * time)* amplitudes[1]*65536; 
+          sample +=  sin(2.0 * PI * frequencies[2] * time)* amplitudes[2]*65536; 
+          sample +=  sin(2.0 * PI * frequencies[3] * time)* amplitudes[3]*65536; 
+          newSamples[i] = (int32_t)(sample); //(float)(millis()<<7));  // scale up
         }
       //!!!remove
 

From ac6897c77a04ab2aad0994539e25c73c579c14e4 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Wed, 25 Jun 2025 21:38:41 +0200
Subject: [PATCH 05/12] fixed scaling for C3, removed debug stuff

---
 usermods/audioreactive/audio_reactive.cpp | 55 ++++-------------------
 usermods/audioreactive/audio_source.h     | 17 +------
 2 files changed, 9 insertions(+), 63 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 1ca821e895..9bc4a8ad2f 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -24,7 +24,7 @@
  * ....
  */
 
-//#define FFT_PREFER_EXACT_PEAKS  // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
+#define FFT_PREFER_EXACT_PEAKS  // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
 
 #if !defined(FFTTASK_PRIORITY)
 #define FFTTASK_PRIORITY 1 // standard: looptask prio
@@ -237,7 +237,7 @@ __attribute__((aligned(16))) float* windowFFT;
 // Helper functions
 
 // compute average of several FFT result bins
-static float fftAddAvg(int from, int to) { //!!!TODO: need to hanlde integer values and save as float for S2 and C3
+static float fftAddAvg(int from, int to) {
   #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
   float result = 0.0f;
   for (int i = from; i <= to; i++) {
@@ -248,7 +248,7 @@ static float fftAddAvg(int from, int to) { //!!!TODO: need to hanlde integer val
   for (int i = from; i <= to; i++) {
     result += valFFT[i];
   }
-  result *= 128; // scale result to match float values. note: scaling value of was found by using simulated sine waves and comparing the results (the raw factor between float and int FFT is 256) TODO: check  this is correct!!!
+  result *= 32; // scale result to match float values. note: scaling value between float and int is 512, float version is scaled down by 16
   #endif
   return float(result) / float(to - from + 1); // return average as float
 }
@@ -340,7 +340,6 @@ void FFTcode(void * parameter)
 
     // band pass filter - can reduce noise floor by a factor of 50 and avoid aliasing effects to base & high frequency bands
     // downside: frequencies below 100Hz will be ignored
-    useBandPassFilter = false; //!!! debug, remove
     if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
 
     // find highest sample in the batch
@@ -371,8 +370,6 @@ void FFTcode(void * parameter)
 
       // run FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3)
 #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
-      Serial.print("raw: ");
-      for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i]); Serial.print(", "); }Serial.println(" ");//!!!
       // remove DC offset
       float sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
@@ -385,12 +382,8 @@ void FFTcode(void * parameter)
         valFFT[i * 2] = windowed_sample;
         valFFT[i * 2 + 1] = 0.0; // set imaginary part to zero
       }
-            //DEBUG:
-     // Serial.print("windowed: ");
-    //  for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i*2]); Serial.print(", "); } Serial.println(" ");//!!!
 #ifdef CONFIG_IDF_TARGET_ESP32S3
       dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT (requires 16bit aligned buffer!)
-      //dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT
 #elif defined(CONFIG_IDF_TARGET_ESP32)
       dsps_fft2r_fc32_ae32(valFFT, samplesFFT); // ESP32 optimized version of FFT
 #else
@@ -403,25 +396,12 @@ void FFTcode(void * parameter)
         float real_part = valFFT[i * 2];
         float imag_part = valFFT[i * 2 + 1];
         valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part);
-        //valFFT[i] = valFFT[i] / 16.0f;          // Reduce magnitude. Want end result to be scaled linear and ~4096 max.  !!! 
+        valFFT[i] = valFFT[i] / 16.0f;          // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
       }
-            //DEBUG:
-            Serial.println("**");
-      for (int i = 0; i < samplesFFT/16; i++) {
-        sum=0;
-        for (int k = i*8; k < i*8 + 8; k++) {
-          sum += valFFT[k];
-        }
-        Serial.print(sum);
-        Serial.print(", ");
-      }
-      Serial.println("**");
 #else
       // remove DC offset
       int32_t sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
-      Serial.print("raw: ");
-      for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i]); Serial.print(", "); }Serial.println(" ");//!!!
       int32_t mean = sum / samplesFFT;
       for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
       //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
@@ -431,9 +411,6 @@ void FFTcode(void * parameter)
         valFFT[i * 2] = windowed_sample;
         valFFT[i * 2 + 1] = 0; // set imaginary part to zero
       }
-      //DEBUG:
-      //Serial.print("windowed: ");
-      //for (int i = 0; i < samplesFFT/8; i++) {Serial.print(valFFT[i*2]); Serial.print(", "); } Serial.println(" ");//!!!
 
       dsps_fft2r_sc16_ansi(valFFT, samplesFFT); // perform FFT on complex value pairs (Re,Im)
       dsps_bit_rev_sc16(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
@@ -444,35 +421,20 @@ void FFTcode(void * parameter)
         int32_t imag_part = valFFT[i * 2 + 1];
         valFFT[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
       }
-
-      //DEBUG:
-      Serial.println("**");
-      for (int i = 0; i < samplesFFT/16; i++) {
-        sum=0;
-        for (int k = i*8; k < i*8 + 8; k++) {
-          sum += valFFT[k];
-        }
-        Serial.print(sum);
-        Serial.print(", ");
-      }
-      Serial.println("**");
 #endif
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
       haveDoneFFT = true;
 #endif
 
+    //TODO calculate FFT_MajorPeak and FFT_Magnitude, that code was removed from here
+
     } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this.
       memset(valFFT, 0, samplesFFT * sizeof(float)); // only lower half of buffer contains FFT results, so only clear that part
       FFT_MajorPeak = 1;
       FFT_Magnitude = 0.001;
     }
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3) // note: for S2 and C3 scaling is done above
-    for (int i = 0; i < samplesFFT; i++) {
-      float t = fabsf(valFFT[i]);                      // just to be sure - values in fft bins should be positive any way
-      valFFT[i] = t / 16.0f;                           // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
-    } // for()
-#endif
+
     // mapping of FFT result bins to frequency channels
     if (fabsf(sampleAvg) > 0.5f) { // noise gate open
 #if 0
@@ -503,8 +465,7 @@ void FFTcode(void * parameter)
       fftCalc[15] = fftAddAvg(194,250);   // 3880 - 5000 // avoid the last 5 bins, which are usually inaccurate
 #else
       /* new mapping, optimized for 22050 Hz by softhack007 */
-                                                    // bins frequency  range
-      useBandPassFilter = false; //!!! debug, remove
+      // bins frequency  range
       if (useBandPassFilter) {
         // skip frequencies below 100hz
         fftCalc[ 0] = 0.8f * fftAddAvg(3,4);
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index 65240de561..87c5fd1d66 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -332,22 +332,6 @@ class I2SSource : public AudioSource {
           return;
         }
 
-        //debug function:  Generate sine wave
-      for (int i = 0; i < num_samples; i++) {
-          float frequencies[] = {250.0, 1200.0, 2600.0, 5500.0}; // 4 test frequencies
-          float amplitudes[] = {10000, 8000, 6000, 4000};
-          float time = (float)i / 22000;
-          float sample = 0.0;
-          //float scale = 990848.0;
-          sample =  sin(2.0 * PI * frequencies[0] * time)* amplitudes[0] *65536; 
-          sample +=  sin(2.0 * PI * frequencies[1] * time)* amplitudes[1]*65536; 
-          sample +=  sin(2.0 * PI * frequencies[2] * time)* amplitudes[2]*65536; 
-          sample +=  sin(2.0 * PI * frequencies[3] * time)* amplitudes[3]*65536; 
-          newSamples[i] = (int32_t)(sample); //(float)(millis()<<7));  // scale up
-        }
-      //!!!remove
-
-
         // Store samples in sample buffer
 #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
         float* _buffer = static_cast<float*>(buffer);
@@ -371,6 +355,7 @@ class I2SSource : public AudioSource {
 #else
   #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
           // note on sample scaling: scaling is only used for inputs with master clock and those are better suited for ESP32 or S3
+          // execution speed is critical on single core MCUs
           //int32_t currSample = newSamples[i] >> FIXEDSHIFT;   // shift to avoid overlow in multiplication
           //currSample = (currSample * intSampleScale) >> 16;   // scale samples, shift down to 16bit
           int16_t currSample = newSamples[i] >> 16;           // no sample scaling, just shift down to 16bit (not scaling saves ~0.4ms on C3)

From 614cb5c5cfcb2b4cb24212cf143f17b5c98678a2 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Thu, 26 Jun 2025 20:35:19 +0200
Subject: [PATCH 06/12] code cleanup, added arduinoFFT back in, added ifdefs &
 description

also added major peak and frequency bin calculation for DSP FFT
---
 platformio.ini                            |   1 +
 usermods/audioreactive/audio_reactive.cpp | 154 +++++++++++++++-------
 usermods/audioreactive/audio_source.h     |   6 +-
 3 files changed, 113 insertions(+), 48 deletions(-)

diff --git a/platformio.ini b/platformio.ini
index 9bdf58d341..36e6d8652e 100644
--- a/platformio.ini
+++ b/platformio.ini
@@ -534,6 +534,7 @@ platform = ${esp32c3.platform}
 framework = arduino
 board = esp32-c3-devkitm-1
 board_build.partitions = ${esp32.default_partitions}
+custom_usermods = audioreactive
 build_flags = ${common.build_flags} ${esp32c3.build_flags} -D WLED_RELEASE_NAME=\"ESP32-C3\"
   -D WLED_WATCHDOG_TIMEOUT=0
   -DLOLIN_WIFI_FIX ; seems to work much better with this
diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 9bc4a8ad2f..1e2380cfa5 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -24,7 +24,64 @@
  * ....
  */
 
-#define FFT_PREFER_EXACT_PEAKS  // use different FFT windowing -> results in "sharper" peaks and less "leaking" into other frequencies
+#define FFT_PREFER_EXACT_PEAKS  // use Blackman-Harris FFT windowing instead of Flat Top -> results in "sharper" peaks and less "leaking" into other frequencies (credits to @softhack)
+
+/*
+ * Note on FFT variants:
+ * - ArduinoFFT: uses floating point calculations, very slow on S2 and C3 (no FPU)
+ * - ESP-IDF DSP library:
+     - faster but uses ~13k of extra flash on ESP32 and S3
+ *   - uses integer math on S2 and C3: slightly less accurate but over 10x faster than ArduinoFFT and uses less flash
+     - not available in IDF < 4.4
+ * - ArduinoFFT is used by default on ESP32 and S3
+ * - ESP-IDF DSP FFT with integer math is used by default on S2 and C3
+ * - defines:
+ *   - UM_AUDIOREACTIVE_USE_ARDUINO_FFT: use ArduinoFFT library for FFT (for S2 and C3)
+ *   - UM_AUDIOREACTIVE_USE_ESPDSP_FFT:  use ESP-IDF DSP for FFT (for ESP32 and S3 on IDF >= 4.4)
+*/
+
+//#define UM_AUDIOREACTIVE_USE_ESPDSP_FFT
+//#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // use integer FFT if using ESP-IDF DSP library, always used on S2 and C3 (UM_AUDIOREACTIVE_USE_ARDUINO_FFT takes priority)
+#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) || ((defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)) && !defined(UM_AUDIOREACTIVE_USE_ARDUINO_FFT))
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default, except ESP32-S2 and ESP32-C3
+#endif
+
+
+#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // DSP FFT library is not available in ESP-IDF < 4.4
+#endif
+
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+#include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
+#else
+#include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
+#ifdef FFT_PREFER_EXACT_PEAKS
+#include "dsps_wind_blackman_harris.h"
+#else
+#include "dsps_wind_flat_top.h"
+#endif
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // always use integer FFT on ESP32-S2 and ESP32-C3
+#endif
+#endif
+
+// These are the input and output vectors.  Input vectors receive computed results from FFT.
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+static float* valFFT = nullptr;
+#else
+static int16_t* valFFT = nullptr;
+#endif
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+static float* vImag = nullptr; // imaginary part of FFT results
+#endif
+
+// pre-computed window function
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+__attribute__((aligned(16))) float* windowFFT;
+#else
+__attribute__((aligned(16))) int16_t* windowFFT;
+#endif
+
 
 #if !defined(FFTTASK_PRIORITY)
 #define FFTTASK_PRIORITY 1 // standard: looptask prio
@@ -155,7 +212,7 @@ static bool useBandPassFilter = false;                    // if true, enables a
 // some prototypes, to ensure consistent interfaces
 static float fftAddAvg(int from, int to);   // average of several FFT result bins
 void FFTcode(void * parameter);      // audio processing task: read samples, run FFT, fill GEQ channels from FFT results
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
 static void runMicFilter(uint16_t numSamples, float *sampleBuffer);          // pre-filtering of raw samples (band-pass)
 #else
 static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer);
@@ -202,22 +259,6 @@ constexpr uint16_t samplesFFT_2 = 256;          // meaningfull part of FFT resul
 #endif
 #define LOG_256  5.54517744f                            // log(256)
 
-// These are the input and output vectors.  Input vectors receive computed results from FFT.
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
-static float* valFFT = nullptr;
-#else
-static int16_t* valFFT = nullptr;
-#endif
-
-
-// pre-computed window function
-#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
-__attribute__((aligned(16))) int16_t* windowFFT;
-#else
-__attribute__((aligned(16))) float* windowFFT;
-#endif
-
-
 // Create FFT object
 // lib_deps += https://github.com/kosme/arduinoFFT#develop @ 1.9.2
 // these options actually cause slow-downs on all esp32 processors, don't use them.
@@ -226,19 +267,11 @@ __attribute__((aligned(16))) float* windowFFT;
 // Below options are forcing ArduinoFFT to use sqrtf() instead of sqrt()
 // #define sqrt_internal sqrtf          // see https://github.com/kosme/arduinoFFT/pull/83 - since v2.0.0 this must be done in build_flags
 
-// ESP-IDF DSP library for FFT and window functions
-#include "dsps_fft2r.h"
-#ifdef FFT_PREFER_EXACT_PEAKS
-#include "dsps_wind_blackman_harris.h"
-#else
-#include "dsps_wind_flat_top.h"
-#endif
-
 // Helper functions
 
 // compute average of several FFT result bins
 static float fftAddAvg(int from, int to) {
-  #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+  #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
   float result = 0.0f;
   for (int i = from; i <= to; i++) {
     result += valFFT[i];
@@ -259,22 +292,31 @@ static float fftAddAvg(int from, int to) {
 void FFTcode(void * parameter)
 {
   DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID());
-
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+  // allocate FFT buffers on first call
+  if (valFFT == nullptr) valFFT = (float*) calloc(sizeof(float), samplesFFT);
+  if (vImag == nullptr) vImag = (float*) calloc(sizeof(float), samplesFFT);
+  if ((valFFT == nullptr) || (vImag == nullptr)) {
+    // something went wrong
+    if (valFFT) free(valFFT); valFFT = nullptr;
+    if (vImag) free(vImag); vImag = nullptr;
+    return;
+  }
+  // Create FFT object with weighing factor storage
+  ArduinoFFT<float> FFT = ArduinoFFT<float>(valFFT, vImag, samplesFFT, SAMPLE_RATE, true);
+#elif !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
   // allocate and initialize FFT buffers on first call
   if (valFFT == nullptr) {
     float* raw_buffer = (float*)heap_caps_malloc((2 * samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
     if ((raw_buffer == nullptr)) return; // something went wrong
     valFFT = (float*)(((uintptr_t)raw_buffer + 15) & ~15);  // SIMD requires aligned memory to 16-byte boundary. note in IDF5 there is MALLOC_CAP_SIMD available
   }
-
   // create window
   if (windowFFT == nullptr) {
     float* raw_buffer = (float*)heap_caps_malloc((samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
     if ((raw_buffer == nullptr)) return; // something went wrong
     windowFFT = (float*)(((uintptr_t)raw_buffer + 15) & ~15);  // SIMD requires aligned memory to 16-byte boundary
   }
-
   if (dsps_fft2r_init_fc32(NULL, samplesFFT) != ESP_OK) return; // initialize FFT tables
   // create window function for FFT
 #ifdef FFT_PREFER_EXACT_PEAKS
@@ -283,7 +325,7 @@ void FFTcode(void * parameter)
   dsps_wind_flat_top_f32(windowFFT, samplesFFT);
 #endif
 #else
-// allocate and initialize FFT buffers on first call
+  // allocate and initialize integer FFT buffers on first call
   if (valFFT == nullptr) valFFT = (int16_t*) calloc(sizeof(int16_t), samplesFFT * 2);
   if ((valFFT == nullptr)) return; // something went wrong
   // create window
@@ -305,7 +347,6 @@ void FFTcode(void * parameter)
   free(windowFloat); // free temporary buffer
 #endif
 
-
   // see https://www.freertos.org/vtaskdelayuntil.html
   const TickType_t xFrequency = FFT_MIN_CYCLE * portTICK_PERIOD_MS;  
 
@@ -343,7 +384,7 @@ void FFTcode(void * parameter)
     if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
 
     // find highest sample in the batch
-    #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+    #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
     float maxSample = 0.0f;                         // max sample from FFT batch
     for (int i=0; i < samplesFFT; i++) {
 	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
@@ -368,8 +409,19 @@ void FFTcode(void * parameter)
     if (sampleAvg > 0.25f) { // noise gate open means that FFT results will be used. Don't run FFT if results are not needed.
 #endif
 
-      // run FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3)
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+      // run Arduino FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2, ~20ms on ESP32-C3)
+      FFT.dcRemoval();                                            // remove DC offset
+      FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward); // Weigh data using "Flat Top" function - better amplitude accuracy
+      //FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
+      FFT.compute( FFTDirection::Forward );                       // Compute FFT
+      FFT.complexToMagnitude();                                   // Compute magnitudes
+      valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
+
+      FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);                // let the effects know which freq was most dominant
+      FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
+#elif !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+      // run run float DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3) TODO: test and fill in these values
       // remove DC offset
       float sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
@@ -383,22 +435,28 @@ void FFTcode(void * parameter)
         valFFT[i * 2 + 1] = 0.0; // set imaginary part to zero
       }
 #ifdef CONFIG_IDF_TARGET_ESP32S3
-      dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT (requires 16bit aligned buffer!)
+      dsps_fft2r_fc32_aes3(valFFT, samplesFFT); // ESP32 S3 optimized version of FFT
 #elif defined(CONFIG_IDF_TARGET_ESP32)
       dsps_fft2r_fc32_ae32(valFFT, samplesFFT); // ESP32 optimized version of FFT
 #else
-      dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT
+      dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT using ANSI C implementation
 #endif
       dsps_bit_rev_fc32(valFFT, samplesFFT);    // bit reverse
-
-      // convert to magnitude
+      // convert to magnitude & find FFT_MajorPeak and FFT_Magnitude
+      FFT_MajorPeak = 0;
+      FFT_Magnitude = 0;
       for (int i = 1; i < samplesFFT_2; i++) {  // skip [0] as it is DC offset
         float real_part = valFFT[i * 2];
         float imag_part = valFFT[i * 2 + 1];
         valFFT[i] = sqrtf(real_part * real_part + imag_part * imag_part);
+        if (valFFT[i] > FFT_Magnitude) {
+          FFT_Magnitude = valFFT[i];
+          FFT_MajorPeak = i*(SAMPLE_RATE/samplesFFT);
+        }
         valFFT[i] = valFFT[i] / 16.0f;          // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
       }
 #else
+      // run integer DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~1.5 ms on ESP32-C3) TODO: test and fill in these values
       // remove DC offset
       int32_t sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
@@ -411,24 +469,30 @@ void FFTcode(void * parameter)
         valFFT[i * 2] = windowed_sample;
         valFFT[i * 2 + 1] = 0; // set imaginary part to zero
       }
-
       dsps_fft2r_sc16_ansi(valFFT, samplesFFT); // perform FFT on complex value pairs (Re,Im)
       dsps_bit_rev_sc16(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
 
       // convert to magnitude, FFT returns interleaved complex values [Re,Im,Re,Im,...]
+      int FFT_MajorPeak_int = 0;
+      int FFT_Magnitude_int = 0;
       for (int i = 1; i < samplesFFT_2; i++) { // skip [0], it is DC offset
         int32_t real_part = valFFT[i * 2];
         int32_t imag_part = valFFT[i * 2 + 1];
         valFFT[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
+        if (valFFT[i] > FFT_Magnitude_int) {
+          FFT_Magnitude_int = valFFT[i];
+          FFT_MajorPeak_int = ((i * SAMPLE_RATE)/samplesFFT);
+        }
+        // note: scaling is done when converting to float in fftAddAvg(), so we don't scale here
       }
+      FFT_MajorPeak = FFT_MajorPeak_int;
+      FFT_Magnitude = FFT_Magnitude_int;
+
 #endif
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
       haveDoneFFT = true;
 #endif
-
-    //TODO calculate FFT_MajorPeak and FFT_Magnitude, that code was removed from here
-
     } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this.
       memset(valFFT, 0, samplesFFT * sizeof(float)); // only lower half of buffer contains FFT results, so only clear that part
       FFT_MajorPeak = 1;
@@ -527,7 +591,7 @@ void FFTcode(void * parameter)
 // Pre / Postprocessing  //
 ///////////////////////////
 
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
 static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // pre-filtering of raw samples (band-pass)
 {
   // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index 87c5fd1d66..7e9391b371 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -22,7 +22,7 @@
 
 // see https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/hw-reference/chip-series-comparison.html#related-documents
 // and https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/api-reference/peripherals/i2s.html#overview-of-all-modes
-#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
+#if defined(CONFIG_IDF_TARGET_ESP32C2) || defined(CONFIG_IDF_TARGET_ESP32C5) || defined(CONFIG_IDF_TARGET_ESP32C6) || defined(CONFIG_IDF_TARGET_ESP32H2) || defined(ESP8266) || defined(ESP8265)
   // there are two things in these MCUs that could lead to problems with audio processing:
   // * no floating point hardware (FPU) support - FFT uses float calculations. If done in software, a strong slow-down can be expected (between 8x and 20x)
   // * single core, so FFT task might slow down other things like LED updates
@@ -333,7 +333,7 @@ class I2SSource : public AudioSource {
         }
 
         // Store samples in sample buffer
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
         float* _buffer = static_cast<float*>(buffer);
 #else
         int16_t* _buffer = static_cast<int16_t*>(buffer); // use integer samples on ESP32-S2 and ESP32-C3
@@ -344,7 +344,7 @@ class I2SSource : public AudioSource {
         for (int i = 0; i < num_samples; i++) {
           newSamples[i] = postProcessSample(newSamples[i]);  // perform postprocessing (needed for ADC samples)
 
-#if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
   #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
           float currSample = (float) newSamples[i] / 65536.0f;      // 32bit input -> 16bit; keeping lower 16bits as decimal places
   #else

From ab69c5ae81904bea7016e505702e8e874cd9434b Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Thu, 26 Jun 2025 22:45:01 +0200
Subject: [PATCH 07/12] bugfixes

---
 usermods/audioreactive/audio_reactive.cpp | 28 +++++++++++++++--------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 1e2380cfa5..eb0e39edc5 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -42,8 +42,10 @@
 
 //#define UM_AUDIOREACTIVE_USE_ESPDSP_FFT
 //#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // use integer FFT if using ESP-IDF DSP library, always used on S2 and C3 (UM_AUDIOREACTIVE_USE_ARDUINO_FFT takes priority)
-#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) || ((defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)) && !defined(UM_AUDIOREACTIVE_USE_ARDUINO_FFT))
-#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default, except ESP32-S2 and ESP32-C3
+//#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // default on ESP32 and S3
+
+#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) && (defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32))
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default on ESP32 and S3
 #endif
 
 
@@ -411,15 +413,20 @@ void FFTcode(void * parameter)
 
 #ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
       // run Arduino FFT (takes 3-5ms on ESP32, ~12ms on ESP32-S2, ~20ms on ESP32-C3)
+      memset(vImag, 0, samplesFFT * sizeof(float));               // set imaginary parts to 0
       FFT.dcRemoval();                                            // remove DC offset
+#ifdef FFT_PREFER_EXACT_PEAKS
+      FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
+#else
       FFT.windowing( FFTWindow::Flat_top, FFTDirection::Forward); // Weigh data using "Flat Top" function - better amplitude accuracy
-      //FFT.windowing(FFTWindow::Blackman_Harris, FFTDirection::Forward);  // Weigh data using "Blackman- Harris" window - sharp peaks due to excellent sideband rejection
+#endif
       FFT.compute( FFTDirection::Forward );                       // Compute FFT
       FFT.complexToMagnitude();                                   // Compute magnitudes
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
-
-      FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);                // let the effects know which freq was most dominant
-      FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
+      FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);              // let the effects know which freq was most dominant
+      for (int i = 1; i < samplesFFT_2; i++) { // skip [0] as it is DC offset
+        valFFT[i] = valFFT[i] / 16.0f;         // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
+      }
 #elif !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
       // run run float DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3) TODO: test and fill in these values
       // remove DC offset
@@ -442,6 +449,7 @@ void FFTcode(void * parameter)
       dsps_fft2r_fc32_ansi(valFFT, samplesFFT); // perform FFT using ANSI C implementation
 #endif
       dsps_bit_rev_fc32(valFFT, samplesFFT);    // bit reverse
+      valFFT[0] = 0;  // set DC bin to 0, as it is not needed and can cause issues
       // convert to magnitude & find FFT_MajorPeak and FFT_Magnitude
       FFT_MajorPeak = 0;
       FFT_Magnitude = 0;
@@ -470,8 +478,8 @@ void FFTcode(void * parameter)
         valFFT[i * 2 + 1] = 0; // set imaginary part to zero
       }
       dsps_fft2r_sc16_ansi(valFFT, samplesFFT); // perform FFT on complex value pairs (Re,Im)
-      dsps_bit_rev_sc16(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
-
+      dsps_bit_rev_sc16_ansi(valFFT, samplesFFT);    // bit reverse i.e. "unshuffle" the results
+      valFFT[0] = 0; // set DC bin to 0, as it is not needed and can cause issues
       // convert to magnitude, FFT returns interleaved complex values [Re,Im,Re,Im,...]
       int FFT_MajorPeak_int = 0;
       int FFT_Magnitude_int = 0;
@@ -480,7 +488,7 @@ void FFTcode(void * parameter)
         int32_t imag_part = valFFT[i * 2 + 1];
         valFFT[i] = sqrt32_bw(real_part * real_part + imag_part * imag_part); // note: this should never overflow as Re and Im form a vector of maximum length 32767
         if (valFFT[i] > FFT_Magnitude_int) {
-          FFT_Magnitude_int = valFFT[i];
+          FFT_Magnitude_int = valFFT[i] * 512; // scale to match raw float value
           FFT_MajorPeak_int = ((i * SAMPLE_RATE)/samplesFFT);
         }
         // note: scaling is done when converting to float in fftAddAvg(), so we don't scale here
@@ -489,7 +497,7 @@ void FFTcode(void * parameter)
       FFT_Magnitude = FFT_Magnitude_int;
 
 #endif
-      valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
+      FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
       haveDoneFFT = true;
 #endif

From 90637eb2a9706aa9d806a73bed1a9c551c049c0e Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Fri, 27 Jun 2025 08:21:06 +0200
Subject: [PATCH 08/12] more bugfixes

now also compiles on ESP8266
---
 usermods/audioreactive/audio_reactive.cpp | 96 ++++++++++++-----------
 usermods/audioreactive/audio_source.h     |  5 +-
 2 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index eb0e39edc5..8bffc1cadb 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -36,55 +36,14 @@
  * - ArduinoFFT is used by default on ESP32 and S3
  * - ESP-IDF DSP FFT with integer math is used by default on S2 and C3
  * - defines:
- *   - UM_AUDIOREACTIVE_USE_ARDUINO_FFT: use ArduinoFFT library for FFT (for S2 and C3)
- *   - UM_AUDIOREACTIVE_USE_ESPDSP_FFT:  use ESP-IDF DSP for FFT (for ESP32 and S3 on IDF >= 4.4)
+ *   - UM_AUDIOREACTIVE_USE_ARDUINO_FFT: use ArduinoFFT library for FFT
+ *   - UM_AUDIOREACTIVE_USE_ESPDSP_FFT:  use ESP-IDF DSP for FFT
 */
 
-//#define UM_AUDIOREACTIVE_USE_ESPDSP_FFT
+//#define UM_AUDIOREACTIVE_USE_ESPDSP_FFT  // default on S2 and C3
 //#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // use integer FFT if using ESP-IDF DSP library, always used on S2 and C3 (UM_AUDIOREACTIVE_USE_ARDUINO_FFT takes priority)
 //#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // default on ESP32 and S3
 
-#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) && (defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32))
-#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default on ESP32 and S3
-#endif
-
-
-#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)
-#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // DSP FFT library is not available in ESP-IDF < 4.4
-#endif
-
-#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
-#include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
-#else
-#include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
-#ifdef FFT_PREFER_EXACT_PEAKS
-#include "dsps_wind_blackman_harris.h"
-#else
-#include "dsps_wind_flat_top.h"
-#endif
-#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
-#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // always use integer FFT on ESP32-S2 and ESP32-C3
-#endif
-#endif
-
-// These are the input and output vectors.  Input vectors receive computed results from FFT.
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-static float* valFFT = nullptr;
-#else
-static int16_t* valFFT = nullptr;
-#endif
-#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
-static float* vImag = nullptr; // imaginary part of FFT results
-#endif
-
-// pre-computed window function
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-__attribute__((aligned(16))) float* windowFFT;
-#else
-__attribute__((aligned(16))) int16_t* windowFFT;
-#endif
-
-
 #if !defined(FFTTASK_PRIORITY)
 #define FFTTASK_PRIORITY 1 // standard: looptask prio
 //#define FFTTASK_PRIORITY 2 // above looptask, below asyc_tcp
@@ -211,6 +170,45 @@ static bool useBandPassFilter = false;                    // if true, enables a
 // Begin FFT Code //
 ////////////////////
 
+#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) && (defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32))
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default on ESP32 and S3
+#endif
+
+#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // DSP FFT library is not available in ESP-IDF < 4.4
+#endif
+
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+#include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
+#else
+#include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
+#ifdef FFT_PREFER_EXACT_PEAKS
+#include "dsps_wind_blackman_harris.h"
+#else
+#include "dsps_wind_flat_top.h"
+#endif
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // always use integer FFT on ESP32-S2 and ESP32-C3
+#endif
+#endif
+
+// These are the input and output vectors.  Input vectors receive computed results from FFT.
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+static float* valFFT = nullptr;
+#else
+static int16_t* valFFT = nullptr;
+#endif
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+static float* vImag = nullptr; // imaginary part of FFT results
+#endif
+
+// pre-computed window function
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+__attribute__((aligned(16))) float* windowFFT;
+#else
+__attribute__((aligned(16))) int16_t* windowFFT;
+#endif
+
 // some prototypes, to ensure consistent interfaces
 static float fftAddAvg(int from, int to);   // average of several FFT result bins
 void FFTcode(void * parameter);      // audio processing task: read samples, run FFT, fill GEQ channels from FFT results
@@ -308,6 +306,7 @@ void FFTcode(void * parameter)
   ArduinoFFT<float> FFT = ArduinoFFT<float>(valFFT, vImag, samplesFFT, SAMPLE_RATE, true);
 #elif !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
   // allocate and initialize FFT buffers on first call
+  // note: free() is never used on these pointers. If it ever is implemented, this implementation can cause memory leaks (need to free raw pointers)
   if (valFFT == nullptr) {
     float* raw_buffer = (float*)heap_caps_malloc((2 * samplesFFT * sizeof(float)) + 16, MALLOC_CAP_8BIT);
     if ((raw_buffer == nullptr)) return; // something went wrong
@@ -502,7 +501,12 @@ void FFTcode(void * parameter)
       haveDoneFFT = true;
 #endif
     } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this.
-      memset(valFFT, 0, samplesFFT * sizeof(float)); // only lower half of buffer contains FFT results, so only clear that part
+      // only lower half of buffer contains FFT results, so only clear that part
+      #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+      memset(valFFT, 0, samplesFFT * sizeof(float));
+      #else
+      memset(valFFT, 0, samplesFFT * sizeof(int16_t));
+      #endif
       FFT_MajorPeak = 1;
       FFT_Magnitude = 0.001;
     }
@@ -660,7 +664,7 @@ static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer)  // pre-fil
     last_vals[1] = last_vals[0];
     last_vals[0] = (int32_t)sampleBuffer[i];
     lowfilt_fp += ALPHA_FP * (highFilteredSample_fp - (lowfilt_fp >> 15)); // low pass filter in 17.15 fixed point format
-    sampleBuffer[i] = highFilteredSample_fp - (lowfilt_fp >> 15);;
+    sampleBuffer[i] = highFilteredSample_fp - (lowfilt_fp >> 15);
   }
 }
 #endif
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index 7e9391b371..63e06c28d8 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -707,7 +707,8 @@ class I2SAdcSource : public I2SSource {
     }
 
 
-    void getSamples(float *buffer, uint16_t num_samples) {
+    void getSamples(void *buffer, uint16_t num_samples) {
+      float *_buffer = static_cast<float*>(buffer);
       /* Enable ADC. This has to be enabled and disabled directly before and
        * after sampling, otherwise Wifi dies
        */
@@ -722,7 +723,7 @@ class I2SAdcSource : public I2SSource {
           }
         #endif
 
-        I2SSource::getSamples(buffer, num_samples);
+        I2SSource::getSamples(_buffer, num_samples);
 
         #if !defined(I2S_GRAB_ADC1_COMPLETELY)
           // old code - works for me without enable/disable, at least on ESP32.

From f8b1af05e105f0f9216fdda30a864c7c8bd133f4 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Sat, 28 Jun 2025 11:23:48 +0200
Subject: [PATCH 09/12] moved ifdefs to correct place, separated sample filter
 & FFT filter application

post FFT band pass and IIR applied to samples are now separated: I found in testing that applying the sample filter helps with aliasing into base-bands, there is no need to hard-cut the lowest frequencies after FFT.
---
 usermods/audioreactive/audio_reactive.cpp | 91 ++++++++++++-----------
 1 file changed, 46 insertions(+), 45 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 8bffc1cadb..6d1422425b 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -123,6 +123,44 @@ static uint8_t maxVol = 31;          // (was 10) Reasonable value for constant v
 static uint8_t binNum = 8;           // Used to select the bin for FFT based beat detection  (deprecated)
 
 #ifdef ARDUINO_ARCH_ESP32
+#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) && (defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32))
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default on ESP32 and S3
+#endif
+
+#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)
+#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // DSP FFT library is not available in ESP-IDF < 4.4
+#endif
+
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+#include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
+#else
+#include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
+#ifdef FFT_PREFER_EXACT_PEAKS
+#include "dsps_wind_blackman_harris.h"
+#else
+#include "dsps_wind_flat_top.h"
+#endif
+#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
+#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // always use integer FFT on ESP32-S2 and ESP32-C3
+#endif
+#endif
+
+// These are the input and output vectors.  Input vectors receive computed results from FFT.
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+static float* valFFT = nullptr;
+#else
+static int16_t* valFFT = nullptr;
+#endif
+#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
+static float* vImag = nullptr; // imaginary part of FFT results
+#endif
+
+// pre-computed window function
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+__attribute__((aligned(16))) float* windowFFT;
+#else
+__attribute__((aligned(16))) int16_t* windowFFT;
+#endif
 
 // use audio source class (ESP32 specific)
 #include "audio_source.h"
@@ -164,51 +202,12 @@ const float agcSampleSmooth[AGC_NUM_PRESETS]  = {  1/12.f,   1/6.f,  1/16.f}; //
 // AGC presets end
 
 static AudioSource *audioSource = nullptr;
-static bool useBandPassFilter = false;                    // if true, enables a bandpass filter 80Hz-16Khz to remove noise. Applies before FFT.
-
+static bool useBandPassFilter = false;                    // if true, enables a hard cutoff bandpass filter. Applies after FFT.
+static bool useMicFilter = false;                         // if true, enables a IIR bandpass filter 80Hz-20Khz to remove noise. Applies before FFT.
 ////////////////////
 // Begin FFT Code //
 ////////////////////
 
-#if !defined(UM_AUDIOREACTIVE_USE_ESPDSP_FFT) && (defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32))
-#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // use ArduinoFFT library for FFT instead of ESP-IDF DSP library by default on ESP32 and S3
-#endif
-
-#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)
-#define UM_AUDIOREACTIVE_USE_ARDUINO_FFT // DSP FFT library is not available in ESP-IDF < 4.4
-#endif
-
-#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
-#include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
-#else
-#include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
-#ifdef FFT_PREFER_EXACT_PEAKS
-#include "dsps_wind_blackman_harris.h"
-#else
-#include "dsps_wind_flat_top.h"
-#endif
-#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32C3)
-#define UM_AUDIOREACTIVE_USE_INTEGER_FFT // always use integer FFT on ESP32-S2 and ESP32-C3
-#endif
-#endif
-
-// These are the input and output vectors.  Input vectors receive computed results from FFT.
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-static float* valFFT = nullptr;
-#else
-static int16_t* valFFT = nullptr;
-#endif
-#ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
-static float* vImag = nullptr; // imaginary part of FFT results
-#endif
-
-// pre-computed window function
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-__attribute__((aligned(16))) float* windowFFT;
-#else
-__attribute__((aligned(16))) int16_t* windowFFT;
-#endif
-
 // some prototypes, to ensure consistent interfaces
 static float fftAddAvg(int from, int to);   // average of several FFT result bins
 void FFTcode(void * parameter);      // audio processing task: read samples, run FFT, fill GEQ channels from FFT results
@@ -382,8 +381,7 @@ void FFTcode(void * parameter)
 
     // band pass filter - can reduce noise floor by a factor of 50 and avoid aliasing effects to base & high frequency bands
     // downside: frequencies below 100Hz will be ignored
-    if (useBandPassFilter) runMicFilter(samplesFFT, valFFT);
-
+    if (useMicFilter) runMicFilter(samplesFFT, valFFT);
     // find highest sample in the batch
     #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
     float maxSample = 0.0f;                         // max sample from FFT batch
@@ -1400,7 +1398,8 @@ class AudioReactive : public Usermod {
         periph_module_reset(PERIPH_I2S0_MODULE);   // not possible on -C3
       #endif
       delay(100);         // Give that poor microphone some time to setup.
-      useBandPassFilter = true; // filter fixes aliasing to base & highest frequency bands and reduces noise floor (use for all mic inputs)
+      useBandPassFilter = false; // filter cuts lowest and highest frequency bands from FFT result (use on very noisy mic inputs)
+      useMicFilter = true;       // filter fixes aliasing to base & highest frequency bands and reduces noise floor (recommended for all mic inputs)
 
       #if !defined(CONFIG_IDF_TARGET_ESP32S2) && !defined(CONFIG_IDF_TARGET_ESP32C3)
         if ((i2sckPin == I2S_PIN_NO_CHANGE) && (i2ssdPin >= 0) && (i2swsPin >= 0) && ((dmType == 1) || (dmType == 4)) ) dmType = 5;   // dummy user support: SCK == -1 --means--> PDM microphone
@@ -1435,6 +1434,7 @@ class AudioReactive : public Usermod {
         case 4:
           DEBUGSR_PRINT(F("AR: Generic I2S Microphone with Master Clock - ")); DEBUGSR_PRINTLN(F(I2S_MIC_CHANNEL_TEXT));
           audioSource = new I2SSource(SAMPLE_RATE, BLOCK_SIZE, 1.0f/24.0f);
+          useMicFilter = false; // I2S with Master Clock is mostly used for line-in, skip sample filtering
           delay(100);
           if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin, i2sckPin, mclkPin);
           break;
@@ -1442,6 +1442,7 @@ class AudioReactive : public Usermod {
         case 5:
           DEBUGSR_PRINT(F("AR: I2S PDM Microphone - ")); DEBUGSR_PRINTLN(F(I2S_PDM_MIC_CHANNEL_TEXT));
           audioSource = new I2SSource(SAMPLE_RATE, BLOCK_SIZE, 1.0f/4.0f);
+          useBandPassFilter = true;  // this reduces the noise floor on SPM1423 from 5% Vpp (~380) down to 0.05% Vpp (~5)
           delay(100);
           if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin);
           break;
@@ -1449,7 +1450,7 @@ class AudioReactive : public Usermod {
         case 6:
           DEBUGSR_PRINTLN(F("AR: ES8388 Source"));
           audioSource = new ES8388Source(SAMPLE_RATE, BLOCK_SIZE);
-          useBandPassFilter = false;
+          useMicFilter = false;
           delay(100);
           if (audioSource) audioSource->initialize(i2swsPin, i2ssdPin, i2sckPin, mclkPin);
           break;

From 3915b1b20ba68f28d164c46524f11c9b7d867b41 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Sat, 28 Jun 2025 12:42:42 +0200
Subject: [PATCH 10/12] changed sample low pass cutoff from 80Hz to 90Hz

better anti-aliasing at minimal loss of base frequency.
---
 usermods/audioreactive/audio_reactive.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 6d1422425b..350a43258b 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -604,10 +604,12 @@ void FFTcode(void * parameter)
 #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
 static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // pre-filtering of raw samples (band-pass)
 {
-  // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency
+  // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency (alpha = 2π × fc / fs)
   //constexpr float alpha = 0.04f;   // 150Hz
   //constexpr float alpha = 0.03f;   // 110Hz
-  constexpr float alpha = 0.0225f; // 80hz
+  //constexpr float alpha = 0.0285f; //100Hz
+  constexpr float alpha = 0.0256f; //90Hz
+  //constexpr float alpha = 0.0225f; // 80hz
   //constexpr float alpha = 0.01693f;// 60hz
   // high frequency cutoff  parameter
   //constexpr float beta1 = 0.75f;   // 11Khz
@@ -638,7 +640,9 @@ static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer)  // pre-fil
   // low frequency cutoff parameter 17.15 fixed point format
   //constexpr int32_t ALPHA_FP = 1311;    // 0.04f * (1<<15) (150Hz)
   //constexpr int32_t ALPHA_FP = 983;     // 0.03f * (1<<15) (110Hz)
-  constexpr int32_t ALPHA_FP = 737;      // 0.0225f * (1<<15) (80Hz)
+  //constexpr int32_t ALPHA_FP = 934;     // 0.0285f * (1<<15) (100Hz)
+  constexpr int32_t ALPHA_FP = 840;       // 0.0256f * (1<<15) (90Hz)
+  //constexpr int32_t ALPHA_FP = 737;     // 0.0225f * (1<<15) (80Hz)
   //constexpr int32_t ALPHA_FP = 555;     // 0.01693f * (1<<15) (60Hz)
 
   // high frequency cutoff parameters 16.16 fixed point format

From e032cf7503028b122cfdf05373d013e30f08c00b Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Sat, 28 Jun 2025 15:10:40 +0200
Subject: [PATCH 11/12] code cleanup and minor speed improvement

- moved scaling of FFT values into fftAddAvg() to use ferwer operations
- added "using" for math types, removing some ifdefs and duplications
---
 usermods/audioreactive/audio_reactive.cpp | 90 ++++++++---------------
 usermods/audioreactive/audio_source.h     | 20 ++---
 2 files changed, 39 insertions(+), 71 deletions(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 350a43258b..588757e0a9 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -133,6 +133,7 @@ static uint8_t binNum = 8;           // Used to select the bin for FFT based bea
 
 #ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
 #include <arduinoFFT.h> // ArduinoFFT library for FFT and window functions
+#undef UM_AUDIOREACTIVE_USE_INTEGER_FFT // arduinoFFT has not integer support
 #else
 #include "dsps_fft2r.h" // ESP-IDF DSP library for FFT and window functions
 #ifdef FFT_PREFER_EXACT_PEAKS
@@ -145,22 +146,23 @@ static uint8_t binNum = 8;           // Used to select the bin for FFT based bea
 #endif
 #endif
 
-// These are the input and output vectors.  Input vectors receive computed results from FFT.
 #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-static float* valFFT = nullptr;
+using FFTsampleType = float;
+using FFTmathType = float;
+#define FFTabs fabsf
 #else
-static int16_t* valFFT = nullptr;
+using FFTsampleType = int16_t;
+using FFTmathType = int32_t;
+#define FFTabs abs
 #endif
+// These are the input and output vectors.  Input vectors receive computed results from FFT.
+static FFTsampleType* valFFT = nullptr;
 #ifdef UM_AUDIOREACTIVE_USE_ARDUINO_FFT
 static float* vImag = nullptr; // imaginary part of FFT results
 #endif
 
 // pre-computed window function
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-__attribute__((aligned(16))) float* windowFFT;
-#else
-__attribute__((aligned(16))) int16_t* windowFFT;
-#endif
+FFTsampleType* windowFFT;
 
 // use audio source class (ESP32 specific)
 #include "audio_source.h"
@@ -211,11 +213,7 @@ static bool useMicFilter = false;                         // if true, enables a
 // some prototypes, to ensure consistent interfaces
 static float fftAddAvg(int from, int to);   // average of several FFT result bins
 void FFTcode(void * parameter);      // audio processing task: read samples, run FFT, fill GEQ channels from FFT results
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-static void runMicFilter(uint16_t numSamples, float *sampleBuffer);          // pre-filtering of raw samples (band-pass)
-#else
-static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer);
-#endif
+static void runMicFilter(uint16_t numSamples, FFTsampleType *sampleBuffer);
 static void postProcessFFTResults(bool noiseGateOpen, int numberOfChannels); // post-processing and post-amp of GEQ channels
 
 static TaskHandle_t FFT_Task = nullptr;
@@ -270,18 +268,15 @@ constexpr uint16_t samplesFFT_2 = 256;          // meaningfull part of FFT resul
 
 // compute average of several FFT result bins
 static float fftAddAvg(int from, int to) {
-  #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-  float result = 0.0f;
+  FFTmathType result = 0;
   for (int i = from; i <= to; i++) {
     result += valFFT[i];
   }
-  #else
-  int32_t result = 0;
-  for (int i = from; i <= to; i++) {
-    result += valFFT[i];
-  }
-  result *= 32; // scale result to match float values. note: scaling value between float and int is 512, float version is scaled down by 16
-  #endif
+ #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+  result = result * 0.0625; // divide by 16 to reduce magnitude. Want end result to be scaled linear and ~4096 max.
+ #else
+  result *= 32; // scale result to match float values. note: raw scaling value between float and int is 512, float version is scaled down by 16
+#endif
   return float(result) / float(to - from + 1); // return average as float
 }
 
@@ -383,21 +378,12 @@ void FFTcode(void * parameter)
     // downside: frequencies below 100Hz will be ignored
     if (useMicFilter) runMicFilter(samplesFFT, valFFT);
     // find highest sample in the batch
-    #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-    float maxSample = 0.0f;                         // max sample from FFT batch
-    for (int i=0; i < samplesFFT; i++) {
-	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
-	    if ((valFFT[i] <= (INT16_MAX - 1024)) && (valFFT[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
-        if (fabsf((float)valFFT[i]) > maxSample) maxSample = fabsf((float)valFFT[i]);
-    }
-    #else
-    int32_t maxSample = 0;                         // max sample from FFT batch
+    FFTsampleType maxSample = 0;                         // max sample from FFT batch
     for (int i=0; i < samplesFFT; i++) {
 	    // pick our  our current mic sample - we take the max value from all samples that go into FFT
 	    if ((valFFT[i] <= (INT16_MAX - 1024)) && (valFFT[i] >= (INT16_MIN + 1024)))  //skip extreme values - normally these are artefacts
-        if (abs(valFFT[i]) > maxSample) maxSample = abs(valFFT[i]);
+        if (FFTabs(valFFT[i]) > maxSample) maxSample = FFTabs(valFFT[i]);
     }
-    #endif
     // release highest sample to volume reactive effects early - not strictly necessary here - could also be done at the end of the function
     // early release allows the filters (getSample() and agcAvg()) to work with fresh values - we will have matching gain and noise gate values when we want to process the FFT results.
     micDataReal = maxSample;
@@ -421,16 +407,15 @@ void FFTcode(void * parameter)
       FFT.complexToMagnitude();                                   // Compute magnitudes
       valFFT[0] = 0;   // The remaining DC offset on the signal produces a strong spike on position 0 that should be eliminated to avoid issues.
       FFT.majorPeak(&FFT_MajorPeak, &FFT_Magnitude);              // let the effects know which freq was most dominant
-      for (int i = 1; i < samplesFFT_2; i++) { // skip [0] as it is DC offset
-        valFFT[i] = valFFT[i] / 16.0f;         // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
-      }
-#elif !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
+      // note: scaling is done in fftAddAvg(), so we don't scale here
+#else
       // run run float DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~x ms on ESP32-C3) TODO: test and fill in these values
       // remove DC offset
-      float sum = 0;
+      FFTmathType sum = 0;
       for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
-      float mean = sum / samplesFFT;
+      FFTmathType mean = sum / (FFTmathType)samplesFFT;
       for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
       //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
       for (int i = samplesFFT - 1; i >= 0 ; i--) {
         // fill the buffer back to front to avoid overwriting samples
@@ -458,15 +443,10 @@ void FFTcode(void * parameter)
           FFT_Magnitude = valFFT[i];
           FFT_MajorPeak = i*(SAMPLE_RATE/samplesFFT);
         }
-        valFFT[i] = valFFT[i] / 16.0f;          // Reduce magnitude. Want end result to be scaled linear and ~4096 max.
+        // note: scaling is done in fftAddAvg(), so we don't scale here
       }
 #else
       // run integer DSP FFT (takes ~x ms on ESP32, ~x ms on ESP32-S2, , ~1.5 ms on ESP32-C3) TODO: test and fill in these values
-      // remove DC offset
-      int32_t sum = 0;
-      for (int i = 0; i < samplesFFT; i++) sum += valFFT[i];
-      int32_t mean = sum / samplesFFT;
-      for (int i = 0; i < samplesFFT; i++) valFFT[i] -= mean;
       //apply window function to samples and fill buffer with interleaved complex values [Re,Im,Re,Im,...]
       for (int i = samplesFFT - 1; i >= 0 ; i--) {
         // fill the buffer back to front to avoid overwriting samples
@@ -488,23 +468,18 @@ void FFTcode(void * parameter)
           FFT_Magnitude_int = valFFT[i] * 512; // scale to match raw float value
           FFT_MajorPeak_int = ((i * SAMPLE_RATE)/samplesFFT);
         }
-        // note: scaling is done when converting to float in fftAddAvg(), so we don't scale here
+        // note: scaling is done in fftAddAvg(), so we don't scale here
       }
       FFT_MajorPeak = FFT_MajorPeak_int;
       FFT_Magnitude = FFT_Magnitude_int;
-
+#endif
 #endif
       FFT_MajorPeak = constrain(FFT_MajorPeak, 1.0f, 11025.0f);   // restrict value to range expected by effects
 #if defined(WLED_DEBUG) || defined(SR_DEBUG)
       haveDoneFFT = true;
 #endif
-    } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this.
-      // only lower half of buffer contains FFT results, so only clear that part
-      #if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-      memset(valFFT, 0, samplesFFT * sizeof(float));
-      #else
-      memset(valFFT, 0, samplesFFT * sizeof(int16_t));
-      #endif
+    } else { // noise gate closed - only clear results as FFT was skipped. MIC samples are still valid when we do this -> set all samples to 0
+      memset(valFFT, 0, samplesFFT * sizeof(FFTsampleType));
       FFT_MajorPeak = 1;
       FFT_Magnitude = 0.001;
     }
@@ -601,9 +576,9 @@ void FFTcode(void * parameter)
 // Pre / Postprocessing  //
 ///////////////////////////
 
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // pre-filtering of raw samples (band-pass)
+static void runMicFilter(uint16_t numSamples, FFTsampleType *sampleBuffer)          // pre-filtering of raw samples (band-pass)
 {
+#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
   // low frequency cutoff parameter - see https://dsp.stackexchange.com/questions/40462/exponential-moving-average-cut-off-frequency (alpha = 2π × fc / fs)
   //constexpr float alpha = 0.04f;   // 150Hz
   //constexpr float alpha = 0.03f;   // 110Hz
@@ -633,10 +608,7 @@ static void runMicFilter(uint16_t numSamples, float *sampleBuffer)          // p
         lowfilt += alpha * (sampleBuffer[i] - lowfilt);
         sampleBuffer[i] = sampleBuffer[i] - lowfilt;
   }
-}
 #else
-static void runMicFilter(uint16_t numSamples, int16_t *sampleBuffer)  // pre-filtering of raw samples (band-pass)
-{
   // low frequency cutoff parameter 17.15 fixed point format
   //constexpr int32_t ALPHA_FP = 1311;    // 0.04f * (1<<15) (150Hz)
   //constexpr int32_t ALPHA_FP = 983;     // 0.03f * (1<<15) (110Hz)
diff --git a/usermods/audioreactive/audio_source.h b/usermods/audioreactive/audio_source.h
index 63e06c28d8..4448f82f97 100644
--- a/usermods/audioreactive/audio_source.h
+++ b/usermods/audioreactive/audio_source.h
@@ -134,7 +134,7 @@ class AudioSource {
        Read num_samples from the microphone, and store them in the provided
        buffer
     */
-    virtual void getSamples(void *buffer, uint16_t num_samples) = 0;
+    virtual void getSamples(FFTsampleType *buffer, uint16_t num_samples) = 0;
 
     /* check if the audio source driver was initialized successfully */
     virtual bool isInitialized(void) {return(_initialized);}
@@ -314,7 +314,7 @@ class I2SSource : public AudioSource {
       if (_mclkPin != I2S_PIN_NO_CHANGE) PinManager::deallocatePin(_mclkPin, PinOwner::UM_Audioreactive);
     }
 
-    virtual void getSamples(void *buffer, uint16_t num_samples) {
+    virtual void getSamples(FFTsampleType *buffer, uint16_t num_samples) {
       if (_initialized) {
         esp_err_t err;
         size_t bytes_read = 0;        /* Counter variable to check if we actually got enough data */
@@ -333,10 +333,7 @@ class I2SSource : public AudioSource {
         }
 
         // Store samples in sample buffer
-#if !defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
-        float* _buffer = static_cast<float*>(buffer);
-#else
-        int16_t* _buffer = static_cast<int16_t*>(buffer); // use integer samples on ESP32-S2 and ESP32-C3
+#if defined(UM_AUDIOREACTIVE_USE_INTEGER_FFT)
         //constexpr int32_t FIXEDSHIFT = 8; // shift by 8 bits for fixed point math (no loss at 24bit input sample resolution)
         //int32_t intSampleScale = _sampleScale * (1<<FIXEDSHIFT); // _sampleScale <= 1.0f, shift for fixed point math
 #endif
@@ -350,8 +347,8 @@ class I2SSource : public AudioSource {
   #else
           float currSample = (float) newSamples[i];                 // 16bit input -> use as-is
   #endif
-          _buffer[i] = currSample;
-          _buffer[i] *= _sampleScale;                               // scale samples
+          buffer[i] = currSample;
+          buffer[i] *= _sampleScale;                               // scale samples
 #else
   #ifdef I2S_SAMPLE_DOWNSCALE_TO_16BIT
           // note on sample scaling: scaling is only used for inputs with master clock and those are better suited for ESP32 or S3
@@ -363,7 +360,7 @@ class I2SSource : public AudioSource {
           //int32_t currSample = (newSamples[i] * intSampleScale) >> FIXEDSHIFT;   // scale samples, shift back down to 16bit
           int16_t currSample = newSamples[i];                 // 16bit input -> use as-is
   #endif
-          _buffer[i] = (int16_t)currSample;
+          buffer[i] = (int16_t)currSample;
 #endif
         }
       }
@@ -707,8 +704,7 @@ class I2SAdcSource : public I2SSource {
     }
 
 
-    void getSamples(void *buffer, uint16_t num_samples) {
-      float *_buffer = static_cast<float*>(buffer);
+    void getSamples(FFTsampleType *buffer, uint16_t num_samples) {
       /* Enable ADC. This has to be enabled and disabled directly before and
        * after sampling, otherwise Wifi dies
        */
@@ -723,7 +719,7 @@ class I2SAdcSource : public I2SSource {
           }
         #endif
 
-        I2SSource::getSamples(_buffer, num_samples);
+        I2SSource::getSamples(buffer, num_samples);
 
         #if !defined(I2S_GRAB_ADC1_COMPLETELY)
           // old code - works for me without enable/disable, at least on ESP32.

From 1cc8adf01d1eea55827726bb800a891e360e3843 Mon Sep 17 00:00:00 2001
From: Damian Schneider <daedae@gmx.ch>
Date: Sat, 28 Jun 2025 15:41:21 +0200
Subject: [PATCH 12/12] fixed wrong #endif position

---
 usermods/audioreactive/audio_reactive.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp
index 588757e0a9..b9bfd16ddf 100644
--- a/usermods/audioreactive/audio_reactive.cpp
+++ b/usermods/audioreactive/audio_reactive.cpp
@@ -640,8 +640,8 @@ static void runMicFilter(uint16_t numSamples, FFTsampleType *sampleBuffer)
     lowfilt_fp += ALPHA_FP * (highFilteredSample_fp - (lowfilt_fp >> 15)); // low pass filter in 17.15 fixed point format
     sampleBuffer[i] = highFilteredSample_fp - (lowfilt_fp >> 15);
   }
-}
 #endif
+}
 
 static void postProcessFFTResults(bool noiseGateOpen, int numberOfChannels) // post-processing and post-amp of GEQ channels
 {