@@ -157,6 +157,12 @@ ARM_DSP_ATTRIBUTE void arm_rfft_q31(
157157#include "arm_helium_utils.h"
158158#include "arm_vec_fft.h"
159159
160+ #if defined(ARM_DSP_BUILT_WITH_GCC )
161+
162+ #define MVE_CMPLX_MULT_FX_AxB_S32 (A ,B ) vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
163+ #define MVE_CMPLX_MULT_FX_AxConjB_S32 (A ,B ) vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
164+
165+ #endif
160166
161167ARM_DSP_ATTRIBUTE void arm_split_rfft_q31 (
162168 q31_t * pSrc ,
@@ -193,9 +199,12 @@ ARM_DSP_ATTRIBUTE void arm_split_rfft_q31(
193199 q31x4_t in2 = vldrwq_gather_shifted_offset_s32 (pSrc , offset );
194200 q31x4_t coefA = vldrwq_gather_shifted_offset_s32 (pCoefAb , offsetCoef );
195201 q31x4_t coefB = vldrwq_gather_shifted_offset_s32 (pCoefBb , offsetCoef );
196-
202+ #if defined(ARM_DSP_BUILT_WITH_GCC )
203+ q31x4_t out = vhaddq_s32 (MVE_CMPLX_MULT_FX_AxB_S32 (in1 , coefA ),MVE_CMPLX_MULT_FX_AxConjB_S32 (coefB , in2 ));
204+ #else
197205 q31x4_t out = vhaddq_s32 (MVE_CMPLX_MULT_FX_AxB (in1 , coefA , q31x4_t ),
198206 MVE_CMPLX_MULT_FX_AxConjB (coefB , in2 , q31x4_t ));
207+ #endif
199208 vst1q (pOut1 , out );
200209 pOut1 += 4 ;
201210
@@ -348,9 +357,13 @@ ARM_DSP_ATTRIBUTE void arm_split_rifft_q31(
348357 q31x4_t coefB = vldrwq_gather_shifted_offset_s32 (pCoefBb , offsetCoef );
349358
350359 /* can we avoid the conjugate here ? */
360+ #if defined(ARM_DSP_BUILT_WITH_GCC )
361+ q31x4_t out = vhaddq_s32 (MVE_CMPLX_MULT_FX_AxConjB_S32 (in1 , coefA ),
362+ vmulq_s32 (conj , MVE_CMPLX_MULT_FX_AxB_S32 (in2 , coefB )));
363+ #else
351364 q31x4_t out = vhaddq_s32 (MVE_CMPLX_MULT_FX_AxConjB (in1 , coefA , q31x4_t ),
352365 vmulq_s32 (conj , MVE_CMPLX_MULT_FX_AxB (in2 , coefB , q31x4_t )));
353-
366+ #endif
354367 vst1q_s32 (pDst , out );
355368 pDst += 4 ;
356369
0 commit comments