diff --git a/src/audio/crossover/crossover.c b/src/audio/crossover/crossover.c index 3a4af0d43314..2511556263c2 100644 --- a/src/audio/crossover/crossover.c +++ b/src/audio/crossover/crossover.c @@ -161,6 +161,10 @@ static int crossover_init_coef_lr4(struct sof_eq_iir_biquad *coef, { int ret; + /* Ensure the LR4 can be processed with the simplified 4th order IIR */ + if (CROSSOVER_LR4_NUM_BIQUADS != SOF_IIR_DF1_4TH_NUM_BIQUADS) + return -EINVAL; + /* Only one set of coefficients is stored in config for both biquads * in series due to identity. To maintain the structure of * iir_state_df1, it requires two copies of coefficients in a row. @@ -190,8 +194,8 @@ static int crossover_init_coef_lr4(struct sof_eq_iir_biquad *coef, if (!lr4->delay) return -ENOMEM; - lr4->biquads = 2; - lr4->biquads_in_series = 2; + lr4->biquads = CROSSOVER_LR4_NUM_BIQUADS; + lr4->biquads_in_series = CROSSOVER_LR4_NUM_BIQUADS; return 0; } diff --git a/src/audio/crossover/crossover.h b/src/audio/crossover/crossover.h index 64fc86023ad0..2312a1d53857 100644 --- a/src/audio/crossover/crossover.h +++ b/src/audio/crossover/crossover.h @@ -15,6 +15,8 @@ #include "crossover_user.h" +#define CROSSOVER_LR4_NUM_BIQUADS 2 + struct comp_buffer; struct comp_dev; @@ -122,7 +124,7 @@ static inline int32_t crossover_generic_process_lr4(int32_t in, struct iir_state_df1 *lr4) { /* Cascade two biquads with same coefficients in series. */ - return iir_df1(lr4, in); + return iir_df1_4th(lr4, in); } static inline void crossover_free_config(struct sof_crossover_config **config) diff --git a/src/audio/multiband_drc/multiband_drc.c b/src/audio/multiband_drc/multiband_drc.c index e76741f004fd..6db99f116c1c 100644 --- a/src/audio/multiband_drc/multiband_drc.c +++ b/src/audio/multiband_drc/multiband_drc.c @@ -66,6 +66,10 @@ static int multiband_drc_eq_init_coef_ch(struct sof_eq_iir_biquad *coef, { int ret; + /* Ensure the LR4 can be processed with the simplified 4th order IIR */ + if (SOF_EMP_DEEMP_BIQUADS != SOF_IIR_DF1_4TH_NUM_BIQUADS) + return -EINVAL; + eq->coef = rzalloc(SOF_MEM_ZONE_RUNTIME, 0, SOF_MEM_CAPS_RAM, sizeof(struct sof_eq_iir_biquad) * SOF_EMP_DEEMP_BIQUADS); if (!eq->coef) diff --git a/src/audio/multiband_drc/multiband_drc_generic.c b/src/audio/multiband_drc/multiband_drc_generic.c index bfdb7c6d3381..bd64f5012fc8 100644 --- a/src/audio/multiband_drc/multiband_drc_generic.c +++ b/src/audio/multiband_drc/multiband_drc_generic.c @@ -39,7 +39,7 @@ static void multiband_drc_process_emp_crossover(struct multiband_drc_state *stat crossover_s = &state->crossover[ch]; if (enable_emp) - emp_out = iir_df1(emp_s, *buf_src); + emp_out = iir_df1_4th(emp_s, *buf_src); else emp_out = *buf_src; @@ -178,7 +178,7 @@ static void multiband_drc_process_deemp(struct multiband_drc_state *state, } if (enable_deemp) - *buf_sink = iir_df1(deemp_s, mix_out); + *buf_sink = iir_df1_4th(deemp_s, mix_out); else *buf_sink = mix_out; diff --git a/src/include/sof/math/iir_df1.h b/src/include/sof/math/iir_df1.h index fc5e034b3b4b..24653f690ea3 100644 --- a/src/include/sof/math/iir_df1.h +++ b/src/include/sof/math/iir_df1.h @@ -13,6 +13,7 @@ #include #define IIR_DF1_NUM_STATE 4 +#define SOF_IIR_DF1_4TH_NUM_BIQUADS 2 struct iir_state_df1 { unsigned int biquads; /* Number of IIR 2nd order sections total */ @@ -34,8 +35,24 @@ void iir_init_delay_df1(struct iir_state_df1 *iir, int32_t **state); void iir_reset_df1(struct iir_state_df1 *iir); +/** + * Calculate IIR filter consisting of biquads + * @param iir IIR state with configured biquad coefficients and delay lines data + * @param x Single s32 Q1.31 format input sample + * @return Single s32 Q1.31 format output samples + */ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x); +/** + * Calculate IIR filter consisting of biquads, special simplified version for + * 4th order filter with two biquads in series. Note: There are no checks for + * iir struct members. + * @param iir IIR state with configured biquad coefficients and delay lines data + * @param x Single s32 Q1.31 format input sample + * @return Single s32 Q1.31 format output samples + */ +int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x); + /* Inline functions */ #if SOF_USE_MIN_HIFI(3, FILTER) #include "iir_df1_hifi3.h" diff --git a/src/math/iir_df1_generic.c b/src/math/iir_df1_generic.c index 6e6482259569..4b069eafb3d7 100644 --- a/src/math/iir_df1_generic.c +++ b/src/math/iir_df1_generic.c @@ -109,4 +109,58 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x) } EXPORT_SYMBOL(iir_df1); +int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x) +{ + int32_t in; + int32_t tmp; + int64_t acc; + int i; + int d = 0; /* Index to state */ + int c = 0; /* Index to coefficient a2 */ + int32_t *coefp = iir->coef; + int32_t *delay = iir->delay; + + /* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */ + /* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */ + in = x; + for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) { + /* Compute output: Delay is Q3.61 + * Q2.30 x Q1.31 -> Q3.61 + * Shift Q3.61 to Q3.31 with rounding, saturate to Q1.31 + */ + acc = ((int64_t)coefp[c]) * delay[d]; /* a2 * y(n - 2) */ + acc += ((int64_t)coefp[c + 1]) * delay[d + 1]; /* a1 * y(n - 1) */ + acc += ((int64_t)coefp[c + 2]) * delay[d + 2]; /* b2 * x(n - 2) */ + acc += ((int64_t)coefp[c + 3]) * delay[d + 3]; /* b1 * x(n - 1) */ + acc += ((int64_t)coefp[c + 4]) * in; /* b0 * x */ + tmp = (int32_t)sat_int32(Q_SHIFT_RND(acc, 61, 31)); + + /* update the delay value */ + delay[d] = delay[d + 1]; + delay[d + 1] = tmp; + delay[d + 2] = delay[d + 3]; + delay[d + 3] = in; + + /* Apply gain Q2.14 x Q1.31 -> Q3.45 */ + acc = ((int64_t)coefp[c + 6]) * tmp; /* Gain */ + + /* Apply biquad output shift right parameter + * simultaneously with Q3.45 to Q3.31 conversion. Then + * saturate to 32 bits Q1.31 and prepare for next + * biquad. + */ + acc = Q_SHIFT_RND(acc, 45 + coefp[c + 5], 31); + in = sat_int32(acc); + + /* Proceed to next biquad coefficients and delay + * lines. + */ + c += SOF_EQ_IIR_NBIQUAD; + d += IIR_DF1_NUM_STATE; + } + /* Output of previous section is in variable in */ + return in; +} +EXPORT_SYMBOL(iir_df1_4th); + #endif diff --git a/src/math/iir_df1_hifi3.c b/src/math/iir_df1_hifi3.c index 7c1237f55f79..eddcc3f980ea 100644 --- a/src/math/iir_df1_hifi3.c +++ b/src/math/iir_df1_hifi3.c @@ -126,4 +126,72 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x) } EXPORT_SYMBOL(iir_df1); +int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x) +{ + ae_int64 acc; + ae_valign coef_align; + ae_int32x2 coef_a2a1; + ae_int32x2 coef_b2b1; + ae_int32x2 coef_b0; + ae_int32x2 gain; + ae_int32x2 shift; + ae_int32x2 delay_y2y1; + ae_int32x2 delay_x2x1; + ae_int32 in; + ae_int32 tmp; + ae_int32x2 *coefp; + ae_int32x2 *delayp; + int32_t *delay_update; + int i; + + /* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */ + coefp = (ae_int32x2 *)&iir->coef[0]; + delayp = (ae_int32x2 *)&iir->delay[0]; + in = x; + for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) { + /* Compute output: Delay is kept Q17.47 while multiply + * instruction gives Q2.30 x Q1.31 -> Q18.46. Need to + * shift delay line values right by one for same align + * as MAC. Store to delay line need to be shifted left + * by one similarly. + */ + coef_align = AE_LA64_PP(coefp); + AE_LA32X2_IP(coef_a2a1, coef_align, coefp); + AE_LA32X2_IP(coef_b2b1, coef_align, coefp); + AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4); + AE_L32_IP(shift, (ae_int32 *)coefp, 4); + AE_L32_IP(gain, (ae_int32 *)coefp, 4); + + AE_L32X2_IP(delay_y2y1, delayp, 8); + AE_L32X2_IP(delay_x2x1, delayp, 8); + + acc = AE_MULF32R_HH(coef_a2a1, delay_y2y1); /* a2 * y(n - 2) */ + AE_MULAF32R_LL(acc, coef_a2a1, delay_y2y1); /* a1 * y(n - 1) */ + AE_MULAF32R_HH(acc, coef_b2b1, delay_x2x1); /* b2 * x(n - 2) */ + AE_MULAF32R_LL(acc, coef_b2b1, delay_x2x1); /* b1 * x(n - 1) */ + AE_MULAF32R_HH(acc, coef_b0, in); /* b0 * x */ + acc = AE_SLAI64S(acc, 1); /* Convert to Q17.47 */ + tmp = AE_ROUND32F48SSYM(acc); /* Round to Q1.31 */ + + /* update the state value */ + delay_update = (int32_t *)delayp - 4; + delay_update[0] = delay_update[1]; + delay_update[1] = tmp; + delay_update[2] = delay_update[3]; + delay_update[3] = in; + + /* Apply gain Q18.14 x Q1.31 -> Q34.30 */ + acc = AE_MULF32R_HH(gain, tmp); /* Gain */ + acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */ + + /* Apply biquad output shift right parameter and then + * round and saturate to 32 bits Q1.31. + */ + acc = AE_SRAA64(acc, shift); + in = AE_ROUND32F48SSYM(acc); + } + return in; +} +EXPORT_SYMBOL(iir_df1_4th); + #endif diff --git a/src/math/iir_df1_hifi4.c b/src/math/iir_df1_hifi4.c index 07a4a495369d..945fd67af5bc 100644 --- a/src/math/iir_df1_hifi4.c +++ b/src/math/iir_df1_hifi4.c @@ -119,4 +119,65 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x) } EXPORT_SYMBOL(iir_df1); +int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x) +{ + ae_valign coef_align; + ae_valign data_r_align; + ae_valign data_w_align = AE_ZALIGN64(); + ae_f64 acc; + ae_int32x2 delay_y2y1; + ae_int32x2 delay_x2x1; + ae_int32x2 coef_a2a1; + ae_int32x2 coef_b2b1; + ae_int32x2 coef_b0; + ae_int32x2 gain; + ae_int32x2 shift; + ae_int32 in; + ae_int32x2 *coefp = (ae_int32x2 *)iir->coef; + ae_int32x2 *delay_r = (ae_int32x2 *)iir->delay; + ae_int32x2 *delay_w = delay_r; + int i; + + /* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */ + /* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */ + data_r_align = AE_LA64_PP(delay_r); + in = x; + for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) { + /* Load data */ + AE_LA32X2_IP(delay_y2y1, data_r_align, delay_r); + AE_LA32X2_IP(delay_x2x1, data_r_align, delay_r); + + /* Load coefficients */ + coef_align = AE_LA64_PP(coefp); + AE_LA32X2_IP(coef_a2a1, coef_align, coefp); + AE_LA32X2_IP(coef_b2b1, coef_align, coefp); + AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4); + AE_L32_IP(shift, (ae_int32 *)coefp, 4); + AE_L32_IP(gain, (ae_int32 *)coefp, 4); + + acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */ + AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */ + AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */ + AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */ + delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */ + + /* Store data */ + AE_SA32X2_IP(delay_y2y1, data_w_align, delay_w); + AE_SA32X2_IP(delay_x2x1, data_w_align, delay_w); + + /* Apply gain */ + acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */ + acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */ + + /* Apply biquad output shift right parameter and then + * round and saturate to 32 bits Q1.31. + */ + acc = AE_SRAA64(acc, shift); + in = AE_ROUND32F48SSYM(acc); + } + AE_SA64POS_FP(data_w_align, delay_w); + return in; +} +EXPORT_SYMBOL(iir_df1_4th); + #endif diff --git a/src/math/iir_df1_hifi5.c b/src/math/iir_df1_hifi5.c index 262cb5120bff..ca331d28c7fa 100644 --- a/src/math/iir_df1_hifi5.c +++ b/src/math/iir_df1_hifi5.c @@ -116,4 +116,63 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x) } EXPORT_SYMBOL(iir_df1); +int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x) +{ + ae_valignx2 coef_align; + ae_valignx2 data_r_align; + ae_valignx2 data_w_align = AE_ZALIGN128(); + ae_f64 acc; + ae_int32x2 delay_y2y1; + ae_int32x2 delay_x2x1; + ae_int32x2 coef_a2a1; + ae_int32x2 coef_b2b1; + ae_int32x2 coef_b0; + ae_int32x2 gain; + ae_int32x2 shift; + ae_int32 in; + ae_int32x4 *coefp = (ae_int32x4 *)iir->coef; + ae_int32x4 *delay_r = (ae_int32x4 *)iir->delay; + ae_int32x4 *delay_w = delay_r; + int i; + + /* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */ + /* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */ + data_r_align = AE_LA128_PP(delay_r); + in = x; + for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) { + /* Load data */ + AE_LA32X2X2_IP(delay_y2y1, delay_x2x1, data_r_align, delay_r); + + /* Load coefficients */ + coef_align = AE_LA128_PP(coefp); + AE_LA32X2X2_IP(coef_a2a1, coef_b2b1, coef_align, coefp); + AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4); + AE_L32_IP(shift, (ae_int32 *)coefp, 4); + AE_L32_IP(gain, (ae_int32 *)coefp, 4); + + acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */ + AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */ + AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */ + AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */ + delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */ + + /* Store data */ + AE_SA32X2X2_IP(delay_y2y1, delay_x2x1, data_w_align, delay_w); + + /* Apply gain */ + acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */ + acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */ + + /* Apply biquad output shift right parameter and then + * round and saturate to 32 bits Q1.31. + */ + acc = AE_SRAA64(acc, shift); + in = AE_ROUND32F48SSYM(acc); + } + + AE_SA128POS_FP(data_w_align, delay_w); + return in; +} +EXPORT_SYMBOL(iir_df1_4th); + #endif