Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio: Multiband DRC: Use optimized 4th order IIR filter version #9808

Merged
merged 3 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/audio/crossover/crossover.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ static int crossover_init_coef_lr4(struct sof_eq_iir_biquad *coef,
{
int ret;

/* Ensure the LR4 can be processed with the simplified 4th order IIR */
if (CROSSOVER_LR4_NUM_BIQUADS != SOF_IIR_DF1_4TH_NUM_BIQUADS)
return -EINVAL;

/* Only one set of coefficients is stored in config for both biquads
* in series due to identity. To maintain the structure of
* iir_state_df1, it requires two copies of coefficients in a row.
Expand Down Expand Up @@ -190,8 +194,8 @@ static int crossover_init_coef_lr4(struct sof_eq_iir_biquad *coef,
if (!lr4->delay)
return -ENOMEM;

lr4->biquads = 2;
lr4->biquads_in_series = 2;
lr4->biquads = CROSSOVER_LR4_NUM_BIQUADS;
lr4->biquads_in_series = CROSSOVER_LR4_NUM_BIQUADS;

return 0;
}
Expand Down
4 changes: 3 additions & 1 deletion src/audio/crossover/crossover.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "crossover_user.h"

#define CROSSOVER_LR4_NUM_BIQUADS 2

struct comp_buffer;
struct comp_dev;

Expand Down Expand Up @@ -122,7 +124,7 @@ static inline int32_t crossover_generic_process_lr4(int32_t in,
struct iir_state_df1 *lr4)
{
/* Cascade two biquads with same coefficients in series. */
return iir_df1(lr4, in);
return iir_df1_4th(lr4, in);
}

static inline void crossover_free_config(struct sof_crossover_config **config)
Expand Down
4 changes: 4 additions & 0 deletions src/audio/multiband_drc/multiband_drc.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ static int multiband_drc_eq_init_coef_ch(struct sof_eq_iir_biquad *coef,
{
int ret;

/* Ensure the LR4 can be processed with the simplified 4th order IIR */
if (SOF_EMP_DEEMP_BIQUADS != SOF_IIR_DF1_4TH_NUM_BIQUADS)
return -EINVAL;

eq->coef = rzalloc(SOF_MEM_ZONE_RUNTIME, 0, SOF_MEM_CAPS_RAM,
sizeof(struct sof_eq_iir_biquad) * SOF_EMP_DEEMP_BIQUADS);
if (!eq->coef)
Expand Down
4 changes: 2 additions & 2 deletions src/audio/multiband_drc/multiband_drc_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static void multiband_drc_process_emp_crossover(struct multiband_drc_state *stat
crossover_s = &state->crossover[ch];

if (enable_emp)
emp_out = iir_df1(emp_s, *buf_src);
emp_out = iir_df1_4th(emp_s, *buf_src);
else
emp_out = *buf_src;

Expand Down Expand Up @@ -178,7 +178,7 @@ static void multiband_drc_process_deemp(struct multiband_drc_state *state,
}

if (enable_deemp)
*buf_sink = iir_df1(deemp_s, mix_out);
*buf_sink = iir_df1_4th(deemp_s, mix_out);
else
*buf_sink = mix_out;

Expand Down
17 changes: 17 additions & 0 deletions src/include/sof/math/iir_df1.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <sof/common.h>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: @singalsu "The omitting of outer loop for parallel biquads and check for null coefficients and use of fixed loop count of two" very complex language is, comments Yoda.


#define IIR_DF1_NUM_STATE 4
#define SOF_IIR_DF1_4TH_NUM_BIQUADS 2

struct iir_state_df1 {
unsigned int biquads; /* Number of IIR 2nd order sections total */
Expand All @@ -34,8 +35,24 @@ void iir_init_delay_df1(struct iir_state_df1 *iir, int32_t **state);

void iir_reset_df1(struct iir_state_df1 *iir);

/**
* Calculate IIR filter consisting of biquads
* @param iir IIR state with configured biquad coefficients and delay lines data
* @param x Single s32 Q1.31 format input sample
* @return Single s32 Q1.31 format output samples
*/
int32_t iir_df1(struct iir_state_df1 *iir, int32_t x);

/**
* Calculate IIR filter consisting of biquads, special simplified version for
* 4th order filter with two biquads in series. Note: There are no checks for
* iir struct members.
* @param iir IIR state with configured biquad coefficients and delay lines data
* @param x Single s32 Q1.31 format input sample
* @return Single s32 Q1.31 format output samples
*/
int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x);

/* Inline functions */
#if SOF_USE_MIN_HIFI(3, FILTER)
#include "iir_df1_hifi3.h"
Expand Down
54 changes: 54 additions & 0 deletions src/math/iir_df1_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,58 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
}
EXPORT_SYMBOL(iir_df1);

int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
{
int32_t in;
int32_t tmp;
int64_t acc;
int i;
int d = 0; /* Index to state */
int c = 0; /* Index to coefficient a2 */
int32_t *coefp = iir->coef;
int32_t *delay = iir->delay;

/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
in = x;
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
/* Compute output: Delay is Q3.61
* Q2.30 x Q1.31 -> Q3.61
* Shift Q3.61 to Q3.31 with rounding, saturate to Q1.31
*/
acc = ((int64_t)coefp[c]) * delay[d]; /* a2 * y(n - 2) */
acc += ((int64_t)coefp[c + 1]) * delay[d + 1]; /* a1 * y(n - 1) */
acc += ((int64_t)coefp[c + 2]) * delay[d + 2]; /* b2 * x(n - 2) */
acc += ((int64_t)coefp[c + 3]) * delay[d + 3]; /* b1 * x(n - 1) */
acc += ((int64_t)coefp[c + 4]) * in; /* b0 * x */
tmp = (int32_t)sat_int32(Q_SHIFT_RND(acc, 61, 31));

/* update the delay value */
delay[d] = delay[d + 1];
delay[d + 1] = tmp;
delay[d + 2] = delay[d + 3];
delay[d + 3] = in;

/* Apply gain Q2.14 x Q1.31 -> Q3.45 */
acc = ((int64_t)coefp[c + 6]) * tmp; /* Gain */

/* Apply biquad output shift right parameter
* simultaneously with Q3.45 to Q3.31 conversion. Then
* saturate to 32 bits Q1.31 and prepare for next
* biquad.
*/
acc = Q_SHIFT_RND(acc, 45 + coefp[c + 5], 31);
in = sat_int32(acc);

/* Proceed to next biquad coefficients and delay
* lines.
*/
c += SOF_EQ_IIR_NBIQUAD;
d += IIR_DF1_NUM_STATE;
}
/* Output of previous section is in variable in */
return in;
}
EXPORT_SYMBOL(iir_df1_4th);

#endif
68 changes: 68 additions & 0 deletions src/math/iir_df1_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,72 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
}
EXPORT_SYMBOL(iir_df1);

int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
{
ae_int64 acc;
ae_valign coef_align;
ae_int32x2 coef_a2a1;
ae_int32x2 coef_b2b1;
ae_int32x2 coef_b0;
ae_int32x2 gain;
ae_int32x2 shift;
ae_int32x2 delay_y2y1;
ae_int32x2 delay_x2x1;
ae_int32 in;
ae_int32 tmp;
ae_int32x2 *coefp;
ae_int32x2 *delayp;
int32_t *delay_update;
int i;

/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
coefp = (ae_int32x2 *)&iir->coef[0];
delayp = (ae_int32x2 *)&iir->delay[0];
in = x;
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
/* Compute output: Delay is kept Q17.47 while multiply
* instruction gives Q2.30 x Q1.31 -> Q18.46. Need to
* shift delay line values right by one for same align
* as MAC. Store to delay line need to be shifted left
* by one similarly.
*/
coef_align = AE_LA64_PP(coefp);
AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
AE_L32_IP(gain, (ae_int32 *)coefp, 4);

AE_L32X2_IP(delay_y2y1, delayp, 8);
AE_L32X2_IP(delay_x2x1, delayp, 8);

acc = AE_MULF32R_HH(coef_a2a1, delay_y2y1); /* a2 * y(n - 2) */
AE_MULAF32R_LL(acc, coef_a2a1, delay_y2y1); /* a1 * y(n - 1) */
AE_MULAF32R_HH(acc, coef_b2b1, delay_x2x1); /* b2 * x(n - 2) */
AE_MULAF32R_LL(acc, coef_b2b1, delay_x2x1); /* b1 * x(n - 1) */
AE_MULAF32R_HH(acc, coef_b0, in); /* b0 * x */
acc = AE_SLAI64S(acc, 1); /* Convert to Q17.47 */
tmp = AE_ROUND32F48SSYM(acc); /* Round to Q1.31 */

/* update the state value */
delay_update = (int32_t *)delayp - 4;
delay_update[0] = delay_update[1];
delay_update[1] = tmp;
delay_update[2] = delay_update[3];
delay_update[3] = in;

/* Apply gain Q18.14 x Q1.31 -> Q34.30 */
acc = AE_MULF32R_HH(gain, tmp); /* Gain */
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */

/* Apply biquad output shift right parameter and then
* round and saturate to 32 bits Q1.31.
*/
acc = AE_SRAA64(acc, shift);
in = AE_ROUND32F48SSYM(acc);
}
return in;
}
EXPORT_SYMBOL(iir_df1_4th);

#endif
61 changes: 61 additions & 0 deletions src/math/iir_df1_hifi4.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,65 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
}
EXPORT_SYMBOL(iir_df1);

int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
{
ae_valign coef_align;
ae_valign data_r_align;
ae_valign data_w_align = AE_ZALIGN64();
ae_f64 acc;
ae_int32x2 delay_y2y1;
ae_int32x2 delay_x2x1;
ae_int32x2 coef_a2a1;
ae_int32x2 coef_b2b1;
ae_int32x2 coef_b0;
ae_int32x2 gain;
ae_int32x2 shift;
ae_int32 in;
ae_int32x2 *coefp = (ae_int32x2 *)iir->coef;
ae_int32x2 *delay_r = (ae_int32x2 *)iir->delay;
ae_int32x2 *delay_w = delay_r;
int i;

/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
data_r_align = AE_LA64_PP(delay_r);
in = x;
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
/* Load data */
AE_LA32X2_IP(delay_y2y1, data_r_align, delay_r);
AE_LA32X2_IP(delay_x2x1, data_r_align, delay_r);

/* Load coefficients */
coef_align = AE_LA64_PP(coefp);
AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
AE_L32_IP(gain, (ae_int32 *)coefp, 4);

acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */
AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */
delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */

/* Store data */
AE_SA32X2_IP(delay_y2y1, data_w_align, delay_w);
AE_SA32X2_IP(delay_x2x1, data_w_align, delay_w);

/* Apply gain */
acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */

/* Apply biquad output shift right parameter and then
* round and saturate to 32 bits Q1.31.
*/
acc = AE_SRAA64(acc, shift);
in = AE_ROUND32F48SSYM(acc);
}
AE_SA64POS_FP(data_w_align, delay_w);
return in;
}
EXPORT_SYMBOL(iir_df1_4th);

#endif
59 changes: 59 additions & 0 deletions src/math/iir_df1_hifi5.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,63 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
}
EXPORT_SYMBOL(iir_df1);

int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
{
ae_valignx2 coef_align;
ae_valignx2 data_r_align;
ae_valignx2 data_w_align = AE_ZALIGN128();
ae_f64 acc;
ae_int32x2 delay_y2y1;
ae_int32x2 delay_x2x1;
ae_int32x2 coef_a2a1;
ae_int32x2 coef_b2b1;
ae_int32x2 coef_b0;
ae_int32x2 gain;
ae_int32x2 shift;
ae_int32 in;
ae_int32x4 *coefp = (ae_int32x4 *)iir->coef;
ae_int32x4 *delay_r = (ae_int32x4 *)iir->delay;
ae_int32x4 *delay_w = delay_r;
int i;

/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
data_r_align = AE_LA128_PP(delay_r);
in = x;
for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
/* Load data */
AE_LA32X2X2_IP(delay_y2y1, delay_x2x1, data_r_align, delay_r);

/* Load coefficients */
coef_align = AE_LA128_PP(coefp);
AE_LA32X2X2_IP(coef_a2a1, coef_b2b1, coef_align, coefp);
AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
AE_L32_IP(shift, (ae_int32 *)coefp, 4);
AE_L32_IP(gain, (ae_int32 *)coefp, 4);

acc = AE_MULF32RA_HH(coef_b0, in); /* acc = b0 * in */
AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
AE_PKSR32(delay_y2y1, acc, 1); /* y2 = y1, y1 = acc(q1.31) */
delay_x2x1 = AE_SEL32_LL(delay_x2x1, in); /* x2 = x1, x1 = in */

/* Store data */
AE_SA32X2X2_IP(delay_y2y1, delay_x2x1, data_w_align, delay_w);

/* Apply gain */
acc = AE_MULF32R_LL(gain, delay_y2y1); /* acc = gain * y1 */
acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */

/* Apply biquad output shift right parameter and then
* round and saturate to 32 bits Q1.31.
*/
acc = AE_SRAA64(acc, shift);
in = AE_ROUND32F48SSYM(acc);
}

AE_SA128POS_FP(data_w_align, delay_w);
return in;
}
EXPORT_SYMBOL(iir_df1_4th);

#endif
Loading