thesofproject · kv2019i · Feb 10, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
@@ -161,6 +161,10 @@ static int crossover_init_coef_lr4(struct sof_eq_iir_biquad *coef,
 {
 	int ret;
 
+	/* Ensure the LR4 can be processed with the simplified 4th order IIR */
+	if (CROSSOVER_LR4_NUM_BIQUADS != SOF_IIR_DF1_4TH_NUM_BIQUADS)
+		return -EINVAL;
+
 	/* Only one set of coefficients is stored in config for both biquads
 	 * in series due to identity. To maintain the structure of
 	 * iir_state_df1, it requires two copies of coefficients in a row.
@@ -190,8 +194,8 @@ static int crossover_init_coef_lr4(struct sof_eq_iir_biquad *coef,
 	if (!lr4->delay)
 		return -ENOMEM;
 
-	lr4->biquads = 2;
-	lr4->biquads_in_series = 2;
+	lr4->biquads = CROSSOVER_LR4_NUM_BIQUADS;
+	lr4->biquads_in_series = CROSSOVER_LR4_NUM_BIQUADS;
 
 	return 0;
 }

@@ -15,6 +15,8 @@
 
 #include "crossover_user.h"
 
+#define CROSSOVER_LR4_NUM_BIQUADS 2
+
 struct comp_buffer;
 struct comp_dev;
 
@@ -122,7 +124,7 @@ static inline int32_t crossover_generic_process_lr4(int32_t in,
 						    struct iir_state_df1 *lr4)
 {
 	/* Cascade two biquads with same coefficients in series. */
-	return iir_df1(lr4, in);
+	return iir_df1_4th(lr4, in);
 }
 
 static inline void crossover_free_config(struct sof_crossover_config **config)

@@ -66,6 +66,10 @@ static int multiband_drc_eq_init_coef_ch(struct sof_eq_iir_biquad *coef,
 {
 	int ret;
 
+	/* Ensure the LR4 can be processed with the simplified 4th order IIR */
+	if (SOF_EMP_DEEMP_BIQUADS != SOF_IIR_DF1_4TH_NUM_BIQUADS)
+		return -EINVAL;
+
 	eq->coef = rzalloc(SOF_MEM_ZONE_RUNTIME, 0, SOF_MEM_CAPS_RAM,
 			   sizeof(struct sof_eq_iir_biquad) * SOF_EMP_DEEMP_BIQUADS);
 	if (!eq->coef)

@@ -39,7 +39,7 @@ static void multiband_drc_process_emp_crossover(struct multiband_drc_state *stat
 		crossover_s = &state->crossover[ch];
 
 		if (enable_emp)
-			emp_out = iir_df1(emp_s, *buf_src);
+			emp_out = iir_df1_4th(emp_s, *buf_src);
 		else
 			emp_out = *buf_src;
 
@@ -178,7 +178,7 @@ static void multiband_drc_process_deemp(struct multiband_drc_state *state,
 		}
 
 		if (enable_deemp)
-			*buf_sink = iir_df1(deemp_s, mix_out);
+			*buf_sink = iir_df1_4th(deemp_s, mix_out);
 		else
 			*buf_sink = mix_out;
 

@@ -13,6 +13,7 @@
 #include <sof/common.h>
 
 #define IIR_DF1_NUM_STATE 4
+#define SOF_IIR_DF1_4TH_NUM_BIQUADS 2
 
 struct iir_state_df1 {
 	unsigned int biquads; /* Number of IIR 2nd order sections total */
@@ -34,8 +35,24 @@ void iir_init_delay_df1(struct iir_state_df1 *iir, int32_t **state);
 
 void iir_reset_df1(struct iir_state_df1 *iir);
 
+/**
+ * Calculate IIR filter consisting of biquads
+ * @param iir	IIR state with configured biquad coefficients and delay lines data
+ * @param x	Single s32 Q1.31 format input sample
+ * @return	Single s32 Q1.31 format output samples
+ */
 int32_t iir_df1(struct iir_state_df1 *iir, int32_t x);
 
+/**
+ * Calculate IIR filter consisting of biquads, special simplified version for
+ * 4th order filter with two biquads in series. Note: There are no checks for
+ * iir struct members.
+ * @param iir	IIR state with configured biquad coefficients and delay lines data
+ * @param x	Single s32 Q1.31 format input sample
+ * @return	Single s32 Q1.31 format output samples
+ */
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x);
+
 /* Inline functions */
 #if SOF_USE_MIN_HIFI(3, FILTER)
 #include "iir_df1_hifi3.h"

@@ -109,4 +109,58 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	int32_t in;
+	int32_t tmp;
+	int64_t acc;
+	int i;
+	int d = 0; /* Index to state */
+	int c = 0; /* Index to coefficient a2 */
+	int32_t *coefp = iir->coef;
+	int32_t *delay = iir->delay;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Compute output: Delay is Q3.61
+		 * Q2.30 x Q1.31 -> Q3.61
+		 * Shift Q3.61 to Q3.31 with rounding, saturate to Q1.31
+		 */
+		acc = ((int64_t)coefp[c]) * delay[d]; /* a2 * y(n - 2) */
+		acc += ((int64_t)coefp[c + 1]) * delay[d + 1]; /* a1 * y(n - 1) */
+		acc += ((int64_t)coefp[c + 2]) * delay[d + 2]; /* b2 * x(n - 2) */
+		acc += ((int64_t)coefp[c + 3]) * delay[d + 3]; /* b1 * x(n - 1) */
+		acc += ((int64_t)coefp[c + 4]) * in; /* b0 * x */
+		tmp = (int32_t)sat_int32(Q_SHIFT_RND(acc, 61, 31));
+
+		/* update the delay value */
+		delay[d] = delay[d + 1];
+		delay[d + 1] = tmp;
+		delay[d + 2] = delay[d + 3];
+		delay[d + 3] = in;
+
+		/* Apply gain Q2.14 x Q1.31 -> Q3.45 */
+		acc = ((int64_t)coefp[c + 6]) * tmp; /* Gain */
+
+		/* Apply biquad output shift right parameter
+		 * simultaneously with Q3.45 to Q3.31 conversion. Then
+		 * saturate to 32 bits Q1.31 and prepare for next
+		 * biquad.
+		 */
+		acc = Q_SHIFT_RND(acc, 45 + coefp[c + 5], 31);
+		in = sat_int32(acc);
+
+		/* Proceed to next biquad coefficients and delay
+		 * lines.
+		 */
+		c += SOF_EQ_IIR_NBIQUAD;
+		d += IIR_DF1_NUM_STATE;
+	}
+	/* Output of previous section is in variable in */
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif
@@ -126,4 +126,72 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	ae_int64 acc;
+	ae_valign coef_align;
+	ae_int32x2 coef_a2a1;
+	ae_int32x2 coef_b2b1;
+	ae_int32x2 coef_b0;
+	ae_int32x2 gain;
+	ae_int32x2 shift;
+	ae_int32x2 delay_y2y1;
+	ae_int32x2 delay_x2x1;
+	ae_int32 in;
+	ae_int32 tmp;
+	ae_int32x2 *coefp;
+	ae_int32x2 *delayp;
+	int32_t *delay_update;
+	int i;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	coefp = (ae_int32x2 *)&iir->coef[0];
+	delayp = (ae_int32x2 *)&iir->delay[0];
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Compute output: Delay is kept Q17.47 while multiply
+		 * instruction gives Q2.30 x Q1.31 -> Q18.46. Need to
+		 * shift delay line values right by one for same align
+		 * as MAC. Store to delay line need to be shifted left
+		 * by one similarly.
+		 */
+		coef_align = AE_LA64_PP(coefp);
+		AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
+		AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
+		AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
+		AE_L32_IP(shift, (ae_int32 *)coefp, 4);
+		AE_L32_IP(gain, (ae_int32 *)coefp, 4);
+
+		AE_L32X2_IP(delay_y2y1, delayp, 8);
+		AE_L32X2_IP(delay_x2x1, delayp, 8);
+
+		acc = AE_MULF32R_HH(coef_a2a1, delay_y2y1); /* a2 * y(n - 2) */
+		AE_MULAF32R_LL(acc, coef_a2a1, delay_y2y1); /* a1 * y(n - 1) */
+		AE_MULAF32R_HH(acc, coef_b2b1, delay_x2x1); /* b2 * x(n - 2) */
+		AE_MULAF32R_LL(acc, coef_b2b1, delay_x2x1); /* b1 * x(n - 1) */
+		AE_MULAF32R_HH(acc, coef_b0, in); /*  b0 * x  */
+		acc = AE_SLAI64S(acc, 1); /* Convert to Q17.47 */
+		tmp = AE_ROUND32F48SSYM(acc); /* Round to Q1.31 */
+
+		/* update the state value */
+		delay_update = (int32_t *)delayp - 4;
+		delay_update[0] = delay_update[1];
+		delay_update[1] = tmp;
+		delay_update[2] = delay_update[3];
+		delay_update[3] = in;
+
+		/* Apply gain Q18.14 x Q1.31 -> Q34.30 */
+		acc = AE_MULF32R_HH(gain, tmp); /* Gain */
+		acc = AE_SLAI64S(acc, 17); /* Convert to Q17.47 */
+
+		/* Apply biquad output shift right parameter and then
+		 * round and saturate to 32 bits Q1.31.
+		 */
+		acc = AE_SRAA64(acc, shift);
+		in = AE_ROUND32F48SSYM(acc);
+	}
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif
@@ -119,4 +119,65 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	ae_valign coef_align;
+	ae_valign data_r_align;
+	ae_valign data_w_align = AE_ZALIGN64();
+	ae_f64 acc;
+	ae_int32x2 delay_y2y1;
+	ae_int32x2 delay_x2x1;
+	ae_int32x2 coef_a2a1;
+	ae_int32x2 coef_b2b1;
+	ae_int32x2 coef_b0;
+	ae_int32x2 gain;
+	ae_int32x2 shift;
+	ae_int32 in;
+	ae_int32x2 *coefp = (ae_int32x2 *)iir->coef;
+	ae_int32x2 *delay_r  = (ae_int32x2 *)iir->delay;
+	ae_int32x2 *delay_w = delay_r;
+	int i;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
+	data_r_align = AE_LA64_PP(delay_r);
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Load data */
+		AE_LA32X2_IP(delay_y2y1, data_r_align, delay_r);
+		AE_LA32X2_IP(delay_x2x1, data_r_align, delay_r);
+
+		/* Load coefficients */
+		coef_align = AE_LA64_PP(coefp);
+		AE_LA32X2_IP(coef_a2a1, coef_align, coefp);
+		AE_LA32X2_IP(coef_b2b1, coef_align, coefp);
+		AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
+		AE_L32_IP(shift, (ae_int32 *)coefp, 4);
+		AE_L32_IP(gain, (ae_int32 *)coefp, 4);
+
+		acc = AE_MULF32RA_HH(coef_b0, in);		  /* acc = b0 * in */
+		AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
+		AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
+		AE_PKSR32(delay_y2y1, acc, 1);		     /* y2 = y1, y1 = acc(q1.31) */
+		delay_x2x1 = AE_SEL32_LL(delay_x2x1, in);    /* x2 = x1, x1 = in */
+
+		/* Store data */
+		AE_SA32X2_IP(delay_y2y1, data_w_align, delay_w);
+		AE_SA32X2_IP(delay_x2x1, data_w_align, delay_w);
+
+		/* Apply gain */
+		acc = AE_MULF32R_LL(gain, delay_y2y1);	/* acc = gain * y1 */
+		acc = AE_SLAI64S(acc, 17);		/* Convert to Q17.47 */
+
+		/* Apply biquad output shift right parameter and then
+		 * round and saturate to 32 bits Q1.31.
+		 */
+		acc = AE_SRAA64(acc, shift);
+		in = AE_ROUND32F48SSYM(acc);
+	}
+	AE_SA64POS_FP(data_w_align, delay_w);
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif
@@ -116,4 +116,63 @@ int32_t iir_df1(struct iir_state_df1 *iir, int32_t x)
 }
 EXPORT_SYMBOL(iir_df1);
 
+int32_t iir_df1_4th(struct iir_state_df1 *iir, int32_t x)
+{
+	ae_valignx2 coef_align;
+	ae_valignx2 data_r_align;
+	ae_valignx2 data_w_align = AE_ZALIGN128();
+	ae_f64 acc;
+	ae_int32x2 delay_y2y1;
+	ae_int32x2 delay_x2x1;
+	ae_int32x2 coef_a2a1;
+	ae_int32x2 coef_b2b1;
+	ae_int32x2 coef_b0;
+	ae_int32x2 gain;
+	ae_int32x2 shift;
+	ae_int32 in;
+	ae_int32x4 *coefp = (ae_int32x4 *)iir->coef;
+	ae_int32x4 *delay_r  = (ae_int32x4 *)iir->delay;
+	ae_int32x4 *delay_w = delay_r;
+	int i;
+
+	/* Coefficients order in coef[] is {a2, a1, b2, b1, b0, shift, gain} */
+	/* Delay order in state[] is {y(n - 2), y(n - 1), x(n - 2), x(n - 1)} */
+	data_r_align = AE_LA128_PP(delay_r);
+	in = x;
+	for (i = 0; i < SOF_IIR_DF1_4TH_NUM_BIQUADS; i++) {
+		/* Load data */
+		AE_LA32X2X2_IP(delay_y2y1, delay_x2x1, data_r_align, delay_r);
+
+		/* Load coefficients */
+		coef_align = AE_LA128_PP(coefp);
+		AE_LA32X2X2_IP(coef_a2a1, coef_b2b1, coef_align, coefp);
+		AE_L32_IP(coef_b0, (ae_int32 *)coefp, 4);
+		AE_L32_IP(shift, (ae_int32 *)coefp, 4);
+		AE_L32_IP(gain, (ae_int32 *)coefp, 4);
+
+		acc = AE_MULF32RA_HH(coef_b0, in);		  /* acc = b0 * in */
+		AE_MULAAFD32RA_HH_LL(acc, coef_a2a1, delay_y2y1); /* + a2 * y2 + a1 * y1 */
+		AE_MULAAFD32RA_HH_LL(acc, coef_b2b1, delay_x2x1); /* + b2 * x2 + b1 * x1 */
+		AE_PKSR32(delay_y2y1, acc, 1);		     /* y2 = y1, y1 = acc(q1.31) */
+		delay_x2x1 = AE_SEL32_LL(delay_x2x1, in);   /* x2 = x1, x1 = in */
+
+		/* Store data */
+		AE_SA32X2X2_IP(delay_y2y1, delay_x2x1, data_w_align, delay_w);
+
+		/* Apply gain */
+		acc = AE_MULF32R_LL(gain, delay_y2y1);	/* acc = gain * y1 */
+		acc = AE_SLAI64S(acc, 17);		/* Convert to Q17.47 */
+
+		/* Apply biquad output shift right parameter and then
+		 * round and saturate to 32 bits Q1.31.
+		 */
+		acc = AE_SRAA64(acc, shift);
+		in = AE_ROUND32F48SSYM(acc);
+	}
+
+	AE_SA128POS_FP(data_w_align, delay_w);
+	return in;
+}
+EXPORT_SYMBOL(iir_df1_4th);
+
 #endif