Skip to content

Commit

Permalink
Math: Optimize sofm_exp_fixed() HiFi version
Browse files Browse the repository at this point in the history
The unnecessary shift and multiply functions can be removed
with use of normal C shift left and with use xtensa multiply,
shift, and round intrinsics directly in the function.

This change saves in TGL HiFi3 platform 1.3 MCPS in DRC
processing mode.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
  • Loading branch information
singalsu committed Oct 31, 2024
1 parent cf62b65 commit bdd20f9
Showing 1 changed file with 17 additions and 55 deletions.
72 changes: 17 additions & 55 deletions src/math/exp_fcn_hifi.c
Original file line number Diff line number Diff line change
Expand Up @@ -280,52 +280,6 @@ int32_t sofm_exp_int32(int32_t x)
return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
}

/* Fractional multiplication with shift and round
* Note that the parameters px and py must be cast to (int64_t) if other type.
*/
static inline int exp_hifi_q_multsr_32x32(int a, int b, int c, int d, int e)
{
ae_int64 res;
int xt_o;
int shift;

res = AE_MUL32_LL(a, b);
shift = XT_SUB(XT_ADD(c, d), XT_ADD(e, 1));
res = AE_SRAA64(res, shift);
res = AE_ADD64(res, 1);
res = AE_SRAI64(res, 1);
xt_o = AE_MOVINT32_FROMINT64(res);

return xt_o;
}

/* A macro for Q-shifts */
static inline int exp_hifi_q_shift_rnd(int a, int b, int c)
{
ae_int32 res;
int shift;

shift = XT_SUB(b, XT_ADD(c, 1));
res = AE_SRAA32(a, shift);
res = AE_ADD32(res, 1);
res = AE_SRAI32(res, 1);

return res;
}

/* Alternative version since compiler does not allow (x >> -1) */
static inline int exp_hifi_q_shift_left(int a, int b, int c)
{
ae_int32 xt_o;
int shift;

shift = XT_SUB(c, b);
xt_o = AE_SLAA32(a, shift);

return xt_o;
}

#define q_mult(a, b, qa, qb, qy) ((int32_t)exp_hifi_q_multsr_32x32((int64_t)(a), b, qa, qb, qy))
/* Fixed point exponent function for approximate range -11.5 .. 7.6
* that corresponds to decibels range -100 .. +66 dB.
*
Expand All @@ -341,11 +295,12 @@ static inline int exp_hifi_q_shift_left(int a, int b, int c)

int32_t sofm_exp_fixed(int32_t x)
{
ae_f64 p;
ae_int32 y0;
ae_int32 y;
int32_t xs;
int32_t y;
int32_t y0;
int32_t n = 1;
int i;
int n = 0;

if (x < SOFM_EXP_FIXED_INPUT_MIN)
return 0;
Expand All @@ -357,20 +312,27 @@ int32_t sofm_exp_fixed(int32_t x)
xs = x;
while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) {
xs >>= 1;
n++;
n <<= 1;
}

/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
* sofm_exp_int32() output is Q9.23, while y0 is Q12.20
*/
y0 = exp_hifi_q_shift_rnd(sofm_exp_int32(exp_hifi_q_shift_left(xs, 27, 28)),
23, 20);
y0 = AE_SRAI32R(sofm_exp_int32(xs << 1), 3);
y = SOFM_EXP_ONE_Q20;
for (i = 0; i < (1 << n); i++)
y = (int32_t)exp_hifi_q_multsr_32x32((int64_t)y, y0, 20, 20, 20);

return y;
/* AE multiply returns Q41 from Q20 * Q20. To get Q20 it need to be
* shifted right by 21. Since the used round instruction is aligned
* to the high 32 bits it is shifted instead left by 32 - 21 = 11:
*/
for (i = 0; i < n; i++) {
p = AE_SLAI64S(AE_MULF32S_LL(y, y0), 11);
y = AE_ROUND32F64SASYM(p);
}

return (int32_t)y;
}

EXPORT_SYMBOL(sofm_exp_fixed);

#endif

0 comments on commit bdd20f9

Please sign in to comment.