Skip to content

Commit

Permalink
Fix NEON optimized loop filter in libtheora
Browse files Browse the repository at this point in the history
Closes #3
  • Loading branch information
linnaea authored and flibitijibibo committed Feb 2, 2023
1 parent 3ca5349 commit 4d36f69
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions lib/theora/arm-intrinsics/armloop.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ static inline uint8x8x2_t loop_filter_neon(uint8x8x4_t pix, uint16_t lim2u) {
int16x8_t r = vreinterpretq_s16_u16(vsubl_u8(pix.val[0], pix.val[3]));
r = vaddq_s16(r, r2);
r = vaddq_s16(r, vshlq_n_s16(r2, 1));
r = vshrq_n_s16(r, 3);
r = vrshrq_n_s16(r, 3);

uint16x8_t absR = vreinterpretq_u16_s16(vabsq_s16(r));
int16x8_t sgnR = vrshrq_n_s16(r, 15);
int16x8_t sgnR = vshrq_n_s16(r, 15);

uint16x8_t absF = vminq_u16(absR, vqsubq_u16(lim2, absR)); // |f| = MIN(|R|,MAX(2L-|R|, 0));
int16x8_t f = veorq_s16(vaddq_s16(sgnR, vreinterpretq_s16_u16(absF)), sgnR);
Expand Down

0 comments on commit 4d36f69

Please sign in to comment.