Skip to content

Commit

Permalink
Provide portable clz implementation
Browse files Browse the repository at this point in the history
This commit implements an efficient clz (count leading zero) routine for
MSVC and Clang/GCC, and falls back to a generic one if the above are
absent.
  • Loading branch information
jserv committed Jul 29, 2024
1 parent e1892ce commit 009f4dc
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 3 deletions.
37 changes: 37 additions & 0 deletions include/twin_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,4 +499,41 @@ void _twin_button_init(twin_button_t *button,
twin_style_t font_style,
twin_dispatch_proc_t dispatch);

/* utility */

#ifdef _MSC_VER
#include <intrin.h>
static inline int twin_clz(uint32_t v)
{
uint32_t leading_zero = 0;
/* Search from LSB to MSB for first set bit.
* Returns zero if no set bit is found.
*/
if (_BitScanReverse(&leading_zero, v))
return 31 - leading_zero;
return 32; /* undefined behavior */
}
#elif defined(__GNUC__) || defined(__clang__)
static inline int twin_clz(uint32_t v)
{
return __builtin_clz(v);
}
#else /* generic implementation */
static inline int twin_clz(uint32_t v)
{
/* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */
static const uint8_t mul_debruijn[] = {
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};

v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;

return mul_debruijn[(uint32_t) (v * 0x07C4ACDDU) >> 27];
}
#endif

#endif /* _TWIN_PRIVATE_H_ */
9 changes: 6 additions & 3 deletions src/fixed.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define uint32_lo(i) ((i) & 0xffff)
#define uint32_hi(i) ((i) >> 16)
#define uint32_carry16 ((1) << 16)

/* Check interval
* For any variable interval checking:
* if (x > minx - epsilon && x < minx + epsilon) ...
Expand All @@ -33,17 +34,20 @@ twin_fixed_t twin_fixed_sqrt(twin_fixed_t a)
/* Shift left 'a' to expand more digit for sqrt precision */
offset &= ~1;
a <<= offset;

/* Calculate the digits need to shift back */
offset >>= 1;
offset -= (16 >> 1);

/* Use digit-by-digit calculation to compute square root */
twin_fixed_t z = 0;
for (twin_fixed_t m = 1UL << ((31 - __builtin_clz(a)) & ~1UL); m; m >>= 2) {
for (twin_fixed_t m = 1UL << ((31 - twin_clz(a)) & ~1UL); m; m >>= 2) {
int b = z + m;
z >>= 1;
if (a >= b)
a -= b, z += m;
}

/* Shift back the expanded digits */
return (offset >= 0) ? z >> offset : z << (-offset);
}
Expand All @@ -66,8 +70,7 @@ twin_sfixed_t _twin_sfixed_sqrt(twin_sfixed_t as)
offset -= (4 >> 1);

twin_sfixed_t z = 0;
for (twin_sfixed_t m = 1UL << ((31 - __builtin_clz(as)) & ~1UL); m;
m >>= 2) {
for (twin_sfixed_t m = 1UL << ((31 - twin_clz(as)) & ~1UL); m; m >>= 2) {
int16_t b = z + m;
z >>= 1;
if (as >= b)
Expand Down

0 comments on commit 009f4dc

Please sign in to comment.