@@ -581,18 +581,18 @@ static const uint3 init_fastdiv_values(uint32_t d) {
581581 return make_uint3 (mp, L, d);
582582}
583583
584- static __device__ __forceinline__ uint32_t fastdiv (uint32_t n, const uint3 div_consts ) {
585- // expects div_consts to contain <mp, L, divisor> in <x, y, z>
586- // div_consts .z is unused and optimized away by the compiler.
584+ static __device__ __forceinline__ uint32_t fastdiv (uint32_t n, const uint3 fastdiv_values ) {
585+ // expects fastdiv_values to contain <mp, L, divisor> in <x, y, z>
586+ // fastdiv_values .z is unused and optimized away by the compiler.
587587 // Compute high 32 bits of n * mp
588- const uint32_t hi = __umulhi (n, div_consts .x );
588+ const uint32_t hi = __umulhi (n, fastdiv_values .x );
589589 // add n, apply bit shift
590- return (hi + n) >> div_consts .y ;
590+ return (hi + n) >> fastdiv_values .y ;
591591}
592592
593- static __device__ __forceinline__ uint32_t fastmodulo (uint32_t n, const uint3 modulo_consts ) {
594- // expects modulo_consts to contain <mp, L, divisor> in <x, y, z> (see init_fastdiv_values)
595- return n - fastdiv (n, modulo_consts ) * modulo_consts .z ;
593+ static __device__ __forceinline__ uint32_t fastmodulo (uint32_t n, const uint3 fastdiv_values ) {
594+ // expects fastdiv_values to contain <mp, L, divisor> in <x, y, z> (see init_fastdiv_values)
595+ return n - fastdiv (n, fastdiv_values ) * fastdiv_values .z ;
596596}
597597
598598typedef void (*dequantize_kernel_t )(const void * vx, const int64_t ib, const int iqs, float2 & v);
0 commit comments