We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 247348e commit 75d3df1Copy full SHA for 75d3df1
csrc/quickreduce/quick_reduce_impl.cuh
@@ -182,7 +182,7 @@ struct CodecQ4 : public CodecBase {
182
for (int i = 0; i < 4; i++) {
183
if constexpr (std::is_same<T, half>::value) {
184
int32_t q4 = ((qw >> (i * 4)) & kMask000F) | kHalf2_1024;
185
- packed_add<half>(w[i], kHalf2_1032);
+ w[i] = packed_add<half>(w[i], kHalf2_1032);
186
} else {
187
int32_t int16_2 = (qw >> (i * 4)) & kMask000F;
188
int16_t low = static_cast<int16_t>(int16_2 & 0xFFFF);
0 commit comments