We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f647afb commit d8e68e4Copy full SHA for d8e68e4
csrc/quantization/awq/gemm_kernels.cu
@@ -334,7 +334,7 @@ __global__ void __launch_bounds__(64)
334
}
335
336
// TODO: Shang: Hoist loop invariance.
337
- for (int ax1_0_1 = 0; ax1_0_1 < 4; ++ax1_0_1) {
+ for (int ax1_0_1 = 0; ax1_0_1 < (N / 32); ++ax1_0_1) {
338
for (int local_id = 0; local_id < 8; ++local_id) {
339
int row_offset = (((int)blockIdx_y) / j_factors1) * 16 +
340
((int)threadIdx.x) / 4 + (local_id % 4) / 2 * 8;
0 commit comments