bugfix: output tensor is assumed contiguous, so we should use d not stride

zcin · zcin · commit 3192d535c0f3 · 2025-10-12T09:53:10.000-07:00
Signed-off-by: Cindy Zhang &lt;cindyzyx9@gmail.com&gt;
diff --git a/include/flashinfer/sampling.cuh b/include/flashinfer/sampling.cuh
@@ -1862,8 +1862,7 @@ __global__ void TopKMaskLogitsKernel(DType* logits, DType* masked_logits, IdType
           (logits_vec[j] > pivot) ? logits_vec[j] : -cuda::std::numeric_limits<float>::infinity();
     }
     if ((i * BLOCK_THREADS + tx) * VEC_SIZE < d) {
-      logits_vec.store(masked_logits + row_idx * stride + i * BLOCK_THREADS * VEC_SIZE +
-                       tx * VEC_SIZE);
+      logits_vec.store(masked_logits + row_idx * d + i * BLOCK_THREADS * VEC_SIZE + tx * VEC_SIZE);
     }
   }
 }
@@ -1987,8 +1986,7 @@ __global__ void TopKRenormProbKernel(DType* probs, DType* renormed_prob, IdType*
       probs_vec[j] = (probs_vec[j] > pivot) ? probs_vec[j] * normalizer : 0;
     }
     if ((i * BLOCK_THREADS + tx) * VEC_SIZE < d) {
-      probs_vec.store(renormed_prob + row_idx * stride + i * BLOCK_THREADS * VEC_SIZE +
-                      tx * VEC_SIZE);
+      probs_vec.store(renormed_prob + row_idx * d + i * BLOCK_THREADS * VEC_SIZE + tx * VEC_SIZE);
     }
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -1862,8 +1862,7 @@ __global__ void TopKMaskLogitsKernel(DType* logits, DType* masked_logits, IdType`
`1862`	`1862`	`(logits_vec[j] > pivot) ? logits_vec[j] : -cuda::std::numeric_limits<float>::infinity();`
`1863`	`1863`	`}`
`1864`	`1864`	`if ((i * BLOCK_THREADS + tx) * VEC_SIZE < d) {`
`1865`		`- logits_vec.store(masked_logits + row_idx * stride + i * BLOCK_THREADS * VEC_SIZE +`
`1866`		`- tx * VEC_SIZE);`
	`1865`	`+ logits_vec.store(masked_logits + row_idx * d + i * BLOCK_THREADS * VEC_SIZE + tx * VEC_SIZE);`
`1867`	`1866`	`}`
`1868`	`1867`	`}`
`1869`	`1868`	`}`
`@@ -1987,8 +1986,7 @@ __global__ void TopKRenormProbKernel(DType* probs, DType* renormed_prob, IdType*`
`1987`	`1986`	`probs_vec[j] = (probs_vec[j] > pivot) ? probs_vec[j] * normalizer : 0;`
`1988`	`1987`	`}`
`1989`	`1988`	`if ((i * BLOCK_THREADS + tx) * VEC_SIZE < d) {`
`1990`		`- probs_vec.store(renormed_prob + row_idx * stride + i * BLOCK_THREADS * VEC_SIZE +`
`1991`		`- tx * VEC_SIZE);`
	`1989`	`+ probs_vec.store(renormed_prob + row_idx * d + i * BLOCK_THREADS * VEC_SIZE + tx * VEC_SIZE);`
`1992`	`1990`	`}`
`1993`	`1991`	`}`
`1994`	`1992`	`}`