Skip to content

Commit 355cb84

Browse files
cleanup
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
1 parent b0d41ae commit 355cb84

File tree

1 file changed

+1
-2
lines changed

1 file changed

+1
-2
lines changed

hopper/flash_fwd_combine_kernel.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ class FlashAttnFwdCombine {
268268
// do actual work is. If the grid is more then 50% sparse, we linearize the M
269269
// and batch. If the grid is more than 50% dense, we use the standard scheduling
270270
// algorithm since its more efficient at calculating the block coordinates.
271-
// NOTE: in valen case args.seqlen_q is the max seqlen_q across all batches
271+
// NOTE: in varlen case args.seqlen_q is the max seqlen_q across all batches
272272
// if the density is over 50% we use the standard scheduling algo
273273
return cute::ceil_div(args.total_q, args.seqlen_q) >= cute::ceil_div(args.b, 2) ?
274274
SchedulingAlgo::STANDARD :
@@ -290,7 +290,6 @@ class FlashAttnFwdCombine {
290290

291291
switch (choose_scheduling_algo(args)) {
292292
case SchedulingAlgo::STANDARD: {
293-
printf("Using standard scheduling algo for varlen!!!!!!!\n");
294293
unsigned int num_blocks_k = cute::ceil_div(args.dv, kBlockK);
295294
unsigned int num_blocks_m = cute::ceil_div(args.seqlen_q * args.num_heads, kBlockM);
296295
return {num_blocks_m, num_blocks_k, static_cast<unsigned int>(args.b)};

0 commit comments

Comments
 (0)