File tree Expand file tree Collapse file tree 1 file changed +1
-2
lines changed Expand file tree Collapse file tree 1 file changed +1
-2
lines changed Original file line number Diff line number Diff line change @@ -268,7 +268,7 @@ class FlashAttnFwdCombine {
268268 // do actual work is. If the grid is more then 50% sparse, we linearize the M
269269 // and batch. If the grid is more than 50% dense, we use the standard scheduling
270270 // algorithm since its more efficient at calculating the block coordinates.
271- // NOTE: in valen case args.seqlen_q is the max seqlen_q across all batches
271+ // NOTE: in varlen case args.seqlen_q is the max seqlen_q across all batches
272272 // if the density is over 50% we use the standard scheduling algo
273273 return cute::ceil_div (args.total_q , args.seqlen_q ) >= cute::ceil_div (args.b , 2 ) ?
274274 SchedulingAlgo::STANDARD :
@@ -290,7 +290,6 @@ class FlashAttnFwdCombine {
290290
291291 switch (choose_scheduling_algo (args)) {
292292 case SchedulingAlgo::STANDARD: {
293- printf (" Using standard scheduling algo for varlen!!!!!!!\n " );
294293 unsigned int num_blocks_k = cute::ceil_div (args.dv , kBlockK );
295294 unsigned int num_blocks_m = cute::ceil_div (args.seqlen_q * args.num_heads , kBlockM );
296295 return {num_blocks_m, num_blocks_k, static_cast <unsigned int >(args.b )};
You can’t perform that action at this time.
0 commit comments