Skip to content

Commit

Permalink
bugfix: fix the scheduler behavior of large batch size (#333)
Browse files Browse the repository at this point in the history
when `128 / page == 0`, our binary search might run into division by
zero issue.
  • Loading branch information
yzh119 authored Jun 24, 2024
1 parent 947830b commit 4d08c63
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions include/flashinfer/attention/handler.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ cudaError_t BatchDecodeWithPagedKVCacheWorkEstimationDispatched(
}
std::tie(max_num_pages_per_batch, new_batch_size) =
PartitionPagedKVCacheBinarySearchMinNumPagePerBatch(max_grid_size, num_kv_heads, num_pages,
128 / page_size);
std::max(128 / page_size, 1));
if (new_batch_size == batch_size && !enable_cuda_graph) {
// do not use partition-kv kernel for short sequence, when not using CUDAGraph
split_kv = false;
Expand Down Expand Up @@ -564,9 +564,9 @@ cudaError_t PrefillSplitQOKVIndptr(bool& split_kv, uint32_t& split_max_batch_siz
const uint32_t qo_chunk_size = get_num_rows_per_cta(warp_layout);

// step 2: determine kv_chunk_size
std::tie(split_kv, kv_chunk_size, new_batch_size) =
PrefillBinarySearchKVChunkSize(max_grid_size, num_kv_heads, packed_qo_len_arr, kv_len_arr,
qo_chunk_size, /*min_kv_chunk_size=*/(128 / page_size));
std::tie(split_kv, kv_chunk_size, new_batch_size) = PrefillBinarySearchKVChunkSize(
max_grid_size, num_kv_heads, packed_qo_len_arr, kv_len_arr, qo_chunk_size,
/*min_kv_chunk_size=*/std::max((128 / page_size), 1));

// step 3: split qo_indptr and kv_indptr
total_num_tiles_q = 0;
Expand Down

0 comments on commit 4d08c63

Please sign in to comment.