Skip to content

Commit 88b9496

Browse files
authored
[Performance] Another prefill/append parameter tweak (#68)
1 parent 8320ebe commit 88b9496

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

include/flashinfer/prefill.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,7 @@ cudaError_t SinglePrefillWithKVCacheWorkEstimation(
13851385
min((num_blocks_per_sm * num_sm) /
13861386
(num_kv_heads *
13871387
ceil_div(qo_len * group_size, num_rows_per_cta)),
1388-
kv_len / 256);
1388+
kv_len / 128);
13891389

13901390
max_grid_size = num_blocks_per_sm * num_sm;
13911391
if (num_chunks > 1) {
@@ -1466,7 +1466,7 @@ cudaError_t SinglePrefillWithKVCacheDispatched(DTypeIn* q, DTypeIn* k, DTypeIn*
14661466
uint32_t num_chunks =
14671467
min((num_blocks_per_sm * num_sm) /
14681468
(num_kv_heads * ceil_div(qo_len * GROUP_SIZE, num_rows_per_cta)),
1469-
kv_len / 256);
1469+
kv_len / 128);
14701470

14711471
if (num_chunks <= 1 || tmp == nullptr) {
14721472
// Enough parallelism, do not split-kv

0 commit comments

Comments
 (0)