Skip to content

Commit dd5fa7e

Browse files
authored
[ROCm][Kernel][V1] Enable AMD Radeon GPU Custom Paged Attention on v1 (#17004)
Signed-off-by: Hosang Yoon <hosang.yoon@amd.com>
1 parent 2b16104 commit dd5fa7e

File tree

6 files changed

+1930
-189
lines changed

6 files changed

+1930
-189
lines changed

benchmarks/kernels/benchmark_paged_attention.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ def main(
8484
if version == "v2":
8585
if current_platform.is_rocm():
8686
global PARTITION_SIZE
87-
PARTITION_SIZE = 1024 if not args.custom_paged_attn else PARTITION_SIZE_ROCM
87+
if not args.custom_paged_attn and not current_platform.is_navi():
88+
PARTITION_SIZE = 1024
89+
else:
90+
PARTITION_SIZE = PARTITION_SIZE_ROCM
8891
num_partitions = (max_seq_len + PARTITION_SIZE - 1) // PARTITION_SIZE
8992
tmp_output = torch.empty(
9093
size=(num_seqs, num_query_heads, num_partitions, head_size),
@@ -159,6 +162,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
159162
scale,
160163
block_tables,
161164
seq_lens,
165+
None,
162166
block_size,
163167
max_seq_len,
164168
alibi_slopes,

0 commit comments

Comments
 (0)