Skip to content

Commit 7a418c7

Browse files
author
qqma
committed
update test converage to FA2 and FA3
Signed-off-by: qqma <qqma@amazon.com>
1 parent 92709f7 commit 7a418c7

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

tests/compile/piecewise/test_full_cudagraph.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ class BackendConfig:
4646
# FA3 on Hopper
4747
"FA3":
4848
BackendConfig(name="FA3",
49-
env_vars={"VLLM_FLASH_ATTN_VERSION": "3"},
49+
env_vars={
50+
"VLLM_FLASH_ATTN_VERSION": "3",
51+
"VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": 16,
52+
},
5053
comp_config={
5154
"cudagraph_mode": "FULL",
5255
},
@@ -90,7 +93,10 @@ class BackendConfig:
9093
# FA2
9194
"FA2":
9295
BackendConfig(name="FA2",
93-
env_vars={"VLLM_FLASH_ATTN_VERSION": "2"},
96+
env_vars={
97+
"VLLM_FLASH_ATTN_VERSION": "2",
98+
"VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": 16,
99+
},
94100
comp_config={
95101
"cudagraph_mode": "FULL",
96102
}),

tests/v1/cudagraph/test_cudagraph_mode.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,10 @@ class BackendConfig:
4747
# FA3 on Hopper
4848
"FA3":
4949
BackendConfig(name="FA3",
50-
env_vars={"VLLM_FLASH_ATTN_VERSION": "3"},
50+
env_vars={
51+
"VLLM_FLASH_ATTN_VERSION": "3",
52+
"VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": 16,
53+
},
5154
comp_config={
5255
"cudagraph_mode": "FULL",
5356
},
@@ -76,7 +79,10 @@ class BackendConfig:
7679
# FA2
7780
"FA2":
7881
BackendConfig(name="FA2",
79-
env_vars={"VLLM_FLASH_ATTN_VERSION": "2"},
82+
env_vars={
83+
"VLLM_FLASH_ATTN_VERSION": "2",
84+
"VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": 16,
85+
},
8086
comp_config={
8187
"cudagraph_mode": "FULL_AND_PIECEWISE",
8288
}),

0 commit comments

Comments
 (0)