File tree Expand file tree Collapse file tree 2 files changed +16
-4
lines changed Expand file tree Collapse file tree 2 files changed +16
-4
lines changed Original file line number Diff line number Diff line change @@ -46,7 +46,10 @@ class BackendConfig:
4646 # FA3 on Hopper
4747 "FA3" :
4848 BackendConfig (name = "FA3" ,
49- env_vars = {"VLLM_FLASH_ATTN_VERSION" : "3" },
49+ env_vars = {
50+ "VLLM_FLASH_ATTN_VERSION" : "3" ,
51+ "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH" : 16 ,
52+ },
5053 comp_config = {
5154 "cudagraph_mode" : "FULL" ,
5255 },
@@ -90,7 +93,10 @@ class BackendConfig:
9093 # FA2
9194 "FA2" :
9295 BackendConfig (name = "FA2" ,
93- env_vars = {"VLLM_FLASH_ATTN_VERSION" : "2" },
96+ env_vars = {
97+ "VLLM_FLASH_ATTN_VERSION" : "2" ,
98+ "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH" : 16 ,
99+ },
94100 comp_config = {
95101 "cudagraph_mode" : "FULL" ,
96102 }),
Original file line number Diff line number Diff line change @@ -47,7 +47,10 @@ class BackendConfig:
4747 # FA3 on Hopper
4848 "FA3" :
4949 BackendConfig (name = "FA3" ,
50- env_vars = {"VLLM_FLASH_ATTN_VERSION" : "3" },
50+ env_vars = {
51+ "VLLM_FLASH_ATTN_VERSION" : "3" ,
52+ "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH" : 16 ,
53+ },
5154 comp_config = {
5255 "cudagraph_mode" : "FULL" ,
5356 },
@@ -76,7 +79,10 @@ class BackendConfig:
7679 # FA2
7780 "FA2" :
7881 BackendConfig (name = "FA2" ,
79- env_vars = {"VLLM_FLASH_ATTN_VERSION" : "2" },
82+ env_vars = {
83+ "VLLM_FLASH_ATTN_VERSION" : "2" ,
84+ "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH" : 16 ,
85+ },
8086 comp_config = {
8187 "cudagraph_mode" : "FULL_AND_PIECEWISE" ,
8288 }),
You can’t perform that action at this time.
0 commit comments