@@ -49,7 +49,13 @@ def use_v0_only(monkeypatch: pytest.MonkeyPatch):
4949# NOTE: Increasing this in this suite will fail CI because we currently cannot
5050# reset distributed env properly. Use a value > 1 just when you test.
5151@pytest .mark .parametrize ("tensor_parallel_size" , [1 ])
52- @pytest .mark .parametrize ("attention_backend" , ["FLASHINFER" , "FLASH_ATTN" ])
52+ @pytest .mark .parametrize ("attention_backend" , [
53+ pytest .param ("FLASHINFER" ,
54+ marks = pytest .mark .skipif (
55+ current_platform .is_rocm (),
56+ reason = "FLASHINFER isn't supported on ROCm" )),
57+ "FLASH_ATTN"
58+ ])
5359def test_models (
5460 hf_runner : HfRunner ,
5561 vllm_runner : VllmRunner ,
@@ -99,7 +105,13 @@ def test_models(
99105@multi_gpu_test (num_gpus = 2 )
100106@pytest .mark .parametrize ("distributed_executor_backend" , ["ray" , "mp" ])
101107@pytest .mark .parametrize ("model" , MODELS )
102- @pytest .mark .parametrize ("attention_backend" , ["FLASHINFER" , "FLASH_ATTN" ])
108+ @pytest .mark .parametrize ("attention_backend" , [
109+ pytest .param ("FLASHINFER" ,
110+ marks = pytest .mark .skipif (
111+ current_platform .is_rocm (),
112+ reason = "FLASHINFER isn't supported on ROCm" )),
113+ "FLASH_ATTN"
114+ ])
103115def test_models_distributed (
104116 hf_runner : HfRunner ,
105117 vllm_runner : VllmRunner ,
@@ -172,6 +184,8 @@ def test_models_distributed(
172184# Due to low-precision numerical divergence, this test is too sensitive to
173185# the async postprocessor
174186@pytest .mark .parametrize ("disable_async_output_proc" , [True ])
187+ @pytest .mark .skipif (current_platform .is_rocm (),
188+ reason = "machete_prepack_B isn't supported on ROCm" )
175189def test_models_with_fp8_kv_cache (
176190 vllm_runner : VllmRunner ,
177191 example_prompts ,
0 commit comments