diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 8f3986270868..34ff145e6ed4 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -89,7 +89,7 @@ steps: - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py - label: Chunked Prefill Test - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] source_file_dependencies: - vllm/ - tests/basic_correctness/test_chunked_prefill diff --git a/tests/basic_correctness/test_chunked_prefill.py b/tests/basic_correctness/test_chunked_prefill.py index eb5b09ff74f6..4a422e8555da 100644 --- a/tests/basic_correctness/test_chunked_prefill.py +++ b/tests/basic_correctness/test_chunked_prefill.py @@ -49,7 +49,13 @@ def use_v0_only(monkeypatch: pytest.MonkeyPatch): # NOTE: Increasing this in this suite will fail CI because we currently cannot # reset distributed env properly. Use a value > 1 just when you test. @pytest.mark.parametrize("tensor_parallel_size", [1]) -@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"]) +@pytest.mark.parametrize("attention_backend", [ + pytest.param("FLASHINFER", + marks=pytest.mark.skipif( + current_platform.is_rocm(), + reason="FLASHINFER isn't supported on ROCm")), + "FLASH_ATTN" +]) def test_models( hf_runner: HfRunner, vllm_runner: VllmRunner, @@ -99,7 +105,13 @@ def test_models( @multi_gpu_test(num_gpus=2) @pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"]) @pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"]) +@pytest.mark.parametrize("attention_backend", [ + pytest.param("FLASHINFER", + marks=pytest.mark.skipif( + current_platform.is_rocm(), + reason="FLASHINFER isn't supported on ROCm")), + "FLASH_ATTN" +]) def test_models_distributed( hf_runner: HfRunner, vllm_runner: VllmRunner, @@ -172,6 +184,8 @@ def test_models_distributed( # Due to low-precision numerical divergence, this test is too sensitive to # the async postprocessor @pytest.mark.parametrize("disable_async_output_proc", [True]) +@pytest.mark.skipif(current_platform.is_rocm(), + reason="machete_prepack_B isn't supported on ROCm") def test_models_with_fp8_kv_cache( vllm_runner: VllmRunner, example_prompts,