Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ steps:
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

- label: Chunked Prefill Test
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
source_file_dependencies:
- vllm/
- tests/basic_correctness/test_chunked_prefill
Expand Down
18 changes: 16 additions & 2 deletions tests/basic_correctness/test_chunked_prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ def use_v0_only(monkeypatch: pytest.MonkeyPatch):
# NOTE: Increasing this in this suite will fail CI because we currently cannot
# reset distributed env properly. Use a value > 1 just when you test.
@pytest.mark.parametrize("tensor_parallel_size", [1])
@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"])
@pytest.mark.parametrize("attention_backend", [
pytest.param("FLASHINFER",
marks=pytest.mark.skipif(
current_platform.is_rocm(),
reason="FLASHINFER isn't supported on ROCm")),
"FLASH_ATTN"
])
def test_models(
hf_runner: HfRunner,
vllm_runner: VllmRunner,
Expand Down Expand Up @@ -99,7 +105,13 @@ def test_models(
@multi_gpu_test(num_gpus=2)
@pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("attention_backend", ["FLASHINFER", "FLASH_ATTN"])
@pytest.mark.parametrize("attention_backend", [
pytest.param("FLASHINFER",
marks=pytest.mark.skipif(
current_platform.is_rocm(),
reason="FLASHINFER isn't supported on ROCm")),
"FLASH_ATTN"
])
def test_models_distributed(
hf_runner: HfRunner,
vllm_runner: VllmRunner,
Expand Down Expand Up @@ -172,6 +184,8 @@ def test_models_distributed(
# Due to low-precision numerical divergence, this test is too sensitive to
# the async postprocessor
@pytest.mark.parametrize("disable_async_output_proc", [True])
@pytest.mark.skipif(current_platform.is_rocm(),
reason="machete_prepack_B isn't supported on ROCm")
def test_models_with_fp8_kv_cache(
vllm_runner: VllmRunner,
example_prompts,
Expand Down