@@ -117,16 +117,14 @@ steps:
117117 commands :
118118 - pytest -v -s core
119119
120- - label : Entrypoints Test # 40min
120+ - label : Entrypoints Test (LLM) # 40min
121121 mirror_hardwares : [amdexperimental]
122122 working_dir : " /vllm-workspace/tests"
123123 fast_check : true
124124 torch_nightly : true
125125 source_file_dependencies :
126126 - vllm/
127127 - tests/entrypoints/llm
128- - tests/entrypoints/openai
129- - tests/entrypoints/test_chat_utils
130128 - tests/entrypoints/offline_mode
131129 commands :
132130 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
@@ -135,9 +133,21 @@ steps:
135133 - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
136134 - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
137135 - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
136+ - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
137+
138+ - label : Entrypoints Test (API Server) # 40min
139+ mirror_hardwares : [amdexperimental]
140+ working_dir : " /vllm-workspace/tests"
141+ fast_check : true
142+ torch_nightly : true
143+ source_file_dependencies :
144+ - vllm/
145+ - tests/entrypoints/openai
146+ - tests/entrypoints/test_chat_utils
147+ commands :
148+ - export VLLM_WORKER_MULTIPROC_METHOD=spawn
138149 - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/
139150 - pytest -v -s entrypoints/test_chat_utils.py
140- - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
141151
142152- label : Distributed Tests (4 GPUs) # 10min
143153 mirror_hardwares : [amdexperimental]
@@ -149,7 +159,6 @@ steps:
149159 - tests/distributed/test_utils
150160 - tests/distributed/test_pynccl
151161 - tests/distributed/test_events
152- - tests/spec_decode/e2e/test_integration_dist_tp4
153162 - tests/compile/test_basic_correctness
154163 - examples/offline_inference/rlhf.py
155164 - examples/offline_inference/rlhf_colocate.py
@@ -172,7 +181,6 @@ steps:
172181 - pytest -v -s compile/test_basic_correctness.py
173182 - pytest -v -s distributed/test_pynccl.py
174183 - pytest -v -s distributed/test_events.py
175- - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
176184 # TODO: create a dedicated test section for multi-GPU example tests
177185 # when we have multiple distributed example tests
178186 - pushd ../examples/offline_inference
@@ -256,6 +264,7 @@ steps:
256264 - pytest -v -s v1/structured_output
257265 - pytest -v -s v1/spec_decode
258266 - pytest -v -s v1/kv_connector/unit
267+ - pytest -v -s v1/metrics
259268 - pytest -v -s v1/test_serial_utils.py
260269 - pytest -v -s v1/test_utils.py
261270 - pytest -v -s v1/test_oracle.py
@@ -264,7 +273,7 @@ steps:
264273 # VLLM_USE_FLASHINFER_SAMPLER or not on H100.
265274 - pytest -v -s v1/e2e
266275 # Integration test for streaming correctness (requires special branch).
267- - pip install -U git+https://github.com/robertgshaw2-neuralmagic /lm-evaluation-harness.git@streaming-api
276+ - pip install -U git+https://github.com/robertgshaw2-redhat /lm-evaluation-harness.git@streaming-api
268277 - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
269278
270279- label : Examples Test # 25min
@@ -282,7 +291,7 @@ steps:
282291 - python3 offline_inference/llm_engine_example.py
283292 - python3 offline_inference/audio_language.py --seed 0
284293 - python3 offline_inference/vision_language.py --seed 0
285- - python3 offline_inference/vision_language_embedding .py --seed 0
294+ - python3 offline_inference/vision_language_pooling .py --seed 0
286295 - python3 offline_inference/vision_language_multi_image.py --seed 0
287296 - VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
288297 - python3 offline_inference/encoder_decoder.py
@@ -320,17 +329,6 @@ steps:
320329 - pytest -v -s samplers
321330 - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
322331
323- - label : Speculative decoding tests # 40min
324- mirror_hardwares : [amdexperimental]
325- source_file_dependencies :
326- - vllm/spec_decode
327- - tests/spec_decode
328- - vllm/model_executor/models/eagle.py
329- commands :
330- - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
331- - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py --ignore=spec_decode/e2e/test_mtp_correctness.py
332- - pytest -v -s spec_decode/e2e/test_eagle_correctness.py
333-
334332- label : LoRA Test %N # 15min each
335333 mirror_hardwares : [amdexperimental, amdproduction]
336334 source_file_dependencies :
@@ -630,6 +628,18 @@ steps:
630628 # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
631629 # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
632630
631+ - label : Transformers Nightly Models Test
632+ working_dir : " /vllm-workspace/"
633+ optional : true
634+ commands :
635+ - pip install --upgrade git+https://github.com/huggingface/transformers
636+ - pytest -v -s tests/models/test_initialization.py
637+ - pytest -v -s tests/models/multimodal/processing/
638+ - pytest -v -s tests/models/multimodal/test_mapping.py
639+ - python3 examples/offline_inference/basic/chat.py
640+ - python3 examples/offline_inference/audio_language.py --model-type whisper
641+ - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
642+
633643# #### 1 GPU test #####
634644# #### multi gpus test #####
635645
@@ -704,7 +714,6 @@ steps:
704714 - pytest -v -s distributed/test_sequence_parallel.py
705715 # this test fails consistently.
706716 # TODO: investigate and fix
707- # - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
708717 - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
709718 - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py
710719 - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
0 commit comments