@@ -201,7 +201,7 @@ jobs:
201201 max-parallel : 2
202202 matrix :
203203 os : [linux-arm64-npu-1]
204- vllm_version : [main, v0.9.1]
204+ vllm_version : [v0.9.1]
205205 name : singlecard e2e test
206206 runs-on : ${{ matrix.os }}
207207 container :
@@ -257,23 +257,7 @@ jobs:
257257 VLLM_WORKER_MULTIPROC_METHOD : spawn
258258 VLLM_USE_MODELSCOPE : True
259259 run : |
260- pytest -sv tests/e2e/singlecard/test_offline_inference.py
261- pytest -sv tests/e2e/singlecard/test_ilama_lora.py
262- pytest -sv tests/e2e/singlecard/test_guided_decoding.py
263- pytest -sv tests/e2e/singlecard/test_camem.py
264- pytest -sv tests/e2e/singlecard/test_embedding.py
265- pytest -sv tests/e2e/singlecard/ \
266- --ignore=tests/e2e/singlecard/test_offline_inference.py \
267- --ignore=tests/e2e/singlecard/test_ilama_lora.py \
268- --ignore=tests/e2e/singlecard/test_guided_decoding.py \
269- --ignore=tests/e2e/singlecard/test_camem.py \
270- --ignore=tests/e2e/singlecard/test_embedding.py \
271- --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
272- --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
273- # ------------------------------------ v1 spec decode test ------------------------------------ #
274- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
275- # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
276- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
260+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
277261
278262 - name : Run e2e test on V0 engine
279263 if : ${{ github.event_name == 'schedule' }}
0 commit comments