@@ -136,9 +136,9 @@ jobs:
136136 strategy :
137137 max-parallel : 2
138138 matrix :
139- os : [linux-aarch64-a2-1, linux-aarch64-a2-2 ]
139+ os : [linux-aarch64-a2-1]
140140 vllm_version : [main, v0.10.0]
141- name : e2e test (${{ matrix.os }} - ${{ matrix.vllm_version }})
141+ name : singlecard e2e test
142142 runs-on : ${{ matrix.os }}
143143 container :
144144 image : swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,34 +191,93 @@ jobs:
191191 VLLM_WORKER_MULTIPROC_METHOD : spawn
192192 VLLM_USE_MODELSCOPE : True
193193 run : |
194- if [[ "${{ matrix.os }}" == "linux-aarch64-a2-1" ]]; then
195- pytest -sv tests/e2e/singlecard/test_offline_inference.py
196- pytest -sv tests/e2e/singlecard/test_ilama_lora.py
197- pytest -sv tests/e2e/singlecard/test_guided_decoding.py
198- pytest -sv tests/e2e/singlecard/test_camem.py
199- pytest -sv tests/e2e/singlecard/test_embedding.py
200- pytest -sv tests/e2e/singlecard/ \
201- --ignore=tests/e2e/singlecard/test_offline_inference.py \
202- --ignore=tests/e2e/singlecard/test_ilama_lora.py \
203- --ignore=tests/e2e/singlecard/test_guided_decoding.py \
204- --ignore=tests/e2e/singlecard/test_camem.py \
205- --ignore=tests/e2e/singlecard/test_embedding.py \
206- --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
207- --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
208- # ------------------------------------ v1 spec decode test ------------------------------------ #
209- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
210- # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
211- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
212- else
213- pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
214- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
215- # To avoid oom, we need to run the test in a single process.
216- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
217- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
218- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
219- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
220- pytest -sv tests/e2e/multicard/test_data_parallel.py
221- pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
222- --ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
223- --ignore=tests/e2e/multicard/test_data_parallel.py
224- fi
194+ pytest -sv tests/e2e/singlecard/test_offline_inference.py
195+ pytest -sv tests/e2e/singlecard/test_ilama_lora.py
196+ pytest -sv tests/e2e/singlecard/test_guided_decoding.py
197+ pytest -sv tests/e2e/singlecard/test_camem.py
198+ pytest -sv tests/e2e/singlecard/test_embedding.py
199+ pytest -sv tests/e2e/singlecard/ \
200+ --ignore=tests/e2e/singlecard/test_offline_inference.py \
201+ --ignore=tests/e2e/singlecard/test_ilama_lora.py \
202+ --ignore=tests/e2e/singlecard/test_guided_decoding.py \
203+ --ignore=tests/e2e/singlecard/test_camem.py \
204+ --ignore=tests/e2e/singlecard/test_embedding.py \
205+ --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
206+ --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
207+ # ------------------------------------ v1 spec decode test ------------------------------------ #
208+ VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
209+ # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
210+ VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
211+
212+ e2e-4-cards :
213+ needs : [e2e]
214+ if : ${{ needs.e2e.result == 'success' }}
215+ strategy :
216+ max-parallel : 2
217+ matrix :
218+ os : [linux-aarch64-a2-2]
219+ vllm_version : [main, v0.10.0]
220+ name : multicard e2e test
221+ runs-on : ${{ matrix.os }}
222+ container :
223+ image : swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
224+ env :
225+ VLLM_LOGGING_LEVEL : ERROR
226+ VLLM_USE_MODELSCOPE : True
227+ steps :
228+ - name : Check npu and CANN info
229+ run : |
230+ npu-smi info
231+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
232+
233+ - name : Config mirrors
234+ run : |
235+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
236+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
237+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
238+ apt-get update -y
239+ apt install git -y
240+
241+ - name : Checkout vllm-project/vllm-ascend repo
242+ uses : actions/checkout@v4
243+
244+ - name : Install system dependencies
245+ run : |
246+ apt-get -y install `cat packages.txt`
247+ apt-get -y install gcc g++ cmake libnuma-dev
248+
249+ - name : Checkout vllm-project/vllm repo
250+ uses : actions/checkout@v4
251+ with :
252+ repository : vllm-project/vllm
253+ ref : ${{ matrix.vllm_version }}
254+ path : ./vllm-empty
255+
256+ - name : Install vllm-project/vllm from source
257+ working-directory : ./vllm-empty
258+ run : |
259+ VLLM_TARGET_DEVICE=empty pip install -e .
260+
261+ - name : Install vllm-project/vllm-ascend
262+ env :
263+ PIP_EXTRA_INDEX_URL : https://mirrors.huaweicloud.com/ascend/repos/pypi
264+ run : |
265+ pip install -r requirements-dev.txt
266+ pip install -v -e .
267+
268+ - name : Run vllm-project/vllm-ascend test
269+ env :
270+ VLLM_WORKER_MULTIPROC_METHOD : spawn
271+ VLLM_USE_MODELSCOPE : True
272+ run : |
273+ pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
274+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
275+ # To avoid oom, we need to run the test in a single process.
276+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
277+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
278+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
279+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
280+ pytest -sv tests/e2e/multicard/test_data_parallel.py
281+ pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
282+ --ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
283+ --ignore=tests/e2e/multicard/test_data_parallel.py
0 commit comments