1515# This file is a part of the vllm-ascend project.
1616#
1717
18- name : ' e2e test / basic '
18+ name : ' test'
1919
2020on :
2121 schedule :
@@ -114,6 +114,56 @@ jobs:
114114 echo "::add-matcher::.github/workflows/matchers/mypy.json"
115115 tools/mypy.sh 1 ${{ matrix.python-version }}
116116
117+ ut :
118+ needs : [lint]
119+ name : unit test
120+ if : ${{ needs.lint.result == 'success' }}
121+ runs-on : ubuntu-latest
122+ container :
123+ image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
124+ env :
125+ VLLM_LOGGING_LEVEL : ERROR
126+ VLLM_USE_MODELSCOPE : True
127+ strategy :
128+ matrix :
129+ vllm_version : [main, v0.9.1]
130+ steps :
131+ - name : Install packages
132+ run : |
133+ apt-get update -y
134+ apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev
135+
136+ - name : Checkout vllm-project/vllm repo
137+ uses : actions/checkout@v4
138+ with :
139+ repository : vllm-project/vllm
140+ ref : ${{ matrix.vllm_version }}
141+ path : ./vllm-empty
142+
143+ - name : Install vllm-project/vllm from source
144+ working-directory : ./vllm-empty
145+ run : |
146+ VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
147+ python3 -m pip uninstall -y triton
148+
149+ - name : Checkout vllm-project/vllm-ascend repo
150+ uses : actions/checkout@v4
151+
152+ - name : Install vllm-project/vllm-ascend
153+ run : |
154+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
155+ python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
156+ python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
157+
158+ - name : Run unit test for V1 Engine
159+ env :
160+ VLLM_USE_V1 : 1
161+ VLLM_WORKER_MULTIPROC_METHOD : spawn
162+ TORCH_DEVICE_BACKEND_AUTOLOAD : 0
163+ run : |
164+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
165+ pytest -sv tests/unit
166+
117167 e2e :
118168 needs : [lint]
119169 if : ${{ needs.lint.result == 'success' }}
@@ -122,7 +172,7 @@ jobs:
122172 matrix :
123173 os : [linux-arm64-npu-1]
124174 vllm_version : [main, v0.9.1]
125- name : vLLM Ascend test
175+ name : singlecard e2e test
126176 runs-on : ${{ matrix.os }}
127177 container :
128178 # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -168,53 +218,47 @@ jobs:
168218 pip install -r requirements-dev.txt
169219 pip install -v -e .
170220
171- - name : Run vllm-project/vllm-ascend test for V1 Engine
221+ - name : Run e2e test for V1 Engine
172222 env :
173223 VLLM_USE_V1 : 1
174224 VLLM_WORKER_MULTIPROC_METHOD : spawn
175225 VLLM_USE_MODELSCOPE : True
176226 run : |
177- pytest -sv tests/singlecard/test_offline_inference.py
227+ pytest -sv tests/e2e/ singlecard/test_offline_inference.py
178228 # TODO: switch hf to modelscope
179229 VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
180- pytest -sv tests/singlecard/test_ilama_lora.py
230+ pytest -sv tests/e2e/ singlecard/test_ilama_lora.py
181231 # TODO(sss): guided decoding doesn't work, fix it later
182- # pytest -sv tests/singlecard/test_guided_decoding.py
183- # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
184- pytest -sv tests/singlecard/test_ascend_config.py
185- pytest -sv tests/singlecard/test_camem.py
186- pytest -sv tests/singlecard/ \
187- --ignore=tests/singlecard/test_offline_inference.py \
188- --ignore=tests/singlecard/test_ilama_lora.py \
189- --ignore=tests/singlecard/test_guided_decoding.py \
190- --ignore=tests/singlecard/test_ascend_config.py \
191- --ignore=tests/singlecard/test_camem.py
232+ # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
233+ pytest -sv tests/e2e/singlecard/test_camem.py
234+ pytest -sv tests/e2e/singlecard/ \
235+ --ignore=tests/e2e/singlecard/test_offline_inference.py \
236+ --ignore=tests/e2e/singlecard/test_ilama_lora.py \
237+ --ignore=tests/e2e/singlecard/test_guided_decoding.py \
238+ --ignore=tests/e2e/singlecard/test_camem.py
192239
193- - name : Run vllm-project/vllm-ascend test on V0 engine
240+ - name : Run e2e test on V0 engine
194241 if : ${{ github.event_name == 'schedule' }}
195242 env :
196243 VLLM_USE_V1 : 0
197244 VLLM_USE_MODELSCOPE : True
198245 run : |
199- pytest -sv tests/singlecard/test_offline_inference.py
246+ pytest -sv tests/e2e/ singlecard/test_offline_inference.py
200247 # TODO: switch hf to modelscope
201248 VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
202- pytest -sv tests/singlecard/test_ilama_lora.py
249+ pytest -sv tests/e2e/ singlecard/test_ilama_lora.py
203250 # guided decoding doesn't work, fix it later
204- # pytest -sv tests/singlecard/test_guided_decoding.py
205- pytest -sv tests/singlecard/test_camem.py
206- # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
207- pytest -sv tests/singlecard/test_ascend_config.py
208- pytest -sv tests/singlecard/test_prompt_embedding.py
209- pytest -sv tests/singlecard/ \
210- --ignore=tests/singlecard/test_offline_inference.py \
211- --ignore=tests/singlecard/test_ilama_lora.py \
212- --ignore=tests/singlecard/test_guided_decoding.py \
213- --ignore=tests/singlecard/test_camem.py \
214- --ignore=tests/singlecard/test_ascend_config.py \
215- --ignore=tests/singlecard/test_prompt_embedding.py \
216- --ignore=tests/singlecard/core/test_ascend_scheduler.py \
217- --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
251+ # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
252+ pytest -sv tests/e2e/singlecard/test_camem.py
253+ pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
254+ pytest -sv tests/e2e/singlecard/ \
255+ --ignore=tests/e2e/singlecard/test_offline_inference.py \
256+ --ignore=tests/e2e/singlecard/test_ilama_lora.py \
257+ --ignore=tests/e2e/singlecard/test_guided_decoding.py \
258+ --ignore=tests/e2e/singlecard/test_camem.py \
259+ --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
260+ --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
261+ --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
218262
219263 e2e-4-cards :
220264 needs : [e2e]
@@ -224,7 +268,7 @@ jobs:
224268 matrix :
225269 os : [linux-arm64-npu-4]
226270 vllm_version : [main, v0.9.1]
227- name : vLLM Ascend test
271+ name : multicard e2e test
228272 runs-on : ${{ matrix.os }}
229273 container :
230274 # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -279,14 +323,14 @@ jobs:
279323 run : |
280324 # TODO: switch hf to modelscope
281325 VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
282- pytest -sv tests/multicard/test_ilama_lora_tp2.py
283- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
326+ pytest -sv tests/e2e/ multicard/test_ilama_lora_tp2.py
327+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py will raise error.
284328 # To avoid oom, we need to run the test in a single process.
285- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
286- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
287- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
288- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
289- pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
329+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
330+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
331+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_topk
332+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
333+ pytest -sv tests/e2e/ multicard/ --ignore=tests/e2e/ multicard/test_ilama_lora_tp2.py --ignore=tests/e2e /multicard/test_offline_inference_distributed.py
290334
291335 - name : Run vllm-project/vllm-ascend test on V0 engine
292336 if : ${{ github.event_name == 'schedule' }}
@@ -296,11 +340,11 @@ jobs:
296340 run : |
297341 # TODO: switch hf to modelscope
298342 VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
299- pytest -sv tests/multicard/test_ilama_lora_tp2.py
300- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
343+ pytest -sv tests/e2e/ multicard/test_ilama_lora_tp2.py
344+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py will raise error.
301345 # To avoid oom, we need to run the test in a single process.
302- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
303- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
304- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
305- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
306- pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
346+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
347+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
348+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_topk
349+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
350+ pytest -sv tests/e2e/ multicard/ --ignore=tests/e2e/ multicard/test_ilama_lora_tp2.py --ignore=tests/e2e /multicard/test_offline_inference_distributed.py
0 commit comments