vllm-project
diff --git a/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 81 additions & 42 deletions b/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 81 additions & 42 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_long_term.yaml‎
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/vllm_ascend_test_long_term.yaml‎
Lines changed: 6 additions & 6 deletions
@@ -108,6 +108,47 @@ jobs:
           echo "::add-matcher::.github/workflows/matchers/mypy.json"
           tools/mypy.sh 1 ${{ matrix.python-version }}
 
+  ut:
+    needs: [lint]
+    if: ${{ needs.lint.result == 'success' }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        vllm_version: [main, v0.9.1]
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_version }}
+          path: ./vllm-empty
+
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+
+      - name: Install vllm-project/vllm-ascend
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
+      - name: Run unit test for V1 Engine
+        env:
+          VLLM_USE_V1: 1
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+        run: |
+          pytest -sv tests/unit
+
   e2e:
     needs: [lint]
     if: ${{ needs.lint.result == 'success' }}
@@ -173,65 +214,63 @@ jobs:
           pip install -r requirements-dev.txt
           pip install -v -e .
 
-      - name: Run vllm-project/vllm-ascend test for V1 Engine
+      - name: Run e2e test for V1 Engine
         env:
           VLLM_USE_V1: 1
           VLLM_WORKER_MULTIPROC_METHOD: spawn
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/test_offline_inference.py
             # guided decoding doesn't work, fix it later
-            # pytest -sv tests/singlecard/test_guided_decoding.py.py
-            # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
-            pytest -sv tests/singlecard/test_ascend_config.py
-            pytest -sv tests/singlecard/test_camem.py
-            pytest -sv tests/singlecard/core/test_ascend_scheduler.py
-            pytest -sv tests/singlecard/core/test_ascend_scheduler_e2e.py
-            pytest -sv tests/singlecard/ \
-            --ignore=tests/singlecard/test_offline_inference.py \
-            --ignore=tests/singlecard/test_guided_decoding.py \
-            --ignore=tests/singlecard/test_ascend_config.py \
-            --ignore=tests/singlecard/test_camem.py \
-            --ignore=tests/singlecard/core/test_ascend_scheduler.py \
-            --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
+            # pytest -sv tests/e2e/singlecard/test_guided_decoding.py.py
+            pytest -sv tests/e2e/singlecard/test_camem.py
+            pytest -sv tests/e2e/singlecard/core/test_ascend_scheduler.py
+            pytest -sv tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
+            pytest -sv tests/e2e/singlecard/ \
+            --ignore=tests/e2e/singlecard/test_offline_inference.py \
+            --ignore=tests/e2e/singlecard/test_guided_decoding.py \
+            --ignore=tests/e2e/singlecard/test_ascend_config.py \
+            --ignore=tests/e2e/singlecard/test_camem.py \
+            --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
+            --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
           else
-            pytest -sv tests/multicard/test_ilama_lora_tp2.py
+            pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
             # To avoid oom, we need to run the test in a single process.
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
           fi
 
-      - name: Run vllm-project/vllm-ascend test on V0 engine
+      - name: Run e2e test on V0 engine
         if: ${{ github.event_name == 'schedule' }}
         env:
           VLLM_USE_V1: 0
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
-            VLLM_USE_MODELSCOPE=True  pytest -sv tests/singlecard/test_offline_inference.py
+            VLLM_USE_MODELSCOPE=True  pytest -sv tests/e2e/singlecard/test_offline_inference.py
             # guided decoding doesn't work, fix it later
-            # pytest -sv tests/singlecard/test_guided_decoding.py.py
-            pytest -sv tests/singlecard/test_camem.py
+            # pytest -sv tests/e2e/singlecard/test_guided_decoding.py.py
+            pytest -sv tests/e2e/singlecard/test_camem.py
             # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
-            pytest -sv tests/singlecard/test_ascend_config.py
-            pytest -sv tests/singlecard/test_prompt_embedding.py
-            pytest -sv tests/singlecard/ \
-              --ignore=tests/singlecard/test_offline_inference.py \
-              --ignore=tests/singlecard/test_guided_decoding.py \
-              --ignore=tests/singlecard/test_camem.py \
-              --ignore=tests/singlecard/test_ascend_config.py \
-              --ignore=tests/singlecard/test_prompt_embedding.py \
-              --ignore=tests/singlecard/core/test_ascend_scheduler.py \
-              --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
+            pytest -sv tests/e2e/singlecard/test_ascend_config.py
+            pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
+            pytest -sv tests/e2e/singlecard/ \
+              --ignore=tests/e2e/singlecard/test_offline_inference.py \
+              --ignore=tests/e2e/singlecard/test_guided_decoding.py \
+              --ignore=tests/e2e/singlecard/test_camem.py \
+              --ignore=tests/e2e/singlecard/test_ascend_config.py \
+              --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
+              --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
+              --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
           else
-            pytest -sv tests/multicard/test_ilama_lora_tp2.py
-            # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
+            pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
+            # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
             # To avoid oom, we need to run the test in a single process.
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
           fi
@@ -96,12 +96,12 @@ jobs:
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
             # spec decode test
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
             # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
-            # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
-            pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
-            pytest -sv tests/long_term/test_accuracy.py
+            # VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
+            pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            pytest -sv tests/e2e/long_term/test_accuracy.py
           else
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
           fi