vllm-project
diff --git a/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 7 additions & 3 deletions b/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_long_term.yaml‎
Lines changed: 0 additions & 4 deletions b/‎.github/workflows/vllm_ascend_test_long_term.yaml‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py‎ renamed to ‎tests/e2e/multicard/test_deepseek_v2_lite_tp2_accuracy.py‎ b/‎tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py‎ renamed to ‎tests/e2e/multicard/test_deepseek_v2_lite_tp2_accuracy.py‎
diff --git a/‎tests/multicard/test_fused_moe_allgather_ep.py‎ renamed to ‎tests/e2e/multicard/test_fused_moe_allgather_ep.py‎ b/‎tests/multicard/test_fused_moe_allgather_ep.py‎ renamed to ‎tests/e2e/multicard/test_fused_moe_allgather_ep.py‎
diff --git a/‎tests/e2e/multicard/test_offline_inference_distributed.py‎
Lines changed: 0 additions & 15 deletions b/‎tests/e2e/multicard/test_offline_inference_distributed.py‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎tests/e2e/long_term/spec_decode_v1/test_v1_mtp_correctness.py‎ renamed to ‎tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py‎ b/‎tests/e2e/long_term/spec_decode_v1/test_v1_mtp_correctness.py‎ renamed to ‎tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py‎
diff --git a/‎tests/e2e/long_term/spec_decode_v1/test_v1_spec_decode.py‎ renamed to ‎tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py‎ b/‎tests/e2e/long_term/spec_decode_v1/test_v1_spec_decode.py‎ renamed to ‎tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py‎
@@ -266,7 +266,13 @@ jobs:
           --ignore=tests/e2e/singlecard/test_offline_inference.py \
           --ignore=tests/e2e/singlecard/test_ilama_lora.py \
           --ignore=tests/e2e/singlecard/test_guided_decoding.py \
-          --ignore=tests/e2e/singlecard/test_camem.py
+          --ignore=tests/e2e/singlecard/test_camem.py \
+          --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
+          --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
+          # ------------------------------------ v1 spec decode test ------------------------------------ #
+          VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+          # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
+          VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
 
       - name: Run e2e test on V0 engine
         if: ${{ github.event_name == 'schedule' }}
@@ -360,7 +366,6 @@ jobs:
           # To avoid oom, we need to run the test in a single process.
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
@@ -382,7 +387,6 @@ jobs:
           # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
           # To avoid oom, we need to run the test in a single process.
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
           pytest -sv tests/e2e/multicard/test_data_parallel.py
 
@@ -100,10 +100,6 @@ jobs:
             # v0 spec decode test
             VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v0/e2e/test_mtp_correctness.py  # it needs a clean process
             pytest -sv tests/e2e/long_term/spec_decode_v0 --ignore=tests/e2e/long_term/spec_decode_v0/e2e/test_mtp_correctness.py
-            # v1 spec decode test
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v1/test_v1_mtp_correctness.py
-            # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode_v1/test_v1_spec_decode.py
             # accuracy test single card
             pytest -sv tests/e2e/long_term/test_accuracy.py
           else
 
@@ -73,21 +73,6 @@ def test_models_distributed_DeepSeek_multistream_moe():
         vllm_model.generate_greedy(example_prompts, max_tokens)
 
 
-def test_models_distributed_DeepSeek():
-    example_prompts = [
-        "Hello, my name is",
-    ]
-    dtype = "half"
-    max_tokens = 5
-    with VllmRunner(
-            "deepseek-ai/DeepSeek-V2-Lite",
-            dtype=dtype,
-            tensor_parallel_size=4,
-            distributed_executor_backend="mp",
-    ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
-
-
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
 def test_models_distributed_topk() -> None:
     example_prompts = [