mend

zhangxinyuehfad · zhangxinyuehfad · commit b9aa9ac21a7c · 2025-07-11T09:49:18.000+08:00
Signed-off-by: hfadzxy &lt;starmoon_zhang@163.com&gt;
diff --git a/.github/workflows/vllm_ascend_test_long_term.yaml b/.github/workflows/vllm_ascend_test_long_term.yaml
@@ -96,8 +96,8 @@ jobs:
       - name: Run vllm-project/vllm-ascend long term test
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
-            pytest -sv tests/e2e/long_term/test_accuracy.py
-          # else
+            pytest -sv tests/e2e/long_term/accuracy/accuracy_singlecard.py
+          else
             # accuracy test multi card
-            # VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
+            pytest -sv tests/e2e/long_term/accuracy/accuracy_multicard.py
           fi
diff --git a/tests/e2e/long_term/accuracy/accuracy_multicard.py b/tests/e2e/long_term/accuracy/accuracy_multicard.py
@@ -61,7 +61,7 @@
 EXPECTED_VALUE = {
     "Qwen/Qwen2.5-0.5B-Instruct": 0.316,
     "Qwen/Qwen3-30B-A3B": 0.888,
-    "deepseek-ai/DeepSeek-V2-Lite": 0.376
+    "deepseek-ai/DeepSeek-V2-Lite": 0.375
 }
 # Maximum context length configuration for each model.
 MAX_MODEL_LEN = {
@@ -98,7 +98,6 @@
 }
 
 multiprocessing.set_start_method("spawn", force=True)
-os.environ["VLLM_USE_V1"] = "1"
 
 
 def run_test(queue, model, max_model_len, model_type, more_args):
diff --git a/tests/e2e/long_term/accuracy/accuracy_singlecard.py b/tests/e2e/long_term/accuracy/accuracy_singlecard.py
@@ -98,10 +98,8 @@ def run_test(queue, model, max_model_len, model_type):
 
 
 @pytest.mark.parametrize("model", MODEL_NAME)
-@pytest.mark.parametrize("VLLM_USE_V1", ["0", "1"])
-def test_lm_eval_accuracy(monkeypatch: pytest.MonkeyPatch, model, VLLM_USE_V1):
+def test_lm_eval_accuracy(monkeypatch: pytest.MonkeyPatch, model):
     with monkeypatch.context() as m:
-        m.setenv("VLLM_USE_V1", VLLM_USE_V1)
         result_queue: Queue[float] = multiprocessing.Queue()
         p = multiprocessing.Process(target=run_test,
                                     args=(result_queue, model,

Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,7 @@`
`61`	`61`	`EXPECTED_VALUE = {`
`62`	`62`	`"Qwen/Qwen2.5-0.5B-Instruct": 0.316,`
`63`	`63`	`"Qwen/Qwen3-30B-A3B": 0.888,`
`64`		`- "deepseek-ai/DeepSeek-V2-Lite": 0.376`
	`64`	`+ "deepseek-ai/DeepSeek-V2-Lite": 0.375`
`65`	`65`	`}`
`66`	`66`	`# Maximum context length configuration for each model.`
`67`	`67`	`MAX_MODEL_LEN = {`
`@@ -98,7 +98,6 @@`
`98`	`98`	`}`
`99`	`99`
`100`	`100`	`multiprocessing.set_start_method("spawn", force=True)`
`101`		`-os.environ["VLLM_USE_V1"] = "1"`
`102`	`101`
`103`	`102`
`104`	`103`	`def run_test(queue, model, max_model_len, model_type, more_args):`