reduce max-tokens

MengqingCao · MengqingCao · commit 52bbbf177fe4 · 2025-06-23T11:23:35.000Z
Signed-off-by: MengqingCao &lt;cmq0113@163.com&gt;
diff --git a/tests/e2e/multicard/test_offline_inference_distributed.py b/tests/e2e/multicard/test_offline_inference_distributed.py
@@ -125,12 +125,14 @@ def test_models_distributed_DeepSeek_W8A8():
 
 
 def test_models_distributed_DeepSeek_dbo(monkeypatch: pytest.MonkeyPatch):
+    max_tokens = 50
     with monkeypatch.context() as m:
         m.setenv("VLLM_ASCEND_ENABLE_DBO", "1")
 
         example_prompts = ["The president of the United States is"] * 41
         dtype = "half"
-        sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+        sampling_params = SamplingParams(max_tokens=max_tokens,
+                                         temperature=0.0)
         with VllmRunner(
                 "deepseek-ai/DeepSeek-V2-Lite",
                 dtype=dtype,
@@ -139,7 +141,6 @@ def test_models_distributed_DeepSeek_dbo(monkeypatch: pytest.MonkeyPatch):
         ) as vllm_model:
             dpo_output = vllm_model.generate(example_prompts, sampling_params)
 
-    with monkeypatch.context() as m:
         m.setenv("VLLM_ASCEND_ENABLE_DBO", "0")
         with VllmRunner(
                 "deepseek-ai/DeepSeek-V2-Lite",