[feat]: update tests and example for dbo

zxdukki · zxdukki · commit f9230b331df9 · 2025-06-06T18:27:13.000+08:00
Signed-off-by: zhuohuan &lt;zxdu1997@gmail.com&gt;
diff --git a/examples/offline_dualbatch_overlap_npu.py b/examples/offline_dualbatch_overlap_npu.py
@@ -8,31 +8,31 @@
 os.environ["VLLM_USE_V1"] = "1"
 
 # Sample prompts.
-prompts = [
-    "Hello, my name is",
-    "The president of the United States is",
-    "The capital of France is",
-    "The future of AI is",
-] * 10
+prompts = ["The president of the United States is"] * 41
 # Create a sampling params object.
-sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 
 
 def main():
     # Create an LLM.
     llm = LLM(
-        model="deepseek-ai/DeepSeek-V2-Lite",
+        model="deepseek-ai/DeepSeek-V3-Lite-base-latest-w8a8-dynamic",
         hf_overrides={
             "architectures": ["DeepseekDBOForCausalLM"],
         },  # override the model arch to run the dbo model
         enforce_eager=True,
-        tensor_parallel_size=8,
-        max_num_seqs=16,
-        max_model_len=8192,
-        max_num_batched_tokens=32768,
-        block_size=128,
-        compilation_config=1,
-        gpu_memory_utilization=0.96)
+        tensor_parallel_size=2,
+        max_model_len=4096,
+        trust_remote_code=True,
+        additional_config={
+            "torchair_graph_config": {
+                "enabled": False
+            },
+            "ascend_scheduler_config": {
+                "enabled": True
+            },
+            "expert_tensor_parallel_size": 1
+        })
 
     # Generate texts from the prompts. The output is a list of RequestOutput
     # objects that contain the prompt, generated text, and other information.
diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py
@@ -85,11 +85,9 @@ def test_models_distributed_topk() -> None:
 
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DBO": "1"})
 def test_models_distributed_DeepSeek_dbo():
-    example_prompts = [
-        "Compare and contrast artificial intelligence with human intelligence in terms of processing information.",
-    ] * 10
+    example_prompts = ["The president of the United States is"] * 41
     dtype = "half"
-    max_tokens = 5
+    sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
     with VllmRunner(
             "deepseek-ai/DeepSeek-V2-Lite",
             dtype=dtype,
@@ -99,4 +97,4 @@ def test_models_distributed_DeepSeek_dbo():
                 "architectures": ["DeepseekDBOForCausalLM"],
             }  # override the model arch to the dbo version
     ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)
+        vllm_model.generate(example_prompts, sampling_params)