We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c7f3b7d commit c0134d2Copy full SHA for c0134d2
tests/e2e/multicard/test_offline_inference_distributed.py
@@ -50,6 +50,21 @@ def test_models_distributed_QwQ():
50
vllm_model.generate_greedy(example_prompts, max_tokens)
51
52
53
+def test_models_distributed_DeepSeek():
54
+ example_prompts = [
55
+ "Hello, my name is",
56
+ ]
57
+ dtype = "half"
58
+ max_tokens = 5
59
+ with VllmRunner(
60
+ "deepseek-ai/DeepSeek-V2-Lite",
61
+ dtype=dtype,
62
+ tensor_parallel_size=4,
63
+ distributed_executor_backend="mp",
64
+ ) as vllm_model:
65
+ vllm_model.generate_greedy(example_prompts, max_tokens)
66
+
67
68
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
69
def test_models_distributed_topk() -> None:
70
example_prompts = [
0 commit comments