Skip to content

Commit 52bbbf1

Browse files
committed
reduce max-tokens
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent c0134d2 commit 52bbbf1

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

tests/e2e/multicard/test_offline_inference_distributed.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,14 @@ def test_models_distributed_DeepSeek_W8A8():
125125

126126

127127
def test_models_distributed_DeepSeek_dbo(monkeypatch: pytest.MonkeyPatch):
128+
max_tokens = 50
128129
with monkeypatch.context() as m:
129130
m.setenv("VLLM_ASCEND_ENABLE_DBO", "1")
130131

131132
example_prompts = ["The president of the United States is"] * 41
132133
dtype = "half"
133-
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
134+
sampling_params = SamplingParams(max_tokens=max_tokens,
135+
temperature=0.0)
134136
with VllmRunner(
135137
"deepseek-ai/DeepSeek-V2-Lite",
136138
dtype=dtype,
@@ -139,7 +141,6 @@ def test_models_distributed_DeepSeek_dbo(monkeypatch: pytest.MonkeyPatch):
139141
) as vllm_model:
140142
dpo_output = vllm_model.generate(example_prompts, sampling_params)
141143

142-
with monkeypatch.context() as m:
143144
m.setenv("VLLM_ASCEND_ENABLE_DBO", "0")
144145
with VllmRunner(
145146
"deepseek-ai/DeepSeek-V2-Lite",

0 commit comments

Comments
 (0)