Skip to content

Commit 2a763b8

Browse files
authored
[Bug] Fix bug in test_chunked.py (#1992)
### What this PR does / why we need it? 1. Remove the return statement, it will always skip following logic. 2. Update `deepseek` to `Qwen2.5-Instruct` for OOM in github e2e test env. 3. Fix the comparison logic ### Does this PR introduce _any_ user-facing change? NO. ### How was this patch tested? Local Test. - vLLM version: v0.10.0 - vLLM main: vllm-project/vllm@0933f9d Signed-off-by: xleoken <xleoken@163.com>
1 parent 27d038d commit 2a763b8

File tree

1 file changed

+35
-27
lines changed

1 file changed

+35
-27
lines changed

tests/e2e/singlecard/test_chunked.py

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,49 +19,57 @@
1919
2020
Run `pytest tests/compile/test_aclgraph.py`.
2121
"""
22-
2322
import pytest
2423
import torch
25-
from vllm import LLM, SamplingParams
24+
from vllm import SamplingParams
25+
26+
from tests.e2e.conftest import VllmRunner
2627

27-
MODELS = ["deepseek-ai/DeepSeek-V2-Lite"]
28+
MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"]
2829

2930

3031
@pytest.mark.parametrize("model", MODELS)
3132
@pytest.mark.parametrize("max_tokens", [1])
3233
def test_models(
3334
model: str,
3435
max_tokens: int,
35-
monkeypatch: pytest.MonkeyPatch,
3636
) -> None:
37-
return
38-
39-
prompts = "The president of the United States is"
37+
prompts = ["The president of the United States is"]
4038

4139
sampling_params = SamplingParams(
4240
max_tokens=max_tokens,
4341
temperature=0.0,
4442
)
4543

46-
vllm_model = LLM(model, long_prefill_token_threshold=4, enforce_eager=True)
47-
output_chunked = vllm_model.generate(prompts, sampling_params)
48-
logprobs_chunked = output_chunked.outputs[0].logprobs
49-
del vllm_model
50-
torch.npu.empty_cache()
44+
with VllmRunner(model, long_prefill_token_threshold=20,
45+
enforce_eager=True) as vllm_model:
46+
output1 = vllm_model.generate(prompts, sampling_params)
47+
48+
with VllmRunner(model,
49+
enforce_eager=True,
50+
additional_config={
51+
'ascend_scheduler_config': {
52+
'enabled': True
53+
},
54+
}) as vllm_model:
55+
output2 = vllm_model.generate(prompts, sampling_params)
56+
57+
# Extract the generated token IDs for comparison
58+
token_ids1 = output1[0][0][0]
59+
token_ids2 = output2[0][0][0]
60+
61+
print(f"Token IDs 1: {token_ids1}")
62+
print(f"Token IDs 2: {token_ids2}")
63+
64+
# Convert token IDs to tensors and calculate cosine similarity
65+
# Take the length of a shorter sequence to ensure consistent dimensions
66+
min_len = min(len(token_ids1), len(token_ids2))
67+
68+
tensor1 = torch.tensor(token_ids1[:min_len], dtype=torch.float32)
69+
tensor2 = torch.tensor(token_ids2[:min_len], dtype=torch.float32)
5170

52-
vllm_model = LLM(model,
53-
enforce_eager=True,
54-
additional_config={
55-
'ascend_scheduler_config': {
56-
'enabled': True
57-
},
58-
})
59-
output = vllm_model.generate(prompts, sampling_params)
60-
logprobs = output.outputs[0].logprobs
61-
del vllm_model
62-
torch.npu.empty_cache()
71+
# Calculate similarity using torch.cosine_similarity
72+
similarity = torch.cosine_similarity(tensor1, tensor2, dim=0)
73+
print(f"Token IDs cosine similarity: {similarity.item()}")
6374

64-
logprobs_similarity = torch.cosine_similarity(logprobs_chunked.flatten(),
65-
logprobs.flatten(),
66-
dim=0)
67-
assert logprobs_similarity > 0.95
75+
assert similarity > 0.95

0 commit comments

Comments
 (0)