Skip to content

Commit bb5799b

Browse files
committed
Fix dynamic npu graph batchsize e2e test to release resource that used
1 parent bdbd417 commit bb5799b

File tree

1 file changed

+18
-17
lines changed

1 file changed

+18
-17
lines changed

tests/e2e/multicard/test_dynamic_npugraph_batchsize.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
#
1717
import pytest
1818
import torch
19-
from vllm import LLM, SamplingParams
19+
from vllm import SamplingParams
20+
21+
from tests.conftest import VllmRunner
2022

2123
MODELS = [
2224
"Qwen/Qwen2.5-0.5B-Instruct",
@@ -38,20 +40,19 @@
3840
def test_models(model: str, tp_size: int, max_tokens: int, temperature: int,
3941
ignore_eos: bool) -> None:
4042
# Create an LLM.
41-
llm = LLM(
42-
model=model,
43-
tensor_parallel_size=tp_size,
44-
)
45-
# Prepare sampling_parames
46-
sampling_params = SamplingParams(
47-
max_tokens=max_tokens,
48-
temperature=temperature,
49-
ignore_eos=ignore_eos,
50-
)
43+
with VllmRunner(model_name=model,
44+
tensor_parallel_size=tp_size,
45+
) as vllm_model:
46+
# Prepare sampling_parames
47+
sampling_params = SamplingParams(
48+
max_tokens=max_tokens,
49+
temperature=temperature,
50+
ignore_eos=ignore_eos,
51+
)
5152

52-
# Generate texts from the prompts.
53-
# The output is a list of RequestOutput objects
54-
outputs = llm.generate(prompts, sampling_params)
55-
torch.npu.synchronize()
56-
# The output length should be equal to prompts length.
57-
assert len(outputs) == len(prompts)
53+
# Generate texts from the prompts.
54+
# The output is a list of RequestOutput objects
55+
outputs = vllm_model.generate(prompts, sampling_params)
56+
torch.npu.synchronize()
57+
# The output length should be equal to prompts length.
58+
assert len(outputs) == len(prompts)

0 commit comments

Comments
 (0)