11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33import pytest
4- import torch
54
65import vllm
76from vllm .compilation .counter import compilation_counter
8- from vllm .config import (CompilationConfig , CompilationLevel , VllmConfig ,
9- set_current_vllm_config )
10-
11- from .piecewise .test_simple import SillyModel
7+ from vllm .config import VllmConfig
128
139
1410def test_use_cudagraphs_dynamic (monkeypatch ):
@@ -22,23 +18,24 @@ def test_use_cudagraphs_dynamic(monkeypatch):
2218
2319
2420@pytest .mark .parametrize ("enabled" , [True , False ])
25- def test_use_cudagraphs (enabled ):
21+ def test_use_cudagraphs (vllm_runner , monkeypatch , enabled ):
2622 assert vllm .envs .VLLM_USE_V1
27- vllm_config = VllmConfig (compilation_config = CompilationConfig (
28- level = CompilationLevel .PIECEWISE ,
29- use_cudagraph = enabled ,
30- cudagraph_capture_sizes = [100 ],
31- ))
32- with set_current_vllm_config (vllm_config ):
33- model = SillyModel (vllm_config = vllm_config , prefix = '' )
34-
35- inputs = torch .randn (100 , device = "cuda" )
36-
37- with compilation_counter .expect (
38- num_graphs_seen = 1 , # one graph for the model
39- num_cudagraph_captured = 1 if enabled else 0 ,
40- ):
41- # first run is warmup
42- model (inputs )
43- # second run does CUDAGraphs recording (if enabled)
44- model (inputs )
23+
24+ # Disable multiprocessing so that the counter is in the same process
25+ monkeypatch .setenv ('VLLM_ENABLE_V1_MULTIPROCESSING' , '0' )
26+
27+ compilation_config = {
28+ "cudagraph_capture_sizes" : [100 ],
29+ "use_cudagraph" : enabled ,
30+ }
31+ with (
32+ compilation_counter .expect (
33+ num_graphs_seen = 1 ,
34+ num_gpu_runner_capture_triggers = 1 if enabled else 0 ,
35+ num_cudagraph_captured = 13 if enabled else 0 ,
36+ ),
37+ # loading the model causes compilation (if enabled) to happen
38+ vllm_runner ('facebook/opt-125m' ,
39+ compilation_config = compilation_config ,
40+ gpu_memory_utilization = 0.4 ) as _ ):
41+ pass
0 commit comments