Skip to content

Commit 48e0ea7

Browse files
yuz207yuz207
authored andcommitted
feat: add SCV mode scaffolding
1 parent 499e424 commit 48e0ea7

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

vllm/envs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
201201
VLLM_DISABLE_NWOR: bool = False
202202
VLLM_NWOR_MODE: str = "stage"
203+
VLLM_SCV_MODE: str = "off"
203204
VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
204205
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
205206
VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
@@ -1315,6 +1316,8 @@ def get_vllm_port() -> int | None:
13151316
"VLLM_DISABLE_NWOR": lambda: bool(int(os.getenv("VLLM_DISABLE_NWOR", "0"))),
13161317
# Select NWOR mode: "stage" (default) or "immediate" to bypass staging.
13171318
"VLLM_NWOR_MODE": lambda: os.getenv("VLLM_NWOR_MODE", "stage"),
1319+
# Speculative chunk verify mode: "off" (default), "graph", or "adaptive".
1320+
"VLLM_SCV_MODE": lambda: os.getenv("VLLM_SCV_MODE", "off"),
13181321
# Used to force set up loopback IP
13191322
"VLLM_LOOPBACK_IP": lambda: os.getenv("VLLM_LOOPBACK_IP", ""),
13201323
# Used to set the process name prefix for vLLM processes.

vllm/v1/worker/gpu_model_runner.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,13 @@ def __init__(
509509
# Cached outputs.
510510
self._deferred_write_manager = DeferredWriteManager(mode=envs.VLLM_NWOR_MODE)
511511
self._latest_nwor_window_metrics: dict[str, int | str] | None = None
512+
self._scv_mode = envs.VLLM_SCV_MODE.lower()
513+
514+
def _scv_enabled(self) -> bool:
515+
if self._scv_mode not in ("off", "graph", "adaptive"):
516+
logger.warning("SCV: unsupported mode '%s', disabling.", self._scv_mode)
517+
self._scv_mode = "off"
518+
return self._scv_mode != "off"
512519
self._draft_token_ids: list[list[int]] | torch.Tensor | None = None
513520
self.transfer_event = torch.cuda.Event()
514521
self.sampled_token_ids_pinned_cpu = torch.empty(

0 commit comments

Comments
 (0)