From bdb7f75899a9cf9f12b0fc8b5b72609fb6c96896 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 19 Sep 2025 13:21:46 -0700 Subject: [PATCH 1/2] [BugFix] Ensure appropriate guards in destructors Signed-off-by: Nick Hill --- vllm/compilation/collective_fusion.py | 2 +- .../distributed/device_communicators/quick_all_reduce.py | 2 +- .../kv_transfer/kv_connector/v1/nixl_connector.py | 9 +++++---- vllm/executor/executor_base.py | 3 --- vllm/v1/worker/gpu_worker.py | 3 ++- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py index 71274420c342..0658b59a2e21 100644 --- a/vllm/compilation/collective_fusion.py +++ b/vllm/compilation/collective_fusion.py @@ -1183,7 +1183,7 @@ def __call__(self, graph: fx.Graph): self.end_and_log() def __del__(self): - if self.disabled: + if getattr(self, "disabled", True): return if flashinfer_comm is not None: flashinfer_comm.trtllm_destroy_ipc_workspace_for_all_reduce( diff --git a/vllm/distributed/device_communicators/quick_all_reduce.py b/vllm/distributed/device_communicators/quick_all_reduce.py index 836241910e2f..88e1d51209ca 100644 --- a/vllm/distributed/device_communicators/quick_all_reduce.py +++ b/vllm/distributed/device_communicators/quick_all_reduce.py @@ -268,7 +268,7 @@ def quick_all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = None): return out def close(self): - if not self.disabled and getattr(self, "_ptr", None): + if not self.disabled and hasattr(self, "_ptr"): if ops is not None: ops.qr_destroy(self._ptr) self._ptr = 0 diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index ff62f60e5a42..d3a08af088c1 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -569,9 +569,10 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): def __del__(self): """Cleanup background threads on destruction.""" - self._handshake_initiation_executor.shutdown(wait=False) - if self._nixl_handshake_listener_t: - self._nixl_handshake_listener_t.join(timeout=0) + if executor := getattr(self, "_handshake_initiation_executor", None): + executor.shutdown(wait=False) + if listener_t := getattr(self, "_nixl_handshake_listener_t", None): + listener_t.join(timeout=0) @staticmethod def _nixl_handshake_listener(metadata: NixlAgentMetadata, @@ -1379,4 +1380,4 @@ def reduce(self) -> dict[str, Union[int, float]]: # TODO: reduce stats to a single value, calculate latency/throughput return { "num_successful_transfers": self.data["num_successful_transfers"] - } \ No newline at end of file + } diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index d18bef1256af..42aa8d14a21e 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -235,9 +235,6 @@ def shutdown(self) -> None: """Shutdown the executor.""" self.collective_rpc("shutdown") - def __del__(self): - self.shutdown() - async def execute_model_async( self, execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]: diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 6855526583f0..8b1e1bb8f45c 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -683,7 +683,8 @@ def save_tensorized_model( tensorizer_config=tensorizer_config, ) def shutdown(self) -> None: - self.model_runner.ensure_kv_transfer_shutdown() + if runner := getattr(self, "model_runner", None): + runner.ensure_kv_transfer_shutdown() def init_worker_distributed_environment( From 5b5b51b9ff8284c4b79379cccb3278c499a86b2d Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 19 Sep 2025 13:54:36 -0700 Subject: [PATCH 2/2] Update vllm/distributed/device_communicators/quick_all_reduce.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Nick Hill --- vllm/distributed/device_communicators/quick_all_reduce.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/distributed/device_communicators/quick_all_reduce.py b/vllm/distributed/device_communicators/quick_all_reduce.py index 88e1d51209ca..836241910e2f 100644 --- a/vllm/distributed/device_communicators/quick_all_reduce.py +++ b/vllm/distributed/device_communicators/quick_all_reduce.py @@ -268,7 +268,7 @@ def quick_all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = None): return out def close(self): - if not self.disabled and hasattr(self, "_ptr"): + if not self.disabled and getattr(self, "_ptr", None): if ops is not None: ops.qr_destroy(self._ptr) self._ptr = 0