diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index bfea106bc027d..b572d7224079c 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -108,8 +108,14 @@ def __init__(self, # created by the process. The following patch is a workaround. with patch("multiprocessing.resource_tracker.register", lambda *args, **kwargs: None): - self.shared_memory = shared_memory.SharedMemory(name=name) - assert self.shared_memory.size == self.total_bytes_of_buffer + try: + self.shared_memory = shared_memory.SharedMemory(name=name) + assert self.shared_memory.size == self.total_bytes_of_buffer # noqa + except FileNotFoundError: + # we might deserialize the object in a different node + # in this case, this object is not used, + # and we should suppress the error + pass def __reduce__(self): return (