diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index 8641833e438b..77df9ed54095 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -252,14 +252,18 @@ def __init__(self, vllm_config: VllmConfig, executor_class: Type[Executor], outputs_queue = self.outputs_queue def process_outputs_socket(): - while True: - (frame, ) = output_socket.recv_multipart(copy=False) - outputs = decoder.decode(frame.buffer) - if outputs.utility_output: - _process_utility_output(outputs.utility_output, - utility_results) - else: - outputs_queue.put_nowait(outputs) + try: + while True: + (frame, ) = output_socket.recv_multipart(copy=False) + outputs = decoder.decode(frame.buffer) + if outputs.utility_output: + _process_utility_output(outputs.utility_output, + utility_results) + else: + outputs_queue.put_nowait(outputs) + except zmq.error.ContextTerminated: + # Expected when the class is GC'd / during process termination. + pass # Process outputs from engine in separate thread. Thread(target=process_outputs_socket, daemon=True).start()