File tree Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Original file line number Diff line number Diff line change 1313from tqdm import tqdm
1414
1515import vllm .envs as envs
16- from vllm import LLM , SamplingParams
1716from vllm .benchmarks .lib .utils import (convert_to_pytorch_benchmark_format ,
1817 write_to_json )
1918from vllm .engine .arg_utils import EngineArgs
@@ -85,6 +84,9 @@ def main(args: argparse.Namespace):
8584 "Please set it to a valid path to use torch profiler." )
8685 engine_args = EngineArgs .from_cli_args (args )
8786
87+ # Lazy import to avoid importing LLM when the bench command is not selected.
88+ from vllm import LLM , SamplingParams
89+
8890 # NOTE(woosuk): If the request cannot be processed in a single batch,
8991 # the engine will automatically process the request in multiple batches.
9092 llm = LLM (** dataclasses .asdict (engine_args ))
Original file line number Diff line number Diff line change 88import inspect
99import json
1010import multiprocessing
11+ import multiprocessing .forkserver as forkserver
1112import os
1213import signal
1314import socket
@@ -155,6 +156,15 @@ async def build_async_engine_client(
155156 client_config : Optional [dict [str , Any ]] = None ,
156157) -> AsyncIterator [EngineClient ]:
157158
159+ if os .getenv ("VLLM_WORKER_MULTIPROC_METHOD" ) == "forkserver" :
160+ # The executor is expected to be mp.
161+ # Pre-import heavy modules in the forkserver process
162+ logger .debug ("Setup forkserver with pre-imports" )
163+ multiprocessing .set_start_method ('forkserver' )
164+ multiprocessing .set_forkserver_preload (["vllm.v1.engine.async_llm" ])
165+ forkserver .ensure_running ()
166+ logger .debug ("Forkserver setup complete!" )
167+
158168 # Context manager to handle engine_client lifecycle
159169 # Ensures everything is shutdown and cleaned up on error/exit
160170 engine_args = AsyncEngineArgs .from_cli_args (args )
You can’t perform that action at this time.
0 commit comments