Skip to content

Commit 44bc46d

Browse files
[Bugfix] Actually disable processing cache when API server is scaled out (#21839)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent b7b23da commit 44bc46d

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

vllm/entrypoints/cli/serve.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,16 @@ def run_multi_api_server(args: argparse.Namespace):
140140
num_api_servers = args.api_server_count
141141
assert num_api_servers > 0
142142

143+
orig_disable_mm_preprocessor_cache = args.disable_mm_preprocessor_cache
144+
143145
# set_process_title("ProcManager")
144146

145147
if num_api_servers > 1:
146148
setup_multiprocess_prometheus()
147149

150+
# Not compatible with API server scale-out
151+
args.disable_mm_preprocessor_cache = True
152+
148153
listen_address, sock = setup_server(args)
149154

150155
engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
@@ -161,11 +166,9 @@ def run_multi_api_server(args: argparse.Namespace):
161166
"with api_server_count > 1")
162167

163168
if model_config.is_multimodal_model and not (
164-
model_config.disable_mm_preprocessor_cache):
165-
logger.warning(
166-
"Multi-model preprocessor cache will be disabled for"
167-
" api_server_count > 1")
168-
model_config.disable_mm_preprocessor_cache = True
169+
orig_disable_mm_preprocessor_cache):
170+
logger.warning("Multi-model preprocessor cache will be disabled "
171+
"for api_server_count > 1")
169172

170173
executor_class = Executor.get_class(vllm_config)
171174
log_stats = not engine_args.disable_log_stats

0 commit comments

Comments
 (0)