File tree Expand file tree Collapse file tree 1 file changed +10
-10
lines changed
components/backends/vllm/src/dynamo/vllm Expand file tree Collapse file tree 1 file changed +10
-10
lines changed Original file line number Diff line number Diff line change @@ -145,16 +145,6 @@ async def init(runtime: DistributedRuntime, config: Config):
145145 .client ()
146146 )
147147
148- if not config .engine_args .data_parallel_rank : # if rank is 0 or None then register
149- await register_llm (
150- ModelType .Backend ,
151- generate_endpoint ,
152- config .model ,
153- config .served_model_name ,
154- kv_cache_block_size = config .engine_args .block_size ,
155- migration_limit = config .migration_limit ,
156- )
157-
158148 factory = StatLoggerFactory (component , config .engine_args .data_parallel_rank or 0 )
159149 engine_client , vllm_config , default_sampling_params = setup_vllm_engine (
160150 config , factory
@@ -190,6 +180,16 @@ async def init(runtime: DistributedRuntime, config: Config):
190180
191181 handler .kv_publisher = kv_publisher
192182
183+ if not config .engine_args .data_parallel_rank : # if rank is 0 or None then register
184+ await register_llm (
185+ ModelType .Backend ,
186+ generate_endpoint ,
187+ config .model ,
188+ config .served_model_name ,
189+ kv_cache_block_size = config .engine_args .block_size ,
190+ migration_limit = config .migration_limit ,
191+ )
192+
193193 try :
194194 await asyncio .gather (
195195 # for decode, we want to transfer the in-flight requests to other decode engines,
You can’t perform that action at this time.
0 commit comments