Skip to content

Commit 8291172

Browse files
authored
fix: Move register_llm_block down (#2316)
1 parent 12fe355 commit 8291172

File tree

1 file changed

+10
-10
lines changed
  • components/backends/vllm/src/dynamo/vllm

1 file changed

+10
-10
lines changed

components/backends/vllm/src/dynamo/vllm/main.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,6 @@ async def init(runtime: DistributedRuntime, config: Config):
145145
.client()
146146
)
147147

148-
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
149-
await register_llm(
150-
ModelType.Backend,
151-
generate_endpoint,
152-
config.model,
153-
config.served_model_name,
154-
kv_cache_block_size=config.engine_args.block_size,
155-
migration_limit=config.migration_limit,
156-
)
157-
158148
factory = StatLoggerFactory(component, config.engine_args.data_parallel_rank or 0)
159149
engine_client, vllm_config, default_sampling_params = setup_vllm_engine(
160150
config, factory
@@ -190,6 +180,16 @@ async def init(runtime: DistributedRuntime, config: Config):
190180

191181
handler.kv_publisher = kv_publisher
192182

183+
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
184+
await register_llm(
185+
ModelType.Backend,
186+
generate_endpoint,
187+
config.model,
188+
config.served_model_name,
189+
kv_cache_block_size=config.engine_args.block_size,
190+
migration_limit=config.migration_limit,
191+
)
192+
193193
try:
194194
await asyncio.gather(
195195
# for decode, we want to transfer the in-flight requests to other decode engines,

0 commit comments

Comments
 (0)