Skip to content

Commit f57cc24

Browse files
committed
update logging
1 parent 9bb63d7 commit f57cc24

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

examples/vllm_v0/components/worker.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,6 @@ async def callback(request: RemotePrefillRequest):
202202

203203
@endpoint()
204204
async def generate(self, request: PreprocessedRequest):
205-
print(request)
206205
request_id = str(uuid.uuid4())
207206

208207
if self.disaggregated_router is not None:
@@ -213,7 +212,7 @@ async def generate(self, request: PreprocessedRequest):
213212
prefill_queue_size = await prefill_queue.get_queue_size()
214213
disagg_router_decision = await self.disaggregated_router.prefill_remote(
215214
len(request.token_ids),
216-
0, # TODO: return prefix hit rate from dynamo-run router
215+
request.estimated_prefix_hit_num_blocks * self.engine_args.block_size,
217216
prefill_queue_size,
218217
)
219218
else:
@@ -226,12 +225,12 @@ async def generate(self, request: PreprocessedRequest):
226225
remote_prefill_request_callback=self.get_remote_prefill_request_callback(),
227226
)
228227
logger.info(
229-
f"Prefilling remotely for request {request_id} with length {len(request.token_ids)}"
228+
f"Prefilling remotely for request {request_id} with length {len(request.token_ids)} (estimated prefix hit length {request.estimated_prefix_hit_num_blocks * self.engine_args.block_size})"
230229
)
231230
else:
232231
remote_prefill_params = None
233232
logger.info(
234-
f"Prefilling locally for request {request_id} with length {len(request.token_ids)}"
233+
f"Prefilling locally for request {request_id} with length {len(request.token_ids)} (estimated prefix hit length {request.estimated_prefix_hit_num_blocks * self.engine_args.block_size})"
235234
)
236235

237236
sampling_params = SamplingParams(**self.default_sampling_params)

0 commit comments

Comments
 (0)