Skip to content

Commit 7731ac1

Browse files
committed
add torch.profile.record_function to show number of new/old requests
1 parent 099c046 commit 7731ac1

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

vllm/v1/worker/gpu_worker.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,11 @@ def execute_model(
359359
get_pp_group().recv_tensor_dict(
360360
all_gather_group=get_tp_group()))
361361

362-
output = self.model_runner.execute_model(scheduler_output,
362+
# add trace annotation so that we can easily distinguish new/cached request numbers in each iteration
363+
num_new_reqs = len(scheduler_output.scheduled_new_reqs)
364+
num_cached_reqs = len(scheduler_output.scheduled_cached_reqs.req_ids)
365+
with torch.profiler.record_function(f"execute_{num_new_reqs}_{num_cached_reqs}"):
366+
output = self.model_runner.execute_model(scheduler_output,
363367
intermediate_tensors)
364368

365369
parallel_config = self.vllm_config.parallel_config

0 commit comments

Comments
 (0)