We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 099c046 commit 7731ac1Copy full SHA for 7731ac1
vllm/v1/worker/gpu_worker.py
@@ -359,7 +359,11 @@ def execute_model(
359
get_pp_group().recv_tensor_dict(
360
all_gather_group=get_tp_group()))
361
362
- output = self.model_runner.execute_model(scheduler_output,
+ # add trace annotation so that we can easily distinguish new/cached request numbers in each iteration
363
+ num_new_reqs = len(scheduler_output.scheduled_new_reqs)
364
+ num_cached_reqs = len(scheduler_output.scheduled_cached_reqs.req_ids)
365
+ with torch.profiler.record_function(f"execute_{num_new_reqs}_{num_cached_reqs}"):
366
+ output = self.model_runner.execute_model(scheduler_output,
367
intermediate_tensors)
368
369
parallel_config = self.vllm_config.parallel_config
0 commit comments