add torch.profile.record_function to show number of new/old requests

dayeol · dayeol · commit 7731ac1eff4f · 2025-08-07T22:28:11.000-07:00
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
@@ -359,7 +359,11 @@ def execute_model(
                 get_pp_group().recv_tensor_dict(
                     all_gather_group=get_tp_group()))
 
-        output = self.model_runner.execute_model(scheduler_output,
+        # add trace annotation so that we can easily distinguish new/cached request numbers in each iteration
+        num_new_reqs = len(scheduler_output.scheduled_new_reqs)
+        num_cached_reqs = len(scheduler_output.scheduled_cached_reqs.req_ids)
+        with torch.profiler.record_function(f"execute_{num_new_reqs}_{num_cached_reqs}"):
+            output = self.model_runner.execute_model(scheduler_output,
                                                  intermediate_tensors)
 
         parallel_config = self.vllm_config.parallel_config