Add debug vllm-project#1

robinren03 · Sep 10, 2024 · e04838f · e04838f
1 parent 5720e9f
commit e04838f
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 3 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -159,7 +159,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
   SET(CUTLASS_ENABLE_HEADERS_ONLY=ON)
   FetchContent_Declare(
         cutlass
-        GIT_REPOSITORY https://github.com/nvidia/cutlass.git
+        GIT_REPOSITORY git@git.tsinghua.edu.cn:ryy23/cutlass.git
         # GIT_REPOSITORY git@github.com:NVIDIA/cutlass.git
         # CUTLASS 3.5.0
         GIT_TAG 7d49e6c7e2f8896c47f586706e67e1fb215529dc

diff --git a/metrics.txt b/metrics.txt
@@ -1 +1 @@
-{"metrics_1": [0.050606489181518555], "metrics_2": []}
+{"metrics_1": [20.784107208251953, 23.877633571624756, 28.07648205757141, 0.8227071762084961, 0.4138922691345215, 0.4153611660003662, 0.4085240364074707, 0.6744792461395264, 0.38916516304016113, 0.40398669242858887, 0.4180002212524414, 0.39832615852355957, 0.4103264808654785], "metrics_2": []}
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
@@ -825,7 +825,7 @@ def _schedule_default(self, session_id_block:Dict[str, int], session_id_arrived:
             self.swapped, SchedulerSwappedInOutputs.create_empty())
 
         # If any requests are swapped, prioritized swapped requests.
-        if not self.swapped:
+        if not self.swapped and len(remaining_running) < 15:
             remaining_waiting, prefills = self._schedule_prefills(
                 self.waiting, budget, curr_loras, enable_chunking=False)
             for seq_group in prefills.seq_groups:
@@ -882,6 +882,10 @@ def _schedule_default(self, session_id_block:Dict[str, int], session_id_arrived:
         # doesn't allow chunked prefills.
         assert len(running_scheduled.prefill_seq_groups) == 0
         assert len(swapped_in.prefill_seq_groups) == 0
+
+        print("Prefill sequence groups:", [seq_group.seq_group.request_id for seq_group in prefills.seq_groups])
+        print("Decoding sequence groups:", [seq_group.seq_group.request_id for seq_group in running_scheduled.decode_seq_groups])
+
         sched_output = SchedulerOutputs(
             scheduled_seq_groups=(prefills.seq_groups +
                                   running_scheduled.decode_seq_groups +
@@ -899,6 +903,7 @@ def _schedule_default(self, session_id_block:Dict[str, int], session_id_arrived:
             preempted=preempted,
         )
 
+        print("Is empty? is waiting?", sched_output.is_empty(), len(self.waiting))
         if sched_output.is_empty() and len(self.waiting) > 0:
             # print("Lazy detection")
             assert session_id_block or session_id_arrived
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"metrics_1": [0.050606489181518555], "metrics_2": []}
		{"metrics_1": [20.784107208251953, 23.877633571624756, 28.07648205757141, 0.8227071762084961, 0.4138922691345215, 0.4153611660003662, 0.4085240364074707, 0.6744792461395264, 0.38916516304016113, 0.40398669242858887, 0.4180002212524414, 0.39832615852355957, 0.4103264808654785], "metrics_2": []}