Skip to content

Commit

Permalink
Add debug vllm-project#1
Browse files Browse the repository at this point in the history
  • Loading branch information
robinren03 committed Sep 10, 2024
1 parent 5720e9f commit e04838f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
SET(CUTLASS_ENABLE_HEADERS_ONLY=ON)
FetchContent_Declare(
cutlass
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
GIT_REPOSITORY git@git.tsinghua.edu.cn:ryy23/cutlass.git
# GIT_REPOSITORY git@github.com:NVIDIA/cutlass.git
# CUTLASS 3.5.0
GIT_TAG 7d49e6c7e2f8896c47f586706e67e1fb215529dc
Expand Down
2 changes: 1 addition & 1 deletion metrics.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"metrics_1": [0.050606489181518555], "metrics_2": []}
{"metrics_1": [20.784107208251953, 23.877633571624756, 28.07648205757141, 0.8227071762084961, 0.4138922691345215, 0.4153611660003662, 0.4085240364074707, 0.6744792461395264, 0.38916516304016113, 0.40398669242858887, 0.4180002212524414, 0.39832615852355957, 0.4103264808654785], "metrics_2": []}
7 changes: 6 additions & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ def _schedule_default(self, session_id_block:Dict[str, int], session_id_arrived:
self.swapped, SchedulerSwappedInOutputs.create_empty())

# If any requests are swapped, prioritized swapped requests.
if not self.swapped:
if not self.swapped and len(remaining_running) < 15:
remaining_waiting, prefills = self._schedule_prefills(
self.waiting, budget, curr_loras, enable_chunking=False)
for seq_group in prefills.seq_groups:
Expand Down Expand Up @@ -882,6 +882,10 @@ def _schedule_default(self, session_id_block:Dict[str, int], session_id_arrived:
# doesn't allow chunked prefills.
assert len(running_scheduled.prefill_seq_groups) == 0
assert len(swapped_in.prefill_seq_groups) == 0

print("Prefill sequence groups:", [seq_group.seq_group.request_id for seq_group in prefills.seq_groups])
print("Decoding sequence groups:", [seq_group.seq_group.request_id for seq_group in running_scheduled.decode_seq_groups])

sched_output = SchedulerOutputs(
scheduled_seq_groups=(prefills.seq_groups +
running_scheduled.decode_seq_groups +
Expand All @@ -899,6 +903,7 @@ def _schedule_default(self, session_id_block:Dict[str, int], session_id_arrived:
preempted=preempted,
)

print("Is empty? is waiting?", sched_output.is_empty(), len(self.waiting))
if sched_output.is_empty() and len(self.waiting) > 0:
# print("Lazy detection")
assert session_id_block or session_id_arrived
Expand Down

0 comments on commit e04838f

Please sign in to comment.