Skip to content

Commit 822c780

Browse files
committed
Move the logging to executor_base.py so V1 can also use it.
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
1 parent 211320f commit 822c780

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

vllm/executor/executor_base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
import asyncio
4+
import time
45
from abc import ABC, abstractmethod
56
from typing import (Any, Awaitable, Callable, Dict, List, Optional, Set, Tuple,
67
Union)
@@ -200,15 +201,23 @@ def sleep(self, level: int = 1):
200201
if self.is_sleeping:
201202
logger.warning("Executor is already sleeping.")
202203
return
204+
time_before_sleep = time.perf_counter()
203205
self.collective_rpc("sleep", kwargs=dict(level=level))
206+
time_after_sleep = time.perf_counter()
204207
self.is_sleeping = True
208+
logger.info("It took %.6f seconds to fall asleep.",
209+
time_after_sleep - time_before_sleep)
205210

206211
def wake_up(self):
207212
if not self.is_sleeping:
208213
logger.warning("Executor is not sleeping.")
209214
return
215+
time_before_wakeup = time.perf_counter()
210216
self.collective_rpc("wake_up")
217+
time_after_wakeup = time.perf_counter()
211218
self.is_sleeping = False
219+
logger.info("It took %.6f seconds to wake up.",
220+
time_after_wakeup - time_before_wakeup)
212221

213222
def save_sharded_state(
214223
self,

vllm/worker/worker.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
"""A GPU worker class."""
33
import gc
44
import os
5-
import time
65
from typing import Dict, List, Optional, Set, Tuple, Type, Union
76

87
import torch
@@ -126,9 +125,7 @@ def stop_profile(self):
126125
def sleep(self, level: int = 1) -> None:
127126
free_bytes_before_sleep = torch.cuda.mem_get_info()[0]
128127
allocator = CuMemAllocator.get_instance()
129-
time_before_sleep = time.perf_counter()
130128
allocator.sleep(offload_tags=("weights", ) if level == 1 else tuple())
131-
time_after_sleep = time.perf_counter()
132129
free_bytes_after_sleep, total = torch.cuda.mem_get_info()
133130
freed_bytes = free_bytes_after_sleep - free_bytes_before_sleep
134131
used_bytes = total - free_bytes_after_sleep
@@ -137,16 +134,10 @@ def sleep(self, level: int = 1) -> None:
137134
"Sleep mode freed %.2f GiB memory, "
138135
"%.2f GiB memory is still in use.", freed_bytes / GiB_bytes,
139136
used_bytes / GiB_bytes)
140-
logger.info("It took %.6f seconds to fall asleep.",
141-
time_after_sleep - time_before_sleep)
142137

143138
def wake_up(self) -> None:
144139
allocator = CuMemAllocator.get_instance()
145-
time_before_wakeup = time.perf_counter()
146140
allocator.wake_up()
147-
time_after_wakeup = time.perf_counter()
148-
logger.info("It took %.6f seconds to wake up.",
149-
time_after_wakeup - time_before_wakeup)
150141

151142
def init_device(self) -> None:
152143
if self.device_config.device.type == "cuda":

0 commit comments

Comments
 (0)