|
20 | 20 | RPCLoadAdapterRequest, |
21 | 21 | RPCProcessRequest, |
22 | 22 | RPCResetPrefixCacheRequest, |
23 | | - RPCStartupRequest, RPCStartupResponse, |
24 | | - RPCUProfileRequest) |
| 23 | + RPCSleepRequest, RPCStartupRequest, |
| 24 | + RPCStartupResponse, |
| 25 | + RPCUProfileRequest, RPCWakeUpRequest) |
25 | 26 | # yapf: enable |
26 | 27 | from vllm.logger import init_logger |
27 | 28 | from vllm.outputs import RequestOutput |
@@ -242,6 +243,10 @@ def handle_new_input(self): |
242 | 243 | self._handle_load_adapter_request(request) |
243 | 244 | elif isinstance(request, RPCResetPrefixCacheRequest): |
244 | 245 | self.reset_prefix_cache() |
| 246 | + elif isinstance(request, RPCSleepRequest): |
| 247 | + self.sleep(request.value) |
| 248 | + elif isinstance(request, RPCWakeUpRequest): |
| 249 | + self.wake_up() |
245 | 250 | else: |
246 | 251 | raise ValueError("Unknown RPCRequest Type: " |
247 | 252 | f"{type(request)}") |
@@ -369,6 +374,12 @@ def stop_profile(self) -> None: |
369 | 374 | def reset_prefix_cache(self) -> bool: |
370 | 375 | return self.engine.reset_prefix_cache() |
371 | 376 |
|
| 377 | + def sleep(self, level: int = 1) -> None: |
| 378 | + self.engine.sleep(level) |
| 379 | + |
| 380 | + def wake_up(self) -> None: |
| 381 | + self.engine.wake_up() |
| 382 | + |
372 | 383 |
|
373 | 384 | def signal_handler(*_) -> None: |
374 | 385 | raise KeyboardInterrupt("MQLLMEngine terminated") |
|
0 commit comments