vllm-project
diff --git a/‎vllm/v1/core/sched/scheduler.py‎
Lines changed: 28 additions & 28 deletions b/‎vllm/v1/core/sched/scheduler.py‎
Lines changed: 28 additions & 28 deletions
diff --git a/‎vllm/v1/engine/__init__.py‎
Lines changed: 18 additions & 18 deletions b/‎vllm/v1/engine/__init__.py‎
Lines changed: 18 additions & 18 deletions
@@ -38,13 +38,13 @@
 class Scheduler(SchedulerInterface):
 
     def __init__(
-            self,
-            vllm_config: VllmConfig,
-            kv_cache_config: KVCacheConfig,
-            structured_output_manager: StructuredOutputManager,
-            mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
-            include_finished_set: bool = False,
-            log_stats: bool = False,
+        self,
+        vllm_config: VllmConfig,
+        kv_cache_config: KVCacheConfig,
+        structured_output_manager: StructuredOutputManager,
+        mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
+        include_finished_set: bool = False,
+        log_stats: bool = False,
     ) -> None:
         self.vllm_config = vllm_config
         self.scheduler_config = vllm_config.scheduler_config
@@ -68,8 +68,8 @@ def __init__(
             self.scheduler_config.max_num_batched_tokens
         self.max_model_len = self.scheduler_config.max_model_len
         self.enable_kv_cache_events = (
-                self.kv_events_config is not None
-                and self.kv_events_config.enable_kv_cache_events)
+            self.kv_events_config is not None
+            and self.kv_events_config.enable_kv_cache_events)
 
         # Create KVConnector for the Scheduler. Note that each Worker
         # will have a corresponding KVConnector with Role=WORKER.
@@ -214,8 +214,8 @@ def schedule(self) -> SchedulerOutput:
             if request.has_encoder_inputs:
                 (encoder_inputs_to_schedule, num_new_tokens,
                  new_encoder_budget) = self._try_schedule_encoder_inputs(
-                    request, request.num_computed_tokens, num_new_tokens,
-                    encoder_budget)
+                     request, request.num_computed_tokens, num_new_tokens,
+                     encoder_budget)
 
             if num_new_tokens == 0:
                 # The request cannot be scheduled because one of the following
@@ -592,7 +592,7 @@ def _make_cached_request_data(
         num_computed_tokens = request.num_computed_tokens
         num_regular_tokens = num_scheduled_tokens - num_scheduled_spec_tokens
         new_token_ids = request.all_token_ids[
-                        num_computed_tokens:num_computed_tokens + num_regular_tokens]
+            num_computed_tokens:num_computed_tokens + num_regular_tokens]
 
         req_data_queue = self._cached_reqs_data.get(request.request_id)
         if req_data_queue:
@@ -611,11 +611,11 @@ def _make_cached_request_data(
         return req_data
 
     def _try_schedule_encoder_inputs(
-            self,
-            request: Request,
-            num_computed_tokens: int,
-            num_new_tokens: int,
-            encoder_budget: int,
+        self,
+        request: Request,
+        num_computed_tokens: int,
+        num_new_tokens: int,
+        encoder_budget: int,
     ) -> tuple[list[int], int, int]:
         """
         Determine which encoder inputs need to be scheduled in the current step,
@@ -810,7 +810,7 @@ def update_from_output(
                         new_prompt_logprobs_tensors=prompt_logprobs_tensors,
                         stop_reason=request.stop_reason,
                         events=request.take_events(),
-                        kv_transfer_params = kv_transfer_params,
+                        kv_transfer_params=kv_transfer_params,
                         trace_headers=request.trace_headers,
                         num_cached_tokens=request.num_cached_tokens,
                     ))
@@ -871,9 +871,9 @@ def add_request(self, request: Request) -> None:
             request.record_event(EngineCoreEventType.QUEUED)
 
     def finish_requests(
-            self,
-            request_ids: Union[str, Iterable[str]],
-            finished_status: RequestStatus,
+        self,
+        request_ids: Union[str, Iterable[str]],
+        finished_status: RequestStatus,
     ) -> None:
         """Handles the finish signal from outside the scheduler.
 
@@ -882,7 +882,7 @@ def finish_requests(
         """
         assert RequestStatus.is_finished(finished_status)
         if isinstance(request_ids, str):
-            request_ids = (request_ids,)
+            request_ids = (request_ids, )
         else:
             request_ids = set(request_ids)
 
@@ -933,8 +933,8 @@ def reset_prefix_cache(self) -> bool:
         return self.kv_cache_manager.reset_prefix_cache()
 
     def make_stats(
-            self,
-            spec_decoding_stats: Optional[SpecDecodingStats] = None,
+        self,
+        spec_decoding_stats: Optional[SpecDecodingStats] = None,
     ) -> Optional[SchedulerStats]:
         if not self.log_stats:
             return None
@@ -949,10 +949,10 @@ def make_stats(
         )
 
     def make_spec_decoding_stats(
-            self,
-            spec_decoding_stats: Optional[SpecDecodingStats],
-            num_draft_tokens: int,
-            num_accepted_tokens: int,
+        self,
+        spec_decoding_stats: Optional[SpecDecodingStats],
+        num_draft_tokens: int,
+        num_accepted_tokens: int,
     ) -> Optional[SpecDecodingStats]:
         if not self.log_stats:
             return None
 
@@ -3,8 +3,8 @@
 
 import enum
 import time
-from collections.abc import Sequence
-from typing import Any, Optional, Union, Mapping
+from collections.abc import Mapping, Sequence
+from typing import Any, Optional, Union
 
 import msgspec
 
@@ -40,10 +40,10 @@ def __str__(self):
 
 
 class EngineCoreRequest(
-    msgspec.Struct,
-    array_like=True,  # type: ignore[call-arg]
-    omit_defaults=True,  # type: ignore[call-arg]
-    gc=False):  # type: ignore[call-arg]
+        msgspec.Struct,
+        array_like=True,  # type: ignore[call-arg]
+        omit_defaults=True,  # type: ignore[call-arg]
+        gc=False):  # type: ignore[call-arg]
 
     request_id: str
     prompt_token_ids: list[int]
@@ -95,10 +95,10 @@ def new_event(cls,
 
 
 class EngineCoreOutput(
-    msgspec.Struct,
-    array_like=True,  # type: ignore[call-arg]
-    omit_defaults=True,  # type: ignore[call-arg]
-    gc=False):  # type: ignore[call-arg]
+        msgspec.Struct,
+        array_like=True,  # type: ignore[call-arg]
+        omit_defaults=True,  # type: ignore[call-arg]
+        gc=False):  # type: ignore[call-arg]
 
     request_id: str
     new_token_ids: list[int]
@@ -110,7 +110,7 @@ class EngineCoreOutput(
     stop_reason: Union[int, str, None] = None
     events: Optional[list[EngineCoreEvent]] = None
     kv_transfer_params: Optional[dict[str, Any]] = None
-      
+
     trace_headers: Optional[Mapping[str, str]] = None
     # The number of tokens with prefix cache hits.
     num_cached_tokens: int = 0
@@ -121,9 +121,9 @@ def finished(self) -> bool:
 
 
 class UtilityOutput(
-    msgspec.Struct,
-    array_like=True,  # type: ignore[call-arg]
-    gc=False):  # type: ignore[call-arg]
+        msgspec.Struct,
+        array_like=True,  # type: ignore[call-arg]
+        gc=False):  # type: ignore[call-arg]
 
     call_id: int
 
@@ -133,10 +133,10 @@ class UtilityOutput(
 
 
 class EngineCoreOutputs(
-    msgspec.Struct,
-    array_like=True,  # type: ignore[call-arg]
-    omit_defaults=True,  # type: ignore[call-arg]
-    gc=False):  # type: ignore[call-arg]
+        msgspec.Struct,
+        array_like=True,  # type: ignore[call-arg]
+        omit_defaults=True,  # type: ignore[call-arg]
+        gc=False):  # type: ignore[call-arg]
 
     # NOTE(Nick): We could consider ways to make this more compact,
     # e.g. columnwise layout