@@ -221,8 +221,7 @@ def __post_init__(self, is_encoder_decoder: bool) -> None:
221221 if self .enable_chunked_prefill :
222222 logger .info (
223223 "Chunked prefill is enabled with max_num_batched_tokens=%d." ,
224- self .max_num_batched_tokens ,
225- )
224+ self .max_num_batched_tokens )
226225
227226 self .chunked_prefill_enabled = self .enable_chunked_prefill
228227 if self .max_num_partial_prefills > 1 :
@@ -234,10 +233,8 @@ def __post_init__(self, is_encoder_decoder: bool) -> None:
234233 "Concurrent partial prefills enabled with "
235234 "max_num_partial_prefills=%d, max_long_partial_prefills=%d, "
236235 "long_prefill_token_threshold=%d" ,
237- self .max_num_partial_prefills ,
238- self .max_long_partial_prefills ,
239- self .long_prefill_token_threshold ,
240- )
236+ self .max_num_partial_prefills , self .max_long_partial_prefills ,
237+ self .long_prefill_token_threshold )
241238
242239 # NOTE: Default set cuda_graph_sizes to [min(max_num_seqs * 2, 512)].
243240 # This avoids OOM in tight memory scenarios with small max_num_seqs,
@@ -250,7 +247,7 @@ def __post_init__(self, is_encoder_decoder: bool) -> None:
250247 self .scheduler_cls = (
251248 "vllm.v1.core.sched.async_scheduler.AsyncScheduler" )
252249
253- @model_validator (mode = " after" )
250+ @model_validator (mode = ' after' )
254251 def _verify_args (self ) -> Self :
255252 if (self .max_num_batched_tokens < self .max_model_len
256253 and not self .chunked_prefill_enabled ):
@@ -273,8 +270,7 @@ def _verify_args(self) -> Self:
273270 "max_num_batched_tokens (%d) exceeds max_num_seqs "
274271 "* max_model_len (%d). This may lead to unexpected behavior." ,
275272 self .max_num_batched_tokens ,
276- self .max_num_seqs * self .max_model_len ,
277- )
273+ self .max_num_seqs * self .max_model_len )
278274
279275 if self .num_lookahead_slots < 0 :
280276 raise ValueError (
0 commit comments