File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -2050,6 +2050,13 @@ def __post_init__(self) -> None:
20502050 _MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS ,
20512051 )
20522052
2053+ # When using default settings,
2054+ # Ensure max_num_batched_tokens does not exceed model limit.
2055+ # Some models (e.g., Whisper) have embeddings tied to max length.
2056+ self .max_num_batched_tokens = min (
2057+ self .max_num_seqs * self .max_model_len ,
2058+ self .max_num_batched_tokens )
2059+
20532060 self .max_num_encoder_input_tokens = self .max_num_batched_tokens
20542061 self .encoder_cache_size = self .max_num_batched_tokens
20552062
@@ -2090,6 +2097,13 @@ def _verify_args(self) -> None:
20902097 "be greater than or equal to max_num_seqs "
20912098 f"({ self .max_num_seqs } )." )
20922099
2100+ if self .max_num_batched_tokens > self .max_num_seqs * self .max_model_len :
2101+ logger .warning (
2102+ "max_num_batched_tokens (%d) exceeds max_num_seqs"
2103+ "* max_model_len (%d). This may lead to unexpected behavior." ,
2104+ self .max_num_batched_tokens ,
2105+ self .max_num_seqs * self .max_model_len )
2106+
20932107 if self .num_lookahead_slots < 0 :
20942108 raise ValueError (
20952109 "num_lookahead_slots "
You can’t perform that action at this time.
0 commit comments