@@ -542,8 +542,10 @@ def __post_init__(self) -> None:
542542 sliding_window = getattr (self .hf_text_config , "sliding_window" , None )
543543 sliding_window_pattern = getattr (self .hf_text_config ,
544544 "sliding_window_pattern" , None )
545+ has_interleaved_attention = sliding_window_pattern is not None or (
546+ isinstance (sliding_window , list ))
545547
546- if not ( self .disable_sliding_window or sliding_window_pattern is None ) :
548+ if not self .disable_sliding_window and has_interleaved_attention :
547549 if (backend :=
548550 envs .VLLM_ATTENTION_BACKEND ) in ("XFORMERS" , "FLASHINFER" ):
549551 sliding_window_len_min = get_min_sliding_window (
@@ -563,7 +565,10 @@ def __post_init__(self) -> None:
563565 # only the attention layer itself is aware of the sliding
564566 # window, and use the window size to compute the attention.
565567 self .hf_text_config .interleaved_sliding_window = sliding_window
566- delattr (self .hf_text_config , "sliding_window" )
568+
569+ if hasattr (self .hf_text_config , "sliding_window" ):
570+ delattr (self .hf_text_config , "sliding_window" )
571+
567572 sliding_window = None
568573
569574 self .max_model_len = _get_and_verify_max_len (
@@ -1041,7 +1046,8 @@ def verify_with_parallel_config(
10411046 if self .use_async_output_proc :
10421047 self .use_async_output_proc = False
10431048
1044- def get_hf_config_sliding_window (self ) -> Optional [int ]:
1049+ def get_hf_config_sliding_window (
1050+ self ) -> Union [Optional [int ], list [Optional [int ]]]:
10451051 """Get the sliding window size, or None if disabled."""
10461052
10471053 # Some models, like Qwen2 and Qwen1.5, use `use_sliding_window` in
@@ -1052,7 +1058,7 @@ def get_hf_config_sliding_window(self) -> Optional[int]:
10521058 return None
10531059 return getattr (self .hf_text_config , "sliding_window" , None )
10541060
1055- def get_sliding_window (self ) -> Optional [int ]:
1061+ def get_sliding_window (self ) -> Optional [Union [ int , list [ Optional [ int ]]] ]:
10561062 """Get the sliding window size, or None if disabled.
10571063 """
10581064 # If user disables sliding window, return None.
0 commit comments