File tree Expand file tree Collapse file tree 2 files changed +14
-4
lines changed Expand file tree Collapse file tree 2 files changed +14
-4
lines changed Original file line number Diff line number Diff line change @@ -723,11 +723,16 @@ def _task_to_convert(task: TaskOption) -> ConvertType:
723723 )
724724
725725 # Workaround for Gemma 2 which uses interleaved sliding window
726- # attention, but it's not specified in its config. TODO: remove this
727- # when Gemma 2 is fixed in Transformers .
726+ # attention, but it's not specified in its config.
727+ # TODO: remove this when Gemma 2 config updated in HuggingFace .
728728 if self .hf_text_config .model_type == "gemma2" :
729729 self .hf_text_config .sliding_window_pattern = 2
730730
731+ # TODO: remove this when Gemma 3n config updated in HuggingFace.
732+ if self .hf_text_config .model_type == "gemma3n_text" :
733+ # 4 sliding window attention followed by 1 full attention
734+ self .hf_text_config .sliding_window_pattern = "LLLLG"
735+
731736 sliding_window = getattr (self .hf_text_config , "sliding_window" , None )
732737 sliding_window_pattern = getattr (self .hf_text_config ,
733738 "sliding_window_pattern" , None )
Original file line number Diff line number Diff line change @@ -297,8 +297,13 @@ def __init__(self,
297297 has_weight = False )
298298
299299 layer_idx = extract_layer_index (prefix )
300- if config .layer_types [layer_idx ] == "sliding_attention" :
301- self .sliding_window = config .sliding_window
300+
301+ is_sliding_window = (
302+ getattr (config , "interleaved_sliding_window" , None ) is not None
303+ and config .layer_types [layer_idx ] == "sliding_attention" )
304+
305+ if is_sliding_window :
306+ self .sliding_window = config .interleaved_sliding_window
302307 rope_theta = config .rope_local_base_freq
303308 rope_scaling = {"rope_type" : "default" }
304309 else :
You can’t perform that action at this time.
0 commit comments