@@ -6647,12 +6647,6 @@ def set_gguf_parameters(self):
66476647 def modify_tensors (
66486648 self , data_torch : Tensor , name : str , bid : int | None
66496649 ) -> Iterable [tuple [str , Tensor ]]:
6650- # Handle layer 46 tensors - preserve all for future MTP support
6651- if bid is not None and bid == 46 :
6652- # Convert layer 46 tensors to GGUF naming but don't try to map them
6653- new_name = name .replace ("model.layers." , "blk." )
6654- return [(new_name , data_torch )]
6655-
66566650 if name .startswith ("model.visual." ): # ignore visual part
66576651 return []
66586652 elif name .startswith ("model.language_model." ):
@@ -6662,14 +6656,18 @@ def modify_tensors(
66626656 if name == "model.embed_tokens.weight" :
66636657 return [(self .map_tensor_name ("token_embd.weight" ), data_torch )]
66646658
6665- # Handle routed experts (skip for NextN layer 46)
6666- if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name and bid != 46 :
6659+ # Handle routed experts
6660+ if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name :
66676661 n_experts = self .hparams ["n_routed_experts" ]
66686662 assert bid is not None
66696663
66706664 if self ._experts is None :
66716665 self ._experts = [{} for _ in range (self .block_count )]
66726666
6667+ # Extend experts array if needed (for models where actual layers > num_hidden_layers)
6668+ while len (self ._experts ) <= bid :
6669+ self ._experts .append ({})
6670+
66736671 self ._experts [bid ][name ] = data_torch
66746672
66756673 if len (self ._experts [bid ]) >= n_experts * 3 :
@@ -6705,11 +6703,22 @@ def modify_tensors(
67056703 new_name = name .replace ("model.layers." , "blk." ).replace (
67066704 ".mlp.gate.e_score_correction_bias" , ".ffn_gate_inp.bias"
67076705 )
6708- return [(self .map_tensor_name (new_name ), data_torch )]
6706+ return [(new_name , data_torch )]
6707+ elif ".mlp.gate.weight" in name :
6708+ new_name = name .replace ("model.layers." , "blk." ).replace (
6709+ ".mlp.gate.weight" , ".ffn_gate_inp.weight"
6710+ )
6711+ return [(new_name , data_torch )]
67096712
67106713 # Handle shared expert tensors
6711- if ".mlp.ffn_" in name and "_shexp" in name :
6712- new_name = name .replace ("model.layers." , "blk." )
6714+ if ".mlp.shared_experts." in name :
6715+ new_name = name .replace ("model.layers." , "blk." ).replace (".mlp.shared_experts." , ".ffn_" )
6716+ if "gate_proj" in new_name :
6717+ new_name = new_name .replace ("gate_proj" , "gate_shexp" )
6718+ elif "down_proj" in new_name :
6719+ new_name = new_name .replace ("down_proj" , "down_shexp" )
6720+ elif "up_proj" in new_name :
6721+ new_name = new_name .replace ("up_proj" , "up_shexp" )
67136722 return [(new_name , data_torch )]
67146723
67156724 # Handle regular dense FFN layers (for hybrid dense/MoE architecture)
@@ -6738,8 +6747,27 @@ def modify_tensors(
67386747 or ".enorm." in name
67396748 or ".hnorm." in name
67406749 ):
6741- # For NextN tensors, convert to GGUF naming convention
6742- new_name = name .replace ("model.layers." , "blk." ).replace ("model." , "" )
6750+ new_name = name .replace ("model.layers." , "blk." ).replace ("model." , "" ).replace (".weight" , "" )
6751+ return [(new_name , data_torch )]
6752+
6753+ # GLM tensor mapping - handle directly without map_tensor_name
6754+ if ".input_layernorm." in name :
6755+ new_name = name .replace ("model.layers." , "blk." ).replace (".input_layernorm." , ".attn_norm." )
6756+ return [(new_name , data_torch )]
6757+ elif ".post_attention_layernorm." in name :
6758+ new_name = name .replace ("model.layers." , "blk." ).replace (".post_attention_layernorm." , ".ffn_norm." )
6759+ return [(new_name , data_torch )]
6760+ elif ".self_attn." in name :
6761+ # Map GLM self_attn to standard attention naming
6762+ new_name = name .replace ("model.layers." , "blk." ).replace (".self_attn." , ".attn_" )
6763+ if "q_proj" in new_name :
6764+ new_name = new_name .replace ("q_proj" , "q" )
6765+ elif "k_proj" in new_name :
6766+ new_name = new_name .replace ("k_proj" , "k" )
6767+ elif "v_proj" in new_name :
6768+ new_name = new_name .replace ("v_proj" , "v" )
6769+ elif "o_proj" in new_name :
6770+ new_name = new_name .replace ("o_proj" , "output" )
67436771 return [(new_name , data_torch )]
67446772
67456773 return super ().modify_tensors (data_torch , name , bid )
0 commit comments