@@ -6578,6 +6578,179 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
65786578 return super ().modify_tensors (data_torch , name , bid )
65796579
65806580
6581+ @ModelBase .register ("Glm4MoeForCausalLM" )
6582+ class Glm4MoeModel (TextModel ):
6583+ model_arch = gguf .MODEL_ARCH .GLM4_MOE
6584+
6585+ def set_vocab (self ):
6586+ from transformers import AutoTokenizer
6587+
6588+ tokenizer = AutoTokenizer .from_pretrained (
6589+ self .dir_model , trust_remote_code = True
6590+ )
6591+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6592+ tokens , toktypes , tokpre = self .get_vocab_base ()
6593+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6594+ self .gguf_writer .add_tokenizer_pre (tokpre )
6595+ self .gguf_writer .add_token_list (tokens )
6596+ self .gguf_writer .add_token_types (toktypes )
6597+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6598+ special_vocab ._set_special_token (
6599+ "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6600+ )
6601+ special_vocab ._set_special_token ("eot" , tokenizer .get_added_vocab ()["<|user|>" ])
6602+ special_vocab ._set_special_token ("eog" , tokenizer .get_added_vocab ()["<|user|>" ])
6603+ special_vocab ._set_special_token ("eog" , tokenizer .get_added_vocab ()["<|observation|>" ])
6604+ special_vocab ._set_special_token (
6605+ "unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6606+ )
6607+ special_vocab ._set_special_token (
6608+ "bos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6609+ )
6610+ special_vocab .add_to_gguf (self .gguf_writer )
6611+
6612+ def set_gguf_parameters (self ):
6613+ super ().set_gguf_parameters ()
6614+ if (rope_dim := self .hparams .get ("head_dim" )) is None :
6615+ rope_dim = (
6616+ self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6617+ )
6618+ self .gguf_writer .add_rope_dimension_count (
6619+ int (rope_dim * self .hparams .get ("partial_rotary_factor" , 0.5 ))
6620+ )
6621+
6622+ # MoE parameters
6623+ if (n_experts := self .hparams .get ("n_routed_experts" )) is not None :
6624+ self .gguf_writer .add_expert_count (n_experts )
6625+ # Note: expert_used_count is already set by parent class using num_experts_per_tok
6626+ if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
6627+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
6628+ if (n_shared_experts := self .hparams .get ("n_shared_experts" )) is not None :
6629+ self .gguf_writer .add_expert_shared_count (n_shared_experts )
6630+ if (first_k_dense_replace := self .hparams .get ("first_k_dense_replace" )) is not None :
6631+ self .gguf_writer .add_leading_dense_block_count (first_k_dense_replace )
6632+
6633+ # Expert gating function (sigmoid for GLM4_MOE)
6634+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
6635+
6636+ # Routed scaling factor
6637+ if (routed_scaling_factor := self .hparams .get ("routed_scaling_factor" )) is not None :
6638+ self .gguf_writer .add_expert_weights_scale (routed_scaling_factor )
6639+
6640+ # Normalise topk probabilities
6641+ if (norm_topk_prob := self .hparams .get ("norm_topk_prob" )) is not None :
6642+ self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
6643+
6644+ _experts : list [dict [str , Tensor ]] | None = None
6645+ _shared_experts : list [dict [str , Tensor ]] | None = None
6646+
6647+ def modify_tensors (
6648+ self , data_torch : Tensor , name : str , bid : int | None
6649+ ) -> Iterable [tuple [str , Tensor ]]:
6650+ # Handle layer 46 tensors - preserve all for future MTP support
6651+ if bid is not None and bid == 46 :
6652+ # Convert layer 46 tensors to GGUF naming but don't try to map them
6653+ new_name = name .replace ("model.layers." , "blk." )
6654+ return [(new_name , data_torch )]
6655+
6656+ if name .startswith ("model.visual." ): # ignore visual part
6657+ return []
6658+ elif name .startswith ("model.language_model." ):
6659+ name = name .replace ("language_model." , "" ) # for multimodal variants
6660+
6661+ # Handle main token embedding
6662+ if name == "model.embed_tokens.weight" :
6663+ return [(self .map_tensor_name ("token_embd.weight" ), data_torch )]
6664+
6665+ # Handle routed experts (skip for NextN layer 46)
6666+ if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name and bid != 46 :
6667+ n_experts = self .hparams ["n_routed_experts" ]
6668+ assert bid is not None
6669+
6670+ if self ._experts is None :
6671+ self ._experts = [{} for _ in range (self .block_count )]
6672+
6673+ self ._experts [bid ][name ] = data_torch
6674+
6675+ if len (self ._experts [bid ]) >= n_experts * 3 :
6676+ tensors : list [tuple [str , Tensor ]] = []
6677+
6678+ # merge the experts into a single 3d tensor
6679+ for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
6680+ datas : list [Tensor ] = []
6681+
6682+ for xid in range (n_experts ):
6683+ ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
6684+ datas .append (self ._experts [bid ][ename ])
6685+ del self ._experts [bid ][ename ]
6686+
6687+ data_torch = torch .stack (datas , dim = 0 )
6688+ # Generate GGUF tensor names for merged experts
6689+ if w_name == "down_proj" :
6690+ new_name = f"blk.{ bid } .ffn_down_exps.weight"
6691+ elif w_name == "gate_proj" :
6692+ new_name = f"blk.{ bid } .ffn_gate_exps.weight"
6693+ elif w_name == "up_proj" :
6694+ new_name = f"blk.{ bid } .ffn_up_exps.weight"
6695+ else :
6696+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
6697+ new_name = self .map_tensor_name (merged_name )
6698+ tensors .append ((new_name , data_torch ))
6699+ return tensors
6700+ else :
6701+ return []
6702+
6703+ # Handle expert gating input (routing gate)
6704+ if ".mlp.gate.e_score_correction_bias" in name :
6705+ new_name = name .replace ("model.layers." , "blk." ).replace (
6706+ ".mlp.gate.e_score_correction_bias" , ".ffn_gate_inp.bias"
6707+ )
6708+ return [(self .map_tensor_name (new_name ), data_torch )]
6709+
6710+ # Handle shared expert tensors
6711+ if ".mlp.ffn_" in name and "_shexp" in name :
6712+ new_name = name .replace ("model.layers." , "blk." )
6713+ return [(new_name , data_torch )]
6714+
6715+ # Handle regular dense FFN layers (for hybrid dense/MoE architecture)
6716+ if ".mlp." in name and "experts" not in name and "_shexp" not in name :
6717+ if "gate_proj" in name :
6718+ new_name = name .replace ("model.layers." , "blk." ).replace (
6719+ ".mlp.gate_proj.weight" , ".ffn_gate.weight"
6720+ )
6721+ elif "up_proj" in name :
6722+ new_name = name .replace ("model.layers." , "blk." ).replace (
6723+ ".mlp.up_proj.weight" , ".ffn_up.weight"
6724+ )
6725+ elif "down_proj" in name :
6726+ new_name = name .replace ("model.layers." , "blk." ).replace (
6727+ ".mlp.down_proj.weight" , ".ffn_down.weight"
6728+ )
6729+ else :
6730+ new_name = name
6731+ return [(self .map_tensor_name (new_name ), data_torch )]
6732+
6733+ # Skip NextN (MTP) tensors
6734+ if (
6735+ ".embed_tokens." in name
6736+ or ".shared_head." in name
6737+ or ".eh_proj." in name
6738+ or ".enorm." in name
6739+ or ".hnorm." in name
6740+ ):
6741+ return []
6742+
6743+ return super ().modify_tensors (data_torch , name , bid )
6744+
6745+ def prepare_tensors (self ):
6746+ super ().prepare_tensors ()
6747+ if self ._experts is not None :
6748+ # flatten `list[dict[str, Tensor]]` into `list[str]`
6749+ experts = [k for d in self ._experts for k in d .keys ()]
6750+ if len (experts ) > 0 :
6751+ raise ValueError (f"Unprocessed experts: { experts } " )
6752+
6753+
65816754@ModelBase .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
65826755class ChatGLMModel (TextModel ):
65836756 model_arch = gguf .MODEL_ARCH .CHATGLM
0 commit comments