@@ -6578,6 +6578,117 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
65786578 return super ().modify_tensors (data_torch , name , bid )
65796579
65806580
6581+ @ModelBase .register ("Glm4MoeForCausalLM" )
6582+ class Glm4MoeModel (TextModel ):
6583+ model_arch = gguf .MODEL_ARCH .GLM4_MOE
6584+
6585+ def set_vocab (self ):
6586+ from transformers import AutoTokenizer
6587+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
6588+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6589+ tokens , toktypes , tokpre = self .get_vocab_base ()
6590+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6591+ self .gguf_writer .add_tokenizer_pre (tokpre )
6592+ self .gguf_writer .add_token_list (tokens )
6593+ self .gguf_writer .add_token_types (toktypes )
6594+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6595+ special_vocab ._set_special_token ("eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ])
6596+ special_vocab ._set_special_token ("eot" , tokenizer .get_added_vocab ()["<|user|>" ])
6597+ special_vocab ._set_special_token ("unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ])
6598+ special_vocab ._set_special_token ("bos" , tokenizer .get_added_vocab ()["<|endoftext|>" ])
6599+ special_vocab .add_to_gguf (self .gguf_writer )
6600+
6601+ def set_gguf_parameters (self ):
6602+ super ().set_gguf_parameters ()
6603+ if (rope_dim := self .hparams .get ("head_dim" )) is None :
6604+ rope_dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6605+ self .gguf_writer .add_rope_dimension_count (int (rope_dim * self .hparams .get ("partial_rotary_factor" , 0.5 )))
6606+
6607+ # MoE parameters
6608+ if (n_experts := self .hparams .get ("n_routed_experts" )) is not None :
6609+ self .gguf_writer .add_expert_count (n_experts )
6610+ if (n_experts_used := self .hparams .get ("num_experts_per_tok" )) is not None :
6611+ self .gguf_writer .add_expert_used_count (n_experts_used )
6612+ if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
6613+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
6614+ if (n_shared_experts := self .hparams .get ("n_shared_experts" )) is not None :
6615+ self .gguf_writer .add_expert_shared_count (n_shared_experts )
6616+ if (first_k_dense_replace := self .hparams .get ("first_k_dense_replace" )) is not None :
6617+ self .gguf_writer .add_leading_dense_block_count (first_k_dense_replace )
6618+
6619+ # Expert gating function (sigmoid for GLM4_MOE)
6620+ self .gguf_writer .add_expert_gating_func (2 ) # LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID
6621+
6622+ # Routed scaling factor
6623+ if (routed_scaling_factor := self .hparams .get ("routed_scaling_factor" )) is not None :
6624+ self .gguf_writer .add_expert_weights_scale (routed_scaling_factor )
6625+
6626+ # Normalise topk probabilities
6627+ if (norm_topk_prob := self .hparams .get ("norm_topk_prob" )) is not None :
6628+ self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
6629+
6630+ _experts : list [dict [str , Tensor ]] | None = None
6631+ _shared_experts : list [dict [str , Tensor ]] | None = None
6632+
6633+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
6634+ if name .startswith ("model.visual." ): # ignore visual part
6635+ return []
6636+ elif name .startswith ("model.language_model." ):
6637+ name = name .replace ("language_model." , "" ) # for multimodal variants
6638+
6639+ # Handle routed experts
6640+ if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name :
6641+ n_experts = self .hparams ["n_routed_experts" ]
6642+ assert bid is not None
6643+
6644+ if self ._experts is None :
6645+ self ._experts = [{} for _ in range (self .block_count )]
6646+
6647+ self ._experts [bid ][name ] = data_torch
6648+
6649+ if len (self ._experts [bid ]) >= n_experts * 3 :
6650+ tensors : list [tuple [str , Tensor ]] = []
6651+
6652+ # merge the experts into a single 3d tensor
6653+ for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
6654+ datas : list [Tensor ] = []
6655+
6656+ for xid in range (n_experts ):
6657+ ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
6658+ datas .append (self ._experts [bid ][ename ])
6659+ del self ._experts [bid ][ename ]
6660+
6661+ data_torch = torch .stack (datas , dim = 0 )
6662+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
6663+ new_name = self .map_tensor_name (merged_name )
6664+ tensors .append ((new_name , data_torch ))
6665+ return tensors
6666+ else :
6667+ return []
6668+
6669+ # Handle shared experts - map to shared expert tensors
6670+ if "shared_experts" in name :
6671+ if "gate_proj" in name :
6672+ new_name = name .replace ("shared_experts.gate_proj.weight" , "ffn_gate_shexp.weight" )
6673+ elif "up_proj" in name :
6674+ new_name = name .replace ("shared_experts.up_proj.weight" , "ffn_up_shexp.weight" )
6675+ elif "down_proj" in name :
6676+ new_name = name .replace ("shared_experts.down_proj.weight" , "ffn_down_shexp.weight" )
6677+ else :
6678+ new_name = name
6679+ return [(self .map_tensor_name (new_name ), data_torch )]
6680+
6681+ return super ().modify_tensors (data_torch , name , bid )
6682+
6683+ def prepare_tensors (self ):
6684+ super ().prepare_tensors ()
6685+ if self ._experts is not None :
6686+ # flatten `list[dict[str, Tensor]]` into `list[str]`
6687+ experts = [k for d in self ._experts for k in d .keys ()]
6688+ if len (experts ) > 0 :
6689+ raise ValueError (f"Unprocessed experts: { experts } " )
6690+
6691+
65816692@ModelBase .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
65826693class ChatGLMModel (TextModel ):
65836694 model_arch = gguf .MODEL_ARCH .CHATGLM
0 commit comments