@@ -111,7 +111,6 @@ class BertEncoder(nn.Module):
111111 def __init__ (self ,
112112 vllm_config : VllmConfig ,
113113 bias : bool = True ,
114- gate_up_proj_bias : bool = True ,
115114 rotary_kwargs : Optional [dict ] = None ,
116115 prefix : str = "" ):
117116 super ().__init__ ()
@@ -123,7 +122,6 @@ def __init__(self,
123122 cache_config = cache_config ,
124123 quant_config = quant_config ,
125124 bias = bias ,
126- gate_up_proj_bias = gate_up_proj_bias ,
127125 rotary_kwargs = rotary_kwargs ,
128126 prefix = f"{ prefix } .layer.{ layer_idx } " )
129127 for layer_idx in range (config .num_hidden_layers )
@@ -146,7 +144,6 @@ def __init__(self,
146144 cache_config : Optional [CacheConfig ] = None ,
147145 quant_config : Optional [QuantizationConfig ] = None ,
148146 bias : bool = True ,
149- gate_up_proj_bias : bool = True ,
150147 rotary_kwargs : Optional [dict ] = None ,
151148 prefix : str = "" ):
152149 super ().__init__ ()
@@ -166,7 +163,7 @@ def __init__(self,
166163 hidden_size = config .hidden_size ,
167164 intermediate_size = config .intermediate_size ,
168165 hidden_act = config .hidden_act ,
169- gate_up_proj_bias = gate_up_proj_bias ,
166+ bias = bias ,
170167 quant_config = quant_config ,
171168 prefix = f"{ prefix } .intermediate" )
172169 else :
@@ -350,15 +347,15 @@ def __init__(self,
350347 hidden_size : int ,
351348 intermediate_size : int ,
352349 hidden_act : str ,
353- gate_up_proj_bias : bool = True ,
350+ bias : bool = True ,
354351 quant_config : Optional [QuantizationConfig ] = None ,
355352 prefix : str = "" ):
356353 super ().__init__ ()
357354 self .act_fn = get_act_and_mul_fn (hidden_act )
358355 self .gate_up_proj = MergedColumnParallelLinear (
359356 hidden_size ,
360357 [intermediate_size ] * 2 ,
361- bias = gate_up_proj_bias ,
358+ bias = bias ,
362359 quant_config = quant_config ,
363360 prefix = f"{ prefix } .gate_up_proj" ,
364361 )
@@ -410,24 +407,18 @@ def __init__(self,
410407 prefix : str = "" ,
411408 embedding_class : type = BertEmbedding ,
412409 bias : bool = True ,
413- gate_up_proj_bias : bool = True ,
414410 rotary_kwargs : Optional [dict ] = None ,
415411 add_pooling_layer : bool = False ):
416412 super ().__init__ ()
417413 """
418414 For BertModel, all linear layers have bias.
419- For NomicBertModel, all linear layers do not have bias,
420- the bias parameter intended to control all linear layers.
421- For GteModel, only up_gate_proj layer does not have bias,
422- so the gate_up_proj_bias parameter must be added.
423- see #16649
415+ For NomicBertModel, all linear layers do not have bias.
424416 """
425417
426418 config = vllm_config .model_config .hf_config
427419 self .embeddings = embedding_class (config )
428420 self .encoder = BertEncoder (vllm_config = vllm_config ,
429421 bias = bias ,
430- gate_up_proj_bias = gate_up_proj_bias ,
431422 rotary_kwargs = rotary_kwargs ,
432423 prefix = f"{ prefix } .encoder" )
433424 self .pooler = BertPooler (config ) if add_pooling_layer else None
@@ -672,7 +663,6 @@ def _build_model(self,
672663 return BertModel (vllm_config = vllm_config ,
673664 prefix = prefix ,
674665 bias = False ,
675- gate_up_proj_bias = False ,
676666 rotary_kwargs = rotary_kwargs ,
677667 embedding_class = BertEmbedding )
678668
@@ -694,6 +684,7 @@ def _build_model(self,
694684
695685 assert config .__class__ .__name__ == "GteConfig"
696686 assert config .position_embedding_type == "rope"
687+ assert config .hidden_act == "gelu"
697688
698689 config .position_embedding_type = "rotary"
699690 config .hidden_act = "gelu_and_mul"
@@ -706,11 +697,21 @@ def _build_model(self,
706697 "base" : config .rope_theta ,
707698 }
708699
709- return BertModel (vllm_config = vllm_config ,
710- prefix = prefix ,
711- gate_up_proj_bias = False ,
712- rotary_kwargs = rotary_kwargs ,
713- embedding_class = BertEmbedding )
700+ model = BertModel (vllm_config = vllm_config ,
701+ prefix = prefix ,
702+ rotary_kwargs = rotary_kwargs ,
703+ embedding_class = BertEmbedding )
704+
705+ # GteModel only gate_up_proj does not have bias.
706+ for layer in model .encoder .layer :
707+ layer .intermediate .gate_up_proj = MergedColumnParallelLinear (
708+ config .hidden_size ,
709+ [config .intermediate_size ] * 2 ,
710+ bias = False ,
711+ quant_config = vllm_config .quant_config ,
712+ prefix = f"{ prefix } .gate_up_proj" ,
713+ )
714+ return model
714715
715716 def split_up_gate_proj (self , weights : Iterable [Tuple [str , torch .Tensor ]]):
716717 n = "mlp.up_gate_proj"
0 commit comments