5555 QKVParallelLinear ,
5656 RowParallelLinear )
5757from vllm .model_executor .layers .quantization import QuantizationConfig
58- from vllm .model_executor .layers .quantization .gptq import GPTQConfig
59- from vllm .model_executor .layers .quantization .gptq_marlin import (
60- GPTQMarlinConfig )
6158from vllm .model_executor .model_loader .weight_utils import default_weight_loader
6259from vllm .model_executor .models .module_mapping import MultiModelKeys
6360from vllm .multimodal import MULTIMODAL_REGISTRY
@@ -179,20 +176,20 @@ def __init__(
179176 hidden_features : int ,
180177 bias : bool = False ,
181178 quant_config : Optional [QuantizationConfig ] = None ,
179+ prefix : str = "" ,
182180 ):
183181 super ().__init__ ()
184182 self .gate_up_proj = MergedColumnParallelLinear (
185183 input_size = in_features ,
186184 output_sizes = [hidden_features ] * 2 ,
187185 bias = bias ,
188186 quant_config = quant_config ,
189- )
190- self .down_proj = RowParallelLinear (
191- hidden_features ,
192- in_features ,
193- bias = bias ,
194- quant_config = quant_config ,
195- )
187+ prefix = f"{ prefix } .gate_up_proj" )
188+ self .down_proj = RowParallelLinear (hidden_features ,
189+ in_features ,
190+ bias = bias ,
191+ quant_config = quant_config ,
192+ prefix = f"{ prefix } .down_proj" )
196193 self .act_fn = SiluAndMul ()
197194
198195 def forward (self , x : torch .Tensor ):
@@ -407,6 +404,7 @@ def __init__(
407404 mlp_hidden_dim ,
408405 bias = False ,
409406 quant_config = quant_config ,
407+ prefix = f"{ prefix } .mlp" ,
410408 )
411409
412410 def forward (
@@ -1278,7 +1276,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12781276 self .visual = Glm4vVisionTransformer (
12791277 config .vision_config ,
12801278 norm_eps = getattr (config , "rms_norm_eps" , 1e-5 ),
1281- quant_config = self . _maybe_ignore_quant_config ( quant_config ) ,
1279+ quant_config = quant_config ,
12821280 prefix = maybe_prefix (prefix , "visual" ),
12831281 )
12841282
@@ -1291,13 +1289,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12911289 self .make_empty_intermediate_tensors = (
12921290 self .language_model .make_empty_intermediate_tensors )
12931291
1294- def _maybe_ignore_quant_config (self , quant_config : QuantizationConfig ):
1295- # GPTQ configs do not have a list of ignored modules, however AutoGPTQ
1296- # seems to avoid vision encoder sections for some models.
1297- if isinstance (quant_config , (GPTQConfig , GPTQMarlinConfig )):
1298- return None
1299- return quant_config
1300-
13011292 def _validate_and_reshape_mm_tensor (self , mm_input : object ,
13021293 name : str ) -> torch .Tensor :
13031294 if not isinstance (mm_input , (torch .Tensor , list )):
0 commit comments