diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index bb8d9bf8a03c..8912affe36fe 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -89,6 +89,7 @@ def __init__( self, config: CohereConfig, quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", ): super().__init__() self.config = config @@ -99,12 +100,14 @@ def __init__( [self.intermediate_size] * 2, bias=False, quant_config=quant_config, + prefix=f"{prefix}.gate_up_proj", ) self.down_proj = RowParallelLinear( self.intermediate_size, self.hidden_size, bias=False, quant_config=quant_config, + prefix=f"{prefix}.down_proj", ) self.act_fn = SiluAndMul() @@ -158,12 +161,14 @@ def __init__( self.total_num_kv_heads, bias=False, quant_config=quant_config, + prefix=f"{prefix}.qkv_proj", ) self.o_proj = RowParallelLinear( self.total_num_heads * self.head_dim, self.hidden_size, bias=False, quant_config=quant_config, + prefix=f"{prefix}.o_proj", ) self.rotary_emb = get_rope( self.head_dim, @@ -244,7 +249,9 @@ def __init__(self, quant_config=quant_config, prefix=f"{prefix}.self_attn") - self.mlp = CohereMLP(config, quant_config=quant_config) + self.mlp = CohereMLP(config, + quant_config=quant_config, + prefix=f"{prefix}.mlp") self.input_layernorm = LayerNorm(param_shape=(config.hidden_size), eps=config.layer_norm_eps)