Skip to content

Commit 6d46a97

Browse files
kylesayrsAlex4210987
authored andcommitted
[SupportsQuant] Chameleon, Chatglm, Commandr (vllm-project#15952)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com> Signed-off-by: xinyuxiao <xinyuxiao2024@gmail.com>
1 parent 9ab484e commit 6d46a97

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

vllm/model_executor/models/chameleon.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
3939
from vllm.sequence import IntermediateTensors
4040

41-
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
41+
from .interfaces import (MultiModalEmbeddings, SupportsMultiModal, SupportsPP,
42+
SupportsQuant)
4243
from .utils import (flatten_bn, is_pp_missing_parameter,
4344
make_empty_intermediate_tensors_factory, make_layers,
4445
maybe_prefix, merge_multimodal_embeddings)
@@ -927,7 +928,11 @@ def forward(
927928
info=ChameleonProcessingInfo,
928929
dummy_inputs=ChameleonDummyInputsBuilder)
929930
class ChameleonForConditionalGeneration(nn.Module, SupportsMultiModal,
930-
SupportsPP):
931+
SupportsPP, SupportsQuant):
932+
packed_modules_mapping = {
933+
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
934+
"gate_up_proj": ["gate_proj", "up_proj"]
935+
}
931936

932937
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
933938
super().__init__()

vllm/model_executor/models/chatglm.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from vllm.sequence import IntermediateTensors
3030
from vllm.transformers_utils.configs import ChatGLMConfig
3131

32-
from .interfaces import SupportsLoRA, SupportsPP
32+
from .interfaces import SupportsLoRA, SupportsPP, SupportsQuant
3333
from .utils import (AutoWeightsLoader, WeightsMapper, is_pp_missing_parameter,
3434
make_empty_intermediate_tensors_factory, make_layers,
3535
maybe_prefix)
@@ -295,7 +295,11 @@ def forward(
295295

296296

297297
@support_torch_compile
298-
class ChatGLMModel(nn.Module):
298+
class ChatGLMModel(nn.Module, SupportsQuant):
299+
packed_modules_mapping = {
300+
"linear_proj.merged_proj":
301+
["linear_proj.gate_proj", "linear_proj.dense_h_to_4h"]
302+
}
299303

300304
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
301305
super().__init__()
@@ -395,7 +399,6 @@ def load_weights(self, weights: Iterable[Tuple[str,
395399

396400

397401
class ChatGLMBaseModel(nn.Module):
398-
399402
hf_to_vllm_mapper = WeightsMapper(
400403
orig_to_new_substr={".word_embeddings": ""}, )
401404

@@ -452,7 +455,8 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
452455
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
453456

454457

455-
class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP):
458+
class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP,
459+
SupportsQuant):
456460
packed_modules_mapping = {
457461
"query_key_value": ["query_key_value"],
458462
"dense_h_to_4h": ["dense_h_to_4h"]

vllm/model_executor/models/commandr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from vllm.platforms import current_platform
5050
from vllm.sequence import IntermediateTensors
5151

52-
from .interfaces import SupportsLoRA, SupportsPP
52+
from .interfaces import SupportsLoRA, SupportsPP, SupportsQuant
5353
from .utils import (extract_layer_index, is_pp_missing_parameter,
5454
make_empty_intermediate_tensors_factory, make_layers,
5555
maybe_prefix)
@@ -332,7 +332,7 @@ def forward(
332332
return hidden_states
333333

334334

335-
class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
335+
class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
336336
packed_modules_mapping = {
337337
"qkv_proj": [
338338
"q_proj",

0 commit comments

Comments
 (0)