|
52 | 52 | from vllm.sequence import IntermediateTensors |
53 | 53 | from vllm.utils import JSONTree, json_map_leaves |
54 | 54 |
|
55 | | -from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsPP |
| 55 | +from .interfaces import (SupportsLoRA, SupportsMultiModal, SupportsPP, |
| 56 | + SupportsQuant) |
56 | 57 | from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, |
57 | 58 | is_pp_missing_parameter, |
58 | 59 | make_empty_intermediate_tensors_factory, make_layers, |
@@ -633,7 +634,8 @@ def forward( |
633 | 634 | return hidden_states, residual |
634 | 635 |
|
635 | 636 |
|
636 | | -class MolmoVisionBackbone(nn.Module): |
| 637 | +class MolmoVisionBackbone(nn.Module, SupportsQuant): |
| 638 | + packed_modules_mapping = {"merged_linear": ["gate_proj", "up_proj"]} |
637 | 639 |
|
638 | 640 | def __init__( |
639 | 641 | self, |
@@ -794,7 +796,7 @@ def load_weights(self, weights: Iterable[Tuple[str, |
794 | 796 |
|
795 | 797 |
|
796 | 798 | @support_torch_compile |
797 | | -class MolmoModel(nn.Module): |
| 799 | +class MolmoModel(nn.Module, SupportsQuant): |
798 | 800 |
|
799 | 801 | def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): |
800 | 802 | super().__init__() |
@@ -1402,8 +1404,8 @@ def get_replacement_molmo(item_idx: int): |
1402 | 1404 | @MULTIMODAL_REGISTRY.register_processor(MolmoMultiModalProcessor, |
1403 | 1405 | info=MolmoProcessingInfo, |
1404 | 1406 | dummy_inputs=MolmoDummyInputsBuilder) |
1405 | | -class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, |
1406 | | - SupportsLoRA): |
| 1407 | +class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, |
| 1408 | + SupportsQuant): |
1407 | 1409 | hf_to_vllm_mapper = WeightsMapper( |
1408 | 1410 | orig_to_new_substr={ |
1409 | 1411 | # vision backbone mapping |
|
0 commit comments