[Quant] BartModel SupportsQuant (#14699)

kylesayrs · web-flow · commit 36d1ccb2861a · 2025-03-13T04:55:59.000Z
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/vllm/model_executor/models/bart.py b/vllm/model_executor/models/bart.py
@@ -44,7 +44,7 @@
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors
 
-from .interfaces import SupportsV0Only
+from .interfaces import SupportsQuant, SupportsV0Only
 from .utils import maybe_prefix
 
 logger = logging.get_logger(__name__)
@@ -697,7 +697,7 @@ def forward(
         return hidden_states
 
 
-class BartModel(nn.Module):
+class BartModel(nn.Module, SupportsQuant):
     _tied_weights_keys = [
         "encoder.embed_tokens.weight", "decoder.embed_tokens.weight"
     ]
@@ -763,7 +763,8 @@ def forward(self, input_ids: torch.Tensor, positions: torch.Tensor,
         return decoder_outputs
 
 
-class BartForConditionalGeneration(nn.Module, SupportsV0Only):
+class BartForConditionalGeneration(nn.Module, SupportsV0Only, SupportsQuant):
+    packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]}
     base_model_prefix = "model"
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):