Skip to content

Commit 36d1ccb

Browse files
authored
[Quant] BartModel SupportsQuant (#14699)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent 1bc3b73 commit 36d1ccb

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

vllm/model_executor/models/bart.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from vllm.model_executor.sampling_metadata import SamplingMetadata
4545
from vllm.sequence import IntermediateTensors
4646

47-
from .interfaces import SupportsV0Only
47+
from .interfaces import SupportsQuant, SupportsV0Only
4848
from .utils import maybe_prefix
4949

5050
logger = logging.get_logger(__name__)
@@ -697,7 +697,7 @@ def forward(
697697
return hidden_states
698698

699699

700-
class BartModel(nn.Module):
700+
class BartModel(nn.Module, SupportsQuant):
701701
_tied_weights_keys = [
702702
"encoder.embed_tokens.weight", "decoder.embed_tokens.weight"
703703
]
@@ -763,7 +763,8 @@ def forward(self, input_ids: torch.Tensor, positions: torch.Tensor,
763763
return decoder_outputs
764764

765765

766-
class BartForConditionalGeneration(nn.Module, SupportsV0Only):
766+
class BartForConditionalGeneration(nn.Module, SupportsV0Only, SupportsQuant):
767+
packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]}
767768
base_model_prefix = "model"
768769

769770
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):

0 commit comments

Comments
 (0)