4343from vllm .model_executor .sampling_metadata import SamplingMetadata
4444from vllm .sequence import IntermediateTensors
4545
46- from .interfaces import SupportsLoRA , SupportsQuant
46+ from .interfaces import SupportsQuant
4747from .utils import maybe_prefix
4848
4949logger = init_logger (__name__ )
@@ -108,7 +108,7 @@ def replace_linear_class(
108108 "rowwise" : RowParallelLinear ,
109109 }.get (style , ReplicatedLinear )
110110
111- lora_cls_map = {
111+ lora_linear_cls = {
112112 ColumnParallelLinear : {
113113 True : ColumnParallelLinearWithShardedLoRA , # fully sharded
114114 False : ColumnParallelLinearWithLoRA # not fully sharded
@@ -117,7 +117,7 @@ def replace_linear_class(
117117 True : RowParallelLinearWithShardedLoRA ,
118118 False : RowParallelLinearWithLoRA
119119 },
120- # ReplicatedLinear doesn't supoort fully sharded LoRA yet,
120+ # ReplicatedLinear doesn't support fully sharded LoRA yet,
121121 # so we use the same class for both cases.
122122 ReplicatedLinear : {
123123 True : ReplicatedLinearWithLoRA ,
@@ -144,7 +144,7 @@ def get_lora_class(cls, fully_sharded: bool = False):
144144 that supports fully sharded LoRA. Defaults to False.
145145
146146 """
147- return lora_cls_map [vllm_linear_cls ][fully_sharded ]
147+ return lora_linear_cls [vllm_linear_cls ][fully_sharded ]
148148
149149 return HFCompatibleLinear (
150150 input_size = linear .in_features ,
@@ -154,7 +154,7 @@ def get_lora_class(cls, fully_sharded: bool = False):
154154 )
155155
156156
157- class TransformersModel (nn .Module , SupportsQuant , SupportsLoRA ):
157+ class TransformersModel (nn .Module , SupportsQuant ):
158158 embedding_padding_modules = ["lm_head" ]
159159 embedding_modules = ["embed_tokens"
160160 ] # TODO transformers will have a util to get it
0 commit comments