diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 81e0741a03cf..9f9d808679d7 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -344,7 +344,7 @@ def __init__( self.supported_lora_modules = get_supported_lora_modules(self.model) assert self.supported_lora_modules, "No supported LoRA modules found in" - f"{self.model.__class__.__name__}." + f" {self.model.__class__.__name__}." if lora_config.long_lora_scaling_factors: # We need to replace rotary emb layer to do batch computation # for long lora. diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index c48cb157084d..6f56eb2d5e38 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -504,9 +504,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.unpadded_vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size, - padding_size=DEFAULT_VOCAB_PADDING_SIZE - # We need bigger padding if using lora for kernel compatibility - if not lora_config else lora_config.lora_vocab_padding_size, + padding_size=DEFAULT_VOCAB_PADDING_SIZE, quant_config=quant_config, prefix=maybe_prefix(prefix, "lm_head"), ) diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py index 264999496876..988b994b7689 100644 --- a/vllm/model_executor/models/nemotron_nas.py +++ b/vllm/model_executor/models/nemotron_nas.py @@ -334,14 +334,6 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps): } # LoRA specific attributes - supported_lora_modules = [ - "qkv_proj", - "o_proj", - "gate_up_proj", - "down_proj", - "embed_tokens", - "lm_head", - ] embedding_modules = { "embed_tokens": "input_embeddings", "lm_head": "output_embeddings", diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py index 6035994f4336..e5ff9ceddef7 100644 --- a/vllm/model_executor/models/phi4mm.py +++ b/vllm/model_executor/models/phi4mm.py @@ -955,11 +955,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.unpadded_vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size, - padding_size=( - DEFAULT_VOCAB_PADDING_SIZE - # We need bigger padding if using lora for kernel - # compatibility - if not lora_config else lora_config.lora_vocab_padding_size), + padding_size=DEFAULT_VOCAB_PADDING_SIZE, quant_config=quant_config, ) if config.tie_word_embeddings: