vllm-project · vllm-bot · May 8, 2025 · May 8, 2025 · jeejeelee · May 8, 2025
@@ -344,7 +344,7 @@ def __init__(
 
         self.supported_lora_modules = get_supported_lora_modules(self.model)
         assert self.supported_lora_modules, "No supported LoRA modules found in"
-        f"{self.model.__class__.__name__}."
+        f" {self.model.__class__.__name__}."
         if lora_config.long_lora_scaling_factors:
             # We need to replace rotary emb layer to do batch computation
             # for long lora.

diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py
@@ -504,9 +504,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.unpadded_vocab_size,
             config.hidden_size,
             org_num_embeddings=config.vocab_size,
-            padding_size=DEFAULT_VOCAB_PADDING_SIZE
-            # We need bigger padding if using lora for kernel compatibility
-            if not lora_config else lora_config.lora_vocab_padding_size,
+            padding_size=DEFAULT_VOCAB_PADDING_SIZE,
             quant_config=quant_config,
             prefix=maybe_prefix(prefix, "lm_head"),
         )

diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py
@@ -334,14 +334,6 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps):
     }
 
     # LoRA specific attributes
-    supported_lora_modules = [
-        "qkv_proj",
-        "o_proj",
-        "gate_up_proj",
-        "down_proj",
-        "embed_tokens",
-        "lm_head",
-    ]
     embedding_modules = {
         "embed_tokens": "input_embeddings",
         "lm_head": "output_embeddings",

diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py
@@ -955,11 +955,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.unpadded_vocab_size,
             config.hidden_size,
             org_num_embeddings=config.vocab_size,
-            padding_size=(
-                DEFAULT_VOCAB_PADDING_SIZE
-                # We need bigger padding if using lora for kernel
-                # compatibility
-                if not lora_config else lora_config.lora_vocab_padding_size),
+            padding_size=DEFAULT_VOCAB_PADDING_SIZE,
             quant_config=quant_config,
         )
         if config.tie_word_embeddings: