fixes

Varun Sundar Rabindranath · Varun Sundar Rabindranath · commit b8e4387e57a1 · 2025-03-13T10:13:01.000-04:00
Signed-off-by: Varun Sundar Rabindranath &lt;varun@neuralmagic.com&gt;
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
@@ -242,12 +242,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         embeddings_indices = torch.narrow(
             self.punica_wrapper._embeddings_indices, 1, 0, x.size(0))
 
-        indices = embeddings_indices[1].view_as(x)
+        indices = embeddings_indices[1]
         full_lora_a_embeddings = F.embedding(
             x + indices,
             self.lora_a_stacked_2d,
         )
-        indices = embeddings_indices[0].view_as(x)
+        indices = embeddings_indices[0]
         full_output = self.base_layer.forward(x +
                                               (indices * added_tokens_mask))