Skipped matmuls where no loras are needed

Akshat-Tripathi · Akshat-Tripathi · commit 6ee0b5785896 · 2025-03-13T16:19:07.000Z
Signed-off-by: Akshat Tripathi &lt;akshat@krai.ai&gt;
diff --git a/vllm/lora/ops/xla_ops/pallas.py b/vllm/lora/ops/xla_ops/pallas.py
@@ -20,15 +20,20 @@ def _():
     
     for i in range(max_num_loras):
         mask_ref[...] = jnp.zeros_like(mask_ref[...], dtype=jnp.float32)
+        valid = False
         for j in range(bT):
+            valid |= idx_ref[j + bT * t] == i
+            
             @pl.when(idx_ref[j + bT * t] == i)
             def _():
                 mask_ref[j, :] = jnp.ones((bL, ), dtype=jnp.float32)
 
-        acc_ref[...] += jax.lax.dot_general(
-            inp_ref[...],
-            lora_ref[i, ...], (((1, ), (1, )), ((), ())),
-            preferred_element_type=jnp.float32) * mask_ref[...]
+        @pl.when(valid)
+        def _():
+            acc_ref[...] += jax.lax.dot_general(
+                inp_ref[...],
+                lora_ref[i, ...], (((1, ), (1, )), ((), ())),
+                preferred_element_type=jnp.float32) * mask_ref[...]
 
     @pl.when(pl.program_id(2) == pl.num_programs(2) - 1)
     def _():