From b9930c06faf9fafb90ebcbc64d4d63178daf6a39 Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Thu, 25 Sep 2025 15:49:01 +0000
Subject: [PATCH] Done

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
---
 vllm/lora/punica_wrapper/punica_gpu.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/vllm/lora/punica_wrapper/punica_gpu.py b/vllm/lora/punica_wrapper/punica_gpu.py
index 2db0e9fee142..467f50050eb2 100644
--- a/vllm/lora/punica_wrapper/punica_gpu.py
+++ b/vllm/lora/punica_wrapper/punica_gpu.py
@@ -11,7 +11,6 @@
 
 import torch
 
-import vllm.envs as envs
 from vllm.lora.layers import LoRAMapping
 from vllm.triton_utils import HAS_TRITON
 
@@ -41,14 +40,8 @@ def __init__(self, max_num_batched_tokens: int, max_batches: int,
                                                       max_num_batched_tokens,
                                                       device=device)
 
-        # When cudagraph capture size is greater than max_num_seqs (max_batches,
-        # here), V0 captures the graph as if max_num_seqs is set to
-        # the capture size.
-        # V1 doesn't have this problem and always respects max_num_seqs.
-        max_num_prompts = (max_batches
-                           if envs.VLLM_USE_V1 else max_num_batched_tokens)
         self.prompt_mapping_meta = LoRAKernelMeta.make(self.max_loras,
-                                                       max_num_prompts,
+                                                       max_batches,
                                                        device=device)
 
     def update_metadata(self, mapping: LoRAMapping,