Skip to content

Commit 8038595

Browse files
jeejeeleeyewentao256
authored andcommitted
[V0 deprecation] Clean up LoRA (#25686)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
1 parent a355561 commit 8038595

File tree

1 file changed

+1
-8
lines changed

1 file changed

+1
-8
lines changed

vllm/lora/punica_wrapper/punica_gpu.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
import torch
1313

14-
import vllm.envs as envs
1514
from vllm.lora.layers import LoRAMapping
1615
from vllm.triton_utils import HAS_TRITON
1716

@@ -41,14 +40,8 @@ def __init__(self, max_num_batched_tokens: int, max_batches: int,
4140
max_num_batched_tokens,
4241
device=device)
4342

44-
# When cudagraph capture size is greater than max_num_seqs (max_batches,
45-
# here), V0 captures the graph as if max_num_seqs is set to
46-
# the capture size.
47-
# V1 doesn't have this problem and always respects max_num_seqs.
48-
max_num_prompts = (max_batches
49-
if envs.VLLM_USE_V1 else max_num_batched_tokens)
5043
self.prompt_mapping_meta = LoRAKernelMeta.make(self.max_loras,
51-
max_num_prompts,
44+
max_batches,
5245
device=device)
5346

5447
def update_metadata(self, mapping: LoRAMapping,

0 commit comments

Comments
 (0)