Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 80e117e

Browse files
authored
Fix lora specific conditions in profile-run (#319)
HabanaAI#256 breaks LoRA specific flow which was handled through `is_profile_run` flag to distinguish warmup and profile-run phase. Introduces a new flag `is_lora_profile_run` to handle this LoRA specific flow in profile-run.
2 parents 84b2490 + f477f85 commit 80e117e

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

vllm/worker/habana_model_runner.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,15 +1292,17 @@ def profile_run(self) -> None:
12921292
max_seq_len = min(self.prompt_seq_bucket_cfg[-1],
12931293
self.max_num_batched_tokens // max_batch_size)
12941294

1295-
self.warmup_scenario(max_batch_size, max_seq_len, True, kv_caches)
1295+
self.warmup_scenario(max_batch_size, max_seq_len, True, kv_caches,
1296+
False, True)
12961297
return
12971298

12981299
def warmup_scenario(self,
12991300
batch_size,
13001301
seq_len,
13011302
is_prompt,
13021303
kv_caches,
1303-
is_profile_run=False) -> None:
1304+
is_pt_profiler_run=False,
1305+
is_lora_profile_run=False) -> None:
13041306
use_graphs = self._use_graphs(batch_size, seq_len, is_prompt)
13051307
scenario_name = ("warmup_"
13061308
f"{'prompt' if is_prompt else 'decode'}_"
@@ -1314,7 +1316,7 @@ def warmup_scenario(self,
13141316
# passed in, which contains a lora from the lora warmup path.
13151317
dummy_lora_requests: List[LoRARequest] = []
13161318
dummy_lora_requests_per_seq: List[LoRARequest] = []
1317-
if self.lora_config and is_profile_run:
1319+
if self.lora_config and is_lora_profile_run:
13181320
assert self.lora_manager is not None
13191321
with self.lora_manager.dummy_lora_cache():
13201322
for idx in range(self.lora_config.max_loras):
@@ -1332,8 +1334,8 @@ def warmup_scenario(self,
13321334
for idx in range(max_num_seqs)
13331335
]
13341336
self.profiler.start('internal', scenario_name)
1335-
times = 3 if use_graphs or is_profile_run else 1
1336-
if self.lora_config and not is_profile_run:
1337+
times = 3 if use_graphs or is_pt_profiler_run else 1
1338+
if self.lora_config and not is_lora_profile_run:
13371339
lora_mapping = LoRAMapping(
13381340
[0] * batch_size * seq_len,
13391341
[0] * batch_size * seq_len,
@@ -1364,7 +1366,7 @@ def warmup_scenario(self,
13641366
]
13651367
torch.hpu.synchronize()
13661368
profiler = None
1367-
if is_profile_run and self.is_driver_worker:
1369+
if is_pt_profiler_run and self.is_driver_worker:
13681370
profiler = setup_profiler()
13691371
profiler.start()
13701372
for _ in range(times):

0 commit comments

Comments
 (0)