Skip to content

Commit 41e8b56

Browse files
author
rshaw@neuralmagic.com
committed
Revert "[Bugfix] Make the deviceprofiler include LoRA memory. (#14469)"
This reverts commit b8b0ccb.
1 parent 9513290 commit 41e8b56

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

vllm/worker/model_runner.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,41 +1111,41 @@ def load_model(self) -> None:
11111111
with DeviceMemoryProfiler(self.device) as m:
11121112
time_before_load = time.perf_counter()
11131113
self.model = get_model(vllm_config=self.vllm_config)
1114-
if self.lora_config:
1115-
assert supports_lora(
1116-
self.model
1117-
), f"{self.model.__class__.__name__} does not support LoRA yet."
1118-
1119-
if supports_multimodal(self.model):
1120-
logger.warning(
1121-
"Regarding multimodal models, vLLM currently "
1122-
"only supports adding LoRA to language model.")
1123-
# It's necessary to distinguish between the
1124-
# max_position_embeddings of VLMs and LLMs.
1125-
if hasattr(self.model.config, "max_position_embeddings"):
1126-
max_pos_embeddings = (
1127-
self.model.config.max_position_embeddings)
1128-
else:
1129-
max_pos_embeddings = (
1130-
self.model.config.text_config.max_position_embeddings)
1131-
1132-
self.lora_manager = LRUCacheWorkerLoRAManager(
1133-
self.scheduler_config.max_num_seqs,
1134-
self.scheduler_config.max_num_batched_tokens,
1135-
self.vocab_size,
1136-
self.lora_config,
1137-
self.device,
1138-
self.model.embedding_modules,
1139-
self.model.embedding_padding_modules,
1140-
max_position_embeddings=max_pos_embeddings,
1141-
)
1142-
self.model = self.lora_manager.create_lora_manager(self.model)
11431114
time_after_load = time.perf_counter()
11441115

11451116
self.model_memory_usage = m.consumed_memory
11461117
logger.info("Model loading took %.4f GB and %.6f seconds",
11471118
self.model_memory_usage / float(2**30),
11481119
time_after_load - time_before_load)
1120+
1121+
if self.lora_config:
1122+
assert supports_lora(
1123+
self.model
1124+
), f"{self.model.__class__.__name__} does not support LoRA yet."
1125+
1126+
if supports_multimodal(self.model):
1127+
logger.warning("Regarding multimodal models, vLLM currently "
1128+
"only supports adding LoRA to language model.")
1129+
# It's necessary to distinguish between the max_position_embeddings
1130+
# of VLMs and LLMs.
1131+
if hasattr(self.model.config, "max_position_embeddings"):
1132+
max_pos_embeddings = self.model.config.max_position_embeddings
1133+
else:
1134+
max_pos_embeddings = (
1135+
self.model.config.text_config.max_position_embeddings)
1136+
1137+
self.lora_manager = LRUCacheWorkerLoRAManager(
1138+
self.scheduler_config.max_num_seqs,
1139+
self.scheduler_config.max_num_batched_tokens,
1140+
self.vocab_size,
1141+
self.lora_config,
1142+
self.device,
1143+
self.model.embedding_modules,
1144+
self.model.embedding_padding_modules,
1145+
max_position_embeddings=max_pos_embeddings,
1146+
)
1147+
self.model = self.lora_manager.create_lora_manager(self.model)
1148+
11491149
if self.prompt_adapter_config:
11501150
self.prompt_adapter_manager = LRUCacheWorkerPromptAdapterManager(
11511151
self.scheduler_config.max_num_seqs,

0 commit comments

Comments
 (0)