From ccb8ef881c8c09f08c509c1912c4fa0f37d7c133 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 31 Dec 2024 10:40:11 +0000 Subject: [PATCH 1/4] Init Signed-off-by: Jee Jee Li --- vllm/model_executor/models/qwen2_vl.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 6181fe3dd13d8..f18cf0e60dec1 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -53,6 +53,7 @@ GPTQMarlinConfig) from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (ImageItem, ModalityData, MultiModalFieldConfig, MultiModalKwargs, @@ -925,12 +926,19 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, } # LoRA specific attributes - # TODO Support LoRA for the visual encoder in the future. supported_lora_modules = [ "qkv_proj", "o_proj", "gate_up_proj", "down_proj", + # vison tower + "qkv", + "attn.proj", # Distinguish patch_embed.proj + "fc1", + "fc2", + # projector + "mlp.0", + "mlp.2" ] embedding_modules = {} embedding_padding_modules = [] @@ -1230,3 +1238,12 @@ def load_weights(self, weights: Iterable[Tuple[str, loader = AutoWeightsLoader(self) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) + + def get_mm_mapping(self) -> MultiModelKeys: + """ + Get the module prefix in multimodal models + """ + return MultiModelKeys.from_string_field( + language_model="language_model", + connector="visual.", + tower_model="visual.merger.") From 16e8e281b76c12aaa825a2220fdc2dc9922ce459 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 31 Dec 2024 10:42:06 +0000 Subject: [PATCH 2/4] format Signed-off-by: Jee Jee Li --- vllm/model_executor/models/qwen2_vl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index f18cf0e60dec1..d43efba83510b 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -942,6 +942,7 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, ] embedding_modules = {} embedding_padding_modules = [] + # To ensure correct weight loading and mapping. hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={ "lm_head.": "language_model.lm_head.", From 2ac660b843fcd40e79a71d7d8920f015fe14f72f Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 1 Jan 2025 01:35:23 +0000 Subject: [PATCH 3/4] Done Signed-off-by: Jee Jee Li --- tests/lora/test_qwen2vl.py | 5 ++--- vllm/model_executor/models/qwen2_vl.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/lora/test_qwen2vl.py b/tests/lora/test_qwen2vl.py index c9f48402b0268..ebdd129db5f6a 100644 --- a/tests/lora/test_qwen2vl.py +++ b/tests/lora/test_qwen2vl.py @@ -7,7 +7,7 @@ from vllm.lora.request import LoRARequest from vllm.platforms import current_platform -MODEL_PATH = "Qwen/Qwen2-VL-7B-Instruct" +MODEL_PATH = "Qwen/Qwen2-VL-2B-Instruct" PROMPT_TEMPLATE = ( "<|im_start|>system\nYou are a helpful assistant.<|im_end|>" @@ -49,10 +49,9 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]: # Print the outputs. generated_texts: List[str] = [] for output in outputs: - prompt = output.prompt generated_text = output.outputs[0].text.strip() generated_texts.append(generated_text) - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + print(f"Generated text: {generated_text!r}") return generated_texts diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index d43efba83510b..d8d9ad81f9c51 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -942,7 +942,7 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, ] embedding_modules = {} embedding_padding_modules = [] - + # To ensure correct weight loading and mapping. hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={ "lm_head.": "language_model.lm_head.", From 46a082ce2c4671da9b23a1284f0b892fe30fc476 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 1 Jan 2025 01:54:37 +0000 Subject: [PATCH 4/4] Fix typo Signed-off-by: Jee Jee Li --- vllm/model_executor/models/qwen2_vl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index d8d9ad81f9c51..a5053346b1ac6 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -931,7 +931,7 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, "o_proj", "gate_up_proj", "down_proj", - # vison tower + # vision tower "qkv", "attn.proj", # Distinguish patch_embed.proj "fc1",