From 86e01f950d53aa3f207d36bd56ff50e751eec0d1 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Fri, 11 Oct 2024 22:37:09 +0800 Subject: [PATCH 1/4] Modfiy MiniCPM lm_head --- vllm/model_executor/models/minicpm.py | 29 ++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index 6bba1594c270..41c2877194bb 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -474,17 +474,18 @@ def __init__( unpadded_vocab_size = config.vocab_size if lora_config: unpadded_vocab_size += lora_config.lora_extra_vocab_size - if not self.config.tie_word_embeddings: - self.lm_head = ParallelLMHead( - unpadded_vocab_size, - config.hidden_size, - org_num_embeddings=config.vocab_size, - padding_size=DEFAULT_VOCAB_PADDING_SIZE - # We need bigger padding if using lora for kernel - # compatibility - if not lora_config else lora_config.lora_vocab_padding_size, - quant_config=quant_config, - ) + self.lm_head = ParallelLMHead( + unpadded_vocab_size, + config.hidden_size, + org_num_embeddings=config.vocab_size, + padding_size=DEFAULT_VOCAB_PADDING_SIZE + # We need bigger padding if using lora for kernel + # compatibility + if not lora_config else lora_config.lora_vocab_padding_size, + quant_config=quant_config, + ) + if config.tie_word_embeddings: + self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) self.scale_width = self.config.hidden_size / self.config.dim_model_base self.logits_processor = LogitsProcessor(unpadded_vocab_size, @@ -517,11 +518,7 @@ def compute_logits( sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: hidden_states = hidden_states / self.scale_width - if self.config.tie_word_embeddings: - lm_head = self.model.embed_tokens - else: - lm_head = self.lm_head - logits = self.logits_processor(lm_head, hidden_states, + logits = self.logits_processor(self.lm_head, hidden_states, sampling_metadata) return logits From 4ccacc30bd80b494454064c72d171ff5768485d6 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 12 Oct 2024 13:19:11 +0800 Subject: [PATCH 2/4] Fix mincpm3 lora mapping bug --- vllm/model_executor/models/minicpm3.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/vllm/model_executor/models/minicpm3.py b/vllm/model_executor/models/minicpm3.py index c37bc5ad7c38..f81c3a650baa 100644 --- a/vllm/model_executor/models/minicpm3.py +++ b/vllm/model_executor/models/minicpm3.py @@ -216,6 +216,28 @@ def _init_layers( class MiniCPM3ForCausalLM(MiniCPMForCausalLM): + packed_modules_mapping = { + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + + # LoRA specific attributes + supported_lora_modules = [ + "kv_a_proj_with_mqa", + "q_a_proj", + "q_b_proj", + "kv_b_proj", + "o_proj", + "gate_up_proj", + "down_proj", + "embed_tokens", + "lm_head", + ] + + # `embedding_modules`` and `embedding_padding_modules` + # are inherited from MiniCPMForCausalLM def _init_model(self): self.model = MiniCPM3Model(config=self.config, From 03ff616b09212fab2333132578caa8cdd2876f7e Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 12 Oct 2024 13:23:07 +0800 Subject: [PATCH 3/4] Fix typo --- vllm/model_executor/models/minicpm3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/minicpm3.py b/vllm/model_executor/models/minicpm3.py index f81c3a650baa..3b5fd95328d7 100644 --- a/vllm/model_executor/models/minicpm3.py +++ b/vllm/model_executor/models/minicpm3.py @@ -236,7 +236,7 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM): "lm_head", ] - # `embedding_modules`` and `embedding_padding_modules` + # `embedding_modules` and `embedding_padding_modules` # are inherited from MiniCPMForCausalLM def _init_model(self): From 09ed30695a4f57931f512d15b24536c36a07fe12 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 12 Oct 2024 23:12:14 +0800 Subject: [PATCH 4/4] Fix ChatGLM LoRA bug --- vllm/lora/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 0dc54516f867..aaadca9a4d16 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -337,7 +337,11 @@ def __init__( self.packed_modules_mapping = copy.deepcopy( self.model.packed_modules_mapping) # Used to indicate whether the model is a multimodal model - self.supports_mm: bool = supports_multimodal(self.model) + self.supports_mm: bool = ( + supports_multimodal(self.model) + # In case the model only supports LoRA for + # text modules (e.g. ChatGLM) + and hasattr(self.model, "get_mm_mapping")) self.packed_modules: Dict[str, List[str]] = {} self.modules: Dict[str, "BaseLayerWithLoRA"] = {} # Dict instead of a Set for compatibility with LRUCache.