From ede21547d1559710ddba39bcf74a99ed4e73e572 Mon Sep 17 00:00:00 2001 From: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com> Date: Sat, 29 Jun 2024 03:56:40 +0800 Subject: [PATCH] init method is executed after from_quantized(), so it cannot obtain the correct layer_modules (#112) Co-authored-by: LRL-ModelCloud --- gptqmodel/models/deepseek_v2.py | 36 ++++++--------------------------- 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/gptqmodel/models/deepseek_v2.py b/gptqmodel/models/deepseek_v2.py index 59a3858c..d92ffbab 100644 --- a/gptqmodel/models/deepseek_v2.py +++ b/gptqmodel/models/deepseek_v2.py @@ -18,6 +18,11 @@ class DeepSeekV2GPTQ(BaseGPTQModel): # DeepSeek-V2 uses 160 experts, v2-lite is auto-switched during __init__ layer_modules = [ + # DeepSeek-V2 and DeepSeek-V2-Lite use same model_type, but different self_attn + # so we provide different layer_modules usage. + # DeepSeek-V2-Lite usage + ["self_attn.q_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj"], + # DeepSeek-V2 usage, included in layer 0-59 ["self_attn.q_a_proj", "self_attn.q_b_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj"], @@ -34,33 +39,4 @@ class DeepSeekV2GPTQ(BaseGPTQModel): # included in layer 1-59 ["mlp.shared_experts.gate_proj", "mlp.shared_experts.up_proj"], ["mlp.shared_experts.down_proj"], - ] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - num_experts = getattr(self.model.config, self.dynamic_expert_index) - - # DeepSeek-V2 and DeepSeek-V2-Lite use same model_type, but different self_attn, expert count, etc - # so we need to adjust the layer_modules based on the expert count - # DeepSeek-V2-Lite uses 64 - if num_experts == 64: - self.layer_modules = [ - # DeepSeek-V2-Lite usage - ["self_attn.q_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj"], - - ["self_attn.o_proj"], - - # included in layer 0 - ["mlp.gate_proj", "mlp.up_proj"], - ["mlp.down_proj"], - - # included in layer 1-59, uses dynamic_expert_index - [f"mlp.experts.{EXPERT_INDEX_PLACEHOLDER}.gate_proj", - f"mlp.experts.{EXPERT_INDEX_PLACEHOLDER}.up_proj"], - [f"mlp.experts.{EXPERT_INDEX_PLACEHOLDER}.down_proj"], - - # included in layer 1-59 - ["mlp.shared_experts.gate_proj", "mlp.shared_experts.up_proj"], - ["mlp.shared_experts.down_proj"], - ] + ] \ No newline at end of file