From ede21547d1559710ddba39bcf74a99ed4e73e572 Mon Sep 17 00:00:00 2001
From: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com>
Date: Sat, 29 Jun 2024 03:56:40 +0800
Subject: [PATCH] init method is executed after from_quantized(), so it cannot
 obtain the correct layer_modules (#112)

Co-authored-by: LRL-ModelCloud <lrl@modelcloud.ai>
---
 gptqmodel/models/deepseek_v2.py | 36 ++++++---------------------------
 1 file changed, 6 insertions(+), 30 deletions(-)

diff --git a/gptqmodel/models/deepseek_v2.py b/gptqmodel/models/deepseek_v2.py
index 59a3858c..d92ffbab 100644
--- a/gptqmodel/models/deepseek_v2.py
+++ b/gptqmodel/models/deepseek_v2.py
@@ -18,6 +18,11 @@ class DeepSeekV2GPTQ(BaseGPTQModel):
 
     # DeepSeek-V2 uses 160 experts, v2-lite is auto-switched during __init__
     layer_modules = [
+        # DeepSeek-V2 and DeepSeek-V2-Lite use same model_type, but different self_attn
+        # so we provide different layer_modules usage.
+        # DeepSeek-V2-Lite usage
+        ["self_attn.q_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj"],
+
         # DeepSeek-V2 usage, included in layer 0-59
         ["self_attn.q_a_proj", "self_attn.q_b_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj"],
 
@@ -34,33 +39,4 @@ class DeepSeekV2GPTQ(BaseGPTQModel):
         # included in layer 1-59
         ["mlp.shared_experts.gate_proj", "mlp.shared_experts.up_proj"],
         ["mlp.shared_experts.down_proj"],
-    ]
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        num_experts = getattr(self.model.config, self.dynamic_expert_index)
-
-        # DeepSeek-V2 and DeepSeek-V2-Lite use same model_type, but different self_attn, expert count, etc
-        # so we need to adjust the layer_modules based on the expert count
-        # DeepSeek-V2-Lite uses 64
-        if num_experts == 64:
-            self.layer_modules = [
-                # DeepSeek-V2-Lite usage
-                ["self_attn.q_proj", "self_attn.kv_a_proj_with_mqa", "self_attn.kv_b_proj"],
-
-                ["self_attn.o_proj"],
-
-                # included in layer 0
-                ["mlp.gate_proj", "mlp.up_proj"],
-                ["mlp.down_proj"],
-
-                # included in layer 1-59, uses dynamic_expert_index
-                [f"mlp.experts.{EXPERT_INDEX_PLACEHOLDER}.gate_proj",
-                 f"mlp.experts.{EXPERT_INDEX_PLACEHOLDER}.up_proj"],
-                [f"mlp.experts.{EXPERT_INDEX_PLACEHOLDER}.down_proj"],
-
-                # included in layer 1-59
-                ["mlp.shared_experts.gate_proj", "mlp.shared_experts.up_proj"],
-                ["mlp.shared_experts.down_proj"],
-            ]
+    ]
\ No newline at end of file