From a259fb06bb772f80c4da3c11391f8bb99822cdb1 Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 11:54:29 +0800
Subject: [PATCH 1/7] add support to MOSS model

---
 auto_gptq/modeling/_const.py |  2 +-
 auto_gptq/modeling/auto.py   |  4 +++-
 auto_gptq/modeling/moss.py   | 12 ++++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 auto_gptq/modeling/moss.py

diff --git a/auto_gptq/modeling/_const.py b/auto_gptq/modeling/_const.py
index 6a427600cef065..3bd6be439dc35c 100644
--- a/auto_gptq/modeling/_const.py
+++ b/auto_gptq/modeling/_const.py
@@ -6,7 +6,7 @@
 CPU = device("cpu")
 CUDA = device("cuda:0")
 
-SUPPORTED_MODELS = ["bloom", "gptj", "gpt_neox", "opt"]
+SUPPORTED_MODELS = ["bloom", "gptj", "gpt_neox", "opt", "moss"]
 if parse_version(transformers_version) >= parse_version("v4.28.0"):
     SUPPORTED_MODELS.append("llama")
 
diff --git a/auto_gptq/modeling/auto.py b/auto_gptq/modeling/auto.py
index f7b393294f436a..2ddafd274393dd 100644
--- a/auto_gptq/modeling/auto.py
+++ b/auto_gptq/modeling/auto.py
@@ -4,6 +4,7 @@
 from .gpt_neox import GPTNeoXGPTQForCausalLM
 from .gptj import GPTJGPTQForCausalLM
 from .llama import LlamaGPTQForCausalLM
+from .moss import MOSSGPTQForCausalLM
 from .opt import OPTGPTQForCausalLM
 
 
@@ -12,7 +13,8 @@
     "gpt_neox": GPTNeoXGPTQForCausalLM,
     "gptj": GPTJGPTQForCausalLM,
     "llama": LlamaGPTQForCausalLM,
-    "opt": OPTGPTQForCausalLM
+    "opt": OPTGPTQForCausalLM,
+    "moss": MOSSGPTQForCausalLM
 }
 
 
diff --git a/auto_gptq/modeling/moss.py b/auto_gptq/modeling/moss.py
new file mode 100644
index 00000000000000..26ffb73e92fa5e
--- /dev/null
+++ b/auto_gptq/modeling/moss.py
@@ -0,0 +1,12 @@
+from ._base import *
+
+
+class MOSSGPTQForCausalLM(BaseGPTQForCausalLM):
+    layers_block_name = "transformer.h"
+    outside_layer_modules = ["transformer.wte", "transformer.drop", "transformer.ln_f"]
+    inside_layer_modules = [
+        ["attn.qkv_proj"],
+        ["attn.out_proj"],
+        ["mlp.fc_in"],
+        ["mlp.fc_out"]
+    ]

From f748dad2e14794bd68ec0c8ac9494d1d9cbae1da Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 12:13:46 +0800
Subject: [PATCH 2/7] always trust remote code

---
 auto_gptq/modeling/_base.py                | 5 +++--
 examples/quantization/quant_with_alpaca.py | 6 +++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/auto_gptq/modeling/_base.py b/auto_gptq/modeling/_base.py
index 78509f773eb783..c9ef1dbb996126 100644
--- a/auto_gptq/modeling/_base.py
+++ b/auto_gptq/modeling/_base.py
@@ -303,7 +303,7 @@ def skip(*args, **kwargs):
         torch.nn.init.uniform_ = skip
         torch.nn.init.normal_ = skip
 
-        config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
+        config = AutoConfig.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True)
         if config.model_type not in SUPPORTED_MODELS:
             raise TypeError(f"{config.model_type} isn't supported yet.")
 
@@ -311,6 +311,7 @@ def skip(*args, **kwargs):
         model_init_kwargs["device_map"] = None
         model_init_kwargs["torch_dtype"] = torch.bfloat16 if bf16 else torch.float16
         model_init_kwargs["low_cpu_mem_usage"] = False
+        model_init_kwargs["trust_remote_code"] = True
 
         model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, **model_init_kwargs)
         model_config = model.config.to_dict()
@@ -335,7 +336,7 @@ def from_quantized(
         use_safetensors: bool = False
     ):
         """load quantized model from local disk"""
-        config = AutoConfig.from_pretrained(save_dir)
+        config = AutoConfig.from_pretrained(save_dir, trust_remote_code=True)
         if config.model_type not in SUPPORTED_MODELS:
             raise TypeError(f"{config.model_type} isn't supported yet.")
 
diff --git a/examples/quantization/quant_with_alpaca.py b/examples/quantization/quant_with_alpaca.py
index 8f81e391d71661..fff409e4cf4047 100644
--- a/examples/quantization/quant_with_alpaca.py
+++ b/examples/quantization/quant_with_alpaca.py
@@ -82,7 +82,11 @@ def main():
     parser.add_argument("--fast_tokenizer", action="store_true")
     args = parser.parse_args()
 
-    tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_dir, use_fast=args.fast_tokenizer)
+    tokenizer = AutoTokenizer.from_pretrained(
+        args.pretrained_model_dir,
+        use_fast=args.fast_tokenizer,
+        trust_remote_code=True
+    )
     model = AutoGPTQForCausalLM.from_pretrained(
         args.pretrained_model_dir,
         quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=args.group_size)

From 6b6dd3e1e37dc38e8052bbc25955703403c7d31a Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 12:15:32 +0800
Subject: [PATCH 3/7] always trust remote code

---
 auto_gptq/modeling/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/auto_gptq/modeling/_utils.py b/auto_gptq/modeling/_utils.py
index b92a72d4b462e0..404734ea3898f5 100644
--- a/auto_gptq/modeling/_utils.py
+++ b/auto_gptq/modeling/_utils.py
@@ -39,7 +39,7 @@ def pack_model(model, quantizers, bits, group_size):
 
 
 def check_and_get_model_type(model_dir):
-    config = AutoConfig.from_pretrained(model_dir)
+    config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
     if config.model_type not in SUPPORTED_MODELS:
         raise TypeError(f"{config.model_type} isn't supported yet.")
     model_type = config.model_type

From 262416e7ff5f928e7f7b954e5ecc679fdef05480 Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 12:16:05 +0800
Subject: [PATCH 4/7] set package version to v0.0.4-dev

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e9a7365c985d5c..95d27ccd22e429 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
 setup(
     name="auto_gptq",
     packages=find_packages(),
-    version="v0.0.3",
+    version="v0.0.4-dev",
     install_requires=requirements,
     extras_require=extras_require,
     ext_modules=extensions,

From d78685aadef3d452f4eba1cd3e8f3700c86d7e22 Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 12:31:00 +0800
Subject: [PATCH 5/7] update README.md

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9bbf66bfc4f1d8..6d93c878dec4e1 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
 An easy-to-use model quantization package with user-friendly apis, based on GPTQ algorithm.
 
 ## News or Update
+- 2023-04-25 - (News&Update) - [MOSS](https://github.com/OpenLMLab/MOSS) is an open-sourced Chinese Large Language Model, quantization is now supported in AutoGPTQ.
 - 2023-04-23 - (Update) - Support evaluation on multiple (down-stream) tasks such as: language-modeling, text-classification, text-summarization.
 - 2023-04-22 - (News) - qwopqwop200's [AutoGPTQ-triton](https://github.com/qwopqwop200/AutoGPTQ-triton) provides faster speed to integrate with quantized model, for everyone who can access to triton, try and enjoy yourself!
 - 2023-04-20 - (News) - AutoGPTQ is automatically compatible with Stability-AI's newly released `gpt_neox` type model family [StableLM](https://github.com/Stability-AI/StableLM).
@@ -25,7 +26,7 @@ pip install .[llama]
 ```
 
 ## Supported Models
-Currently, `auto_gptq` supports: `bloom`, `gpt_neox`, `gptj`, `llama` and `opt`; more CausalLMs will come soon!
+Currently, `auto_gptq` supports: `bloom`, `gpt_neox`, `gptj`, `llama`, `moss` and `opt`; more CausalLMs will come soon!
 
 ## Supported Evaluation Tasks
 Currently, `auto_gptq` supports: `LanguageModelingTask`, `SequenceClassificationTask` and `TextSummarizationTask`; more Tasks will come soon!

From cfcba7d1167be7d3a86d895ffc97974e9e58781e Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 12:34:04 +0800
Subject: [PATCH 6/7] update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6d93c878dec4e1..2862859ceddf9c 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 An easy-to-use model quantization package with user-friendly apis, based on GPTQ algorithm.
 
 ## News or Update
-- 2023-04-25 - (News&Update) - [MOSS](https://github.com/OpenLMLab/MOSS) is an open-sourced Chinese Large Language Model, quantization is now supported in AutoGPTQ.
+- 2023-04-25 - (News&Update) - [MOSS](https://github.com/OpenLMLab/MOSS) is an open-source tool-augmented conversational language model from Fudan University, quantization is now supported in AutoGPTQ.
 - 2023-04-23 - (Update) - Support evaluation on multiple (down-stream) tasks such as: language-modeling, text-classification, text-summarization.
 - 2023-04-22 - (News) - qwopqwop200's [AutoGPTQ-triton](https://github.com/qwopqwop200/AutoGPTQ-triton) provides faster speed to integrate with quantized model, for everyone who can access to triton, try and enjoy yourself!
 - 2023-04-20 - (News) - AutoGPTQ is automatically compatible with Stability-AI's newly released `gpt_neox` type model family [StableLM](https://github.com/Stability-AI/StableLM).

From 419160b73399fceb3ae2c19d449fd24da0e0cc05 Mon Sep 17 00:00:00 2001
From: PanQiWei <594557445@qq.com>
Date: Tue, 25 Apr 2023 12:52:49 +0800
Subject: [PATCH 7/7] always trust remote code

---
 auto_gptq/modeling/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/auto_gptq/modeling/_base.py b/auto_gptq/modeling/_base.py
index c9ef1dbb996126..b11856eed880c4 100644
--- a/auto_gptq/modeling/_base.py
+++ b/auto_gptq/modeling/_base.py
@@ -357,7 +357,7 @@ def skip(*args, **kwargs):
 
         transformers.modeling_utils._init_weights = False
         torch.set_default_dtype(torch.half)
-        model = AutoModelForCausalLM.from_config(config)
+        model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
         torch.set_default_dtype(torch.float)
         model = model.eval()
         layers = find_layers(model)