From a259fb06bb772f80c4da3c11391f8bb99822cdb1 Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 11:54:29 +0800 Subject: [PATCH 1/7] add support to MOSS model --- auto_gptq/modeling/_const.py | 2 +- auto_gptq/modeling/auto.py | 4 +++- auto_gptq/modeling/moss.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 auto_gptq/modeling/moss.py diff --git a/auto_gptq/modeling/_const.py b/auto_gptq/modeling/_const.py index 6a427600cef065..3bd6be439dc35c 100644 --- a/auto_gptq/modeling/_const.py +++ b/auto_gptq/modeling/_const.py @@ -6,7 +6,7 @@ CPU = device("cpu") CUDA = device("cuda:0") -SUPPORTED_MODELS = ["bloom", "gptj", "gpt_neox", "opt"] +SUPPORTED_MODELS = ["bloom", "gptj", "gpt_neox", "opt", "moss"] if parse_version(transformers_version) >= parse_version("v4.28.0"): SUPPORTED_MODELS.append("llama") diff --git a/auto_gptq/modeling/auto.py b/auto_gptq/modeling/auto.py index f7b393294f436a..2ddafd274393dd 100644 --- a/auto_gptq/modeling/auto.py +++ b/auto_gptq/modeling/auto.py @@ -4,6 +4,7 @@ from .gpt_neox import GPTNeoXGPTQForCausalLM from .gptj import GPTJGPTQForCausalLM from .llama import LlamaGPTQForCausalLM +from .moss import MOSSGPTQForCausalLM from .opt import OPTGPTQForCausalLM @@ -12,7 +13,8 @@ "gpt_neox": GPTNeoXGPTQForCausalLM, "gptj": GPTJGPTQForCausalLM, "llama": LlamaGPTQForCausalLM, - "opt": OPTGPTQForCausalLM + "opt": OPTGPTQForCausalLM, + "moss": MOSSGPTQForCausalLM } diff --git a/auto_gptq/modeling/moss.py b/auto_gptq/modeling/moss.py new file mode 100644 index 00000000000000..26ffb73e92fa5e --- /dev/null +++ b/auto_gptq/modeling/moss.py @@ -0,0 +1,12 @@ +from ._base import * + + +class MOSSGPTQForCausalLM(BaseGPTQForCausalLM): + layers_block_name = "transformer.h" + outside_layer_modules = ["transformer.wte", "transformer.drop", "transformer.ln_f"] + inside_layer_modules = [ + ["attn.qkv_proj"], + ["attn.out_proj"], + ["mlp.fc_in"], + ["mlp.fc_out"] + ] From f748dad2e14794bd68ec0c8ac9494d1d9cbae1da Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 12:13:46 +0800 Subject: [PATCH 2/7] always trust remote code --- auto_gptq/modeling/_base.py | 5 +++-- examples/quantization/quant_with_alpaca.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/auto_gptq/modeling/_base.py b/auto_gptq/modeling/_base.py index 78509f773eb783..c9ef1dbb996126 100644 --- a/auto_gptq/modeling/_base.py +++ b/auto_gptq/modeling/_base.py @@ -303,7 +303,7 @@ def skip(*args, **kwargs): torch.nn.init.uniform_ = skip torch.nn.init.normal_ = skip - config = AutoConfig.from_pretrained(pretrained_model_name_or_path) + config = AutoConfig.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True) if config.model_type not in SUPPORTED_MODELS: raise TypeError(f"{config.model_type} isn't supported yet.") @@ -311,6 +311,7 @@ def skip(*args, **kwargs): model_init_kwargs["device_map"] = None model_init_kwargs["torch_dtype"] = torch.bfloat16 if bf16 else torch.float16 model_init_kwargs["low_cpu_mem_usage"] = False + model_init_kwargs["trust_remote_code"] = True model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, **model_init_kwargs) model_config = model.config.to_dict() @@ -335,7 +336,7 @@ def from_quantized( use_safetensors: bool = False ): """load quantized model from local disk""" - config = AutoConfig.from_pretrained(save_dir) + config = AutoConfig.from_pretrained(save_dir, trust_remote_code=True) if config.model_type not in SUPPORTED_MODELS: raise TypeError(f"{config.model_type} isn't supported yet.") diff --git a/examples/quantization/quant_with_alpaca.py b/examples/quantization/quant_with_alpaca.py index 8f81e391d71661..fff409e4cf4047 100644 --- a/examples/quantization/quant_with_alpaca.py +++ b/examples/quantization/quant_with_alpaca.py @@ -82,7 +82,11 @@ def main(): parser.add_argument("--fast_tokenizer", action="store_true") args = parser.parse_args() - tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_dir, use_fast=args.fast_tokenizer) + tokenizer = AutoTokenizer.from_pretrained( + args.pretrained_model_dir, + use_fast=args.fast_tokenizer, + trust_remote_code=True + ) model = AutoGPTQForCausalLM.from_pretrained( args.pretrained_model_dir, quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=args.group_size) From 6b6dd3e1e37dc38e8052bbc25955703403c7d31a Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 12:15:32 +0800 Subject: [PATCH 3/7] always trust remote code --- auto_gptq/modeling/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auto_gptq/modeling/_utils.py b/auto_gptq/modeling/_utils.py index b92a72d4b462e0..404734ea3898f5 100644 --- a/auto_gptq/modeling/_utils.py +++ b/auto_gptq/modeling/_utils.py @@ -39,7 +39,7 @@ def pack_model(model, quantizers, bits, group_size): def check_and_get_model_type(model_dir): - config = AutoConfig.from_pretrained(model_dir) + config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) if config.model_type not in SUPPORTED_MODELS: raise TypeError(f"{config.model_type} isn't supported yet.") model_type = config.model_type From 262416e7ff5f928e7f7b954e5ecc679fdef05480 Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 12:16:05 +0800 Subject: [PATCH 4/7] set package version to v0.0.4-dev --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e9a7365c985d5c..95d27ccd22e429 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ setup( name="auto_gptq", packages=find_packages(), - version="v0.0.3", + version="v0.0.4-dev", install_requires=requirements, extras_require=extras_require, ext_modules=extensions, From d78685aadef3d452f4eba1cd3e8f3700c86d7e22 Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 12:31:00 +0800 Subject: [PATCH 5/7] update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9bbf66bfc4f1d8..6d93c878dec4e1 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ An easy-to-use model quantization package with user-friendly apis, based on GPTQ algorithm. ## News or Update +- 2023-04-25 - (News&Update) - [MOSS](https://github.com/OpenLMLab/MOSS) is an open-sourced Chinese Large Language Model, quantization is now supported in AutoGPTQ. - 2023-04-23 - (Update) - Support evaluation on multiple (down-stream) tasks such as: language-modeling, text-classification, text-summarization. - 2023-04-22 - (News) - qwopqwop200's [AutoGPTQ-triton](https://github.com/qwopqwop200/AutoGPTQ-triton) provides faster speed to integrate with quantized model, for everyone who can access to triton, try and enjoy yourself! - 2023-04-20 - (News) - AutoGPTQ is automatically compatible with Stability-AI's newly released `gpt_neox` type model family [StableLM](https://github.com/Stability-AI/StableLM). @@ -25,7 +26,7 @@ pip install .[llama] ``` ## Supported Models -Currently, `auto_gptq` supports: `bloom`, `gpt_neox`, `gptj`, `llama` and `opt`; more CausalLMs will come soon! +Currently, `auto_gptq` supports: `bloom`, `gpt_neox`, `gptj`, `llama`, `moss` and `opt`; more CausalLMs will come soon! ## Supported Evaluation Tasks Currently, `auto_gptq` supports: `LanguageModelingTask`, `SequenceClassificationTask` and `TextSummarizationTask`; more Tasks will come soon! From cfcba7d1167be7d3a86d895ffc97974e9e58781e Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 12:34:04 +0800 Subject: [PATCH 6/7] update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6d93c878dec4e1..2862859ceddf9c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ An easy-to-use model quantization package with user-friendly apis, based on GPTQ algorithm. ## News or Update -- 2023-04-25 - (News&Update) - [MOSS](https://github.com/OpenLMLab/MOSS) is an open-sourced Chinese Large Language Model, quantization is now supported in AutoGPTQ. +- 2023-04-25 - (News&Update) - [MOSS](https://github.com/OpenLMLab/MOSS) is an open-source tool-augmented conversational language model from Fudan University, quantization is now supported in AutoGPTQ. - 2023-04-23 - (Update) - Support evaluation on multiple (down-stream) tasks such as: language-modeling, text-classification, text-summarization. - 2023-04-22 - (News) - qwopqwop200's [AutoGPTQ-triton](https://github.com/qwopqwop200/AutoGPTQ-triton) provides faster speed to integrate with quantized model, for everyone who can access to triton, try and enjoy yourself! - 2023-04-20 - (News) - AutoGPTQ is automatically compatible with Stability-AI's newly released `gpt_neox` type model family [StableLM](https://github.com/Stability-AI/StableLM). From 419160b73399fceb3ae2c19d449fd24da0e0cc05 Mon Sep 17 00:00:00 2001 From: PanQiWei <594557445@qq.com> Date: Tue, 25 Apr 2023 12:52:49 +0800 Subject: [PATCH 7/7] always trust remote code --- auto_gptq/modeling/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auto_gptq/modeling/_base.py b/auto_gptq/modeling/_base.py index c9ef1dbb996126..b11856eed880c4 100644 --- a/auto_gptq/modeling/_base.py +++ b/auto_gptq/modeling/_base.py @@ -357,7 +357,7 @@ def skip(*args, **kwargs): transformers.modeling_utils._init_weights = False torch.set_default_dtype(torch.half) - model = AutoModelForCausalLM.from_config(config) + model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) torch.set_default_dtype(torch.float) model = model.eval() layers = find_layers(model)