From 877508f73bfcabf6ac3723a8d61729e432072950 Mon Sep 17 00:00:00 2001 From: jinghanhu Date: Sat, 20 Apr 2024 12:09:45 +0800 Subject: [PATCH] [WIP]support Atom model (#741) --- README.md | 2 ++ README_CN.md | 2 ++ ...14\346\225\260\346\215\256\351\233\206.md" | 2 ++ .../LLM/Supported-models-datasets.md | 2 ++ .../llm/scripts/atom_7b_chat/lora/infer.sh | 11 +++++++ .../llm/scripts/atom_7b_chat/lora/sft.sh | 31 +++++++++++++++++++ swift/llm/utils/model.py | 19 ++++++++++++ swift/llm/utils/template.py | 6 ++++ 8 files changed, 75 insertions(+) create mode 100644 examples/pytorch/llm/scripts/atom_7b_chat/lora/infer.sh create mode 100644 examples/pytorch/llm/scripts/atom_7b_chat/lora/sft.sh diff --git a/README.md b/README.md index e77f8ced78..e12aeb9e58 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ To facilitate use by users unfamiliar with deep learning, we provide a Gradio we Additionally, we are expanding capabilities for other modalities. Currently, we support full-parameter training and LoRA training for AnimateDiff. ## 🎉 News +- 2024.04.20: Support for inference, fine-tuning, and deployment of **Atom** series models. This includes: Atom-7B and Atom-7B-Chat. use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/atom_7b_chat/lora/sft.sh) to train. - 2024.04.19: Support for single-card, DDP, ZeRO2, and ZeRO3 training and inference with NPU, please refer to [NPU Inference and Fine-tuning Best Practices](docs/source_en/LLM/NPU-best-practice.md). - 2024.04.19: Support for inference, fine-tuning, and deployment of **Llama3** series models. This includes: Llama-3-8B, Llama-3-8B-Instruct, Llama-3-70B, and Llama-3-70B-Instruct. use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/llama3_8b_instruct/lora/sft.sh) to train. - 2024.04.18: Supported models: wizardlm2-7b-awq, wizardlm2-8x22b, yi-6b-chat-awq, yi-6b-chat-int8, yi-34b-chat-awq, yi-34b-chat-int8. Supported `--deepspeed zero3-offload` and provided default zero3-offload configuration file for zero3+cpu offload usage. @@ -436,6 +437,7 @@ CUDA_VISIBLE_DEVICES=0 swift deploy \ | mengzi3 | [Langboat](https://github.com/Langboat/Mengzi3) | Chinese
English | 13B | base model | | c4ai-command-r | [c4ai](https://cohere.com/command) | Multilingual | 35B-104B | chat model | | WizardLM2 | [WizardLM2 series models](https://github.com/nlpxucan/WizardLM) | English | 7B-8x22B
including quantized versions | chat model
MoE model | +| Atom | [Atom](https://github.com/LlamaFamily/Llama-Chinese) | Chinese | 7B| base model
chat model| #### MLLMs diff --git a/README_CN.md b/README_CN.md index 7df3108886..ef9a64b473 100644 --- a/README_CN.md +++ b/README_CN.md @@ -40,6 +40,7 @@ SWIFT支持近**200种LLM和MLLM**(多模态大模型)的训练、推理、 此外,我们也在拓展其他模态的能力,目前我们支持了AnimateDiff的全参数训练和LoRA训练。 ## 🎉 新闻 +- 2024.04.20: 支持**Atom**系列模型的推理, 微调和部署等. 包括: Atom-7B and Atom-7B-Chat. 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/atom_7b_chat/lora/sft.sh)来开始训练! - 2024.04.19: 支持NPU的单卡、DDP、ZeRO2和ZeRO3的训练与推理, 可以查看[NPU推理与微调最佳实践](docs/source/LLM/NPU推理与微调最佳实践.md). - 2024.04.19: 支持**Llama3**系列模型的推理, 微调和部署等. 包括: Llama-3-8B, Llama-3-8B-Instruct, Llama-3-70B, Llama-3-70B-Instruct. 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/llama3_8b_instruct/lora/sft.sh)开始训练叭! - 2024.04.18: 支持模型: wizardlm2-7b-awq, wizardlm2-8x22b, yi-6b-chat-awq, yi-6b-chat-int8, yi-34b-chat-awq, yi-34b-chat-int8. 支持`--deepspeed zero3-offload`, 提供了默认zero3-offload配置文件来使用zero3+cpu offload. @@ -433,6 +434,7 @@ CUDA_VISIBLE_DEVICES=0 swift deploy \ | mengzi3 | [Langboat](https://github.com/Langboat/Mengzi3) | 中文
英文 | 13B | base模型 | | c4ai-command-r | [c4ai](https://cohere.com/command) | 多语种 | 35B-104B | chat模型 | | WizardLM2 | [WizardLM2系列模型](https://github.com/nlpxucan/WizardLM) | 多语种 | 7B-8x22B
包含量化版本 | chat模型
MoE模型 | +| Atom | [Atom](https://github.com/LlamaFamily/Llama-Chinese) | 中文 | 7B| base模型
chat模型| #### 多模态大模型 diff --git "a/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index 5be0dfad7e..959d4faa6f 100644 --- "a/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/LLM/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -92,6 +92,8 @@ |llama3-8b-instruct|[LLM-Research/Meta-Llama-3-8B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-8B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔||-|[meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)| |llama3-70b|[LLM-Research/Meta-Llama-3-70B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-70B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[meta-llama/Meta-Llama-3-70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B)| |llama3-70b-instruct|[LLM-Research/Meta-Llama-3-70B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-70B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔||-|[meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)| +|atom-7b|[FlagAlpha/Atom-7B](https://modelscope.cn/models/FlagAlpha/Atom-7B/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔||-|[FlagAlpha/Atom-7B](https://huggingface.co/FlagAlpha/Atom-7B)| +|atom-7b-chat|[FlagAlpha/Atom-7B-Chat](https://modelscope.cn/models/FlagAlpha/Atom-7B-Chat/summary)|q_proj, k_proj, v_proj|atom|✔|✔||-|[FlagAlpha/Atom-7B-Chat](https://huggingface.co/FlagAlpha/Atom-7B-Chat)| |llava1d6-mistral-7b-instruct|[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b/summary)|q_proj, k_proj, v_proj|llava-mistral-instruct|✔|✘|transformers>=4.34|multi-modal, vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)| |llava1d6-yi-34b-instruct|[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b/summary)|q_proj, k_proj, v_proj|llava-yi-instruct|✔|✘||multi-modal, vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)| |yi-6b|[01ai/Yi-6B](https://modelscope.cn/models/01ai/Yi-6B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B)| diff --git a/docs/source_en/LLM/Supported-models-datasets.md b/docs/source_en/LLM/Supported-models-datasets.md index 1e7fbfe870..ae800538dc 100644 --- a/docs/source_en/LLM/Supported-models-datasets.md +++ b/docs/source_en/LLM/Supported-models-datasets.md @@ -92,6 +92,8 @@ The table below introcudes all models supported by SWIFT: |llama3-8b-instruct|[LLM-Research/Meta-Llama-3-8B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-8B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔||-|[meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)| |llama3-70b|[LLM-Research/Meta-Llama-3-70B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-70B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[meta-llama/Meta-Llama-3-70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B)| |llama3-70b-instruct|[LLM-Research/Meta-Llama-3-70B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-70B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔||-|[meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)| +|atom-7b|[FlagAlpha/Atom-7B](https://modelscope.cn/models/FlagAlpha/Atom-7B/summary)|q_proj, k_proj, v_proj|default-generation-bos|✔|✔||-|[FlagAlpha/Atom-7B](https://huggingface.co/FlagAlpha/Atom-7B)| +|atom-7b-chat|[FlagAlpha/Atom-7B-Chat](https://modelscope.cn/models/FlagAlpha/Atom-7B-Chat/summary)|q_proj, k_proj, v_proj|atom|✔|✔||-|[FlagAlpha/Atom-7B-Chat](https://huggingface.co/FlagAlpha/Atom-7B-Chat)| |llava1d6-mistral-7b-instruct|[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b/summary)|q_proj, k_proj, v_proj|llava-mistral-instruct|✔|✘|transformers>=4.34|multi-modal, vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)| |llava1d6-yi-34b-instruct|[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b/summary)|q_proj, k_proj, v_proj|llava-yi-instruct|✔|✘||multi-modal, vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)| |yi-6b|[01ai/Yi-6B](https://modelscope.cn/models/01ai/Yi-6B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B)| diff --git a/examples/pytorch/llm/scripts/atom_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/atom_7b_chat/lora/infer.sh new file mode 100644 index 0000000000..ad8224060a --- /dev/null +++ b/examples/pytorch/llm/scripts/atom_7b_chat/lora/infer.sh @@ -0,0 +1,11 @@ +# Experimental environment: 3090 +CUDA_VISIBLE_DEVICES=0 \ +swift infer \ + --ckpt_dir "output/atom-7b-chat/vx-xxx/checkpoint-xxx" \ + --load_dataset_config true \ + --max_new_tokens 2048 \ + --temperature 0.1 \ + --top_p 0.7 \ + --repetition_penalty 1. \ + --do_sample true \ + --merge_lora false \ diff --git a/examples/pytorch/llm/scripts/atom_7b_chat/lora/sft.sh b/examples/pytorch/llm/scripts/atom_7b_chat/lora/sft.sh new file mode 100644 index 0000000000..59ec07dec9 --- /dev/null +++ b/examples/pytorch/llm/scripts/atom_7b_chat/lora/sft.sh @@ -0,0 +1,31 @@ +# Experimental environment: 3090,A10,V100... +# 20GB GPU memory +CUDA_VISIBLE_DEVICES=0 \ +swift sft \ + --model_type atom-7b-chat \ + --model_revision master \ + --sft_type lora \ + --tuner_backend peft \ + --dtype AUTO \ + --output_dir output \ + --ddp_backend nccl \ + --dataset ms-bench \ + --train_dataset_sample -1 \ + --num_train_epochs 3 \ + --max_length 2048 \ + --check_dataset_strategy warning \ + --lora_rank 8 \ + --lora_alpha 32 \ + --lora_dropout_p 0.05 \ + --lora_target_modules DEFAULT \ + --gradient_checkpointing true \ + --batch_size 1 \ + --weight_decay 0.1 \ + --learning_rate 1e-4 \ + --gradient_accumulation_steps 16 \ + --max_grad_norm 0.5 \ + --warmup_ratio 0.03 \ + --eval_steps 100 \ + --save_steps 100 \ + --save_total_limit 2 \ + --logging_steps 10 \ diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py index e1cea66321..4f7e063446 100644 --- a/swift/llm/utils/model.py +++ b/swift/llm/utils/model.py @@ -129,6 +129,9 @@ class ModelType: llama3_8b_instruct = 'llama3-8b-instruct' llama3_70b = 'llama3-70b' llama3_70b_instruct = 'llama3-70b-instruct' + # atom + atom_7b = 'atom-7b' + atom_7b_chat = 'atom-7b-chat' # llava llava1d6_mistral_7b_instruct = 'llava1d6-mistral-7b-instruct' llava1d6_yi_34b_instruct = 'llava1d6-yi-34b-instruct' @@ -463,6 +466,22 @@ def _new_forward(self, x): QuantLinear.forward = _new_forward +@register_model( + ModelType.atom_7b, + 'FlagAlpha/Atom-7B', + LoRATM.llama2, + TemplateType.default_generation_bos, + support_flash_attn=True, + support_vllm=True, + hf_model_id='FlagAlpha/Atom-7B') +@register_model( + ModelType.atom_7b_chat, + 'FlagAlpha/Atom-7B-Chat', + LoRATM.llama2, + TemplateType.atom, + support_flash_attn=True, + support_vllm=True, + hf_model_id='FlagAlpha/Atom-7B-Chat') @register_model( ModelType.internlm_20b, 'Shanghai_AI_Laboratory/internlm-20b', diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py index 780a4ee948..55478218a6 100644 --- a/swift/llm/utils/template.py +++ b/swift/llm/utils/template.py @@ -65,6 +65,7 @@ class TemplateType: mplug_owl2 = 'mplug-owl2' wizardlm2_awq = 'wizardlm2-awq' wizardlm2 = 'wizardlm2' + atom = 'atom' # compatibility. (Deprecated) chatml = 'chatml' telechat = 'telechat' @@ -1403,6 +1404,11 @@ def data_collator(self, Template(['{{SYSTEM}}'], ['USER: {{QUERY}} ASSISTANT:'], [''], [''], _wizardlm2_system)) +register_template( + TemplateType.atom, + Template(['{{SYSTEM}}'], ['Human: {{QUERY}}\nAssistant: '], + [''], [''])) + def get_template( template_type: str,