diff --git a/applications/DeepSpeed-Chat/.gitignore b/applications/DeepSpeed-Chat/.gitignore new file mode 100644 index 000000000..cbc922f8f --- /dev/null +++ b/applications/DeepSpeed-Chat/.gitignore @@ -0,0 +1,137 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +results/ +outputs/ + +.amltconfig +.test_output +*.hdf5 +*.h5 diff --git a/applications/DeepSpeed-Chat/README.md b/applications/DeepSpeed-Chat/README.md index c0a456823..ac9f3ab6a 100644 --- a/applications/DeepSpeed-Chat/README.md +++ b/applications/DeepSpeed-Chat/README.md @@ -33,20 +33,24 @@ A fast, affordable, scalable and open system framework for enabling end-to-end R ## Table of Contents +- [πŸ•DeepSpeed-Chat: Easy, Fast and Affordable RLHF Training of ChatGPT-like Models at All ScalesπŸ•](#deepspeed-chat-easy-fast-and-affordable-rlhf-training-of-chatgpt-like-models-at-all-scales) +- [Table of Contents](#table-of-contents) - [πŸ“° Latest News πŸ“°](#-latest-news-) -- [πŸš€ What is DeepSpeed Chat πŸš€οΈ](#-what-is-deepspeed-chat-) +- [πŸš€ What is DeepSpeed Chat πŸš€](#-what-is-deepspeed-chat-) - [🧨 Capabilities 🧨](#-capabilities-) - [β˜• Quick Start β˜•](#-quick-start-) - [🐼 Installation](#-installation) - - [🐼 Single Script for Training 3-Step RLHF Pipeline](#-one-single-script-completes-all-three-stages-of-rlhf-training-and-generate-your-first-chatgpt-model) + - [🐼 One Single Script Completes All Three Steps of RLHF Training and Generate Your First ChatGPT Model](#-one-single-script-completes-all-three-steps-of-rlhf-training-and-generate-your-first-chatgpt-model) - [🐼 Demonstration: Individual Step Fine-Tuning](#-demonstration-individual-step-fine-tuning) - [πŸ• Step 1 - Supervised Fine-Tuning](#-step-1---supervised-fine-tuning) - [πŸ•‘ Step 2 - Reward Model](#-step-2---reward-model) - [πŸ•’ Step 3 - Reinforcement Learning with Human Feedback](#-step-3---reinforcement-learning-with-human-feedback) - - [🐼 Adding and using your own datasets in DeepSpeed-Chat](#-adding-and-using-your-own-datasets-in-deepspeed-chat) - - [🐼 Customizing RLHF training pipeline via DeepSpeed-Chat’s APIs](#-customizing-your-own-rlhf-training-pipeline-using-deepspeed-chats-rlhf-apis) - - [🐼 Serving Your Model: Plug-in and Test!](#-serving-plug-in-your-final-model-trained-by-deepspeed-chat-and-test-it-out) + - [🐼 Adding and using your own datasets in DeepSpeed-Chat](#-adding-and-using-your-own-datasets-in-deepspeed-chat) + - [🐼 Customizing your own RLHF training pipeline using DeepSpeed-Chat’s RLHF APIs](#-customizing-your-own-rlhf-training-pipeline-using-deepspeed-chats-rlhf-apis) + - [🐼 Serving: Plug-in your final model trained by DeepSpeed-Chat and test it out!](#-serving-plug-in-your-final-model-trained-by-deepspeed-chat-and-test-it-out) - [πŸ”₯ Training Performance Evaluation πŸ”₯](#-training-performance-evaluation-) + - [🐲 Superior Model Scale and Low Training Cost](#-superior-model-scale-and-low-training-cost) + - [🐲 Throughput and Model Size Scalability Comparisons with Existing RLHF Systems](#-throughput-and-model-size-scalability-comparisons-with-existing-rlhf-systems) - [😽 Supported Models 😽](#-supported-models-) - [πŸ”¬ Build Pipeline Status πŸ”¬](#-build-pipeline-status-) - [βš“ Documentation and Tutorial βš“](#-documentation-and-tutorial-) @@ -119,6 +123,7 @@ pip install deepspeed>=0.9.0 git clone https://github.com/microsoft/DeepSpeedExamples.git cd DeepSpeedExamples/applications/DeepSpeed-Chat/ pip install -r requirements.txt +pip install -e . ``` ### 🐼 One Single Script Completes All Three Steps of RLHF Training and Generate Your First ChatGPT Model diff --git a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/ppo_trainer.py b/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py similarity index 99% rename from applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/ppo_trainer.py rename to applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py index 2a5056cfe..22cba6be0 100644 --- a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/ppo_trainer.py +++ b/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py @@ -4,17 +4,12 @@ # DeepSpeed Team import torch import torch.nn.functional as F -import sys -import os import time import deepspeed from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus from deepspeed.accelerator import get_accelerator -sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) - -from utils.utils import print_rank_0 +from dschat.utils.utils import print_rank_0 def print_all_ranks(tag, value, rank): diff --git a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/rlhf_engine.py b/applications/DeepSpeed-Chat/dschat/rlhf/rlhf_engine.py similarity index 97% rename from applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/rlhf_engine.py rename to applications/DeepSpeed-Chat/dschat/rlhf/rlhf_engine.py index 3a192d017..5b6778cc2 100755 --- a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/rlhf_engine.py +++ b/applications/DeepSpeed-Chat/dschat/rlhf/rlhf_engine.py @@ -9,10 +9,10 @@ from deepspeed.ops.adam import DeepSpeedCPUAdam from transformers import AutoModelForCausalLM, get_scheduler -from utils.ds_utils import get_train_ds_config, get_eval_ds_config -from utils.module.lora import convert_linear_layer_to_lora, only_optimize_lora_parameters, make_model_gradient_checkpointing_compatible -from utils.model.model_utils import create_hf_model, create_critic_model -from utils.utils import get_optimizer_grouped_parameters +from dschat.utils.ds_utils import get_train_ds_config, get_eval_ds_config +from dschat.utils.module.lora import convert_linear_layer_to_lora, only_optimize_lora_parameters, make_model_gradient_checkpointing_compatible +from dschat.utils.model.model_utils import create_hf_model, create_critic_model +from dschat.utils.utils import get_optimizer_grouped_parameters """ TODOs: * support HF models for critic (for debugging), must be a previously saved ckpt from step-2 diff --git a/applications/DeepSpeed-Chat/training/utils/data/data_utils.py b/applications/DeepSpeed-Chat/dschat/utils/data/data_utils.py similarity index 99% rename from applications/DeepSpeed-Chat/training/utils/data/data_utils.py rename to applications/DeepSpeed-Chat/dschat/utils/data/data_utils.py index 277ebd730..744ad7927 100644 --- a/applications/DeepSpeed-Chat/training/utils/data/data_utils.py +++ b/applications/DeepSpeed-Chat/dschat/utils/data/data_utils.py @@ -14,7 +14,7 @@ import os import hashlib from itertools import chain -from . import raw_datasets +from dschat.utils.data import raw_datasets from deepspeed.accelerator import get_accelerator diff --git a/applications/DeepSpeed-Chat/training/utils/data/raw_datasets.py b/applications/DeepSpeed-Chat/dschat/utils/data/raw_datasets.py similarity index 100% rename from applications/DeepSpeed-Chat/training/utils/data/raw_datasets.py rename to applications/DeepSpeed-Chat/dschat/utils/data/raw_datasets.py diff --git a/applications/DeepSpeed-Chat/training/utils/ds_utils.py b/applications/DeepSpeed-Chat/dschat/utils/ds_utils.py similarity index 100% rename from applications/DeepSpeed-Chat/training/utils/ds_utils.py rename to applications/DeepSpeed-Chat/dschat/utils/ds_utils.py diff --git a/applications/DeepSpeed-Chat/training/utils/model/model_utils.py b/applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py similarity index 97% rename from applications/DeepSpeed-Chat/training/utils/model/model_utils.py rename to applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py index 27e5720ca..97d3bff15 100644 --- a/applications/DeepSpeed-Chat/training/utils/model/model_utils.py +++ b/applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py @@ -12,9 +12,8 @@ from huggingface_hub import snapshot_download from transformers.deepspeed import HfDeepSpeedConfig -from .reward_model import RewardModel -from ..utils import load_state_dict_into_model -from ..utils import print_rank_0 +from dschat.utils.model.reward_model import RewardModel +from dschat.utils.utils import load_state_dict_into_model, print_rank_0 def configure_dropout(model_config, dropout): diff --git a/applications/DeepSpeed-Chat/training/utils/model/reward_model.py b/applications/DeepSpeed-Chat/dschat/utils/model/reward_model.py similarity index 100% rename from applications/DeepSpeed-Chat/training/utils/model/reward_model.py rename to applications/DeepSpeed-Chat/dschat/utils/model/reward_model.py diff --git a/applications/DeepSpeed-Chat/training/utils/module/lora.py b/applications/DeepSpeed-Chat/dschat/utils/module/lora.py similarity index 100% rename from applications/DeepSpeed-Chat/training/utils/module/lora.py rename to applications/DeepSpeed-Chat/dschat/utils/module/lora.py diff --git a/applications/DeepSpeed-Chat/training/utils/perf.py b/applications/DeepSpeed-Chat/dschat/utils/perf.py similarity index 100% rename from applications/DeepSpeed-Chat/training/utils/perf.py rename to applications/DeepSpeed-Chat/dschat/utils/perf.py diff --git a/applications/DeepSpeed-Chat/training/utils/utils.py b/applications/DeepSpeed-Chat/dschat/utils/utils.py similarity index 100% rename from applications/DeepSpeed-Chat/training/utils/utils.py rename to applications/DeepSpeed-Chat/dschat/utils/utils.py diff --git a/applications/DeepSpeed-Chat/train.py b/applications/DeepSpeed-Chat/e2e_rlhf.py similarity index 100% rename from applications/DeepSpeed-Chat/train.py rename to applications/DeepSpeed-Chat/e2e_rlhf.py diff --git a/applications/DeepSpeed-Chat/setup.py b/applications/DeepSpeed-Chat/setup.py new file mode 100644 index 000000000..343f5b476 --- /dev/null +++ b/applications/DeepSpeed-Chat/setup.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# setup.py: install script for deepspeed_chat +""" +to install deepspeed_chat and its dependencies for development work, +run this cmd from the root directory: + pip install -e . +""" +import setuptools + +setuptools.setup( + name="deepspeed-chat", + version="0.1", + url= + "https://github.com/microsoft/DeepSpeedExamples/tree/master/applications/DeepSpeed-Chat", + include_package_data=True, + packages=setuptools.find_packages(include=['dschat']), + install_requires=[ + "datasets>=2.8.0", "sentencepiece>=0.1.97", "protobuf==3.20.3", + "accelerate>=0.15.0", "torch>=1.12.0", "deepspeed>=0.9.2", + "transformers", "tensorboard" + ], + extras_require={ + "azureml": [ + "azure-ml-component", + "azureml-core", + ], + }) diff --git a/applications/DeepSpeed-Chat/training/tests/test_training.py b/applications/DeepSpeed-Chat/tests/test_training.py similarity index 97% rename from applications/DeepSpeed-Chat/training/tests/test_training.py rename to applications/DeepSpeed-Chat/tests/test_training.py index 3be4f6ff6..7ffe02972 100644 --- a/applications/DeepSpeed-Chat/training/tests/test_training.py +++ b/applications/DeepSpeed-Chat/tests/test_training.py @@ -66,7 +66,7 @@ def test_ds_chat(zero_stage, hybrid_engine, offload, lora): # cd into execution dir wd = os.getcwd() - os.chdir("../step3_rlhf_finetuning") + os.chdir("../training/step3_rlhf_finetuning") sweep_script = "training_scripts/opt/single_node/sweep/run_single.sh" # Run bash script @@ -85,3 +85,5 @@ def test_ds_chat(zero_stage, hybrid_engine, offload, lora): ), "Actor model was not saved during step 3 training." assert file_exists(f"{output_path}/critic/", "pytorch_model.bin" ), "Critic model was not saved during step 3 training." + + os.chdir(wd) diff --git a/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/main.py b/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/main.py index 0332e2e3f..c37d1f4cd 100755 --- a/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/main.py +++ b/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/main.py @@ -4,9 +4,7 @@ # DeepSpeed Team import argparse -import os import math -import sys import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler @@ -23,14 +21,12 @@ from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam from deepspeed import get_accelerator -sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) -from utils.data.data_utils import create_prompt_dataset -from utils.utils import print_rank_0, to_device, save_hf_format, set_random_seed, get_all_reduce_mean, get_optimizer_grouped_parameters, save_zero_three_model, load_hf_tokenizer -from utils.ds_utils import get_train_ds_config -from utils.module.lora import convert_linear_layer_to_lora, convert_lora_to_linear_layer, only_optimize_lora_parameters, make_model_gradient_checkpointing_compatible -from utils.model.model_utils import create_hf_model, causal_lm_model_to_fp32_loss -from utils.perf import print_throughput +from dschat.utils.data.data_utils import create_prompt_dataset +from dschat.utils.utils import print_rank_0, to_device, save_hf_format, set_random_seed, get_all_reduce_mean, get_optimizer_grouped_parameters, save_zero_three_model, load_hf_tokenizer +from dschat.utils.ds_utils import get_train_ds_config +from dschat.utils.module.lora import convert_linear_layer_to_lora, convert_lora_to_linear_layer, only_optimize_lora_parameters, make_model_gradient_checkpointing_compatible +from dschat.utils.model.model_utils import create_hf_model, causal_lm_model_to_fp32_loss +from dschat.utils.perf import print_throughput def parse_args(): diff --git a/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/prompt_eval.py b/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/prompt_eval.py index 21f79d63a..a25b0edea 100644 --- a/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/prompt_eval.py +++ b/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/prompt_eval.py @@ -5,16 +5,12 @@ import argparse import logging import torch -import sys -import os from transformers import ( AutoModelForCausalLM, ) -sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) -from utils.model.model_utils import create_hf_model -from utils.utils import load_hf_tokenizer +from dschat.utils.model.model_utils import create_hf_model +from dschat.utils.utils import load_hf_tokenizer from deepspeed import get_accelerator logger = logging.getLogger(__name__) diff --git a/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/main.py b/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/main.py index dae906173..265c1caf4 100644 --- a/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/main.py +++ b/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/main.py @@ -4,9 +4,7 @@ # DeepSpeed Team import argparse -import os import math -import sys import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler @@ -21,13 +19,11 @@ from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam from deepspeed.accelerator import get_accelerator -sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) -from utils.model.model_utils import create_critic_model -from utils.data.data_utils import create_prompt_dataset, DataCollatorReward -from utils.utils import print_rank_0, to_device, save_hf_format, set_random_seed, get_all_reduce_mean, get_optimizer_grouped_parameters, save_zero_three_model, load_hf_tokenizer -from utils.ds_utils import get_train_ds_config -from utils.module.lora import convert_linear_layer_to_lora, convert_lora_to_linear_layer, only_optimize_lora_parameters, make_model_gradient_checkpointing_compatible +from dschat.utils.model.model_utils import create_critic_model +from dschat.utils.data.data_utils import create_prompt_dataset, DataCollatorReward +from dschat.utils.utils import print_rank_0, to_device, save_hf_format, set_random_seed, get_all_reduce_mean, get_optimizer_grouped_parameters, save_zero_three_model, load_hf_tokenizer +from dschat.utils.ds_utils import get_train_ds_config +from dschat.utils.module.lora import convert_linear_layer_to_lora, convert_lora_to_linear_layer, only_optimize_lora_parameters, make_model_gradient_checkpointing_compatible def parse_args(): diff --git a/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/rw_eval.py b/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/rw_eval.py index 28ee87d1c..23f9a66af 100644 --- a/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/rw_eval.py +++ b/applications/DeepSpeed-Chat/training/step2_reward_model_finetuning/rw_eval.py @@ -4,16 +4,10 @@ # DeepSpeed Team import argparse -import os import torch -import sys - -sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) -from utils.model.model_utils import create_critic_model -from utils.utils import to_device -from utils.utils import load_hf_tokenizer +from dschat.utils.model.model_utils import create_critic_model +from dschat.utils.utils import to_device, load_hf_tokenizer from deepspeed import get_accelerator diff --git a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py index 78f3249c0..a5be5671b 100644 --- a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py +++ b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py @@ -33,18 +33,13 @@ import deepspeed -from ppo_trainer import DeepSpeedPPOTrainer, DeepSpeedPPOTrainerUnsupervised -from rlhf_engine import DeepSpeedRLHFEngine - -import sys - -sys.path.append( - os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) -from utils.data.data_utils import create_prompt_dataset, MiniDataset, DataCollatorRLHF, get_unsupervised_data -from utils.utils import print_rank_0, to_device, save_hf_format, set_random_seed, get_all_reduce_mean, \ - moving_average, save_zero_three_model, load_hf_tokenizer, ExponentialMovingAverage -from utils.module.lora import convert_lora_to_linear_layer -from utils.perf import print_throughput_step3 +from dschat.rlhf.ppo_trainer import DeepSpeedPPOTrainer, DeepSpeedPPOTrainerUnsupervised +from dschat.rlhf.rlhf_engine import DeepSpeedRLHFEngine +from dschat.utils.data.data_utils import create_prompt_dataset, MiniDataset, DataCollatorRLHF, get_unsupervised_data +from dschat.utils.utils import print_rank_0, to_device, save_hf_format, set_random_seed, get_all_reduce_mean, moving_average, save_zero_three_model, load_hf_tokenizer, \ + ExponentialMovingAverage +from dschat.utils.module.lora import convert_lora_to_linear_layer +from dschat.utils.perf import print_throughput_step3 from deepspeed.accelerator import get_accelerator writer = None