Skip to content

Commit

Permalink
Add deepspeed experiment (huggingface#795)
Browse files Browse the repository at this point in the history
* Add deepspeed experiment

* add deepspeed pip install

* update hello world.sh

* update comments

* remove cleanup
  • Loading branch information
vwxyzjn authored and Andrew Lapp committed May 10, 2024
1 parent e808ea6 commit b245af2
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 10 deletions.
2 changes: 1 addition & 1 deletion benchmark/benchmark_level1_plot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ python -m openrlbenchmark.rlops_multi_metrics \
--no-check-empty-runs \
--pc.ncols 2 \
--pc.ncols-legend 1 \
--output-filename benchmark/trl/$FOLDER_STRING/different_models \
--output-filename benchmark/trl/$FOLDER_STRING/hello_world \
--scan-history

python benchmark/upload_benchmark.py \
Expand Down
16 changes: 14 additions & 2 deletions benchmark/benchmark_level2.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# compound
# compound experiments: gpt2xl + grad_accu
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
--num-seeds 3 \
Expand All @@ -8,4 +8,16 @@ python benchmark/benchmark.py \
--slurm-gpus-per-task 1 \
--slurm-ntasks 1 \
--slurm-total-cpus 12 \
--slurm-template-path benchmark/trl.slurm_template
--slurm-template-path benchmark/trl.slurm_template

# compound experiments: Cerebras-GPT-6.7B + deepspeed zero2 + grad_accu
python benchmark/benchmark.py \
--command "accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2 --ppo_config.batch_size 32 --ppo_config.mini_batch_size 32 --ppo_config.log_with wandb --ppo_config.model_name cerebras/Cerebras-GPT-6.7B --ppo_config.reward_model sentiment-analysis:cerebras/Cerebras-GPT-6.7B" \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
--slurm-gpus-per-task 8 \
--slurm-ntasks 1 \
--slurm-total-cpus 90 \
--slurm-template-path benchmark/trl.slurm_template
10 changes: 10 additions & 0 deletions benchmark/benchmark_level2_plot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ python -m openrlbenchmark.rlops_multi_metrics \
--output-filename benchmark/trl/$FOLDER_STRING/different_models \
--scan-history

python -m openrlbenchmark.rlops_multi_metrics \
--filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=trl_ppo_trainer_config.value.reward_model&cen=trl_ppo_trainer_config.value.exp_name&metrics=env/reward_mean&metrics=objective/kl' \
"sentiment_tuning_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2$TAGS_STRING" \
--env-ids sentiment-analysis:cerebras/Cerebras-GPT-6.7B \
--no-check-empty-runs \
--pc.ncols 2 \
--pc.ncols-legend 1 \
--output-filename benchmark/trl/$FOLDER_STRING/deepspeed \
--scan-history

python benchmark/upload_benchmark.py \
--folder_path="benchmark/trl/$FOLDER_STRING" \
--path_in_repo="images/benchmark/$FOLDER_STRING" \
Expand Down
6 changes: 0 additions & 6 deletions benchmark/post_github_comment.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@
#SBATCH --ntasks=1
#SBATCH --output=slurm/logs/%x_%j.out

cleanup () {
rm -rf "$WORK_DIR"
echo "Deleted temp working directory $WORK_DIR"
}
trap cleanup EXIT

sleep 2m
bash $BENCHMARK_PLOT_SCRIPT
srun python benchmark/post_github_comment.py
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"diffusers": ["diffusers>=0.18.0"],
"deepspeed": ["deepspeed>=0.9.5"],
"dev": ["parameterized", "pytest", "pytest-xdist", "pre-commit", "peft>=0.4.0", "diffusers>=0.18.0"],
"benchmark": ["wandb", "ghapi", "openrlbenchmark==0.2.1a5", "requests"],
"benchmark": ["wandb", "ghapi", "openrlbenchmark==0.2.1a5", "requests", "deepspeed"],
}

setup(
Expand Down

0 comments on commit b245af2

Please sign in to comment.