huggingface · vwxyzjn · Sep 20, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 20, 2023
diff --git a/benchmark/benchmark_level1_plot.sh b/benchmark/benchmark_level1_plot.sh
@@ -9,7 +9,7 @@ python -m openrlbenchmark.rlops_multi_metrics \
     --no-check-empty-runs \
     --pc.ncols 2 \
     --pc.ncols-legend 1 \
-    --output-filename benchmark/trl/$FOLDER_STRING/different_models \
+    --output-filename benchmark/trl/$FOLDER_STRING/hello_world \
     --scan-history
 
 python benchmark/upload_benchmark.py \

diff --git a/benchmark/benchmark_level2.sh b/benchmark/benchmark_level2.sh
@@ -1,4 +1,4 @@
-# compound
+# compound experiments: gpt2xl + grad_accu
 python benchmark/benchmark.py \
     --command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
     --num-seeds 3 \
@@ -8,4 +8,16 @@ python benchmark/benchmark.py \
     --slurm-gpus-per-task 1 \
     --slurm-ntasks 1 \
     --slurm-total-cpus 12 \
-    --slurm-template-path benchmark/trl.slurm_template
+    --slurm-template-path benchmark/trl.slurm_template
+
+# compound experiments: Cerebras-GPT-6.7B + deepspeed zero2 + grad_accu
+python benchmark/benchmark.py \
+    --command "accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2  --ppo_config.batch_size 32  --ppo_config.mini_batch_size 32 --ppo_config.log_with wandb --ppo_config.model_name cerebras/Cerebras-GPT-6.7B --ppo_config.reward_model sentiment-analysis:cerebras/Cerebras-GPT-6.7B" \
+    --num-seeds 3 \
+    --start-seed 1 \
+    --workers 10 \
+    --slurm-nodes 1 \
+    --slurm-gpus-per-task 8 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 90 \
+    --slurm-template-path benchmark/trl.slurm_template
diff --git a/benchmark/benchmark_level2_plot.sh b/benchmark/benchmark_level2_plot.sh
@@ -13,6 +13,16 @@ python -m openrlbenchmark.rlops_multi_metrics \
     --output-filename benchmark/trl/$FOLDER_STRING/different_models \
     --scan-history
 
+python -m openrlbenchmark.rlops_multi_metrics \
+    --filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=trl_ppo_trainer_config.value.reward_model&cen=trl_ppo_trainer_config.value.exp_name&metrics=env/reward_mean&metrics=objective/kl' \
+        "sentiment_tuning_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2$TAGS_STRING" \
+    --env-ids sentiment-analysis:cerebras/Cerebras-GPT-6.7B \
+    --no-check-empty-runs \
+    --pc.ncols 2 \
+    --pc.ncols-legend 1 \
+    --output-filename benchmark/trl/$FOLDER_STRING/deepspeed \
+    --scan-history
+
 python benchmark/upload_benchmark.py \
     --folder_path="benchmark/trl/$FOLDER_STRING" \
     --path_in_repo="images/benchmark/$FOLDER_STRING" \

diff --git a/benchmark/post_github_comment.sbatch b/benchmark/post_github_comment.sbatch
@@ -4,12 +4,6 @@
 #SBATCH --ntasks=1
 #SBATCH --output=slurm/logs/%x_%j.out
 
-cleanup () {
-    rm -rf "$WORK_DIR"
-    echo "Deleted temp working directory $WORK_DIR"
-}
-trap cleanup EXIT
-
 sleep 2m
 bash $BENCHMARK_PLOT_SCRIPT
 srun python benchmark/post_github_comment.py
diff --git a/setup.py b/setup.py
@@ -73,7 +73,7 @@
     "diffusers": ["diffusers>=0.18.0"],
     "deepspeed": ["deepspeed>=0.9.5"],
     "dev": ["parameterized", "pytest", "pytest-xdist", "pre-commit", "peft>=0.4.0", "diffusers>=0.18.0"],
-    "benchmark": ["wandb", "ghapi", "openrlbenchmark==0.2.1a5", "requests"],
+    "benchmark": ["wandb", "ghapi", "openrlbenchmark==0.2.1a5", "requests", "deepspeed"],
 }
 
 setup(