Skip to content

Commit

Permalink
Temp benchmark ci dir (#765)
Browse files Browse the repository at this point in the history
* Support fork in benchmark CI

* use temporary dir for benchmark CI

* debug

* revert back

* dependency fix

* refactor script
  • Loading branch information
vwxyzjn authored Sep 18, 2023
1 parent 9a8d52c commit 4697523
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 36 deletions.
17 changes: 14 additions & 3 deletions benchmark/benchmark_and_report.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
#### Step 1: create a work directory:
# this is necessary because another github action job will remove
# the entire directory, which slurm depends on.
# https://stackoverflow.com/questions/4632028/how-to-create-a-temporary-directory
MY_SLURM_TMP_DIR=/fsx/costa/slurm_tmpdir
mkdir -p $MY_SLURM_TMP_DIR
WORK_DIR=`mktemp -d -p "$MY_SLURM_TMP_DIR"`
cp -r "$PWD" "$WORK_DIR"
cd "$WORK_DIR/$(basename "$PWD")"
echo WORK_DIR: $WORK_DIR

#### Step 2: actual work starts:
echo PATH is $PATH
echo PYTHONPATH is $PYTHONPATH
echo whcih python is $(which python)

export WANDB_ENTITY=huggingface

bash benchmark/benchmark_core.sh > output.txt
bash $BENCHMARK_SCRIPT > output.txt

# Extract Job IDs into an array
job_ids=($(grep "Job ID:" output.txt | awk '{print $3}'))
Expand All @@ -27,4 +38,4 @@ done
echo "TAGS_STRING: $TAGS_STRING"
echo "FOLDER_STRING: $FOLDER_STRING"

TAGS_STRING=$TAGS_STRING FOLDER_STRING=$FOLDER_STRING sbatch --dependency=afterany:$job_ids benchmark/post_github_comment.sbatch
TAGS_STRING=$TAGS_STRING FOLDER_STRING=$FOLDER_STRING BENCHMARK_PLOT_SCRIPT=$BENCHMARK_PLOT_SCRIPT sbatch --dependency=afterany:$job_ids benchmark/post_github_comment.sbatch
24 changes: 0 additions & 24 deletions benchmark/benchmark_core.sh

This file was deleted.

11 changes: 11 additions & 0 deletions benchmark/benchmark_level1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# hello world experiment
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.log_with wandb" \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
--slurm-gpus-per-task 1 \
--slurm-ntasks 1 \
--slurm-total-cpus 12 \
--slurm-template-path benchmark/trl.slurm_template
3 changes: 0 additions & 3 deletions benchmark/plot_core.sh → benchmark/benchmark_level1_plot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
# see https://github.com/openrlbenchmark/openrlbenchmark#get-started for documentation
echo "we deal with $TAGS_STRING"

# "sentiment_tuning_gpt2xl_grad_accu$TAGS_STRING" \

python -m openrlbenchmark.rlops_multi_metrics \
--filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=trl_ppo_trainer_config.value.reward_model&cen=trl_ppo_trainer_config.value.exp_name&metrics=env/reward_mean&metrics=objective/kl' \
"sentiment_tuning$TAGS_STRING" \
Expand All @@ -14,7 +12,6 @@ python -m openrlbenchmark.rlops_multi_metrics \
--output-filename benchmark/trl/$FOLDER_STRING/different_models \
--scan-history


python benchmark/upload_benchmark.py \
--folder_path="benchmark/trl/$FOLDER_STRING" \
--path_in_repo="images/benchmark/$FOLDER_STRING" \
Expand Down
11 changes: 11 additions & 0 deletions benchmark/benchmark_level2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# compound
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
--slurm-gpus-per-task 1 \
--slurm-ntasks 1 \
--slurm-total-cpus 12 \
--slurm-template-path benchmark/trl.slurm_template
21 changes: 21 additions & 0 deletions benchmark/benchmark_level2_plot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# pip install openrlbenchmark==0.2.1a5
# see https://github.com/openrlbenchmark/openrlbenchmark#get-started for documentation
echo "we deal with $TAGS_STRING"

python -m openrlbenchmark.rlops_multi_metrics \
--filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=trl_ppo_trainer_config.value.reward_model&cen=trl_ppo_trainer_config.value.exp_name&metrics=env/reward_mean&metrics=objective/kl' \
"sentiment_tuning$TAGS_STRING" \
"sentiment_tuning_gpt2xl_grad_accu$TAGS_STRING" \
--env-ids sentiment-analysis:lvwerra/distilbert-imdb \
--no-check-empty-runs \
--pc.ncols 2 \
--pc.ncols-legend 1 \
--output-filename benchmark/trl/$FOLDER_STRING/different_models \
--scan-history

python benchmark/upload_benchmark.py \
--folder_path="benchmark/trl/$FOLDER_STRING" \
--path_in_repo="images/benchmark/$FOLDER_STRING" \
--repo_id="trl-internal-testing/example-images" \
--repo_type="dataset"

8 changes: 4 additions & 4 deletions benchmark/benchmark_aux.sh → benchmark/benchmark_level3.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## w/ and w/o gradient accumulation
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_step_grad_accu --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 128 --ppo_config.log_with wandb" \
--num-seeds 5 \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
Expand All @@ -13,7 +13,7 @@ python benchmark/benchmark.py \
## w/ different models (gpt2, gpt2-xl, falcon, llama2)
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_gpt2 --ppo_config.log_with wandb" \
--num-seeds 5 \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
Expand All @@ -23,7 +23,7 @@ python benchmark/benchmark.py \
--slurm-template-path benchmark/trl.slurm_template
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_falcon_rw_1b --ppo_config.model_name tiiuae/falcon-rw-1b --ppo_config.log_with wandb" \
--num-seeds 5 \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
Expand All @@ -36,7 +36,7 @@ python benchmark/benchmark.py \
## w/ and w/o PEFT
python benchmark/benchmark.py \
--command "python examples/scripts/sentiment_tuning.py --ppo_config.exp_name sentiment_tuning_peft --use_peft --ppo_config.log_with wandb" \
--num-seeds 5 \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
--slurm-nodes 1 \
Expand Down
8 changes: 7 additions & 1 deletion benchmark/post_github_comment.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
#SBATCH --ntasks=1
#SBATCH --output=slurm/logs/%x_%j.out

cleanup () {
rm -rf "$WORK_DIR"
echo "Deleted temp working directory $WORK_DIR"
}
trap cleanup EXIT

sleep 2m
bash benchmark/plot_core.sh
bash $BENCHMARK_PLOT_SCRIPT
srun python benchmark/post_github_comment.py
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"diffusers": ["diffusers>=0.18.0"],
"deepspeed": ["deepspeed>=0.9.5"],
"dev": ["parameterized", "pytest", "pytest-xdist", "pre-commit", "peft>=0.4.0", "diffusers>=0.18.0"],
"benchmark": ["wandb", "ghapi", "openrlbenchmark==0.2.1a5"],
"benchmark": ["wandb", "ghapi", "openrlbenchmark==0.2.1a5", "requests"],
}

setup(
Expand Down

0 comments on commit 4697523

Please sign in to comment.