Skip to content

TorchBench Userbenchmark on AI Cluster #211

TorchBench Userbenchmark on AI Cluster

TorchBench Userbenchmark on AI Cluster #211

name: TorchBench Userbenchmark on AI Cluster
on:
schedule:
# on the AI cluster, the cron job runs at 8 PM UTC
# we expect the result to be available at 10 PM UTC
- cron: '0 22 * * *' # run at 10 PM UTC
workflow_dispatch:
inputs:
env:
PYTHON_VERSION: "3.8"
CONDA_ENV_NAME: "ai-cluster-ci"
PLATFORM_NAME: "ai_cluster"
TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
jobs:
collect-userbenchmark-result:
runs-on: [linux.2xlarge] # a small instance is sufficient
timeout-minutes: 60 # 1 hour
steps:
- name: Checkout TorchBench
uses: actions/checkout@v3
with:
path: benchmark
- name: Install conda
run: |
bash benchmark/scripts/install_conda.sh
- name: Create conda environment
run: |
. ${HOME}/miniconda3/etc/profile.d/conda.sh
conda create -y -q --name "${CONDA_ENV_NAME}" python="${PYTHON_VERSION}"
conda activate "${CONDA_ENV_NAME}"
conda install -y pyyaml numpy boto3 requests
- name: Run AI Cluster benchmark
run: |
. ${HOME}/miniconda3/etc/profile.d/conda.sh && conda activate "${CONDA_ENV_NAME}"
# remove old results
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
pushd benchmark
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
# Figure out what userbenchmarks we should run, and run it
python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}
cp -r ./.userbenchmark ../benchmark-output
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: TorchBench result
path: benchmark-output/
- name: Remove conda environment
run: |
. ${HOME}/miniconda3/etc/profile.d/conda.sh
conda env remove --name "${CONDA_ENV_NAME}"