Skip to content

[Fixbug] Fix a performance bug in auto-scheduler #17

[Fixbug] Fix a performance bug in auto-scheduler

[Fixbug] Fix a performance bug in auto-scheduler #17

Workflow file for this run

name: Regression
on:
workflow_dispatch:
inputs:
shutdown_instances:
description: 'Shut down GPU instances when finished.'
required: true
type: boolean
default: true
issue_comment:
types: [created]
env:
CI_DB_HOSTNAME: ${{ secrets.CI_DB_HOSTNAME }}
CI_DB_PORT: ${{ secrets.CI_DB_PORT }}
CI_DB_USERNAME: ${{ secrets.CI_DB_USERNAME }}
CI_DB_PASSWORD: ${{ secrets.CI_DB_PASSWORD }}
CI_CS_HOSTNAME: ${{ secrets.CI_CS_HOSTNAME }}
CI_CS_PORT: ${{ secrets.CI_CS_PORT }}
CI_CS_USERNAME: ${{ secrets.CI_CS_USERNAME }}
CI_CS_PASSWORD: ${{ secrets.CI_CS_PASSWORD }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
HF_TOKEN: ${{ secrets.HF_TOKEN }}
jobs:
start_instances:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'issue_comment' && github.event.issue.pull_request != '' &&
contains(fromJSON('["MEMBER", "OWNER", "COLLABORATOR"]'), github.event.comment.author_association) &&
contains(github.event.comment.body, '$hidet-ci launch')
runs-on: ubuntu-latest
outputs:
started_instances: ${{ steps.run_py_script.outputs.started_instances }}
hw_configs: ${{ steps.run_py_script.outputs.hw_configs }}
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Install dependencies
run: pip install mysql-connector-python
- name: Run main Python script
id: run_py_script
run: timeout 900 python ./.github/scripts/start_instances.py
env:
# TODO: Allow launching only specified GPU instances
HW_CONFIG: all
REPO_NAME: ${{ github.repository }}
- name: Upload run configs
uses: actions/upload-artifact@v3
with:
name: run_configs
path: run_configs.json
retention-days: 1
run_tests:
needs: start_instances
timeout-minutes: 2880
strategy:
matrix:
hw_configs: ${{ fromJSON(needs.start_instances.outputs.hw_configs) }}
runs-on: ${{ matrix.hw_configs }}
container:
image: nvcr.io/nvidia/pytorch:23.10-py3
options: --gpus all
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: |
${{ github.event_name == 'workflow_dispatch' && github.ref_name ||
format('refs/pull/{0}/head', github.event.issue.number) }}
path: hidet
- name: Checkout models
uses: actions/checkout@v4
with:
repository: ${{ secrets.MODEL_REPO }}
token: ${{ secrets.GH_PAT }}
path: models
ref: ci
- name: Install dependencies
run: |
pip install -r hidet/requirements.txt
pip install -r hidet/requirements-dev.txt
pip install -r hidet/.github/requirements-ci.txt
- name: Build hidet
run: |
bash hidet/scripts/wheel/build_wheel.sh
WHEEL=$(find hidet/scripts/wheel/built_wheel -maxdepth 1 -name '*.whl')
echo "WHEEL_NAME=$WHEEL" >> $GITHUB_ENV
echo "Built wheel: ${{ env.WHEEL_NAME }}"
- name: Install hidet
run: |
pip install --no-deps --force-reinstall ${{ env.WHEEL_NAME }}
- name: Install models
run: |
pip install -e models
- name: Download run configs
uses: actions/download-artifact@v3
with:
name: run_configs
- name: Run tests
timeout-minutes: 2880
run: |
python hidet/.github/scripts/run_tests.py
env:
HW_CONFIG: ${{ matrix.hw_configs }}
REPO_NAME: ${{ github.repository }}
REPO_BRANCH: |
${{ github.event_name == 'workflow_dispatch' && github.ref_name ||
format('pull/{0}', github.event.issue.number) }}
- name: Upload run configs
uses: actions/upload-artifact@v3
with:
name: run_configs_${{ matrix.hw_configs }}
path: run_configs.json
retention-days: 1
upload_results:
runs-on: ubuntu-latest
needs: [start_instances, run_tests]
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
ref: |
${{ github.event_name == 'workflow_dispatch' && github.ref_name ||
format('refs/pull/{0}/head', github.event.issue.number) }}
- name: Install dependencies
run: pip install mysql-connector-python
- name: Download run configs
uses: actions/download-artifact@v3
- name: Setup ENV
run: |
COMMIT_TIME=$(git log -1 --format=%cd --date=format:'%Y-%m-%d %H:%M:%S')
COMMIT_AUTHOR=$(git log -1 --format=%an)
COMMIT_SHA=$(git log -1 --format=%H)
echo "COMMIT_TIME=$COMMIT_TIME" >> $GITHUB_ENV
echo "COMMIT_AUTHOR=$COMMIT_AUTHOR" >> $GITHUB_ENV
echo "COMMIT_SHA=$COMMIT_SHA" >> $GITHUB_ENV
- name: Run main Python script
run: python ./.github/scripts/upload_results.py
env:
REPO_NAME: ${{ github.repository }}
REPO_BRANCH: |
${{ github.event_name == 'workflow_dispatch' && github.ref_name ||
format('pull/{0}', github.event.issue.number) }}
COMMIT_SHA: ${{ env.COMMIT_SHA }}
COMMIT_TIME: ${{ env.COMMIT_TIME }}
COMMIT_AUTHOR: ${{ env.COMMIT_AUTHOR }}
HW_CONFIGS: ${{ needs.start_instances.outputs.hw_configs }}
stop_instances:
if: |
github.event_name == 'workflow_dispatch' && inputs.shutdown_instances ||
github.event_name == 'issue_comment' && github.event.issue.pull_request != '' &&
!contains(github.event.comment.body, '--keep')
runs-on: ubuntu-latest
needs: [start_instances, run_tests]
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Run main Python script
run: timeout 900 python ./.github/scripts/stop_instances.py
env:
STARTED_INSTANCES: ${{ needs.start_instances.outputs.started_instances }}