-
Notifications
You must be signed in to change notification settings - Fork 293
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add A10g workflow to gain access to A10G GPU (#2558)
Summary: As the title says. To gain SSH access: 1. Add label `with-ssh` to the PR 2. Login as the instructions (Corp-VPN connection required) Pull Request resolved: #2558 Reviewed By: davidberard98 Differential Revision: D67227271 Pulled By: xuzhao9 fbshipit-source-id: d385ffbcf23580ca664451b0ecc31ec666664e7c
- Loading branch information
1 parent
6f191e9
commit 285fb28
Showing
2 changed files
with
55 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
set -eou pipefail | ||
|
||
echo "Holding runner for 2 hours until all ssh sessions have logged out" | ||
for _ in $(seq 1440); do | ||
# Break if no ssh session exists anymore | ||
if [ "$(who)" = "" ]; then | ||
break | ||
fi | ||
echo "." | ||
sleep 5 | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
name: TorchBench PR Test on A10G | ||
on: | ||
workflow_dispatch: | ||
pull_request: | ||
|
||
jobs: | ||
linux-test-a10g: | ||
# Don't run on forked repos | ||
# Only run on PR labeled 'with-ssh' | ||
if: github.repository_owner == 'pytorch' && contains(github.event.pull_request.labels.*.name, 'with-ssh') | ||
runs-on: linux.g5.4xlarge.nvidia.gpu | ||
timeout-minutes: 240 | ||
environment: docker-s3-upload | ||
env: | ||
CONDA_ENV: "pr-test-cuda" | ||
TEST_CONFIG: "cuda" | ||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | ||
steps: | ||
- name: Checkout TorchBench | ||
uses: actions/checkout@v3 | ||
- name: Setup SSH (Click me for login details) | ||
uses: pytorch/test-infra/.github/actions/setup-ssh@main | ||
with: | ||
github-secret: ${{ secrets.TORCHBENCH_ACCESS_TOKEN }} | ||
- name: Install Conda | ||
run: | | ||
bash ./.ci/torchbench/install-conda.sh | ||
- name: Install TorchBench | ||
run: | | ||
bash ./.ci/torchbench/install.sh | ||
- name: Wait for SSH session to end | ||
if: always() | ||
run: | | ||
bash ./.ci/torchbench/check-ssh.sh | ||
- name: Clean up Conda env | ||
if: always() | ||
run: | | ||
. ${HOME}/miniconda3/etc/profile.d/conda.sh | ||
conda remove -n "${CONDA_ENV}" --all | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | ||
cancel-in-progress: true |