Skip to content

Create an Azure VM

Create an Azure VM #35

name: Create an Azure VM
defaults:
run:
shell: bash
on:
workflow_dispatch:
inputs:
action:
description: Action
required: true
type: choice
options:
- start
- terminate all
size:
description: Instance
required: true
type: choice
# All numbers are for spot instances in East US as of 2023-06-03.
# Standard_HB120rs_v3:
# - https://learn.microsoft.com/en-us/azure/virtual-machines/hbv3-series
# - 120 AMD EPYC™ 7V73X (Milan-X) CPU cores
# - 448 GB of RAM
# - 2 * 960 GB SSD
# - $0.37/hour
# Standard_NC8as_T4_v3:
# - https://learn.microsoft.com/en-us/azure/virtual-machines/nct4-v3-series
# - 1x NVIDIA Tesla T4 GPU with 16 GB of RAM
# - 8 AMD EPYC 7V12 (Rome) CPU cores
# - 56 GB of RAM
# - 1 * 360 GB SSD
# - $0.14/hour
# Standard_NV36ads_A10_v5:
# - https://learn.microsoft.com/en-us/azure/virtual-machines/nva10v5-series
# - 1x NVIDIA A10 GPU with 24 GB of RAM
# - 36 AMD EPYC 74F3V(Milan) CPU cores
# - 440 GB of RAM
# - 1 * 1440 GB SSD
# - $1.28/hour
options:
- Standard_HB120rs_v3
# - Standard_NC8as_T4_v3
# - Standard_NV36ads_A10_v5
jobs:
github-check-for-runner:
name: Check for runner (GitHub)
runs-on: ubuntu-latest
if: ${{ inputs.action == 'start' }}
outputs:
# NOTE: Returns the ID of the matching runner, or -1 if no matching runner is found.
id: ${{ steps.id.outputs.value }}
steps:
- name: Query GitHub for runners
id: runners
env:
# We require admin read access to the repository to query the runners.
# TODO: Expires on Thu, Jun 13 2024.
GH_TOKEN: ${{ secrets.NIX_CUDA_TEST_GH_TOKEN_ADMIN_READ }}
run: |
gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
/repos/ConnorBaker/nix-cuda-test/actions/runners \
| jq -crS '.runners' \
| xargs -0 printf "json=%s" \
| tee -a "$GITHUB_OUTPUT"
- name: Filter the runners by size and status
id: matching-runners
run: |
jq -crS \
--arg size "${{ inputs.size }}" \
'map(select(.name == $size and .status == "online"))' \
<<< '${{ steps.runners.outputs.json }}' \
| xargs -0 printf "json=%s" \
| tee -a "$GITHUB_OUTPUT"
- name: Get the ID of the matching runners
id: id
run: |
declare -ri length=$(jq -crS 'length' <<< '${{ steps.matching-runners.outputs.json }}')
declare -i id=-1
if (( length == 0 )); then
echo "No matching runner found."
elif (( length == 1 )); then
echo "Matching runner found."
id=$(jq -crS '.[0].id' <<< '${{ steps.matching-runners.outputs.json }}')
else
echo "Multiple matching runners found!"
exit 1
fi
echo "value=$id" | tee -a "$GITHUB_OUTPUT"
azure-check-for-instance:
name: Check for instance (Azure)
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
outputs:
ip: ${{ steps.ip.outputs.value }}
steps:
- name: Log in to Azure
uses: Azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
- name: Check for instance
id: instance
uses: azure/CLI@v1
with:
azcliversion: latest
inlineScript: |
if
az vm show \
--show-details \
--resource-group NixCudaTest \
--name "${{ inputs.size }}" \
| jq -crS . \
| xargs -0 printf "json=%s" \
| tee -a "$GITHUB_OUTPUT"
then
echo "Instance ${{ inputs.size }} exists"
else
echo "Instance ${{ inputs.size }} does not exist"
fi
# IP is "" if the instance does not exist.
- name: Get instance IP
id: ip
run: |
jq -crS \
'.publicIps' \
<<< '${{ steps.instance.outputs.json }}' \
| xargs -0 printf "value=%s" \
| tee -a "$GITHUB_OUTPUT"
create-running-instance:
name: Create instance
runs-on: ubuntu-latest
needs:
- azure-check-for-instance
- github-check-for-runner
if: ${{ inputs.action == 'start' && needs.github-check-for-runner.outputs.id == -1 || needs.azure-check-for-instance.outputs.ip == 'null' }}
permissions:
contents: read
id-token: write
steps:
- name: Test pre-conditions
run: |
declare -ri id=${{ needs.github-check-for-runner.outputs.id }}
declare -r ip=${{ needs.azure-check-for-instance.outputs.ip }}
declare GH_RUNNER_STATUS
if (( id == -1 )) ; then
GH_RUNNER_STATUS="does not exist"
else
GH_RUNNER_STATUS="exists"
fi
declare AZ_INSTANCE_STATUS
if [[ -z "$ip" ]]; then
AZ_INSTANCE_STATUS="does not exist"
else
AZ_INSTANCE_STATUS="exists"
fi
declare PRECONDITIONS_STATUS
if [[ "$id" == "-1" && -z "$ip" ]]; then
PRECONDITIONS_STATUS="met"
else
PRECONDITIONS_STATUS="not met"
fi
echo "Preconditions $PRECONDITIONS_STATUS: GitHub runner $GH_RUNNER_STATUS and Azure instance $AZ_INSTANCE_STATUS"
if [[ "$PRECONDITIONS_STATUS" != "met" ]]; then
exit 1
fi
- uses: actions/checkout@v4
- name: Set up SSH keys
run: |
mkdir -p "$HOME/.ssh"
echo "${{ secrets.AZURE_SSH_PRIVATE_KEY }}" > "$HOME/.ssh/id_rsa"
chmod 600 "$HOME/.ssh/id_rsa"
echo "${{ secrets.AZURE_SSH_PUBLIC_KEY }}" > "$HOME/.ssh/id_rsa.pub"
chmod 644 "$HOME/.ssh/id_rsa.pub"
- name: Log in to Azure
uses: Azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
- name: Provision the instance
id: provision
uses: azure/CLI@v1
with:
azcliversion: latest
# TODO: This likely runs inside a docker image, which is why we need to provision keys twice.
# However, it is odd that accessing GITHUB_WORKSPACE works.
inlineScript: |
declare -rA images=(
[Standard_HB120rs_v3]="Canonical:0001-com-ubuntu-minimal-mantic:minimal-23_10-gen2:latest"
)
if [[ -z "${images[${{ inputs.size }}]}" ]] ; then
echo "No image available for size: ${{ inputs.size }}"
exit 1
fi
mkdir -p "$HOME/.ssh"
echo "${{ secrets.AZURE_SSH_PRIVATE_KEY }}" > "$HOME/.ssh/id_rsa"
chmod 600 "$HOME/.ssh/id_rsa"
echo "${{ secrets.AZURE_SSH_PUBLIC_KEY }}" > "$HOME/.ssh/id_rsa.pub"
chmod 644 "$HOME/.ssh/id_rsa.pub"
az vm create \
--admin-username runner \
--enable-hibernation false \
--enable-hotpatching false \
--eviction-policy Delete \
--image "Canonical:0001-com-ubuntu-minimal-mantic:minimal-23_10-gen2:latest" \
--location eastus \
--max-price 1.0 \
--name "${{ inputs.size }}" \
--nic-delete-option Delete \
--priority Spot \
--resource-group NixCudaTest \
--security-type Standard \
--size "${{ inputs.size }}" \
--ssh-key-values "$HOME/.ssh/id_rsa.pub" \
--user-data "$GITHUB_WORKSPACE/ci/user-data-${{ inputs.size }}.sh" \
| jq -crS . \
| xargs -0 printf "json=%s" \
| tee -a "$GITHUB_OUTPUT"
- name: Get the instance IP
id: ip
run: |
declare -r ip=$(jq -crS '.publicIpAddress' <<< '${{ steps.provision.outputs.json }}')
if [[ -z "$ip" ]]; then
echo "Instance creation failed: no IP address"
exit 1
fi
echo "value=$ip" | tee -a "$GITHUB_OUTPUT"
- name: Add instance to SSH config
run: |
cat >> "$HOME/.ssh/config" <<EOF
Host ${{ inputs.size }}
HostName ${{ steps.ip.outputs.value }}
User runner
IdentityFile ~/.ssh/id_rsa
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
EOF
chmod 644 "$HOME/.ssh/config"
- name: Ensure instance is reachable via ssh
run: timeout 60s bash -c "until ssh ${{ inputs.size }} true; do sleep 15; done"
- name: Generate registration token
env:
# We require admin read access to the repository to query the runners.
# TODO: Expires on Thu, Jun 13 2024.
GH_TOKEN: ${{ secrets.NIX_CUDA_TEST_GH_TOKEN_ADMIN_READ }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
/repos/ConnorBaker/nix-cuda-test/actions/runners/registration-token \
| jq -crS '.token' \
> "$GITHUB_WORKSPACE/.runner-token"
- name: Copy the token to the runner
run: scp "$GITHUB_WORKSPACE/.runner-token" "${{ inputs.size }}:~/.runner-token"
- name: Set up self-hosted GitHub runner
# NOTE: Must execute ./config.sh from the actions-runner directory as it uses relative paths.
run: |
ssh "${{ inputs.size }}" bash -c \
'cd "$HOME/actions-runner" \
&& ./config.sh \
--unattended \
--url "https://github.com/ConnorBaker/nix-cuda-test" \
--token "$(cat ~/.runner-token)" \
--name "${{ inputs.size }}" \
--labels "${{ inputs.size }}"'
sleep 15
build-nix-cuda-test:
name: Build nix-cuda-test
runs-on:
# - Standard_HB120rs_v3
- self-hosted
steps:
- uses: actions/checkout@v4
- name: Set up Nix
uses: ./.github/actions/setup-nix
- name: Set up Attic
uses: ./.github/actions/setup-attic
with:
cache-key: ${{ secrets.NIX_CUDA_TEST_ATTIC_CACHE_KEY }}
- name: Build nix-cuda-test
run: nix build --print-build-logs --no-link .#nix-cuda-test