Skip to content

Commit

Permalink
Test automatic stop mechanism
Browse files Browse the repository at this point in the history
Also:
  * Add sleep on test polling loops
  * Fix unsafe access to event time on Azure
  * Wait for data to be synchronized before exiting
  * Fix testing inconsistencies
  • Loading branch information
0x2b3bfa0 authored Nov 24, 2021
1 parent 5271863 commit c90e8d2
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 69 deletions.
106 changes: 50 additions & 56 deletions .github/workflows/smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,58 +10,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- run: echo ✓
start:
needs: authorize
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
kubeconfig: ${{ steps.cluster.outputs.kubeconfig }}
steps:
- id: cluster
run: |
az login \
--service-principal \
--user="$AZURE_CLIENT_ID" \
--password="$AZURE_CLIENT_SECRET" \
--tenant="$AZURE_TENANT_ID"
az account set \
--subscription="$AZURE_SUBSCRIPTION_ID"
az extension add \
--name=aks-preview
az provider register \
--namespace=Microsoft.ContainerService
az feature register \
--namespace=Microsoft.ContainerService \
--name=GPUDedicatedVHDPreview
az group create \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--location=eastus
az aks create \
--resource-group="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--node-vm-size=Standard_NC6 \
--node-count=1 \
--aks-custom-headers=UseGPUDedicatedVHD=true \
--generate-ssh-keys
az aks get-credentials \
--resource-group="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--file - |
perl -p -e 's/%/%25/g;' -e 's/\n/%0A/g;' -e 's/\r/%0D/g;' |
xargs -0 printf "::set-output name=kubeconfig::%s\n"
env:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
test:
needs: start
needs: authorize
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
provider: [AWS, AZ, GCP, K8S]
provider: [AWS, AZ, GCP]
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
Expand All @@ -71,8 +27,6 @@ jobs:
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
GOOGLE_APPLICATION_CREDENTIALS_DATA: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS_DATA }}
KUBECONFIG_DATA: ${{ needs.start.outputs.kubeconfig }}
SMOKE_TEST_IDENTIFIER: smoke test ${{ github.run_id }}
SMOKE_TEST_ENABLE_${{ matrix.provider }}: true
steps:
- uses: actions/checkout@v2
Expand All @@ -88,12 +42,21 @@ jobs:
run: go test ./task -v -timeout=30m -count=1
env:
SMOKE_TEST_SWEEP: true
stop:
if: always()
needs: test
test-k8s:
name: test (K8S)
needs: authorize
runs-on: ubuntu-latest
timeout-minutes: 30
env:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: ^1.17
- run: |
az login \
--service-principal \
Expand All @@ -102,11 +65,42 @@ jobs:
--tenant="$AZURE_TENANT_ID"
az account set \
--subscription="$AZURE_SUBSCRIPTION_ID"
- id: cluster
run: |
az extension add \
--name=aks-preview
az provider register \
--namespace=Microsoft.ContainerService
az feature register \
--namespace=Microsoft.ContainerService \
--name=GPUDedicatedVHDPreview
az group create \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--location=eastus
az aks create \
--resource-group="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--node-vm-size=Standard_NC6 \
--node-count=1 \
--aks-custom-headers=UseGPUDedicatedVHD=true \
--generate-ssh-keys
az aks get-credentials \
--resource-group="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--file - |
perl -0777p \
-e 's/%/%25/g;' \
-e 's/\n/%0A/g;' \
-e 's/\r/%0D/g;' \
-e 's/(.+)/::add-mask::\1\n::set-output name=kubeconfig::\1\n/g'
- run: go test ./task -v -timeout=30m -count=1
env:
KUBECONFIG_DATA: ${{ steps.cluster.outputs.kubeconfig }}
SMOKE_TEST_ENABLE_K8S: true
- if: always()
run: |
az group delete \
--name="tpiSmokeTestCluster$GITHUB_RUN_ID" \
--yes
env:
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
env:
SMOKE_TEST_IDENTIFIER: smoke test ${{ github.run_id }}
7 changes: 6 additions & 1 deletion task/az/resources/resource_virtual_machine_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net"
"regexp"
"time"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-30/compute"

Expand Down Expand Up @@ -240,8 +241,12 @@ func (v *VirtualMachineScaleSet) Read(ctx context.Context) error {
}
if scaleSetView.Statuses != nil {
for _, status := range *scaleSetView.Statuses {
statusTime := time.Unix(0, 0)
if status.Time != nil {
statusTime = status.Time.Time
}
v.Attributes.Events = append(v.Attributes.Events, common.Event{
Time: status.Time.Time,
Time: statusTime,
Code: to.String(status.Code),
Description: []string{
string(status.Level),
Expand Down
33 changes: 21 additions & 12 deletions task/task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package task
import (
"context"
"os"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -48,8 +49,8 @@ func TestTask(t *testing.T) {
}

t.Run(string(provider), func(t *testing.T) {
oldData := gofakeit.Phrase()
newData := gofakeit.Phrase()
oldData := gofakeit.UUID()
newData := gofakeit.UUID()

dataDirectory := t.TempDir()
dataFile := filepath.Join(dataDirectory, "data")
Expand All @@ -75,8 +76,10 @@ func TestTask(t *testing.T) {
Environment: common.Environment{
Image: "ubuntu",
Script: `#!/bin/bash
cat data
echo "$ENVIRONMENT_VARIABLE_DATA" | tee data
mv data data.old
echo "$ENVIRONMENT_VARIABLE_DATA" > data
sleep 60
cat data data.old
`,
Variables: map[string]*string{
"ENVIRONMENT_VARIABLE_DATA": &newData,
Expand All @@ -88,7 +91,6 @@ func TestTask(t *testing.T) {
Ingress: common.FirewallRule{
Ports: &[]uint16{22},
},
// Egress: everything open.
},
Spot: common.SpotEnabled,
Parallelism: 1,
Expand Down Expand Up @@ -120,31 +122,38 @@ func TestTask(t *testing.T) {
require.Nil(t, err)

for _, log := range logs {
if assert.Contains(t, log, oldData) &&
assert.Contains(t, log, newData) {
if strings.Contains(t, log, oldData) &&
strings.Contains(t, log, newData) {
break loop
}
}

time.Sleep(10 * time.Second)
}

if provider == common.ProviderK8S {
require.Equal(t, newTask.Start(ctx), common.NotImplementedError)
require.Equal(t, newTask.Stop(ctx), common.NotImplementedError)
} else {
require.Nil(t, newTask.Stop(ctx))
require.Nil(t, newTask.Stop(ctx))

for assert.Nil(t, newTask.Read(ctx)) &&
newTask.Status(ctx)[common.StatusCodeRunning] > 0 {
continue
time.Sleep(10 * time.Second)
}

require.Nil(t, newTask.Start(ctx))
require.Nil(t, newTask.Start(ctx))

for assert.Nil(t, newTask.Read(ctx)) &&
newTask.Status(ctx)[common.StatusCodeRunning] == 0 {
continue
time.Sleep(10 * time.Second)
}

require.Nil(t, newTask.Stop(ctx))
require.Nil(t, newTask.Stop(ctx))

for assert.Nil(t, newTask.Read(ctx)) &&
newTask.Status(ctx)[common.StatusCodeRunning] > 0 {
time.Sleep(10 * time.Second)
}
}

Expand Down

0 comments on commit c90e8d2

Please sign in to comment.