From 2993607bcdbcce4e3b4b6f8174e115ca976b665e Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Tue, 17 Aug 2021 17:01:44 -0400 Subject: [PATCH 1/3] [stress testing] Add stress test docs for deploy script --- tools/stress-cluster/chaos/README.md | 124 +++++++++++++-------------- 1 file changed, 58 insertions(+), 66 deletions(-) diff --git a/tools/stress-cluster/chaos/README.md b/tools/stress-cluster/chaos/README.md index f98d112387d..fca2f93f6aa 100644 --- a/tools/stress-cluster/chaos/README.md +++ b/tools/stress-cluster/chaos/README.md @@ -35,8 +35,9 @@ You will need the following tools to create and run tests: 1. [Docker](https://docs.docker.com/get-docker/) 1. [Kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) -1. [Helm](https://helm.sh/) +1. [Helm](https://helm.sh/docs/intro/install/) 1. [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) +1. [Powershell Core](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell-core-on-linux?view=powershell-7.1#ubuntu-2004) (if using Linux) ## Access @@ -56,7 +57,7 @@ kubectl get namespaces ## Quick Testing with no Dependencies -This section details how to deploy a simple job, without any dependencies on the cluster (e.g. azure credentials, app insights keys). +This section details how to deploy a simple job, without any dependencies on the cluster (e.g. azure credentials, app insights keys) or stress test scripts. It is used to illustrate how kubernetes and the tools work only. Stress test development should be done using the [deploy script](https://github.com/Azure/azure-sdk-tools/blob/main/eng/common/scripts/stress-testing/deploy-stress-tests.ps1). To get started, you will need to create a container image containing your long-running test, and a manifest to execute that image as a [kubernetes job](https://kubernetes.io/docs/concepts/workloads/controllers/job/). @@ -161,6 +162,18 @@ The basic layout for a stress test is the following (see `examples/stress_deploy # Any additional bicep module files/directories referenced by test-resources.bicep ``` +### Stress Test Metadata + +A stress test package should follow a few conventions that are used by the automation to auto-discover behavior. + +Fields in `Chart.yaml` +1. The `name` field will get used as the helm release name. To deploy instances of the same stress test release in parallel, update this field. +1. The `annotations.stressTest` field must be set to true for the script to discover the test. +1. The `annotations.namespace` field must be set, and governs which kubernetes namespace the stress test package will be + installed into as a helm release. +1. Extra fields in `annotations` can be set arbitrarily, and used via the `-Filters` argument to the [stress test deploy + script](https://github.com/Azure/azure-sdk-tools/blob/main/eng/common/scripts/stress-testing/deploy-stress-tests.ps1). + ### Stress Test Secrets For ease of implementation regarding merging secrets from various Keyvault sources, secret values injected into the stress @@ -192,31 +205,33 @@ a `chart/test-resources.json` file in place before running `helm install`. The stress test cluster and config boilerplate will handle running ARM deployments in an init container before stress test container startup. -If using Azure Bicep files, they should be declared at the subscription `targetScope`, as opposed to the default -resource group scope. Additionally, they should create a resource group for the test, along with tags marking deletion -for the group after the intended duration of the stress test. - The bicep file should output at least the resource group name, which will be injected into the stress test env file. ``` -targetScope = 'subscription' +// Dummy parameter to handle defaults the script passes in +param testApplicationOid string = '' -param groupName string -param location string -param now string = utcNow('u') - -resource group 'Microsoft.Resources/resourceGroups@2020-10-01' = { - name: 'rg-stress-${groupName}-${uniqueString(now)}' - location: location - tags: { - DeleteAfter: dateTimeAdd(now, 'PT8H') - } +resource config 'Microsoft.AppConfiguration/configurationStores@2020-07-01-preview' = { + name: 'config-${resourceGroup().name}' + location: resourceGroup().location + sku: { + name: 'Standard' + } } -output RESOURCE_GROUP string = group.name +output RESOURCE_GROUP string = resourceGroup().name +output AZURE_CLIENT_OID string = testApplicationOid ``` -See the [Job Manifest section](#job-manifest) for an example spec containing config template includes for resource auto-deployment. +A stress test package must include a `parameters.json` file as well, which can either be empty or contain parameters: + +``` +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { } +} +``` ### Helm Chart Dependencies @@ -261,7 +276,6 @@ spec: containers: - name: deployment-example image: mcr.microsoft.com/azure-cli - {{- include "stress-test-addons.container-env" . | nindent 10 }} command: ['bash', '-c'] args: - | @@ -269,6 +283,7 @@ spec: az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_CLIENT_SECRET --tenant $AZURE_TENANT_ID && az account set -s $AZURE_SUBSCRIPTION_ID && az group show -g $RESOURCE_GROUP -o json + {{- include "stress-test-addons.container-env" . | nindent 6 }} {{- end -}} ``` @@ -342,52 +357,32 @@ The underlying `stress-test-addons` helm library will handle a scenarios list au ## Deploying a Stress Test -To build and deploy the stress test, first log in to access the cluster resources if not already set up: +The stress test deployment is best run via the [stress test deploy +script](https://github.com/Azure/azure-sdk-tools/blob/main/eng/common/scripts/stress-testing/deploy-stress-tests.ps1). +This script handles: cluster and container registry access, building the stress test helm package, installing helm +package dependencies, and building and pushing docker images. The script must be run via powershell or powershell core. -``` -az login -# Log in to the container registry for Docker access -az acr login -n stresstestregistry -# Download the kubeconfig for the cluster -az aks get-credentials -g rg-stress-test-cluster- -n stress-test --subscription 'Azure SDK Test Resources' -``` +If using bash or another linux terminal, a [powershell core](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell-core-on-linux?view=powershell-7.1#ubuntu-2004) shell can be invoked via `pwsh`. -Then register the helm repository (this only needs to be done once): +The first invocation of the script must be run with the `-Login` flag to set up cluster and container registry access. ``` -helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/ -helm repo update -``` - -Then build/publish images and build ARM templates. Make sure the docker image matches what's referenced in the helm templates. +cd -``` -# Build and publish image -docker build . -t stresstestregistry.azurecr.io//: -docker push stresstestregistry.azurecr.io//: - -# Compile ARM template (if using Bicep files) -az bicep build -f ./test-resources.bicep - -# Install helm dependencies -helm dependency update +/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 ` + -Login ` + -PushImages ` + -Repository ` + -DeployId ``` -Then install the stress test into the cluster: +To re-deploy more quickly, the script can be run without `-Login` and/or without `-PushImages` (if no code changes were +made). ``` -kubectl create namespace -kubectl label namespace owners= -helm install -n . -``` - -To install into a different cluster (test, prod, or dev): - -``` -az aks get-credentials --subscription '' -g rg-stress-test-cluster- -n stress-test -kubectl create namespace -kubectl label namespace owners= -helm install -n . --set stress-test-addons.env= +/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 ` + -Repository ` + -DeployId ``` You can check the progress/status of your installation via: @@ -396,13 +391,7 @@ You can check the progress/status of your installation via: helm list -n ``` -To update/re-deploy the test with changes: - -``` -helm upgrade . -``` - -To debug the yaml built by `helm install`, run: +To debug the kubernetes manifests installed by the stress test, run the following from the stress test directory: ``` helm template . @@ -419,13 +408,16 @@ To check the status of the stress test job resources: ``` # List stress test pods kubectl get pods -n -l release= -# Get logs from azure-deployer init container + +# Get logs from azure-deployer init container, if deploying resources. Omit `-c azure-deployer` to get main container +logs. kubectl logs -n -c azure-deployer + # If empty, there may have been startup failures kubectl describe pod -n ``` -Once the `azure-deployer` init container is completed and the stress test pod is in a `Running` state, +If deploying resources, once the `azure-deployer` init container is completed and the stress test pod is in a `Running` state, you can quick check the local logs: ``` From f07d0585ff198633002dd633a9b0de53a784f260 Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Tue, 17 Aug 2021 17:48:59 -0400 Subject: [PATCH 2/3] [stress testing] Stress test deployment script resiliency fixes --- .../stress-testing/deploy-stress-tests.ps1 | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 index 25af8c9edd7..d34e3cba6e1 100644 --- a/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 +++ b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 @@ -20,7 +20,8 @@ $ErrorActionPreference = 'Stop' . $PSScriptRoot/find-all-stress-packages.ps1 $FailedCommands = New-Object Collections.Generic.List[hashtable] -if (!(Get-Module powershell-yaml)) { +if (!(Get-Module -ListAvailable powershell-yaml)) { + Write-Host "Installing powershell-yaml module..." Install-Module -Name powershell-yaml -RequiredVersion 0.4.1 -Force -Scope CurrentUser } @@ -51,7 +52,10 @@ function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImage RunOrExitOnFailure az login --allow-no-subscriptions } - $clusterName = (az aks list -g $clusterGroup -o json| ConvertFrom-Json).name + # Discover cluster name, only one cluster per group is expected + Write-Host "Listing AKS cluster in $subscription/$clusterGroup" + $cluster = RunOrExitOnFailure az aks list -g $clusterGroup --subscription $subscription -o json + $clusterName = ($cluster | ConvertFrom-Json).name RunOrExitOnFailure az aks get-credentials ` -n "$clusterName" ` @@ -60,8 +64,9 @@ function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImage --overwrite-existing if ($pushImages) { - $registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name - RunOrExitOnFailure az acr login -n $registry + $registry = RunOrExitOnFailure az acr list -g $clusterGroup --subscription $subscription -o json + $registryName = ($registry | ConvertFrom-Json).name + RunOrExitOnFailure az acr login -n $registryName } } @@ -110,11 +115,8 @@ function DeployStressPackage( [string]$repository, [boolean]$pushImages ) { - $registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name - if (!$registry) { - Write-Host "Could not find container registry in resource group $clusterGroup" - exit 1 - } + $registry = RunOrExitOnFailure az acr list -g $clusterGroup --subscription $subscription -o json + $registryName = ($registry | ConvertFrom-Json).name Run helm dependency update $pkg.Directory if ($LASTEXITCODE) { return } @@ -133,7 +135,7 @@ function DeployStressPackage( if (!$imageName) { $imageName = $dockerFile.Directory.Name } - $imageTag = "${registry}.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" + $imageTag = "${registryName}.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" Write-Host "Building and pushing stress test docker image '$imageTag'" Run docker build -t $imageTag -f $dockerFile.FullName $dockerFile.DirectoryName if ($LASTEXITCODE) { return } @@ -154,7 +156,7 @@ function DeployStressPackage( Run helm upgrade $pkg.ReleaseName $pkg.Directory ` -n $pkg.Namespace ` --install ` - --set repository=$registry.azurecr.io/$repository ` + --set repository=$registryName.azurecr.io/$repository ` --set tag=$deployId ` --set stress-test-addons.env=$environment if ($LASTEXITCODE) { @@ -176,4 +178,7 @@ function DeployStressPackage( Run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId } -DeployStressTests @PSBoundParameters +# Don't call functions when the script is being dot sourced +if ($MyInvocation.InvocationName -ne ".") { + DeployStressTests @PSBoundParameters +} From 8215fd8153919e9f6cbbb2e4d0277f6b0b86b7b8 Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Wed, 18 Aug 2021 14:14:43 -0400 Subject: [PATCH 3/3] Use PSModule-Helpers to install powershell-yaml --- eng/common/scripts/stress-testing/deploy-stress-tests.ps1 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 index d34e3cba6e1..a95ce62ff02 100644 --- a/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 +++ b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 @@ -20,10 +20,8 @@ $ErrorActionPreference = 'Stop' . $PSScriptRoot/find-all-stress-packages.ps1 $FailedCommands = New-Object Collections.Generic.List[hashtable] -if (!(Get-Module -ListAvailable powershell-yaml)) { - Write-Host "Installing powershell-yaml module..." - Install-Module -Name powershell-yaml -RequiredVersion 0.4.1 -Force -Scope CurrentUser -} +. (Join-Path $PSScriptRoot "../Helpers" PSModule-Helpers.ps1) +Install-ModuleIfNotInstalled "powershell-yaml" "0.4.1" | Import-Module # Powershell does not (at time of writing) treat exit codes from external binaries # as cause for stopping execution, so do this via a wrapper function.