From a27d99a1f54b1fc0165670857b7a3041a64f77e1 Mon Sep 17 00:00:00 2001 From: Jordan Hoey Date: Tue, 15 Oct 2024 15:57:54 +0100 Subject: [PATCH] Adding new AKS status script --- .github/workflows/aks-auto-shutdown.yaml | 2 +- .github/workflows/aks-auto-start.yaml | 2 +- scripts/aks/auto-shutdown-status.sh | 75 ++++++++++++++++++++---- scripts/aks/common-functions.sh | 2 +- 4 files changed, 65 insertions(+), 16 deletions(-) diff --git a/.github/workflows/aks-auto-shutdown.yaml b/.github/workflows/aks-auto-shutdown.yaml index debee598..83a4afb7 100644 --- a/.github/workflows/aks-auto-shutdown.yaml +++ b/.github/workflows/aks-auto-shutdown.yaml @@ -65,4 +65,4 @@ jobs: - name: AKS Auto Shutdown status check if: env.DEV_ENV != 'true' - run: ./scripts/aks/auto-shutdown-status.sh stop ${{ secrets.REGISTRYSLACKWEBHOOK }} + run: ./scripts/aks/auto-shutdown-status.sh stop ${{ secrets.SHUTDOWN_NOTIFICATIONS_WEBHOOK }} diff --git a/.github/workflows/aks-auto-start.yaml b/.github/workflows/aks-auto-start.yaml index 053acc85..8907568a 100644 --- a/.github/workflows/aks-auto-start.yaml +++ b/.github/workflows/aks-auto-start.yaml @@ -65,4 +65,4 @@ jobs: - name: AKS Auto Shutdown status check #if: env.DEV_ENV != 'true' - run: ./scripts/aks/auto-shutdown-status.sh start ${{ secrets.REGISTRYSLACKWEBHOOK }} + run: ./scripts/aks/auto-shutdown-status.sh start ${{ secrets.SHUTDOWN_NOTIFICATIONS_WEBHOOK }} diff --git a/scripts/aks/auto-shutdown-status.sh b/scripts/aks/auto-shutdown-status.sh index 9f2e7cf3..4c00a605 100755 --- a/scripts/aks/auto-shutdown-status.sh +++ b/scripts/aks/auto-shutdown-status.sh @@ -1,29 +1,78 @@ #!/usr/bin/env bash -#set -x +# set -x shopt -s nocasematch -AMBER='\033[1;33m' -GREEN='\033[0;32m' +# Source shared function scripts source scripts/aks/common-functions.sh source scripts/common/common-functions.sh +# Set variables for later use, MODE has a default but can be overridden at usage time +# notificationSlackWebhook is used during the function call `auto_shutdown_notification` MODE=${1:-start} -registrySlackWebhook=$2 +notificationSlackWebhook=$2 +SKIP="false" + +# Catch problems with MODE input, must be one of Start/Stop +if [[ "$MODE" != "start" && "$MODE" != "stop" ]]; then + echo "Invalid MODE. Please use 'start' or 'stop'." + exit 1 +fi CLUSTERS=$(get_clusters) clusters_count=$(jq -c -r '.count' <<<$CLUSTERS) -log "$clusters_count AKS Clusters found" -log "----------------------------------------------" +ts_echo "$clusters_count AKS Clusters found" +# For each AKS Cluster found in the function `get_clusters` start another loop jq -c '.data[]' <<<$CLUSTERS | while read cluster; do + # Function that returns the Resource Group, Id and Name of the AKS Cluster and its current state as variables get_cluster_details - if [[ $cluster_status == "Stopped" ]]; then - echo -e "${GREEN}$CLUSTER_NAME is $cluster_status" - elif [[ $cluster_status == "Running" ]]; then - echo -e "${AMBER}$CLUSTER_NAME is $cluster_status" + # Set variables based on inputs which are used to decide when to SKIP an environment + if [[ $cluster_env == "sbox" ]]; then + cluster_env=${cluster_env/#sbox/Sandbox} + elif [[ $cluster_env == "ptlsbox" ]]; then + cluster_env=${cluster_env/ptlsbox/Sandbox} + elif [[ $cluster_env == "stg" ]]; then + cluster_env=${cluster_env/stg/Staging} fi - if [[ $MODE == "start" ]]; then - check_cluster_status + + cluster_business_area=$(echo $CLUSTER_NAME | cut -d'-' -f1) + cluster_business_area=${cluster_business_area/ss/cross-cutting} + + # SKIP variable updated based on the output of the `should_skip_start_stop` function which calculates its value + # based on the issues_list.json file which contains user requests to keep environments online after normal hours + SKIP=$(should_skip_start_stop $cluster_env $cluster_business_area $MODE) + + # Setup message output templates for later use + logMessage="Cluster: $CLUSTER_NAME in Subscription: $SUBSCRIPTION ResourceGroup: $RESOURCE_GROUP is in $CLUSTER_STATUS state after $MODE action" + slackMessage="Cluster: *$CLUSTER_NAME* in Subscription: *$SUBSCRIPTION* is in *$CLUSTER_STATUS* state after *$MODE* action" + + # If SKIP is false then we progress with the status check for the particular Flexible server in this loop run, if SKIP is true then do nothing + if [[ $SKIP == "false" ]]; then + # Check state of the AKS Cluster and print output as required + # Depending on the value of MODE a notification will also be sent + # - If MODE = Start then a stopped AKS Cluster is incorrect and we should notify + # - If MODE = Stop then a running AKS Cluster is incorrect and we should notify + # - If neither Running or Stopped is found then something else is going on and we should notify + case "$CLUSTER_STATUS" in + *"Running"*) + ts_echo_color $([[ $MODE == "start" ]] && echo GREEN || echo RED) "$logMessage" + if [[ $MODE == "stop" ]]; then + auto_shutdown_notification ":red_circle: $slackMessage" + fi + ;; + *"Stopped"*) + ts_echo_color $([[ $MODE == "start" ]] && echo RED || echo GREEN) "$logMessage" + if [[ $MODE == "start" ]]; then + auto_shutdown_notification ":red_circle: $slackMessage" + fi + ;; + *) + ts_echo_color AMBER "$logMessage" + auto_shutdown_notification ":yellow_circle: $slackMessage" + ;; + esac + else + ts_echo_color AMBER "Cluster: $SERVER_NAME in ResourceGroup: $RESOURCE_GROUP has been skipped from today's $MODE operation schedule" fi -done +done \ No newline at end of file diff --git a/scripts/aks/common-functions.sh b/scripts/aks/common-functions.sh index ff531295..9d170a9e 100644 --- a/scripts/aks/common-functions.sh +++ b/scripts/aks/common-functions.sh @@ -27,7 +27,7 @@ function get_cluster_details() { RESOURCE_GROUP=$(jq -r '.resourceGroup' <<<$cluster) CLUSTER_NAME=$(jq -r '.name' <<<$cluster) STARTUP_MODE=$(jq -r '.tags.startupMode' <<<$cluster) - CLUSTER_STATUS=$(jq -r '.powerState.code' <<<$cluster) + CLUSTER_STATUS=$(jq -r '.properties.powerState.code' <<<$cluster) SUBSCRIPTION=$(jq -r '.subscriptionId' <<<$cluster) }