Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a script to remove orphaned K8s disks #745

Merged
merged 4 commits into from
Aug 14, 2019
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions hack/gcp-rm-unused-disks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env bash
#
# Delete unused (PD) disks.
# It is relatively easy to orphan cloud disk in GKE: just delete the K8s cluster.
#
# Assumption:
# * If you have any inactive (not mounted) PD disks they are represented by a PV in K8s.
# * Your disks are single AZ disks (multi-AZ disk will be ignored)

# Warning:
# * If this script does not work correctly, you will suffer data loss.
# * Only use this in a non-production environment.
#
# Extra Safety:
# * If a disk is mounted in an instance, gcloud refuses to delete it.

set -euo pipefail

REGIONS=$(gcloud compute disks list | awk '{print $2}' | grep -v LOCATION | sort | uniq | cut -d "-" -f 1-2 | uniq)
disk_zones=$(gcloud compute disks list | awk '{print $2}' | grep -v LOCATION | sort | uniq | xargs echo -n)

PROJECT="${PROJECT:-$(gcloud config get-value project)}"
echo "using PROJECT: $PROJECT"
echo "looking at REGIONS: $REGIONS"

pv_file="pvs.txt"
node_file=nodes.txt
instance_file=instances.txt
trap "rm -f $pv_file $node_file" "EXIT"

export KUBECONFIG=./kubeconfig
touch "$KUBECONFIG"
chmod 0600 "$KUBECONFIG"
trap "rm -f $KUBECONFIG" "EXIT"

declare -A disks

echo "$REGIONS" | while read -r region ; do
zones=$(gcloud compute zones list | grep "$region" | awk '{print $1}')

# Get the disk listings first. Otherwise we might think the instance using the disk does not exist if it was just created
echo "$zones" | while read -r zone ; do
gcloud compute instances list --filter="zone:($zone)" > "$zone-$instance_file"
trap "rm -f $zone-$instance_file" "EXIT"
gcloud compute disks list --filter="zone:($zone)" > "$zone-disks"
trap "rm -f $zone-disks" "EXIT"
done

rm -f $pv_file $node_file
clusters=$(gcloud container clusters list --filter="location:($region)" --format json | jq -r -c '.[] | .name + " " + .location')
echo "$clusters"
echo "$clusters" | while read -r name location ; do
location_type="--zone"
if [[ -z "$(echo "$location" | cut -d '-' -f 3)" ]] ; then
location_type="--region"
fi
gcloud container clusters get-credentials "$name" "$location_type" $location
kubectl describe pv | awk '/PDName:/ {print $2}' | sort >> $pv_file
kubectl get node --no-headers | awk '{print $1}' | sort >> $node_file
done
rm -f "$KUBECONFIG"

echo "$zones" | while read -r zone ; do
if disks=$(cat "$zone-disks" | grep gke | awk '{print $1}') ; then
echo "ZONE: $zone"
echo "$disks" | while read -r disk ; do
if grep "$disk" $pv_file > /dev/null ; then
echo "KEEP disk $disk used by pv"
else
disk_json=$(gcloud compute disks describe "$disk" --zone "$zone" --format json)
node="$(echo "$disk_json" | jq -r '.selfLink')"
if instance=$(grep "$node" $zone-$instance_file) >/dev/null ; then
echo "KEEP disk $disk $zone used by instance $instance"
else
disk_gb="$(echo "$disk_json" | jq -r '.sizeGb')"
users_null="$(echo "$disk_json" | jq -r '.users')"
if [[ "$users_null" == null ]] ; then
echo "Deleting disk $disk $zone ${disk_gb}GB"
echo "Y" | gcloud compute disks delete "$disk" --zone "$zone"
else
users="$(echo "$disk_json" | jq -r '.users[]')"
if [[ "$(echo "$users" | wc -l)" -eq 0 ]] ; then
echo "expected null but go an empty users field for disk" >&2
exit 1
fi

echo "$users" | while read -r user ; do
if ! echo "$user" | grep 'instances' >/dev/null ; then
echo "didn't understand user: $user for disk: $disk" >&2
exit 1
fi
done

if [[ "$(echo "$users" | wc -l)" -gt 1 ]] ; then
echo "KEEP disk $disk with multiple users $users" >&2
else
user="$(echo "$users" | head -1)"
node=$(echo "$user" | awk -F '/' '{print $NF}')
if grep "$node" $node_file >/dev/null ; then
echo "KEEP disk $disk $zone used by node $node"
else
if instance=$(grep "$node" $zone-$instance_file) >/dev/null ; then
echo "KEEP disk $disk $zone used by instance $instance"
else
echo "Deleting disk $disk $zone ${disk_gb}GB"
echo "Y" | gcloud compute disks delete "$disk" --zone "$zone"
fi
fi
fi
fi
fi
fi
done
fi
done
done

# Just in case the original listing was paged.
other_zones=$(gcloud compute disks list --filter="NOT zone:($disk_zones)" | wc -l)
if [[ $other_zones -gt 0 ]] ; then
echo "Seeing new regions now. Run this script again"
exit 1
fi