Skip to content

Commit

Permalink
Add script to clean-up unused AWS EC2 Instances and VPCs
Browse files Browse the repository at this point in the history
Signed-off-by: shiva kumar <shivaku@nvidia.com>
  • Loading branch information
shivakunv committed Dec 20, 2024
1 parent 014a6e0 commit aa1d57e
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 124 deletions.
36 changes: 3 additions & 33 deletions .github/workflows/awscleanup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,42 +38,12 @@ jobs:
echo "AWS_VPC_IDS=$vpcs" >> $GITHUB_ENV
- name: Clean up VPCs
if: env.vpcs != ''
if: env.AWS_VPC_IDS != ''
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
get_tag_value(){
local vpc_id=$1
local key=$2
aws ec2 describe-tags --filters "Name=resource-id,Values=$vpc_id" "Name=key,Values=$key" \
--query "Tags[0].Value" --output text
}
for vpc in $vpcs; do
github_repository=$(get_tag_value $vpc "GitHubRepository")
run_id=$(get_tag_value $vpc "GitHubRunId")
job_name=$(get_tag_value $vpc "GitHubJob")
response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/repos/${github_repository}/actions/runs/${run_id}/jobs")
if [[ -z "$response" || "$response" == "null" ]]; then
continue
fi
# 1. make sure .jobs exist in response
# e.g. { "message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404" }
# 2. check if all jobs completed
if ! echo "$response" | jq -e '.jobs != null' >/dev/null 2>&1; then
continue
fi
is_jobs_not_completed=$(echo "$response" | jq -r ".jobs? // [] |
map(select(.status != \"completed\")) |
length")
if [[ "$is_jobs_not_completed" -eq 0 ]]; then
echo "Holodeck e2e Job status is not in running stage , Delete the vpc $vpc and dependent resources"
scripts/awscleanup.sh $vpc
fi
for vpcid in $AWS_VPC_IDS; do
scripts/awscleanup.sh $vpcid
done
- name: Post cleanup
Expand Down
224 changes: 133 additions & 91 deletions scripts/awscleanup.sh
Original file line number Diff line number Diff line change
@@ -1,103 +1,145 @@
#!/bin/bash

if [[ $# -ne 1 ]]; then
echo " vpcid required for deletion"
exit 1
echo " vpcid required for deletion"
exit 1
fi
export vpc=$1

echo "Start cleanup of resources in VPC: $vpc"

# Delete Instance
instances=$(aws ec2 describe-instances \
--filters "Name=vpc-id,Values=$vpc" \
--query "Reservations[].Instances[].InstanceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for instance in $instances; do
aws ec2 terminate-instances --instance-ids "$instance"
done

# Detach and Delete Security Groups
security_groups=$(aws ec2 describe-security-groups \
--filters Name=vpc-id,Values=$vpc \
--query "SecurityGroups[?GroupName!='default'].GroupId" \
--output text | tr -d '\r' | tr '\n' ' ')
for sg in $security_groups; do
enis=$(aws ec2 describe-network-interfaces \
--filters Name=group-id,Values=$sg \
--query "NetworkInterfaces[].NetworkInterfaceId" \

export vpcid=$1

get_tag_value(){
local vpc=$1
local key=$2
aws ec2 describe-tags --filters "Name=resource-id,Values=$vpcid" "Name=key,Values=$key" \
--query "Tags[0].Value" --output text
}

delete_vpc_resources() {
if [[ $# -ne 1 ]]; then
echo " vpcid required for deletion"
exit 1
fi
export vpcid=$1

echo "Start cleanup of resources in VPC: $vpcid"

# Delete Instance
instances=$(aws ec2 describe-instances \
--filters "Name=vpc-id,Values=$vpcid" \
--query "Reservations[].Instances[].InstanceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for eni in $enis; do
aws ec2 modify-network-interface-attribute \
--network-interface-id "$eni" \
--groups "$(aws ec2 describe-security-groups \
--query 'SecurityGroups[?GroupName==`default`].GroupId' \
--output text)"
for instance in $instances; do
aws ec2 terminate-instances --instance-ids "$instance"
done
aws ec2 delete-security-group --group-id "$sg"
done

# Delete Subnets
subnets=$(aws ec2 describe-subnets \
--filters Name=vpc-id,Values=$vpc \
--query "Subnets[].SubnetId" \
--output text | tr -d '\r' | tr '\n' ' ')
for subnet in $subnets; do
aws ec2 delete-subnet --subnet-id "$subnet"
done

# Delete Route Tables
# 1. Make first rt as Main , as we cannot delete vpcs attached with main
# 2. replace all rt with first rt
# 3. delete rt
# 4. Main table(first_rt) will be deleted once vpc deleted
first_rt=""
route_tables=$(aws ec2 describe-route-tables \
--filters Name=vpc-id,Values=$vpc \
--query "RouteTables[].RouteTableId" \
--output text | tr -d '\r' | tr '\n' ' ')
for rt in $route_tables; do
associations=$(aws ec2 describe-route-tables \
--route-table-ids "$rt" \
--query "RouteTables[].Associations[].RouteTableAssociationId" \

# Detach and Delete Security Groups
security_groups=$(aws ec2 describe-security-groups \
--filters Name=vpc-id,Values=$vpcid \
--query "SecurityGroups[?GroupName!='default'].GroupId" \
--output text | tr -d '\r' | tr '\n' ' ')
for sg in $security_groups; do
enis=$(aws ec2 describe-network-interfaces \
--filters Name=group-id,Values=$sg \
--query "NetworkInterfaces[].NetworkInterfaceId" \
--output text | tr -d '\r' | tr '\n' ' ')
for assoc_id in $associations; do
if [ -z "$first_rt" ]; then
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $rt
first_rt=$rt
for eni in $enis; do
aws ec2 modify-network-interface-attribute \
--network-interface-id "$eni" \
--groups "$(aws ec2 describe-security-groups \
--query 'SecurityGroups[?GroupName==`default`].GroupId' \
--output text)"
done
aws ec2 delete-security-group --group-id "$sg"
done

# Delete Subnets
subnets=$(aws ec2 describe-subnets \
--filters Name=vpc-id,Values=$vpcid \
--query "Subnets[].SubnetId" \
--output text | tr -d '\r' | tr '\n' ' ')
for subnet in $subnets; do
aws ec2 delete-subnet --subnet-id "$subnet"
done

# Delete Route Tables
# 1. Make first rt as Main , as we cannot delete vpcs attached with main
# 2. replace all rt with first rt
# 3. delete rt
# 4. Main table(first_rt) will be deleted once vpc deleted
first_rt=""
route_tables=$(aws ec2 describe-route-tables \
--filters Name=vpc-id,Values=$vpcid \
--query "RouteTables[].RouteTableId" \
--output text | tr -d '\r' | tr '\n' ' ')
for rt in $route_tables; do
associations=$(aws ec2 describe-route-tables \
--route-table-ids "$rt" \
--query "RouteTables[].Associations[].RouteTableAssociationId" \
--output text | tr -d '\r' | tr '\n' ' ')
for assoc_id in $associations; do
if [ -z "$first_rt" ]; then
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $rt
first_rt=$rt
else
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt
fi
done
aws ec2 delete-route-table --route-table-id "$rt" 2>>/dev/null
done

# Delete Internet Gateway
internet_gateways=$(aws ec2 describe-internet-gateways \
--filters Name=attachment.vpc-id,Values=$vpcid \
--query "InternetGateways[].InternetGatewayId" \
--output text | tr -d '\r' | tr '\n' ' ')
for igw in $internet_gateways; do
aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpcid"
aws ec2 delete-internet-gateway --internet-gateway-id "$igw"
done

# Delete vpc
# try 3 times with 30 seconds interval
attempts=0
echo "All resource Deleted for VPC: $vpcid , now delete vpc"
while [ $attempts -lt 3 ]; do
if aws ec2 delete-vpc --vpc-id $vpcid; then
echo "Successfully deleted VPC: $vpcid"
break
else
aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt
attempts=$((attempts + 1))
if [ $attempts -lt 3 ]; then
echo "Failed to delete VPC: $vpcid. Retrying in 30 seconds..."
sleep 30
fi
fi
done
aws ec2 delete-route-table --route-table-id "$rt" 2>>/dev/null
done

# Delete Internet Gateway
internet_gateways=$(aws ec2 describe-internet-gateways \
--filters Name=attachment.vpc-id,Values=$vpc \
--query "InternetGateways[].InternetGatewayId" \
--output text | tr -d '\r' | tr '\n' ' ')
for igw in $internet_gateways; do
aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpc"
aws ec2 delete-internet-gateway --internet-gateway-id "$igw"
done

# Delete vpc
# try 3 times with 30 seconds interval
attempts=0
echo "All resource Deleted for VPC: $vpc , now delete vpc"
while [ $attempts -lt 3 ]; do
if aws ec2 delete-vpc --vpc-id $vpc; then
echo "Successfully deleted VPC: $vpc"
break
else
attempts=$((attempts + 1))
if [ $attempts -lt 3 ]; then
echo "Failed to delete VPC: $vpc. Retrying in 30 seconds..."
sleep 30
fi
if [ $attempts -eq 3 ]; then
echo "Failed to delete VPC: $vpcid after 3 attempts. Continue the loop to delete other vpc"
fi
done
if [ $attempts -eq 3 ]; then
echo "Failed to delete VPC: $vpc after 3 attempts. Continue the loop to delete other vpc"
}

github_repository=$(get_tag_value $vpcid "GitHubRepository")
run_id=$(get_tag_value $vpcid "GitHubRunId")
job_name=$(get_tag_value $vpcid "GitHubJob")
response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/repos/${github_repository}/actions/runs/${run_id}/jobs")
if [[ -z "$response" || "$response" == "null" ]]; then
exit 0
fi

# 1. make sure .jobs exist in response
# e.g. { "message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404" }
# 2. check if all jobs completed

if ! echo "$response" | jq -e '.jobs != null' >/dev/null 2>&1; then
exit 0
fi

is_jobs_not_completed=$(echo "$response" | jq -r ".jobs? // [] |
map(select(.status != \"completed\")) |
length")

if [[ "$is_jobs_not_completed" -eq 0 ]]; then
echo "Holodeck e2e Job status is not in running stage , Delete the vpc $vpcid and dependent resources"
delete_vpc_resources $vpcid
fi

0 comments on commit aa1d57e

Please sign in to comment.