diff --git a/.github/workflows/awscleanup.yaml b/.github/workflows/awscleanup.yaml index 0cb55c21..841ba2d7 100644 --- a/.github/workflows/awscleanup.yaml +++ b/.github/workflows/awscleanup.yaml @@ -38,42 +38,12 @@ jobs: echo "AWS_VPC_IDS=$vpcs" >> $GITHUB_ENV - name: Clean up VPCs - if: env.vpcs != '' + if: env.AWS_VPC_IDS != '' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - get_tag_value(){ - local vpc_id=$1 - local key=$2 - aws ec2 describe-tags --filters "Name=resource-id,Values=$vpc_id" "Name=key,Values=$key" \ - --query "Tags[0].Value" --output text - } - for vpc in $vpcs; do - github_repository=$(get_tag_value $vpc "GitHubRepository") - run_id=$(get_tag_value $vpc "GitHubRunId") - job_name=$(get_tag_value $vpc "GitHubJob") - response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ - "https://api.github.com/repos/${github_repository}/actions/runs/${run_id}/jobs") - if [[ -z "$response" || "$response" == "null" ]]; then - continue - fi - - # 1. make sure .jobs exist in response - # e.g. { "message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404" } - # 2. check if all jobs completed - - if ! echo "$response" | jq -e '.jobs != null' >/dev/null 2>&1; then - continue - fi - - is_jobs_not_completed=$(echo "$response" | jq -r ".jobs? // [] | - map(select(.status != \"completed\")) | - length") - - if [[ "$is_jobs_not_completed" -eq 0 ]]; then - echo "Holodeck e2e Job status is not in running stage , Delete the vpc $vpc and dependent resources" - scripts/awscleanup.sh $vpc - fi + for vpcid in $AWS_VPC_IDS; do + scripts/awscleanup.sh $vpcid done - name: Post cleanup diff --git a/scripts/awscleanup.sh b/scripts/awscleanup.sh index 1e137ade..3e0b3bcb 100755 --- a/scripts/awscleanup.sh +++ b/scripts/awscleanup.sh @@ -1,103 +1,145 @@ #!/bin/bash if [[ $# -ne 1 ]]; then - echo " vpcid required for deletion" - exit 1 + echo " vpcid required for deletion" + exit 1 fi -export vpc=$1 - -echo "Start cleanup of resources in VPC: $vpc" - -# Delete Instance -instances=$(aws ec2 describe-instances \ - --filters "Name=vpc-id,Values=$vpc" \ - --query "Reservations[].Instances[].InstanceId" \ - --output text | tr -d '\r' | tr '\n' ' ') -for instance in $instances; do - aws ec2 terminate-instances --instance-ids "$instance" -done - -# Detach and Delete Security Groups -security_groups=$(aws ec2 describe-security-groups \ - --filters Name=vpc-id,Values=$vpc \ - --query "SecurityGroups[?GroupName!='default'].GroupId" \ - --output text | tr -d '\r' | tr '\n' ' ') -for sg in $security_groups; do - enis=$(aws ec2 describe-network-interfaces \ - --filters Name=group-id,Values=$sg \ - --query "NetworkInterfaces[].NetworkInterfaceId" \ + +export vpcid=$1 + +get_tag_value(){ + local vpc=$1 + local key=$2 + aws ec2 describe-tags --filters "Name=resource-id,Values=$vpcid" "Name=key,Values=$key" \ + --query "Tags[0].Value" --output text +} + +delete_vpc_resources() { + if [[ $# -ne 1 ]]; then + echo " vpcid required for deletion" + exit 1 + fi + export vpcid=$1 + + echo "Start cleanup of resources in VPC: $vpcid" + + # Delete Instance + instances=$(aws ec2 describe-instances \ + --filters "Name=vpc-id,Values=$vpcid" \ + --query "Reservations[].Instances[].InstanceId" \ --output text | tr -d '\r' | tr '\n' ' ') - for eni in $enis; do - aws ec2 modify-network-interface-attribute \ - --network-interface-id "$eni" \ - --groups "$(aws ec2 describe-security-groups \ - --query 'SecurityGroups[?GroupName==`default`].GroupId' \ - --output text)" + for instance in $instances; do + aws ec2 terminate-instances --instance-ids "$instance" done - aws ec2 delete-security-group --group-id "$sg" -done - -# Delete Subnets -subnets=$(aws ec2 describe-subnets \ - --filters Name=vpc-id,Values=$vpc \ - --query "Subnets[].SubnetId" \ - --output text | tr -d '\r' | tr '\n' ' ') -for subnet in $subnets; do - aws ec2 delete-subnet --subnet-id "$subnet" -done - -# Delete Route Tables -# 1. Make first rt as Main , as we cannot delete vpcs attached with main -# 2. replace all rt with first rt -# 3. delete rt -# 4. Main table(first_rt) will be deleted once vpc deleted -first_rt="" -route_tables=$(aws ec2 describe-route-tables \ - --filters Name=vpc-id,Values=$vpc \ - --query "RouteTables[].RouteTableId" \ - --output text | tr -d '\r' | tr '\n' ' ') -for rt in $route_tables; do - associations=$(aws ec2 describe-route-tables \ - --route-table-ids "$rt" \ - --query "RouteTables[].Associations[].RouteTableAssociationId" \ + + # Detach and Delete Security Groups + security_groups=$(aws ec2 describe-security-groups \ + --filters Name=vpc-id,Values=$vpcid \ + --query "SecurityGroups[?GroupName!='default'].GroupId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for sg in $security_groups; do + enis=$(aws ec2 describe-network-interfaces \ + --filters Name=group-id,Values=$sg \ + --query "NetworkInterfaces[].NetworkInterfaceId" \ --output text | tr -d '\r' | tr '\n' ' ') - for assoc_id in $associations; do - if [ -z "$first_rt" ]; then - aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $rt - first_rt=$rt + for eni in $enis; do + aws ec2 modify-network-interface-attribute \ + --network-interface-id "$eni" \ + --groups "$(aws ec2 describe-security-groups \ + --query 'SecurityGroups[?GroupName==`default`].GroupId' \ + --output text)" + done + aws ec2 delete-security-group --group-id "$sg" + done + + # Delete Subnets + subnets=$(aws ec2 describe-subnets \ + --filters Name=vpc-id,Values=$vpcid \ + --query "Subnets[].SubnetId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for subnet in $subnets; do + aws ec2 delete-subnet --subnet-id "$subnet" + done + + # Delete Route Tables + # 1. Make first rt as Main , as we cannot delete vpcs attached with main + # 2. replace all rt with first rt + # 3. delete rt + # 4. Main table(first_rt) will be deleted once vpc deleted + first_rt="" + route_tables=$(aws ec2 describe-route-tables \ + --filters Name=vpc-id,Values=$vpcid \ + --query "RouteTables[].RouteTableId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for rt in $route_tables; do + associations=$(aws ec2 describe-route-tables \ + --route-table-ids "$rt" \ + --query "RouteTables[].Associations[].RouteTableAssociationId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for assoc_id in $associations; do + if [ -z "$first_rt" ]; then + aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $rt + first_rt=$rt + else + aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt + fi + done + aws ec2 delete-route-table --route-table-id "$rt" 2>>/dev/null + done + + # Delete Internet Gateway + internet_gateways=$(aws ec2 describe-internet-gateways \ + --filters Name=attachment.vpc-id,Values=$vpcid \ + --query "InternetGateways[].InternetGatewayId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for igw in $internet_gateways; do + aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpcid" + aws ec2 delete-internet-gateway --internet-gateway-id "$igw" + done + + # Delete vpc + # try 3 times with 30 seconds interval + attempts=0 + echo "All resource Deleted for VPC: $vpcid , now delete vpc" + while [ $attempts -lt 3 ]; do + if aws ec2 delete-vpc --vpc-id $vpcid; then + echo "Successfully deleted VPC: $vpcid" + break else - aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt + attempts=$((attempts + 1)) + if [ $attempts -lt 3 ]; then + echo "Failed to delete VPC: $vpcid. Retrying in 30 seconds..." + sleep 30 + fi fi done - aws ec2 delete-route-table --route-table-id "$rt" 2>>/dev/null -done - -# Delete Internet Gateway -internet_gateways=$(aws ec2 describe-internet-gateways \ - --filters Name=attachment.vpc-id,Values=$vpc \ - --query "InternetGateways[].InternetGatewayId" \ - --output text | tr -d '\r' | tr '\n' ' ') -for igw in $internet_gateways; do - aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpc" - aws ec2 delete-internet-gateway --internet-gateway-id "$igw" -done - -# Delete vpc -# try 3 times with 30 seconds interval -attempts=0 -echo "All resource Deleted for VPC: $vpc , now delete vpc" -while [ $attempts -lt 3 ]; do - if aws ec2 delete-vpc --vpc-id $vpc; then - echo "Successfully deleted VPC: $vpc" - break - else - attempts=$((attempts + 1)) - if [ $attempts -lt 3 ]; then - echo "Failed to delete VPC: $vpc. Retrying in 30 seconds..." - sleep 30 - fi + if [ $attempts -eq 3 ]; then + echo "Failed to delete VPC: $vpcid after 3 attempts. Continue the loop to delete other vpc" fi -done -if [ $attempts -eq 3 ]; then - echo "Failed to delete VPC: $vpc after 3 attempts. Continue the loop to delete other vpc" +} + +github_repository=$(get_tag_value $vpcid "GitHubRepository") +run_id=$(get_tag_value $vpcid "GitHubRunId") +job_name=$(get_tag_value $vpcid "GitHubJob") +response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/${github_repository}/actions/runs/${run_id}/jobs") +if [[ -z "$response" || "$response" == "null" ]]; then + exit 0 +fi + +# 1. make sure .jobs exist in response +# e.g. { "message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404" } +# 2. check if all jobs completed + +if ! echo "$response" | jq -e '.jobs != null' >/dev/null 2>&1; then + exit 0 +fi + +is_jobs_not_completed=$(echo "$response" | jq -r ".jobs? // [] | + map(select(.status != \"completed\")) | + length") + +if [[ "$is_jobs_not_completed" -eq 0 ]]; then + echo "Holodeck e2e Job status is not in running stage , Delete the vpc $vpcid and dependent resources" + delete_vpc_resources $vpcid fi