From b62752b31415100e4ae0917e80f1e05970875671 Mon Sep 17 00:00:00 2001 From: Yecheng Fu Date: Thu, 19 Mar 2020 20:44:18 +0800 Subject: [PATCH] fix clean bug (#1991) --- ci/aws-clean-eks.sh | 85 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/ci/aws-clean-eks.sh b/ci/aws-clean-eks.sh index 69f11ec570..fc9c94eb81 100755 --- a/ci/aws-clean-eks.sh +++ b/ci/aws-clean-eks.sh @@ -24,25 +24,52 @@ function get_stacks() { aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text } +function delete_security_group() { + local sgId="$1" + echo "info: deleting security group '$sgId'" + for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$sgId" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do + echo "info: clear leaked network interfaces '$eni'" + aws ec2 delete-network-interface --network-interface-id "$eni" + done + aws ec2 delete-security-group --group-id "$sgId" + if [ $? -eq 0 ]; then + echo "info: succesfully deleted security group '$sgId'" + else + echo "error: failed to deleted security group '$sgId'" + fi +} + +function delete_vpc() { + local vpcId="$1" + echo "info: deleting vpc '$vpcId'" + for sgId in $(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$vpcId" --query "SecurityGroups[?GroupName != 'default'].GroupId" --output text); do + delete_security_group "$sgId" + done + aws ec2 delete-vpc --vpc-id "$vpcId" + if [ $? -eq 0 ]; then + echo "info: succesfully deleted vpc '$vpcId'" + else + echo "error: failed to deleted vpc '$vpcId'" + fi +} + function fix_eks_mng_deletion_issues() { local cluster="$1" local mng="$2" while IFS=$'\n' read -r line; do read -r code resourceIds <<< $line if [ "$code" == "Ec2SecurityGroupDeletionFailure" ]; then - echo "info: clear security group '$resourceIds'" - for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$resourceIds" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do - echo "info: clear leaked network interfaces '$eni'" - aws ec2 delete-network-interface --network-interface-id "$eni" + IFS=',' read -ra sgIds <<< "$resourceIds" + for sgId in ${sgIds[@]}; do + delete_security_group "$sgId" done - aws ec2 delete-security-group --group-id $resourceIds fi done <<< $(aws eks describe-nodegroup --cluster-name "$cluster" --nodegroup-name "$mng" --query 'nodegroup.health.issues' --output json | jq -r '.[].resourceIds |= join(",") | .[] | "\(.code)\t\(.resourceIds)"') } function clean_eks() { local CLUSTER="$1" - echo "info: deleting mng stack" + echo "info: searching mng stack" local regex='^'$CLUSTER'-mng-[0-9]+$' local mngStack= for stackName in $(get_stacks); do @@ -53,24 +80,15 @@ function clean_eks() { break done if [ -n "$mngStack" ]; then - echo "info: mng stack found '$mngStack', deleting it" - aws cloudformation delete-stack --stack-name $mngStack - aws cloudformation wait stack-delete-complete --stack-name $mngStack - if [ $? -ne 0 ]; then - echo "error: failed to delete mng stack '$mngStack', delete related resource first" - for mngName in $(aws eks list-nodegroups --cluster-name jenkins-tidb-operator-e2e2 --query 'nodegroups[*]' --output text); do - fix_eks_mng_deletion_issues "$CLUSTER" $mngName - done - aws cloudformation delete-stack --stack-name $mngStack - aws cloudformation wait stack-delete-complete --stack-name $mngStack - fi + echo "info: mng stack found '$mngStack'" else - echo "info: mng stack not found, skipped" + echo "info: mng stack not found" fi - echo "info: deleting cluster/cluster-role/mng-role/vpc stacks" + echo "info: deleting mng/cluster/cluster-role/mng-role/vpc stacks" local stacks=( - $CLUSTER-cluster + $mngStack + $CLUSTER-cluster $CLUSTER-role-cluster $CLUSTER-role-mng $CLUSTER-vpc @@ -79,6 +97,33 @@ function clean_eks() { echo "info: deleting stack $stack" aws cloudformation delete-stack --stack-name $stack aws cloudformation wait stack-delete-complete --stack-name $stack + if [ $? -ne 0 ]; then + echo "error: failed to delete stack '$stack'" + if [ "$stack" == "$mngStack" ]; then + echo "info: try to fix mng stack '$stack'" + for mngName in $(aws eks list-nodegroups --cluster-name "$CLUSTER" --query 'nodegroups[*]' --output text); do + fix_eks_mng_deletion_issues "$CLUSTER" $mngName + done + elif [ "$stack" == "$CLUSTER-vpc" ]; then + echo "info: try to fix vpc stack '$stack'" + while IFS=$'\n' read -r sgId; do + delete_security_group "$sgId" + done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "ControlPlaneSecurityGroupID") | .OutputValue') + while IFS=$'\n' read -r vpcId; do + delete_vpc "$vpcId" + done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "VPCID") | .OutputValue') + else + echo "fatal: unable to delete stack $stack" + exit 1 + fi + echo "info: try to delete the stack '$stack' again" + aws cloudformation delete-stack --stack-name $stack + aws cloudformation wait stack-delete-complete --stack-name $stack + if [ $? -ne 0 ]; then + echo "fatal: unable to delete stack $stack" + exit 1 + fi + fi done }