Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete API Gateway if cluster up fails #1172

Merged
merged 6 commits into from
Jun 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 48 additions & 10 deletions cli/cmd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ var _upCmd = &cobra.Command{
exit.Error(err)
}

err = CreateBucketIfNotFound(awsClient, clusterConfig.Bucket)
err = createBucketIfNotFound(awsClient, clusterConfig.Bucket)
if err != nil {
exit.Error(err)
}
Expand All @@ -147,7 +147,7 @@ var _upCmd = &cobra.Command{
exit.Error(err)
}

err = CreateLogGroupIfNotFound(awsClient, clusterConfig.LogGroup)
err = createLogGroupIfNotFound(awsClient, clusterConfig.LogGroup)
if err != nil {
exit.Error(err)
}
Expand All @@ -156,16 +156,25 @@ var _upCmd = &cobra.Command{
exit.Error(err)
}

err = createDashboard(awsClient, clusterConfig.ClusterName)
err = createOrClearDashboard(awsClient, clusterConfig.ClusterName)
if err != nil {
exit.Error(err)
}

err = createOrReplaceAPIGateway(awsClient, clusterConfig.ClusterName, clusterConfig.Tags)
if err != nil {
exit.Error(err)
}

out, exitCode, err := runManagerUpdateCommand("/root/install.sh", clusterConfig, awsCreds, _flagClusterEnv)
if err != nil {
awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
exit.Error(err)
}
if exitCode == nil || *exitCode != 0 {
awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion
helpStr := "\nDebugging tips (may or may not apply to this error):"
helpStr += fmt.Sprintf("\n* if your cluster started spinning up but was unable to provision instances, additional error information may be found in the activity history of your cluster's autoscaling groups (select each autoscaling group and click the \"Activity History\" tab): https://console.aws.amazon.com/ec2/autoscaling/home?region=%s#AutoScalingGroups:", *clusterConfig.Region)
helpStr += fmt.Sprintf("\n* if your cluster started spinning up, please ensure that your CloudFormation stacks for this cluster have been fully deleted before trying to spin up this cluster again (you can delete your CloudFormation stacks from the AWS console: %s)", getCloudFormationURL(clusterConfig.ClusterName, *clusterConfig.Region))
Expand Down Expand Up @@ -323,11 +332,11 @@ var _downCmd = &cobra.Command{
_, errAPIGateway := awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
_, errVPCLink := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
if errAPIGateway != nil {
fmt.Print("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/apis\n")
fmt.Printf("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/apis\n", *accessConfig.Region)
errors.PrintError(errAPIGateway)
}
if errVPCLink != nil {
fmt.Print("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/vpc-links\n")
fmt.Printf("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/vpc-links\n", *accessConfig.Region)
errors.PrintError(errVPCLink)
}
if errAPIGateway == nil && errVPCLink == nil {
Expand All @@ -339,7 +348,7 @@ var _downCmd = &cobra.Command{
fmt.Print("○ deleting dashboard ")
err = awsClient.DeleteDashboard(*accessConfig.ClusterName)
if err != nil {
fmt.Print("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it manually via the cloudwatch console: https://console.aws.amazon.com/cloudwatch/home#dashboards:\n")
fmt.Printf("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it via the cloudwatch console: https://%s.console.aws.amazon.com/cloudwatch/home#dashboards:\n", *accessConfig.Region)
errors.PrintError(err)
fmt.Println()
} else {
Expand Down Expand Up @@ -716,7 +725,7 @@ func getCloudFormationURLWithAccessConfig(accessConfig *clusterconfig.AccessConf
return getCloudFormationURL(*accessConfig.ClusterName, *accessConfig.Region)
}

func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error {
func createBucketIfNotFound(awsClient *aws.Client, bucket string) error {
bucketFound, err := awsClient.DoesBucketExist(bucket)
if err != nil {
return err
Expand All @@ -725,6 +734,7 @@ func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error {
fmt.Print("○ creating a new s3 bucket: ", bucket)
err = awsClient.CreateBucket(bucket)
if err != nil {
fmt.Print("\n\n")
return err
}
fmt.Println(" ✓")
Expand All @@ -734,7 +744,7 @@ func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error {
return nil
}

func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
func createLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
logGroupFound, err := awsClient.DoesLogGroupExist(logGroup)
if err != nil {
return err
Expand All @@ -743,6 +753,7 @@ func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
fmt.Print("○ creating a new cloudwatch log group: ", logGroup)
err = awsClient.CreateLogGroup(logGroup)
if err != nil {
fmt.Print("\n\n")
return err
}
fmt.Println(" ✓")
Expand All @@ -753,8 +764,8 @@ func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error {
return nil
}

// createDashboard creates a new dashboard (or clears an existing one if it already exists)
func createDashboard(awsClient *aws.Client, dashboardName string) error {
// createOrClearDashboard creates a new dashboard (or clears an existing one if it already exists)
func createOrClearDashboard(awsClient *aws.Client, dashboardName string) error {
dashboardFound, err := awsClient.DoesDashboardExist(dashboardName)
if err != nil {
return err
Expand All @@ -768,10 +779,37 @@ func createDashboard(awsClient *aws.Client, dashboardName string) error {

err = awsClient.CreateDashboard(dashboardName, consts.DashboardTitle)
if err != nil {
fmt.Print("\n\n")
return err
}

fmt.Println(" ✓")

return nil
}

// createOrReplaceAPIGateway creates an API gateway for the cluster (or clears an existing one if it already exists)
func createOrReplaceAPIGateway(awsClient *aws.Client, clusterName string, tags map[string]string) error {
fmt.Print("○ creating api gateway: ", clusterName)

_, err := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterName)
if err != nil {
fmt.Print("\n\n")
return errors.Append(err, fmt.Sprintf("\n\nunable to delete existing vpc link with tag %s=%s; please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/vpc-links", clusterconfig.ClusterNameTag, clusterName, awsClient.Region))
}

_, err = awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterName)
if err != nil {
fmt.Print("\n\n")
return errors.Append(err, fmt.Sprintf("\n\nunable to delete existing api gateway with tag %s=%s; please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/apis", clusterconfig.ClusterNameTag, clusterName, awsClient.Region))
}

_, err = awsClient.CreateAPIGateway(clusterName, tags)
if err != nil {
fmt.Print("\n\n")
return err
}

fmt.Println(" ✓")
return nil
}
36 changes: 36 additions & 0 deletions manager/get_api_gateway_endpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2020 Cortex Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import boto3
import os


def get_api_gateway_endpoint():
cluster_name = os.environ["CORTEX_CLUSTER_NAME"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since there are only 2 variables, it may be easier to follow the code that calls this python file if these values are accepted as args

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other python files, e.g. create_gateway_integration.py, also pull from the environment variables, so I think for now I lean towards leaving it as is. I'm actually leaning toward moving more of the api gateway code from install.sh to python, so at that point we can convert to only using args, does that sound ok?

region = os.environ["CORTEX_REGION"]
client_apigateway = boto3.client("apigatewayv2", region_name=region)

paginator = client_apigateway.get_paginator("get_apis")
for api_gateway_page in paginator.paginate():
for api_gateway in api_gateway_page["Items"]:
if api_gateway["Tags"].get("cortex.dev/cluster-name") == cluster_name:
return api_gateway["ApiEndpoint"]

raise Exception(
f"your cluster's api gateway (in {region} with tag cortex.dev/cluster-name={cluster_name}) does not exist"
)


if __name__ == "__main__":
print(get_api_gateway_endpoint(), end="")
36 changes: 36 additions & 0 deletions manager/get_api_gateway_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2020 Cortex Labs, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import boto3
import os


def get_api_gateway_id():
cluster_name = os.environ["CORTEX_CLUSTER_NAME"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since there are only 2 variables, it may be easier to follow the code that calls this python file if these values are accepted as args

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(see comment above)

region = os.environ["CORTEX_REGION"]
client_apigateway = boto3.client("apigatewayv2", region_name=region)

paginator = client_apigateway.get_paginator("get_apis")
for api_gateway_page in paginator.paginate():
for api_gateway in api_gateway_page["Items"]:
if api_gateway["Tags"].get("cortex.dev/cluster-name") == cluster_name:
return api_gateway["ApiId"]

raise Exception(
f"your cluster's api gateway (in {region} with tag cortex.dev/cluster-name={cluster_name}) does not exist"
)


if __name__ == "__main__":
print(get_api_gateway_id(), end="")
2 changes: 1 addition & 1 deletion manager/info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ function get_api_load_balancer_endpoint() {
}

function get_api_gateway_endpoint() {
aws apigatewayv2 get-apis --region $CORTEX_REGION | jq ".Items[] | select(.Name == \"${CORTEX_CLUSTER_NAME}\") | .ApiEndpoint" | tr -d '"'
python get_api_gateway_endpoint.py
}

if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION >/dev/null 2>&1; then
Expand Down
13 changes: 1 addition & 12 deletions manager/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,18 +163,6 @@ function ensure_eks() {
function main() {
mkdir -p $CORTEX_CLUSTER_WORKSPACE

# create API Gateway
if [ "$arg1" != "--update" ]; then
create_api_output=$(aws apigatewayv2 create-api --tags $CORTEX_TAGS --region $CORTEX_REGION --name $CORTEX_CLUSTER_NAME --protocol-type HTTP)
api_id=$(echo $create_api_output | jq .ApiId | tr -d '"')
if [ "$api_id" = "" ] || [ "$api_id" = "null" ]; then
echo -e "unable to extract api gateway ID from create-api output:\n$create_api_output"
exit 1
fi
# create default stage; ignore error because default stage is supposed to be already created, but currently it isn't because of a possible bug in create-api
aws apigatewayv2 create-stage --region $CORTEX_REGION --tags $CORTEX_TAGS --api-id $api_id --auto-deploy --stage-name \$default &>/dev/null || true
fi

# create cluster (if it doesn't already exist)
ensure_eks

Expand Down Expand Up @@ -261,6 +249,7 @@ function main() {
# add VPC Link integration to API Gateway
if [ "$arg1" != "--update" ] && [ "$CORTEX_API_LOAD_BALANCER_SCHEME" == "internal" ]; then
echo -n "○ creating api gateway vpc link integration "
api_id=$(python get_api_gateway_id.py)
python create_gateway_integration.py $api_id $vpc_link_id
echo "✓"
echo -n "○ waiting for api gateway vpc link integration "
Expand Down
48 changes: 43 additions & 5 deletions pkg/lib/aws/apigateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,34 @@ import (
"github.com/cortexlabs/cortex/pkg/lib/errors"
)

// CreateAPIGateway Creates a new API Gateway with the default stage
func (c *Client) CreateAPIGateway(name string, tags map[string]string) (string, error) {
createAPIResponse, err := c.APIGatewayV2().CreateApi(&apigatewayv2.CreateApiInput{
Name: aws.String(name),
ProtocolType: aws.String(apigatewayv2.ProtocolTypeHttp),
Tags: aws.StringMap(tags),
})
if err != nil {
return "", errors.Wrap(err, "failed to create api gateway")
}
if createAPIResponse.ApiId == nil {
return "", errors.ErrorUnexpected("failed to create api gateway")
}

_, err = c.APIGatewayV2().CreateStage(&apigatewayv2.CreateStageInput{
ApiId: createAPIResponse.ApiId,
AutoDeploy: aws.Bool(true),
StageName: aws.String("$default"),
Tags: aws.StringMap(tags),
})
if err != nil {
c.DeleteAPIGateway(*createAPIResponse.ApiId) // best effort cleanup
return "", errors.Wrap(err, "failed to create $default api gateway stage")
}

return *createAPIResponse.ApiId, nil
}

// GetVPCLinkByTag Gets a VPC Link by tag (returns nil if there are no matches)
func (c *Client) GetVPCLinkByTag(tagName string, tagValue string) (*apigatewayv2.VpcLink, error) {
var nextToken *string
Expand Down Expand Up @@ -111,20 +139,30 @@ func (c *Client) DeleteAPIGatewayByTag(tagName string, tagValue string) (*apigat
return nil, nil
}

// Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion)
err = c.DeleteAPIGatewayMappings(*apiGateway.ApiId)
err = c.DeleteAPIGateway(*apiGateway.ApiId)
if err != nil {
return nil, err
}

return apiGateway, nil
}

// DeleteAPIGateway Deletes an API Gateway by ID (returns an error if the API Gateway does not exist)
func (c *Client) DeleteAPIGateway(apiGatewayID string) error {
// Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion)
err := c.DeleteAPIGatewayMappings(apiGatewayID)
if err != nil {
return err
}

_, err = c.APIGatewayV2().DeleteApi(&apigatewayv2.DeleteApiInput{
ApiId: apiGateway.ApiId,
ApiId: aws.String(apiGatewayID),
})
if err != nil {
return nil, errors.Wrap(err, "failed to delete api gateway "+*apiGateway.ApiId)
return errors.Wrap(err, "failed to delete api gateway "+apiGatewayID)
}

return apiGateway, nil
return nil
}

// DeleteAPIGatewayMappingsForDomainName deletes all API mappings that point to the provided api gateway from the provided domain name
Expand Down