diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go index a90209a9a6..8342fb5478 100644 --- a/cli/cmd/cluster.go +++ b/cli/cmd/cluster.go @@ -138,7 +138,7 @@ var _upCmd = &cobra.Command{ exit.Error(err) } - err = CreateBucketIfNotFound(awsClient, clusterConfig.Bucket) + err = createBucketIfNotFound(awsClient, clusterConfig.Bucket) if err != nil { exit.Error(err) } @@ -147,7 +147,7 @@ var _upCmd = &cobra.Command{ exit.Error(err) } - err = CreateLogGroupIfNotFound(awsClient, clusterConfig.LogGroup) + err = createLogGroupIfNotFound(awsClient, clusterConfig.LogGroup) if err != nil { exit.Error(err) } @@ -156,16 +156,25 @@ var _upCmd = &cobra.Command{ exit.Error(err) } - err = createDashboard(awsClient, clusterConfig.ClusterName) + err = createOrClearDashboard(awsClient, clusterConfig.ClusterName) + if err != nil { + exit.Error(err) + } + + err = createOrReplaceAPIGateway(awsClient, clusterConfig.ClusterName, clusterConfig.Tags) if err != nil { exit.Error(err) } out, exitCode, err := runManagerUpdateCommand("/root/install.sh", clusterConfig, awsCreds, _flagClusterEnv) if err != nil { + awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion + awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion exit.Error(err) } if exitCode == nil || *exitCode != 0 { + awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion + awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterConfig.ClusterName) // best effort deletion helpStr := "\nDebugging tips (may or may not apply to this error):" helpStr += fmt.Sprintf("\n* if your cluster started spinning up but was unable to provision instances, additional error information may be found in the activity history of your cluster's autoscaling groups (select each autoscaling group and click the \"Activity History\" tab): https://console.aws.amazon.com/ec2/autoscaling/home?region=%s#AutoScalingGroups:", *clusterConfig.Region) helpStr += fmt.Sprintf("\n* if your cluster started spinning up, please ensure that your CloudFormation stacks for this cluster have been fully deleted before trying to spin up this cluster again (you can delete your CloudFormation stacks from the AWS console: %s)", getCloudFormationURL(clusterConfig.ClusterName, *clusterConfig.Region)) @@ -323,11 +332,11 @@ var _downCmd = &cobra.Command{ _, errAPIGateway := awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName) _, errVPCLink := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName) if errAPIGateway != nil { - fmt.Print("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/apis\n") + fmt.Printf("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/apis\n", *accessConfig.Region) errors.PrintError(errAPIGateway) } if errVPCLink != nil { - fmt.Print("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/vpc-links\n") + fmt.Printf("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/vpc-links\n", *accessConfig.Region) errors.PrintError(errVPCLink) } if errAPIGateway == nil && errVPCLink == nil { @@ -339,7 +348,7 @@ var _downCmd = &cobra.Command{ fmt.Print("○ deleting dashboard ") err = awsClient.DeleteDashboard(*accessConfig.ClusterName) if err != nil { - fmt.Print("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it manually via the cloudwatch console: https://console.aws.amazon.com/cloudwatch/home#dashboards:\n") + fmt.Printf("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it via the cloudwatch console: https://%s.console.aws.amazon.com/cloudwatch/home#dashboards:\n", *accessConfig.Region) errors.PrintError(err) fmt.Println() } else { @@ -716,7 +725,7 @@ func getCloudFormationURLWithAccessConfig(accessConfig *clusterconfig.AccessConf return getCloudFormationURL(*accessConfig.ClusterName, *accessConfig.Region) } -func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error { +func createBucketIfNotFound(awsClient *aws.Client, bucket string) error { bucketFound, err := awsClient.DoesBucketExist(bucket) if err != nil { return err @@ -725,6 +734,7 @@ func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error { fmt.Print("○ creating a new s3 bucket: ", bucket) err = awsClient.CreateBucket(bucket) if err != nil { + fmt.Print("\n\n") return err } fmt.Println(" ✓") @@ -734,7 +744,7 @@ func CreateBucketIfNotFound(awsClient *aws.Client, bucket string) error { return nil } -func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error { +func createLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error { logGroupFound, err := awsClient.DoesLogGroupExist(logGroup) if err != nil { return err @@ -743,6 +753,7 @@ func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error { fmt.Print("○ creating a new cloudwatch log group: ", logGroup) err = awsClient.CreateLogGroup(logGroup) if err != nil { + fmt.Print("\n\n") return err } fmt.Println(" ✓") @@ -753,8 +764,8 @@ func CreateLogGroupIfNotFound(awsClient *aws.Client, logGroup string) error { return nil } -// createDashboard creates a new dashboard (or clears an existing one if it already exists) -func createDashboard(awsClient *aws.Client, dashboardName string) error { +// createOrClearDashboard creates a new dashboard (or clears an existing one if it already exists) +func createOrClearDashboard(awsClient *aws.Client, dashboardName string) error { dashboardFound, err := awsClient.DoesDashboardExist(dashboardName) if err != nil { return err @@ -768,6 +779,7 @@ func createDashboard(awsClient *aws.Client, dashboardName string) error { err = awsClient.CreateDashboard(dashboardName, consts.DashboardTitle) if err != nil { + fmt.Print("\n\n") return err } @@ -775,3 +787,29 @@ func createDashboard(awsClient *aws.Client, dashboardName string) error { return nil } + +// createOrReplaceAPIGateway creates an API gateway for the cluster (or clears an existing one if it already exists) +func createOrReplaceAPIGateway(awsClient *aws.Client, clusterName string, tags map[string]string) error { + fmt.Print("○ creating api gateway: ", clusterName) + + _, err := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, clusterName) + if err != nil { + fmt.Print("\n\n") + return errors.Append(err, fmt.Sprintf("\n\nunable to delete existing vpc link with tag %s=%s; please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/vpc-links", clusterconfig.ClusterNameTag, clusterName, awsClient.Region)) + } + + _, err = awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, clusterName) + if err != nil { + fmt.Print("\n\n") + return errors.Append(err, fmt.Sprintf("\n\nunable to delete existing api gateway with tag %s=%s; please delete it via the api gateway console: https://%s.console.aws.amazon.com/apigateway/main/apis", clusterconfig.ClusterNameTag, clusterName, awsClient.Region)) + } + + _, err = awsClient.CreateAPIGateway(clusterName, tags) + if err != nil { + fmt.Print("\n\n") + return err + } + + fmt.Println(" ✓") + return nil +} diff --git a/manager/get_api_gateway_endpoint.py b/manager/get_api_gateway_endpoint.py new file mode 100644 index 0000000000..4a185704be --- /dev/null +++ b/manager/get_api_gateway_endpoint.py @@ -0,0 +1,36 @@ +# Copyright 2020 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import boto3 +import os + + +def get_api_gateway_endpoint(): + cluster_name = os.environ["CORTEX_CLUSTER_NAME"] + region = os.environ["CORTEX_REGION"] + client_apigateway = boto3.client("apigatewayv2", region_name=region) + + paginator = client_apigateway.get_paginator("get_apis") + for api_gateway_page in paginator.paginate(): + for api_gateway in api_gateway_page["Items"]: + if api_gateway["Tags"].get("cortex.dev/cluster-name") == cluster_name: + return api_gateway["ApiEndpoint"] + + raise Exception( + f"your cluster's api gateway (in {region} with tag cortex.dev/cluster-name={cluster_name}) does not exist" + ) + + +if __name__ == "__main__": + print(get_api_gateway_endpoint(), end="") diff --git a/manager/get_api_gateway_id.py b/manager/get_api_gateway_id.py new file mode 100644 index 0000000000..ba25790ddb --- /dev/null +++ b/manager/get_api_gateway_id.py @@ -0,0 +1,36 @@ +# Copyright 2020 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import boto3 +import os + + +def get_api_gateway_id(): + cluster_name = os.environ["CORTEX_CLUSTER_NAME"] + region = os.environ["CORTEX_REGION"] + client_apigateway = boto3.client("apigatewayv2", region_name=region) + + paginator = client_apigateway.get_paginator("get_apis") + for api_gateway_page in paginator.paginate(): + for api_gateway in api_gateway_page["Items"]: + if api_gateway["Tags"].get("cortex.dev/cluster-name") == cluster_name: + return api_gateway["ApiId"] + + raise Exception( + f"your cluster's api gateway (in {region} with tag cortex.dev/cluster-name={cluster_name}) does not exist" + ) + + +if __name__ == "__main__": + print(get_api_gateway_id(), end="") diff --git a/manager/info.sh b/manager/info.sh index cc7d55dcca..67a54f35b3 100755 --- a/manager/info.sh +++ b/manager/info.sh @@ -25,7 +25,7 @@ function get_api_load_balancer_endpoint() { } function get_api_gateway_endpoint() { - aws apigatewayv2 get-apis --region $CORTEX_REGION | jq ".Items[] | select(.Name == \"${CORTEX_CLUSTER_NAME}\") | .ApiEndpoint" | tr -d '"' + python get_api_gateway_endpoint.py } if ! eksctl utils describe-stacks --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION >/dev/null 2>&1; then diff --git a/manager/install.sh b/manager/install.sh index 35d8ce4e4c..71b2377ceb 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -163,18 +163,6 @@ function ensure_eks() { function main() { mkdir -p $CORTEX_CLUSTER_WORKSPACE - # create API Gateway - if [ "$arg1" != "--update" ]; then - create_api_output=$(aws apigatewayv2 create-api --tags $CORTEX_TAGS --region $CORTEX_REGION --name $CORTEX_CLUSTER_NAME --protocol-type HTTP) - api_id=$(echo $create_api_output | jq .ApiId | tr -d '"') - if [ "$api_id" = "" ] || [ "$api_id" = "null" ]; then - echo -e "unable to extract api gateway ID from create-api output:\n$create_api_output" - exit 1 - fi - # create default stage; ignore error because default stage is supposed to be already created, but currently it isn't because of a possible bug in create-api - aws apigatewayv2 create-stage --region $CORTEX_REGION --tags $CORTEX_TAGS --api-id $api_id --auto-deploy --stage-name \$default &>/dev/null || true - fi - # create cluster (if it doesn't already exist) ensure_eks @@ -261,6 +249,7 @@ function main() { # add VPC Link integration to API Gateway if [ "$arg1" != "--update" ] && [ "$CORTEX_API_LOAD_BALANCER_SCHEME" == "internal" ]; then echo -n "○ creating api gateway vpc link integration " + api_id=$(python get_api_gateway_id.py) python create_gateway_integration.py $api_id $vpc_link_id echo "✓" echo -n "○ waiting for api gateway vpc link integration " diff --git a/pkg/lib/aws/apigateway.go b/pkg/lib/aws/apigateway.go index 9e7a67e511..648cf7c1e1 100644 --- a/pkg/lib/aws/apigateway.go +++ b/pkg/lib/aws/apigateway.go @@ -25,6 +25,34 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/errors" ) +// CreateAPIGateway Creates a new API Gateway with the default stage +func (c *Client) CreateAPIGateway(name string, tags map[string]string) (string, error) { + createAPIResponse, err := c.APIGatewayV2().CreateApi(&apigatewayv2.CreateApiInput{ + Name: aws.String(name), + ProtocolType: aws.String(apigatewayv2.ProtocolTypeHttp), + Tags: aws.StringMap(tags), + }) + if err != nil { + return "", errors.Wrap(err, "failed to create api gateway") + } + if createAPIResponse.ApiId == nil { + return "", errors.ErrorUnexpected("failed to create api gateway") + } + + _, err = c.APIGatewayV2().CreateStage(&apigatewayv2.CreateStageInput{ + ApiId: createAPIResponse.ApiId, + AutoDeploy: aws.Bool(true), + StageName: aws.String("$default"), + Tags: aws.StringMap(tags), + }) + if err != nil { + c.DeleteAPIGateway(*createAPIResponse.ApiId) // best effort cleanup + return "", errors.Wrap(err, "failed to create $default api gateway stage") + } + + return *createAPIResponse.ApiId, nil +} + // GetVPCLinkByTag Gets a VPC Link by tag (returns nil if there are no matches) func (c *Client) GetVPCLinkByTag(tagName string, tagValue string) (*apigatewayv2.VpcLink, error) { var nextToken *string @@ -111,20 +139,30 @@ func (c *Client) DeleteAPIGatewayByTag(tagName string, tagValue string) (*apigat return nil, nil } - // Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion) - err = c.DeleteAPIGatewayMappings(*apiGateway.ApiId) + err = c.DeleteAPIGateway(*apiGateway.ApiId) if err != nil { return nil, err } + return apiGateway, nil +} + +// DeleteAPIGateway Deletes an API Gateway by ID (returns an error if the API Gateway does not exist) +func (c *Client) DeleteAPIGateway(apiGatewayID string) error { + // Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion) + err := c.DeleteAPIGatewayMappings(apiGatewayID) + if err != nil { + return err + } + _, err = c.APIGatewayV2().DeleteApi(&apigatewayv2.DeleteApiInput{ - ApiId: apiGateway.ApiId, + ApiId: aws.String(apiGatewayID), }) if err != nil { - return nil, errors.Wrap(err, "failed to delete api gateway "+*apiGateway.ApiId) + return errors.Wrap(err, "failed to delete api gateway "+apiGatewayID) } - return apiGateway, nil + return nil } // DeleteAPIGatewayMappingsForDomainName deletes all API mappings that point to the provided api gateway from the provided domain name