cortexlabs · tthebst · Jun 15, 2020 · May 28, 2020 · May 30, 2020 · May 30, 2020
diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
@@ -319,11 +319,34 @@ var _downCmd = &cobra.Command{
 			prompt.YesOrExit(fmt.Sprintf("your cluster named \"%s\" in %s will be spun down and all apis will be deleted, are you sure you want to continue?", *accessConfig.ClusterName, *accessConfig.Region), "", "")
 		}
 
+		fmt.Print("￮ deleting api gateway ")
+		_, errAPIGateway := awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
+		_, errVPCLink := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
+		if errAPIGateway != nil {
+			fmt.Print("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/apis\n")
+			errors.PrintError(errAPIGateway)
+		}
+		if errVPCLink != nil {
+			fmt.Print("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/vpc-links\n")
+			errors.PrintError(errVPCLink)
+		}
+		if errAPIGateway == nil && errVPCLink == nil {
+			fmt.Println("✓")
+		} else {
+			fmt.Println()
+		}
+
+		fmt.Print("￮ deleting dashboard ")
 		err = awsClient.DeleteDashboard(*accessConfig.ClusterName)
 		if err != nil {
-			exit.Error(err)
+			fmt.Print("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it manually via the cloudwatch console: https://console.aws.amazon.com/cloudwatch/home#dashboards:\n")
+			errors.PrintError(err)
+			fmt.Println()
+		} else {
+			fmt.Println("✓")
 		}
 
+		fmt.Println("￮ spinning down the cluster ...")
 		out, exitCode, err := runManagerAccessCommand("/root/uninstall.sh", *accessConfig, awsCreds, _flagClusterEnv)
 		if err != nil {
 			exit.Error(err)

diff --git a/cli/cmd/get.go b/cli/cmd/get.go
@@ -319,7 +319,10 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) {
 
 	apiEndpoint := apiRes.BaseURL
 	if env.Provider == types.AWSProviderType {
-		apiEndpoint = strings.Replace(urls.Join(apiRes.BaseURL, *api.Endpoint), "https://", "http://", 1)
+		apiEndpoint = urls.Join(apiRes.BaseURL, *api.Endpoint)
+		if api.Networking.APIGateway == userconfig.NoneAPIGatewayType {
+			apiEndpoint = strings.Replace(apiEndpoint, "https://", "http://", 1)
+		}
 	}
 
 	if apiRes.DashboardURL != "" {

diff --git a/cli/cmd/lib_cluster_config.go b/cli/cmd/lib_cluster_config.go
@@ -457,9 +457,6 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsCreds A
 	}
 	fmt.Printf("cortex will also create an s3 bucket (%s) and a cloudwatch log group (%s)%s\n\n", clusterConfig.Bucket, clusterConfig.LogGroup, privateSubnetMsg)
 
-	if clusterConfig.APILoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme {
-		fmt.Print("warning: you've configured the API load balancer to be internal; you must configure VPC Peering or an API Gateway VPC Link to connect to your APIs (see https://docs.cortex.dev/guides/vpc-peering or https://docs.cortex.dev/guides/api-gateway)\n\n")
-	}
 	if clusterConfig.OperatorLoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme {
 		fmt.Print("warning: you've configured the operator load balancer to be internal; you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/guides/vpc-peering)\n\n")
 	}

diff --git a/docs/cluster-management/config.md b/docs/cluster-management/config.md
@@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t
 
 The Cortex cluster may be configured by providing a configuration file to `cortex cluster up` or `cortex cluster configure` via the `--config` flag (e.g. `cortex cluster up --config cluster.yaml`). Below is the schema for the cluster configuration file, with default values shown (unless otherwise specified):
 
-<!-- CORTEX_VERSION_MINOR x2 -->
+<!-- CORTEX_VERSION_MINOR x6 -->
 ```yaml
 # cluster.yaml
 
@@ -49,18 +49,21 @@ instance_volume_type: gp2
 
 # whether the subnets used for EC2 instances should be public or private (default: "public")
 # if "public", instances will be assigned public IP addresses; if "private", instances won't have public IPs and a NAT gateway will be created to allow outgoing network requests
+# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information
 subnet_visibility: public  # must be "public" or "private"
 
 # whether to include a NAT gateway with the cluster (a NAT gateway is necessary when using private subnets)
 # default value is "none" if subnet_visibility is set to "public"; "single" if subnet_visibility is "private"
 nat_gateway: none  # must be "none", "single", or "highly_available" (highly_available means one NAT gateway per availability zone)
 
 # whether the API load balancer should be internet-facing or internal (default: "internet-facing")
-# note: if using "internal", you must configure VPC Peering or an API Gateway VPC Link to connect to your APIs (see https://docs.cortex.dev/guides/vpc-peering or https://docs.cortex.dev/guides/api-gateway)
+# note: if using "internal", APIs will still be accessible via the public API Gateway endpoint unless you also disable API Gateway in your API's configuration (if you do that, you must configure VPC Peering to connect to your APIs)
+# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information
 api_load_balancer_scheme: internet-facing  # must be "internet-facing" or "internal"
 
 # whether the operator load balancer should be internet-facing or internal (default: "internet-facing")
-# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/guides/vpc-peering)
+# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/guides/vpc-peering)
+# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information
 operator_load_balancer_scheme: internet-facing  # must be "internet-facing" or "internal"
 
 # CloudWatch log group for cortex (default: <cluster_name>)
@@ -73,8 +76,8 @@ tags:  # <string>: <string> map of key/value pairs
 # see https://docs.cortex.dev/v/master/cluster-management/spot-instances for additional details on spot configuration
 spot: false
 
-# see https://docs.cortex.dev/v/master/guides/subdomain-https-setup for instructions on how to set up HTTPS for APIs
-ssl_certificate_arn:  # if empty, APIs will still be accessible via HTTPS (in addition to HTTP), but will not use a trusted certificate
+# see https://docs.cortex.dev/v/master/guides/custom-domain for instructions on how to set up a custom domain
+ssl_certificate_arn:
 ```
 
 The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [API configuration](../deployments/api-configuration.md).

diff --git a/docs/cluster-management/uninstall.md b/docs/cluster-management/uninstall.md
@@ -43,6 +43,4 @@ aws s3 rb --force s3://<bucket>
 aws logs describe-log-groups --log-group-name-prefix=<log_group> --query logGroups[*].[logGroupName] --output text | xargs -I {} aws logs delete-log-group --log-group-name {}
 ```
 
-If you've setup API gateway and want to delete it, please follow these [instructions](../guides/api-gateway.md#cleanup).
-
-If you've configured HTTPS by specifying an SSL Certificate for a subdomain in your cluster configuration, you may wish to remove the SSL Certificate and Hosted Zone for the domain by following these [instructions](../guides/subdomain-https-setup.md#cleanup).
+If you've configured a custom domain for your APIs, you may wish to remove the SSL Certificate and Hosted Zone for the domain by following these [instructions](../guides/custom-domain.md#cleanup).
diff --git a/docs/cluster-management/update.md b/docs/cluster-management/update.md
@@ -33,6 +33,6 @@ cortex version
 cortex cluster up
 ```
 
-In production environments, you can upgrade your cluster without downtime if you have a service in front of your Cortex cluster (for example, you can [configure API Gateway as a proxy service](../guides/api-gateway.md)): first spin up your new cluster, then update your client-facing service to route traffic to your new cluster, and then spin down your old cluster.
+In production environments, you can upgrade your cluster without downtime if you have a service in front of your Cortex cluster (for example, a backend server or an external API Gateway): first spin up your new cluster, then update your client-facing service to route traffic to your new cluster, and then spin down your old cluster.
 
 If you've set up HTTPS by specifying an SSL Certificate for a subdomain in your cluster configuration, you can upgrade your cluster with minimal downtime: first spin up a new cluster, then update the A record in your subdomain hosted zone to point to the API loadbalancer of your new cluster. Wait at least a 24 to 48 hours before spinning down your old cluster to allow old DNS cache to be flushed.
diff --git a/docs/deployments/api-configuration.md b/docs/deployments/api-configuration.md
@@ -26,6 +26,8 @@ Reference the section below which corresponds to your Predictor type: [Python](#
     cpu: <string | int | float>  # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
     gpu: <int>  # GPU request per replica (default: 0)
     mem: <string>  # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
+  networking:
+    api_gateway: public | none  # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
   autoscaling:  # (aws only)
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -46,7 +48,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [networking](networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
 
 ## TensorFlow Predictor
 
@@ -76,6 +78,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     cpu: <string | int | float>  # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
     gpu: <int>  # GPU request per replica (default: 0)
     mem: <string>  # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
+  networking:
+    api_gateway: public | none  # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
   autoscaling:  # (aws only)
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -96,7 +100,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [networking](networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
 
 ## ONNX Predictor
 
@@ -124,6 +128,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     cpu: <string | int | float>  # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
     gpu: <int>  # GPU request per replica (default: 0)
     mem: <string>  # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
+  networking:
+    api_gateway: public | none  # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
   autoscaling:  # (aws only)
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -144,4 +150,4 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [networking](networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
diff --git a/docs/deployments/networking.md b/docs/deployments/networking.md
@@ -0,0 +1,116 @@
+# Networking
+
+_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_
+
+APIs are deployed with an internet-facing API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [api configuration](api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs internal to your VPC. See below for common configurations.
+
+By default, the API load balancer is internet-facing. You can configure your API load balancer to be internal by setting `api_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be internet-facing regardless of `api_load_balancer_scheme`. See below for common configurations.
+
+## Common API networking configurations
+
+### Public https endpoint (with API Gateway)
+
+This is the most common configuration for public APIs. [Custom domains](../guides/custom-domain.md) can be used with this setup, but are not required.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internal
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: public  # this is the default, so can be omitted
+```
+
+### Internal https endpoint
+
+You can configure your API to be internal (i.e. not internet-facing). If you do this, you must use [VPC Peering](../guides/vpc-peering.md) to connect to your APIs.
+
+The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](../guides/custom-domain.md), which will use ACM to provision SSL certs for your domain.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internal  # this is the default, so can be omitted
+
+# use this to configure a custom domain
+# if you don't use a custom domain, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`)
+ssl_certificate_arn: arn:aws:acm:us-west-2:***:certificate/***
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
+
+### Internal http endpoint
+
+You can configure your API to be internal (i.e. not internet-facing). If you do this, you must use [VPC Peering](../guides/vpc-peering.md) to connect to your APIs.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internal  # this is the default, so can be omitted
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
+
+### Public https endpoint (without API Gateway)
+
+API gateway is generally recommended for internet-facing https APIs, but there may be a situation where you don't wish to use it (e.g. requests take longer than 29 seconds to complete, which is the max for API Gateway). In this case, clients can connect directly to the API load balancer.
+
+The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](../guides/custom-domain.md), which will use ACM to provision SSL certs for your domain.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internet-facing  # this is the default, so can be omitted
+
+# use this to configure a custom domain
+# if you don't use a custom domain, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`)
+ssl_certificate_arn: arn:aws:acm:us-west-2:***:certificate/***
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
+
+### Public http endpoint
+
+If you don't wish to use https for your public API, you can simply disable API gateway (your API will be accessed directly via the API load balancer):
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internet-facing  # this is the default, so can be omitted
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```