diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
index f7ebe89cae..56763ad71f 100644
--- a/cli/cmd/cluster.go
+++ b/cli/cmd/cluster.go
@@ -319,11 +319,34 @@ var _downCmd = &cobra.Command{
 			prompt.YesOrExit(fmt.Sprintf("your cluster named \"%s\" in %s will be spun down and all apis will be deleted, are you sure you want to continue?", *accessConfig.ClusterName, *accessConfig.Region), "", "")
 		}
 
+		fmt.Print("￮ deleting api gateway ")
+		_, errAPIGateway := awsClient.DeleteAPIGatewayByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
+		_, errVPCLink := awsClient.DeleteVPCLinkByTag(clusterconfig.ClusterNameTag, *accessConfig.ClusterName)
+		if errAPIGateway != nil {
+			fmt.Print("\n\nunable to delete cortex's api gateway (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/apis\n")
+			errors.PrintError(errAPIGateway)
+		}
+		if errVPCLink != nil {
+			fmt.Print("\n\nunable to delete cortex's vpc link (see error below); if it still exists after the cluster has been deleted, please delete it manually via the api gateway console: https://console.aws.amazon.com/apigateway/main/vpc-links\n")
+			errors.PrintError(errVPCLink)
+		}
+		if errAPIGateway == nil && errVPCLink == nil {
+			fmt.Println("✓")
+		} else {
+			fmt.Println()
+		}
+
+		fmt.Print("￮ deleting dashboard ")
 		err = awsClient.DeleteDashboard(*accessConfig.ClusterName)
 		if err != nil {
-			exit.Error(err)
+			fmt.Print("\n\nunable to delete cortex's api dashboard (see error below); if it still exists after the cluster has been deleted, please delete it manually via the cloudwatch console: https://console.aws.amazon.com/cloudwatch/home#dashboards:\n")
+			errors.PrintError(err)
+			fmt.Println()
+		} else {
+			fmt.Println("✓")
 		}
 
+		fmt.Println("￮ spinning down the cluster ...")
 		out, exitCode, err := runManagerAccessCommand("/root/uninstall.sh", *accessConfig, awsCreds, _flagClusterEnv)
 		if err != nil {
 			exit.Error(err)
diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index 0a36f33116..864dfe214e 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -319,7 +319,10 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) {
 
 	apiEndpoint := apiRes.BaseURL
 	if env.Provider == types.AWSProviderType {
-		apiEndpoint = strings.Replace(urls.Join(apiRes.BaseURL, *api.Endpoint), "https://", "http://", 1)
+		apiEndpoint = urls.Join(apiRes.BaseURL, *api.Endpoint)
+		if api.Networking.APIGateway == userconfig.NoneAPIGatewayType {
+			apiEndpoint = strings.Replace(apiEndpoint, "https://", "http://", 1)
+		}
 	}
 
 	if apiRes.DashboardURL != "" {
diff --git a/cli/cmd/lib_cluster_config.go b/cli/cmd/lib_cluster_config.go
index 430915429d..9238ebab5c 100644
--- a/cli/cmd/lib_cluster_config.go
+++ b/cli/cmd/lib_cluster_config.go
@@ -457,9 +457,6 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsCreds A
 	}
 	fmt.Printf("cortex will also create an s3 bucket (%s) and a cloudwatch log group (%s)%s\n\n", clusterConfig.Bucket, clusterConfig.LogGroup, privateSubnetMsg)
 
-	if clusterConfig.APILoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme {
-		fmt.Print("warning: you've configured the API load balancer to be internal; you must configure VPC Peering or an API Gateway VPC Link to connect to your APIs (see https://docs.cortex.dev/guides/vpc-peering or https://docs.cortex.dev/guides/api-gateway)\n\n")
-	}
 	if clusterConfig.OperatorLoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme {
 		fmt.Print("warning: you've configured the operator load balancer to be internal; you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/guides/vpc-peering)\n\n")
 	}
diff --git a/docs/cluster-management/config.md b/docs/cluster-management/config.md
index 7e7bba9563..692281a2ff 100644
--- a/docs/cluster-management/config.md
+++ b/docs/cluster-management/config.md
@@ -4,7 +4,7 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t
 
 The Cortex cluster may be configured by providing a configuration file to `cortex cluster up` or `cortex cluster configure` via the `--config` flag (e.g. `cortex cluster up --config cluster.yaml`). Below is the schema for the cluster configuration file, with default values shown (unless otherwise specified):
 
-<!-- CORTEX_VERSION_MINOR x2 -->
+<!-- CORTEX_VERSION_MINOR x6 -->
 ```yaml
 # cluster.yaml
 
@@ -49,6 +49,7 @@ instance_volume_type: gp2
 
 # whether the subnets used for EC2 instances should be public or private (default: "public")
 # if "public", instances will be assigned public IP addresses; if "private", instances won't have public IPs and a NAT gateway will be created to allow outgoing network requests
+# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information
 subnet_visibility: public  # must be "public" or "private"
 
 # whether to include a NAT gateway with the cluster (a NAT gateway is necessary when using private subnets)
@@ -56,11 +57,13 @@ subnet_visibility: public  # must be "public" or "private"
 nat_gateway: none  # must be "none", "single", or "highly_available" (highly_available means one NAT gateway per availability zone)
 
 # whether the API load balancer should be internet-facing or internal (default: "internet-facing")
-# note: if using "internal", you must configure VPC Peering or an API Gateway VPC Link to connect to your APIs (see https://docs.cortex.dev/guides/vpc-peering or https://docs.cortex.dev/guides/api-gateway)
+# note: if using "internal", APIs will still be accessible via the public API Gateway endpoint unless you also disable API Gateway in your API's configuration (if you do that, you must configure VPC Peering to connect to your APIs)
+# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information
 api_load_balancer_scheme: internet-facing  # must be "internet-facing" or "internal"
 
 # whether the operator load balancer should be internet-facing or internal (default: "internet-facing")
-# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (see https://docs.cortex.dev/guides/vpc-peering)
+# note: if using "internal", you must configure VPC Peering to connect your CLI to your cluster operator (https://docs.cortex.dev/v/master/guides/vpc-peering)
+# see https://docs.cortex.dev/v/master/miscellaneous/security#private-cluster for more information
 operator_load_balancer_scheme: internet-facing  # must be "internet-facing" or "internal"
 
 # CloudWatch log group for cortex (default: <cluster_name>)
@@ -73,8 +76,8 @@ tags:  # <string>: <string> map of key/value pairs
 # see https://docs.cortex.dev/v/master/cluster-management/spot-instances for additional details on spot configuration
 spot: false
 
-# see https://docs.cortex.dev/v/master/guides/subdomain-https-setup for instructions on how to set up HTTPS for APIs
-ssl_certificate_arn:  # if empty, APIs will still be accessible via HTTPS (in addition to HTTP), but will not use a trusted certificate
+# see https://docs.cortex.dev/v/master/guides/custom-domain for instructions on how to set up a custom domain
+ssl_certificate_arn:
 ```
 
 The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [API configuration](../deployments/api-configuration.md).
diff --git a/docs/cluster-management/uninstall.md b/docs/cluster-management/uninstall.md
index bf84bf94f1..5ef66dde54 100644
--- a/docs/cluster-management/uninstall.md
+++ b/docs/cluster-management/uninstall.md
@@ -43,6 +43,4 @@ aws s3 rb --force s3://<bucket>
 aws logs describe-log-groups --log-group-name-prefix=<log_group> --query logGroups[*].[logGroupName] --output text | xargs -I {} aws logs delete-log-group --log-group-name {}
 ```
 
-If you've setup API gateway and want to delete it, please follow these [instructions](../guides/api-gateway.md#cleanup).
-
-If you've configured HTTPS by specifying an SSL Certificate for a subdomain in your cluster configuration, you may wish to remove the SSL Certificate and Hosted Zone for the domain by following these [instructions](../guides/subdomain-https-setup.md#cleanup).
+If you've configured a custom domain for your APIs, you may wish to remove the SSL Certificate and Hosted Zone for the domain by following these [instructions](../guides/custom-domain.md#cleanup).
diff --git a/docs/cluster-management/update.md b/docs/cluster-management/update.md
index 5e2b5a9793..915045fabc 100644
--- a/docs/cluster-management/update.md
+++ b/docs/cluster-management/update.md
@@ -33,6 +33,6 @@ cortex version
 cortex cluster up
 ```
 
-In production environments, you can upgrade your cluster without downtime if you have a service in front of your Cortex cluster (for example, you can [configure API Gateway as a proxy service](../guides/api-gateway.md)): first spin up your new cluster, then update your client-facing service to route traffic to your new cluster, and then spin down your old cluster.
+In production environments, you can upgrade your cluster without downtime if you have a service in front of your Cortex cluster (for example, a backend server or an external API Gateway): first spin up your new cluster, then update your client-facing service to route traffic to your new cluster, and then spin down your old cluster.
 
 If you've set up HTTPS by specifying an SSL Certificate for a subdomain in your cluster configuration, you can upgrade your cluster with minimal downtime: first spin up a new cluster, then update the A record in your subdomain hosted zone to point to the API loadbalancer of your new cluster. Wait at least a 24 to 48 hours before spinning down your old cluster to allow old DNS cache to be flushed.
diff --git a/docs/deployments/api-configuration.md b/docs/deployments/api-configuration.md
index 6c3067975d..3f1a50f3cd 100644
--- a/docs/deployments/api-configuration.md
+++ b/docs/deployments/api-configuration.md
@@ -26,6 +26,8 @@ Reference the section below which corresponds to your Predictor type: [Python](#
     cpu: <string | int | float>  # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
     gpu: <int>  # GPU request per replica (default: 0)
     mem: <string>  # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
+  networking:
+    api_gateway: public | none  # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
   autoscaling:  # (aws only)
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -46,7 +48,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [networking](networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
 
 ## TensorFlow Predictor
 
@@ -76,6 +78,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     cpu: <string | int | float>  # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
     gpu: <int>  # GPU request per replica (default: 0)
     mem: <string>  # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
+  networking:
+    api_gateway: public | none  # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
   autoscaling:  # (aws only)
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -96,7 +100,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [networking](networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
 
 ## ONNX Predictor
 
@@ -124,6 +128,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     cpu: <string | int | float>  # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
     gpu: <int>  # GPU request per replica (default: 0)
     mem: <string>  # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
+  networking:
+    api_gateway: public | none  # whether to create a public API Gateway endpoint for this API (if not, the load balancer will be accessed directly) (default: public)
   autoscaling:  # (aws only)
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -144,4 +150,4 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [networking](networking.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
diff --git a/docs/deployments/networking.md b/docs/deployments/networking.md
new file mode 100644
index 0000000000..7d8a08e286
--- /dev/null
+++ b/docs/deployments/networking.md
@@ -0,0 +1,116 @@
+# Networking
+
+_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_
+
+APIs are deployed with an internet-facing API Gateway by default (the API Gateway forwards requests to the API load balancer). Each API can be independently configured to not create the API Gateway endpoint by setting `api_gateway: none` in the `networking` field of the [api configuration](api-configuration.md). If the API Gateway endpoint is not created, your API can still be accessed via the API load balancer; `cortex get API_NAME` will show the load balancer endpoint if API Gateway is disabled. API Gateway is enabled by default, and is generally recommended unless it doesn't support your use case due to limitations such as the 29 second request timeout, or if you are keeping your APIs internal to your VPC. See below for common configurations.
+
+By default, the API load balancer is internet-facing. You can configure your API load balancer to be internal by setting `api_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file (before creating your cluster). This will force external traffic to go through your API Gateway endpoint, or if you disabled API Gateway for your API, it will make your API only accessible through VPC Peering. Note that if API Gateway is used, endpoints will be internet-facing regardless of `api_load_balancer_scheme`. See below for common configurations.
+
+## Common API networking configurations
+
+### Public https endpoint (with API Gateway)
+
+This is the most common configuration for public APIs. [Custom domains](../guides/custom-domain.md) can be used with this setup, but are not required.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internal
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: public  # this is the default, so can be omitted
+```
+
+### Internal https endpoint
+
+You can configure your API to be internal (i.e. not internet-facing). If you do this, you must use [VPC Peering](../guides/vpc-peering.md) to connect to your APIs.
+
+The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](../guides/custom-domain.md), which will use ACM to provision SSL certs for your domain.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internal  # this is the default, so can be omitted
+
+# use this to configure a custom domain
+# if you don't use a custom domain, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`)
+ssl_certificate_arn: arn:aws:acm:us-west-2:***:certificate/***
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
+
+### Internal http endpoint
+
+You can configure your API to be internal (i.e. not internet-facing). If you do this, you must use [VPC Peering](../guides/vpc-peering.md) to connect to your APIs.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internal  # this is the default, so can be omitted
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
+
+### Public https endpoint (without API Gateway)
+
+API gateway is generally recommended for internet-facing https APIs, but there may be a situation where you don't wish to use it (e.g. requests take longer than 29 seconds to complete, which is the max for API Gateway). In this case, clients can connect directly to the API load balancer.
+
+The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`). Alternatively, you can set up a [custom domain](../guides/custom-domain.md), which will use ACM to provision SSL certs for your domain.
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internet-facing  # this is the default, so can be omitted
+
+# use this to configure a custom domain
+# if you don't use a custom domain, clients will need to skip certificate verification when making HTTPS requests (e.g. `curl -k`)
+ssl_certificate_arn: arn:aws:acm:us-west-2:***:certificate/***
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
+
+### Public http endpoint
+
+If you don't wish to use https for your public API, you can simply disable API gateway (your API will be accessed directly via the API load balancer):
+
+```yaml
+# cluster.yaml
+
+api_load_balancer_scheme: internet-facing  # this is the default, so can be omitted
+```
+
+```yaml
+# cortex.yaml
+
+- name: my-api
+  ...
+  networking:
+    api_gateway: none
+```
diff --git a/docs/guides/api-gateway.md b/docs/guides/api-gateway.md
deleted file mode 100644
index f2cdc82a8e..0000000000
--- a/docs/guides/api-gateway.md
+++ /dev/null
@@ -1,181 +0,0 @@
-# Set up AWS API Gateway
-
-_WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_
-
-We have plans to automatically configure API gateway when creating a Cortex API ([#326](https://github.com/cortexlabs/cortex/issues/326)), but until that's implemented, it's fairly straightforward to set it up manually.
-
-One reason to use API Gateway is to get HTTPS working with valid certificates (either by using AWS's built-in certificates, or using your own via custom domains and the AWS Certificate Manager). Another reason could be to expose your APIs to the internet when configuring Cortex to use an internal load balancer.
-
-If your API load balancer is internet-facing (which is the default, or you explicitly set `api_load_balancer_scheme: internet-facing` in your cluster configuration file before creating your cluster), use the [first section](#if-your-api-load-balancer-is-internet-facing) of this guide.
-
-If your API load balancer is internal (i.e. you set `api_load_balancer_scheme: internal` in your cluster configuration file before creating your cluster), use the [second section](#if-your-api-load-balancer-is-internal) of this guide.
-
-## If your API load balancer is internet-facing
-
-### Step 1
-
-Go to the [API Gateway console](https://console.aws.amazon.com/apigateway/home), select "REST API" under "Choose an API type", and click "Build"
-
-![step 1](https://user-images.githubusercontent.com/808475/78293216-18269e80-74dd-11ea-9e68-86922c2cbc7c.png)
-
-### Step 2
-
-Select "REST" and "New API", name your API (e.g. "cortex"), select either "Regional" or "Edge optimized" (depending on your preference), and click "Create API"
-
-![step 2](https://user-images.githubusercontent.com/808475/78293434-66d43880-74dd-11ea-92d6-692158171a3f.png)
-
-### Step 3
-
-Select "Actions" > "Create Resource"
-
-![step 3](https://user-images.githubusercontent.com/808475/80154502-8b6b7f80-8574-11ea-9c78-7d9f277bf55b.png)
-
-### Step 4
-
-Select "Configure as proxy resource" and "Enable API Gateway CORS", and click "Create Resource"
-
-![step 4](https://user-images.githubusercontent.com/808475/80154565-ad650200-8574-11ea-8753-808cd35902e2.png)
-
-### Step 5
-
-Select "HTTP Proxy" and set "Endpoint URL" to "http://<BASE_API_ENDPOINT>/{proxy}". You can get your base API endpoint via `cortex cluster info`; make sure to prepend `http://` and append `/{proxy}`. For example, mine is: `http://a9eaf69fd125947abb1065f62de59047-81cdebc0275f7d96.elb.us-west-2.amazonaws.com/{proxy}`.
-
-Leave "Content Handling" set to "Passthrough" and Click "Save".
-
-![step 5](https://user-images.githubusercontent.com/808475/80154735-13ea2000-8575-11ea-83ca-58f182df83c6.png)
-
-### Step 6
-
-Select "Actions" > "Deploy API"
-
-![step 6](https://user-images.githubusercontent.com/808475/80154802-2c5a3a80-8575-11ea-9ab3-de89885fd658.png)
-
-### Step 7
-
-Create a new stage (e.g. "dev") and click "Deploy"
-
-![step 7](https://user-images.githubusercontent.com/808475/80154859-4431be80-8575-11ea-9305-50384b1f9847.png)
-
-### Step 8
-
-Copy your "Invoke URL"
-
-![step 8](https://user-images.githubusercontent.com/808475/80154911-5dd30600-8575-11ea-9682-1a7328783011.png)
-
-### Using your new endpoint
-
-You may now use the "Invoke URL" in place of your APIs endpoint in your client. For example, this curl request:
-
-```bash
-curl http://a9eaf69fd125947abb1065f62de59047-81cdebc0275f7d96.elb.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
-```
-
-Would become:
-
-```bash
-curl https://31qjv48rs6.execute-api.us-west-2.amazonaws.com/dev/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
-```
-
-### Cleanup
-
-Delete the API Gateway before spinning down your Cortex cluster:
-
-![delete api gateway](https://user-images.githubusercontent.com/808475/80155073-bdc9ac80-8575-11ea-99a1-95c0579da79e.png)
-
-## If your API load balancer is internal
-
-### Step 1
-
-Navigate to AWS's EC2 Load Balancer dashboard and locate the Cortex API load balancer. You can determine which is the API load balancer by inspecting the `kubernetes.io/service-name` tag:
-
-![step 1](https://user-images.githubusercontent.com/808475/80142777-961c1980-8560-11ea-9202-40964dbff5e9.png)
-
-Take note of the load balancer's name.
-
-### Step 2
-
-Go to the [API Gateway console](https://console.aws.amazon.com/apigateway/home), click "VPC Links" on the left sidebar, and click "Create"
-
-![step 2](https://user-images.githubusercontent.com/808475/80142466-0c6c4c00-8560-11ea-8293-eb5e5572b797.png)
-
-### Step 3
-
-Select "VPC link for REST APIs", name your VPC link (e.g. "cortex"), select the API load balancer (identified in Step 1), and click "Create"
-
-![step 3](https://user-images.githubusercontent.com/808475/80143027-03c84580-8561-11ea-92de-9ed0a5dfa593.png)
-
-### Step 4
-
-Wait for the VPC link to be created (it will take a few minutes)
-
-![step 4](https://user-images.githubusercontent.com/808475/80144088-bbaa2280-8562-11ea-901b-8520eb253df7.png)
-
-### Step 5
-
-Go to the [API Gateway console](https://console.aws.amazon.com/apigateway/home), select "REST API" under "Choose an API type", and click "Build"
-
-![step 5](https://user-images.githubusercontent.com/808475/78293216-18269e80-74dd-11ea-9e68-86922c2cbc7c.png)
-
-### Step 6
-
-Select "REST" and "New API", name your API (e.g. "cortex"), select either "Regional" or "Edge optimized" (depending on your preference), and click "Create API"
-
-![step 6](https://user-images.githubusercontent.com/808475/78293434-66d43880-74dd-11ea-92d6-692158171a3f.png)
-
-### Step 7
-
-Select "Actions" > "Create Resource"
-
-![step 7](https://user-images.githubusercontent.com/808475/80141938-3cffb600-855f-11ea-9c1c-132ca4503b7a.png)
-
-### Step 8
-
-Select "Configure as proxy resource" and "Enable API Gateway CORS", and click "Create Resource"
-
-![step 8](https://user-images.githubusercontent.com/808475/80142124-80f2bb00-855f-11ea-8e4e-9413146e0815.png)
-
-### Step 9
-
-Select "VPC Link", select "Use Proxy Integration", choose your newly-created VPC Link, and set "Endpoint URL" to "http://<BASE_API_ENDPOINT>/{proxy}". You can get your base API endpoint via `cortex cluster info`; make sure to prepend `http://` and append `/{proxy}`. For example, mine is: `http://a5044e34a352d44b0945adcd455c7fa3-32fa161d3e5bcbf9.elb.us-west-2.amazonaws.com/{proxy}`. Click "Save"
-
-![step 9](https://user-images.githubusercontent.com/808475/80147407-4f322200-8568-11ea-8ef5-df5164c1375f.png)
-
-### Step 10
-
-Select "Actions" > "Deploy API"
-
-![step 10](https://user-images.githubusercontent.com/808475/80147555-86083800-8568-11ea-86af-1b1e38c9d322.png)
-
-### Step 11
-
-Create a new stage (e.g. "dev") and click "Deploy"
-
-![step 11](https://user-images.githubusercontent.com/808475/80147631-a7692400-8568-11ea-8a09-13dbd50b17b9.png)
-
-### Step 12
-
-Copy your "Invoke URL"
-
-![step 12](https://user-images.githubusercontent.com/808475/80147716-c798e300-8568-11ea-9aef-7dd6fdf4a68a.png)
-
-### Using your new endpoint
-
-You may now use the "Invoke URL" in place of your APIs endpoint in your client. For example, this curl request:
-
-```bash
-curl http://a5044e34a352d44b0945adcd455c7fa3-32fa161d3e5bcbf9.elb.us-west-2.amazonaws.com/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
-```
-
-Would become:
-
-```bash
-curl https://lrivodooqh.execute-api.us-west-2.amazonaws.com/dev/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
-```
-
-### Cleanup
-
-Delete the API Gateway and VPC Link before spinning down your Cortex cluster:
-
-![delete api](https://user-images.githubusercontent.com/808475/80149163-05970680-856b-11ea-9f82-61f4061a3321.png)
-
-![delete vpc link](https://user-images.githubusercontent.com/808475/80149204-1ba4c700-856b-11ea-83f7-9741c78b6b95.png)
diff --git a/docs/guides/subdomain-https-setup.md b/docs/guides/custom-domain.md
similarity index 59%
rename from docs/guides/subdomain-https-setup.md
rename to docs/guides/custom-domain.md
index 7a27817488..411a5f8ea1 100644
--- a/docs/guides/subdomain-https-setup.md
+++ b/docs/guides/custom-domain.md
@@ -1,38 +1,42 @@
-# Set up HTTPS on a subdomain
+# Set up a custom domain
 
 _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_
 
-The recommended way to set up HTTPS with trusted certificates is by using [API Gateway](../api-gateway.md) because it's simpler and enables you to use API Gateway features such as rate limiting (it also supports custom domains). This guide is only recommended if HTTPS is required and you don't wish to use API Gateway (e.g. it doesn't support your use case due to limitations such as the 29 second request timeout).
+You can use any custom domain (that you own) for your prediction endpoints. For example, you can make your API accessible via `api.example.com/iris-classifier`. This guide will demonstrate how to create a dedicated subdomain in AWS Route 53 and use an SSL certificate provisioned by AWS Certificate Manager (ACM).
 
-This guide will demonstrate how to create a dedicated subdomain in AWS Route 53 and use an SSL certificate provisioned by AWS Certificate Manager (ACM) to support HTTPS traffic to Cortex APIs. By the end of this guide, you will have a Cortex cluster with APIs accessible via `https://<your-subdomain>/<api-endpoint>`.
+There are two methods for achieving this, and which method to use depends on whether you're using API Gateway or not (without API Gateway, requests are sent directly to the API load balancer instead). API Gateway is enabled by default.
 
-You must own a domain and be able to modify its DNS records.
+The first set of steps are the same whether or not you're using API Gateway. If you aren't using API gateway, follow this guide before creating your Cortex cluster.
 
-## Step 1
+_note: you must own a domain and be able to modify its DNS records to complete this guide._
+
+## Generate a certificate for your domain
+
+### Step 1
 
 Decide on a subdomain that you want to dedicate to Cortex APIs. For example if your domain is `example.com`, a valid subdomain can be `api.example.com`.
 
 This guide will use `cortexlabs.dev` as the example domain and `api.cortexlabs.dev` as the subdomain.
 
-## Step 2
+### Step 2
 
 We will set up a hosted zone on Route 53 to manage the DNS records for the subdomain. Go to the [Route 53 console](https://console.aws.amazon.com/route53/home) and click "Hosted Zones".
 
 ![step 2](https://user-images.githubusercontent.com/4365343/82210754-a6b07d00-98dd-11ea-9cec-9f6b07282aa8.png)
 
-## Step 3
+### Step 3
 
 Click "Create Hosted Zone" and then enter your subdomain as the domain name for your hosted zone and click "Create".
 
 ![step 3](https://user-images.githubusercontent.com/4365343/82211091-4968fb80-98de-11ea-8ec4-8d26d1aea77a.png)
 
-## Step 4
+### Step 4
 
 Take note of the values in the NS record.
 
 ![step 4](https://user-images.githubusercontent.com/4365343/82211656-386cba00-98df-11ea-8c86-4961082b5f49.png)
 
-## Step 5
+### Step 5
 
 Navigate to your root DNS service provider (e.g. Google Domains, AWS Route 53, Go Daddy). Your root DNS service provider is typically the registrar where you purchased your domain (unless you have transferred DNS management elsewhere). The procedure for adding DNS records may vary based on your service provider.
 
@@ -42,61 +46,105 @@ We are going to add an NS (name server) record that specifies that any traffic t
 
 ![step 5](https://user-images.githubusercontent.com/4365343/82211959-bcbf3d00-98df-11ea-834d-692b3bcf9332.png)
 
-## Step 6
+### Step 6
 
 We are now going to create an SSL certificate for your subdomain. Go to the [ACM console](https://us-west-2.console.aws.amazon.com/acm/home) and click "Get Started" under the "Provision certificates" section.
 
 ![step 6](https://user-images.githubusercontent.com/4365343/82202340-c04ac800-98cf-11ea-9472-89dd6d67eb0d.png)
 
-## Step 7
+### Step 7
 
 Select "Request a public certificate" and then "Request a certificate".
 
 ![step 7](https://user-images.githubusercontent.com/4365343/82202654-3e0ed380-98d0-11ea-8c57-025f0b69c54f.png)
 
-## Step 8
+### Step 8
 
 Enter your subdomain and then click "Next".
 
 ![step 8](https://user-images.githubusercontent.com/4365343/82224652-1cbedf00-98f2-11ea-912b-466cee2f6e25.png)
 
-## Step 9
+### Step 9
 
 Select "DNS validation" and then click "Next".
 
 ![step 9](https://user-images.githubusercontent.com/4365343/82205311-66003600-98d4-11ea-90e3-da7e8b0b2b9c.png)
 
-## Step 10
+### Step 10
 
 Add tags for searchability (optional) then click "Review".
 
 ![step 10](https://user-images.githubusercontent.com/4365343/82206485-52ee6580-98d6-11ea-95a9-1d0ebafc178a.png)
 
-## Step 11
+### Step 11
 
 Click "Confirm and request".
 
 ![step 11](https://user-images.githubusercontent.com/4365343/82206602-84ffc780-98d6-11ea-9f2f-ce383404ec67.png)
 
-## Step 12
+### Step 12
 
 Click "Create record in Route 53". A popup will appear indicating that a Record is going to be added to Route 53. Click "Create" to automatically add the DNS record to your subdomain's hosted zone. Then click "Continue".
 
 ![step 12](https://user-images.githubusercontent.com/4365343/82223539-c8ffc600-98f0-11ea-93a2-044aa0c9670d.png)
 
-## Step 13
+### Step 13
 
 Wait for the Certificate Status to be "issued". This might take a few minutes.
 
 ![step 13](https://user-images.githubusercontent.com/4365343/82209663-a616e700-98db-11ea-95cb-c6efedadb942.png)
 
-## Step 14
+### Step 14
 
 Take note of the certificate's ARN. The certificate is ineligible for renewal because it is currently not being used. It will be eligible for renewal after it is used in Cortex.
 
 ![step 14](https://user-images.githubusercontent.com/4365343/82222684-9e613d80-98ef-11ea-98c0-5a20b457f062.png)
 
-## Step 15
+If you are using API Gateway, continue to the next section. Otherwise (i.e. clients connect directly to your API load balancer) proceed to [configure the API load balancer](#configure-the-api-load-balancer).
+
+## Configure API Gateway
+
+### Step 1
+
+Navigate to the [API Gateway console](https://us-west-2.console.aws.amazon.com/apigateway) (make sure that the region in top right matches your Cortex region). Click "Custom domain names" and then click "Create".
+
+![step 1](https://user-images.githubusercontent.com/808475/84082403-a105fe80-a994-11ea-8015-0df9aeef07b1.png)
+
+### Step 2
+
+Type in the name of your domain and choose the Regional endpoint type, TLS 1.2, and the ACM certificate that you created earlier. Then click "Create".
+
+![step 2](https://user-images.githubusercontent.com/808475/84082448-b8dd8280-a994-11ea-9ef7-6bb33b3a403b.png)
+
+### Step 3
+
+This should take you back to the custom domains page, and you should see your new custom domain. Make sure your API is selected. Take note of the API Gateway domain name (we will use this later), and click "Configure API mappings".
+
+![step 3](https://user-images.githubusercontent.com/808475/84084960-62267780-a999-11ea-8a6b-4be9cfca2a9c.png)
+
+### Step 4
+
+Click "Add new mapping".
+
+![step 4](https://user-images.githubusercontent.com/808475/84082516-d7dc1480-a994-11ea-9cae-d4fb1ac1e767.png)
+
+### Step 5
+
+Select your API Gateway, choose the "$default" stage, and lave "Path" blank. Click Save.
+
+![step 5](https://user-images.githubusercontent.com/808475/84082553-e5919a00-a994-11ea-9bc3-cf5f9eb869eb.png)
+
+### Step 6
+
+Go back to the [Route 53 console](https://console.aws.amazon.com/route53/home#hosted-zones:) and select the hosted zone you created earlier. Click "Create Record Set", and add an Alias record that routes traffic to your Cortex cluster's API Gateway (the target name should match the "API Gateway domain name" show in step 3). Leave "Name" blank.
+
+![step 6](https://user-images.githubusercontent.com/808475/84083366-54232780-a996-11ea-9bc6-2c9945a160d4.png)
+
+Proceed to [using your new endpoint](#using-your-new-endpoint)
+
+## Configure the API load balancer
+
+### Step 1
 
 Add the following field to your cluster configuration:
 
@@ -114,21 +162,21 @@ and then create a Cortex cluster.
 $ cortex cluster up --config cluster.yaml
 ```
 
-## Step 16
+### Step 2
 
 After your cluster has been created, navigate to your [EC2 Load Balancer console](https://us-west-2.console.aws.amazon.com/ec2/v2/home#LoadBalancers:sort=loadBalancerName) and locate the Cortex API load balancer. You can determine which is the API load balancer by inspecting the `kubernetes.io/service-name` tag.
 
 Take note of the load balancer's name.
 
-![step 16](https://user-images.githubusercontent.com/808475/80142777-961c1980-8560-11ea-9202-40964dbff5e9.png)
+![step 2](https://user-images.githubusercontent.com/808475/80142777-961c1980-8560-11ea-9202-40964dbff5e9.png)
 
-## Step 17
+### Step 3
 
-Go to the hosted zone you created in the [Route 53 console](https://console.aws.amazon.com/route53/home#hosted-zones:) and add an Alias record that routes traffic to your Cortex cluster's API load balancer (leave "Name" blank).
+Go back to the [Route 53 console](https://console.aws.amazon.com/route53/home#hosted-zones:) and select the hosted zone you created earlier. Click "Create Record Set", and add an Alias record that routes traffic to your Cortex cluster's API load balancer (leave "Name" blank).
 
-![step 17](https://user-images.githubusercontent.com/4365343/82228372-08311580-98f7-11ea-9faa-24050fc432d8.png)
+![step 3](https://user-images.githubusercontent.com/808475/84083422-6ac97e80-a996-11ea-9679-be37268a2133.png)
 
-### Using your new endpoint
+## Using your new endpoint
 
 Wait a few minutes to allow the DNS changes to propagate. You may now use your subdomain in place of your API load balancer endpoint in your client. For example, this curl request:
 
@@ -143,22 +191,21 @@ Would become:
 curl https://api.cortexlabs.dev/iris-classifier -X POST -H "Content-Type: application/json" -d @sample.json
 ```
 
-### Debugging connectivity issues
+## Debugging connectivity issues
 
-You may encounter connectivity issues due to cached DNS records that haven't expired yet. It could take anywhere from a few minutes to 48 hours for DNS cache to completely refresh.
+You could run into connectivity issues if you make a request to your API without waiting long enough for your DNS records to propagate after creating them (it usually takes 5-10 mintues). If you are updating existing DNS records, it could take anywhere from a few minutes to 48 hours for the DNS cache to expire (until then, your previous DNS configuration will be used).
 
-You could run into connectivity issues if you make a request to your API without waiting long enough after step 17.
+To test connectivity, try the following steps:
 
-To test connectivity try the following steps:
 1. Deploy any api (e.g. iris-classifier).
 1. Make an HTTPS GET request to the your api e.g. `curl https://api.cortexlabs.dev/iris-classifier` or enter the url in your browser.
 1. If you run into an error such as `curl: (6) Could not resolve host: api.cortexlabs.dev` wait a few minutes and make the HTTPS Get request from another device that hasn't made a request to that url in a while. A successful request looks like this:
-```
+
+```text
 {"message":"make a prediction by sending a post request to this endpoint with a json payload",...}
 ```
 
-
-### Cleanup
+## Cleanup
 
 Spin down your Cortex cluster.
 
diff --git a/docs/guides/vpc-peering.md b/docs/guides/vpc-peering.md
index 3997914c6b..1a44d538dc 100644
--- a/docs/guides/vpc-peering.md
+++ b/docs/guides/vpc-peering.md
@@ -2,9 +2,9 @@
 
 _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_
 
-If you are using an internal operator load balancer (i.e. you set `operator_load_balancer_scheme: internal` in your cluster configuration file before creating your cluster), you can use VPC Peering to enable your Cortex CLI to connect to your cluster operator from another VPC so that you may run `cortex` commands.
+If you are using an internal operator load balancer (i.e. you set `operator_load_balancer_scheme: internal` in your cluster configuration file before creating your cluster), you can use VPC Peering to enable your Cortex CLI to connect to your cluster operator from another VPC so that you may run `cortex` commands. Note that because the operator validates that the CLI user is an active IAM user in the same AWS account as the Cortex cluster, it is usually unnecessary to configure the operator's load balancer to be internal.
 
-If you are using an internal API load balancer (i.e. you set `api_load_balancer_scheme: internal` in your cluster configuration file before creating your cluster), you can use VPC Peering to enable prediction requests from another VPC. _Note: if you intend to create a public endpoint for your internal API load balancer, see our [API Gateway guide](api-gateway.md)._
+If you are using an internal API load balancer (i.e. you set `api_load_balancer_scheme: internal` in your cluster configuration file before creating your cluster) and you disabled API Gateway for your API (i.e. you set `api_gateway: none` in the `networking` field of your api configuration), you can use VPC Peering to enable prediction requests from another VPC.
 
 This guide illustrates how to create a VPC Peering connection between a VPC of your choice and the Cortex load balancers.
 
diff --git a/docs/miscellaneous/architecture.md b/docs/miscellaneous/architecture.md
index 80348100ec..88940898ec 100644
--- a/docs/miscellaneous/architecture.md
+++ b/docs/miscellaneous/architecture.md
@@ -2,6 +2,6 @@
 
 _WARNING: you are on the master branch, please refer to the docs on the branch that matches your `cortex version`_
 
-![architecture diagram](https://user-images.githubusercontent.com/808475/81362760-7293bb80-9096-11ea-92e3-475c673b3dbc.png)
+![architecture diagram](https://user-images.githubusercontent.com/808475/83995909-92c1cf00-a90f-11ea-983f-c96117e42aa3.png)
 
 _note: this diagram is simplified for illustrative purposes_
diff --git a/docs/miscellaneous/security.md b/docs/miscellaneous/security.md
index 10ebecf327..a387c2b31a 100644
--- a/docs/miscellaneous/security.md
+++ b/docs/miscellaneous/security.md
@@ -4,13 +4,17 @@ _WARNING: you are on the master branch, please refer to the docs on the branch t
 
 _The information on this page assumes you are running Cortex on AWS. If you're only deploying locally, this information does not apply (although AWS credentials can still be passed into your APIs, and can be specified with `cortex env configure local`)_
 
-## Private cluster
+## Private cluster subnets
 
-By default, instances are created in public subnets and are assigned public IP addresses. You can configure all instances in your cluster to use private subnets by setting `subnet_visibility: private` in your [cluster configuration](config.md) file before creating your cluster (e.g. via `cortex cluster up -c cluster.yaml`). If private subnets are used, instances will not have public IP addresses, and Cortex will create a NAT gateway to allow outgoing network requests.
+By default, instances are created in public subnets and are assigned public IP addresses. You can configure all instances in your cluster to use private subnets by setting `subnet_visibility: private` in your [cluster configuration](../cluster-management/config.md) file before creating your cluster. If private subnets are used, instances will not have public IP addresses, and Cortex will create a NAT gateway to allow outgoing network requests.
 
-By default, the API load balancer is internet-facing, and therefore APIs are publicly accessible. You can configure your API load balancer to be internal by setting `api_load_balancer_scheme: internal` in your cluster configuration file (before creating your cluster). If you do this, you will need to configure [VPC Peering](../guides/vpc-peering.md) or an [API Gateway with VPC Link](../guides/api-gateway.md) to make prediction requests to your APIs.
+## Private APIs
 
-By default, the Cortex cluster operator's load balancer is internet-facing, and therefore publicly accessible (the operator is what the `cortex` CLI connects to). The operator validates that the requester is an active IAM user in the same AWS account as the Cortex cluster (see [below](#cli)). Therefore it is usually unnecessary to configure the operator's load balancer to be internal, but this can be done by by setting `operator_load_balancer_scheme: internal` in your cluster configuration file. If you do this, you will need to configure [VPC Peering](../guides/vpc-peering.md) to allow your CLI to connect to the Cortex operator (this will be necessary to run any `cortex` commands).
+See [networking](../deployments/networking.md) for a discussion of API visibility.
+
+## Private operator
+
+By default, the Cortex cluster operator's load balancer is internet-facing, and therefore publicly accessible (the operator is what the `cortex` CLI connects to). The operator validates that the CLI user is an active IAM user in the same AWS account as the Cortex cluster (see [below](#cli)). Therefore it is usually unnecessary to configure the operator's load balancer to be internal, but this can be done by by setting `operator_load_balancer_scheme: internal` in your [cluster configuration](../cluster-management/config.md) file. If you do this, you will need to configure [VPC Peering](../guides/vpc-peering.md) to allow your CLI to connect to the Cortex operator (this will be necessary to run any `cortex` commands).
 
 ## IAM permissions
 
@@ -30,20 +34,12 @@ It is recommended to use an IAM user with the `AdministratorAccess` policy to cr
 
 ### Operator
 
-The operator requires read permissions for any S3 bucket containing exported models, read and write permissions for the Cortex S3 bucket, read and write permissions for the Cortex CloudWatch log group, read and write permissions for CloudWatch metrics, and read permissions for ECR. The policy below may be used to restrict the Operator's access:
+The operator requires read permissions for any S3 bucket containing exported models, read/write permissions for the Cortex S3 bucket, read permissions for ECR, read permissions for ELB, read/write permissions for API Gateway, read/write permissions for CloudWatch metrics, and read/write permissions for the Cortex CloudWatch log group. The policy below may be used to restrict the Operator's access:
 
 ```json
 {
     "Version": "2012-10-17",
     "Statement": [
-        {
-            "Sid": "VisualEditor0",
-            "Effect": "Allow",
-            "Action": [
-                "sts:GetCallerIdentity"
-            ],
-            "Resource": "*"
-        },
         {
             "Effect": "Allow",
             "Action": "s3:*",
@@ -51,9 +47,12 @@ The operator requires read permissions for any S3 bucket containing exported mod
         },
         {
             "Action": [
+                "sts:GetCallerIdentity",
+                "ecr:GetAuthorizationToken",
+                "elasticloadbalancing:Describe*",
+                "apigateway:*",
                 "cloudwatch:*",
-                "logs:*",
-                "ecr:GetAuthorizationToken"
+                "logs:*"
             ],
             "Effect": "Allow",
             "Resource": "*"
@@ -67,11 +66,3 @@ It is possible to further restrict access by limiting access to particular resou
 ### CLI
 
 In order to connect to the operator via the CLI, you must provide valid AWS credentials for any user with access to the account. No special permissions are required. The CLI can be configured using the `cortex env configure ENVIRONMENT_NAME` command (e.g. `cortex env configure aws`).
-
-## HTTPS
-
-All APIs are accessible via HTTPS (in addition to HTTP). The SSL certificate is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification (e.g. `curl -k`) when using HTTPS.
-
-To use AWS's default (trusted) certificate or your own certificate, you can [set up API Gateway](../guides/api-gateway.md) to be a proxy to your Cortex cluster. Since the API load balancer created by Cortex is internet-facing by default, in order to force traffic to go through your API Gateway endpoint, you will also need to set `api_load_balancer_scheme: internal` in your [cluster configuration](config.md) file (before creating your cluster).
-
-Alternatively, you can create an SSL certificate for your custom domain, and use this certificate in the API load balancer (see our instructions for [setting up HTTPS on a subdomain](../guides/subdomain-https-setup.md)).
diff --git a/docs/summary.md b/docs/summary.md
index ad4488318f..86ff599566 100644
--- a/docs/summary.md
+++ b/docs/summary.md
@@ -16,6 +16,7 @@
 * [API configuration](deployments/api-configuration.md)
 * [API deployment](deployments/deployment.md)
 * [Autoscaling](deployments/autoscaling.md)
+* [Networking](deployments/networking.md)
 * [Compute](deployments/compute.md)
 * [Using GPUs](deployments/gpus.md)
 * [Prediction monitoring](deployments/prediction-monitoring.md)
@@ -49,8 +50,7 @@
 
 * [Multi-model endpoints](guides/multi-model.md)
 * [View API metrics](guides/metrics.md)
-* [Set up AWS API gateway](guides/api-gateway.md)
-* [Set up HTTPS on a subdomain](guides/subdomain-https-setup.md)
+* [Set up a custom domain](guides/custom-domain.md)
 * [Set up VPC peering](guides/vpc-peering.md)
 * [Add a batch runner API](guides/batch-runner.md)
 * [SSH into worker instance](guides/ssh-instance.md)
diff --git a/examples/sklearn/iris-classifier/README.md b/examples/sklearn/iris-classifier/README.md
index 98cb3d46fe..ee23be336b 100644
--- a/examples/sklearn/iris-classifier/README.md
+++ b/examples/sklearn/iris-classifier/README.md
@@ -124,7 +124,7 @@ Create a `cortex.yaml` file and add the configuration below and replace `cortex-
       key: sklearn/iris-classifier/model.pkl
 ```
 
-Here are the complete [API configuration docs](../../../docs/deployments/api-configuration).
+Here are the complete [API configuration docs](../../../docs/deployments/api-configuration.md).
 
 <br>
 
diff --git a/manager/create_gateway_integration.py b/manager/create_gateway_integration.py
new file mode 100644
index 0000000000..90af0061a7
--- /dev/null
+++ b/manager/create_gateway_integration.py
@@ -0,0 +1,68 @@
+# Copyright 2020 Cortex Labs, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import boto3
+import sys
+import traceback
+import os
+
+
+def get_istio_api_gateway_elb_arn(client_elb):
+    paginator = client_elb.get_paginator("describe_load_balancers")
+    for elb_page in paginator.paginate():
+        for elb in elb_page["LoadBalancers"]:
+            elb_arn = elb["LoadBalancerArn"]
+            elb_tags = client_elb.describe_tags(ResourceArns=[elb_arn])["TagDescriptions"][0][
+                "Tags"
+            ]
+            for tag in elb_tags:
+                if (
+                    tag["Key"] == "kubernetes.io/service-name"
+                    and tag["Value"] == "istio-system/ingressgateway-apis"
+                ):
+                    return elb_arn
+    raise Exception("Could not find ingressgateway-apis ELB")
+
+
+def get_listener_arn(elb_arn, client_elb):
+    paginator = client_elb.get_paginator("describe_listeners")
+    for listener_page in paginator.paginate(LoadBalancerArn=elb_arn):
+        for listener in listener_page["Listeners"]:
+            if listener["Port"] == 80:
+                return listener["ListenerArn"]
+    raise Exception("Could not find ELB port 80 listener")
+
+
+def create_gateway_intregration(api_id, vpc_link_id):
+    client_elb = boto3.client("elbv2", region_name=os.environ["CORTEX_REGION"])
+    client_apigateway = boto3.client("apigatewayv2", region_name=os.environ["CORTEX_REGION"])
+
+    elb_arn = get_istio_api_gateway_elb_arn(client_elb)
+    listener_arn = get_listener_arn(elb_arn, client_elb)
+
+    client_apigateway.create_integration(
+        ApiId=api_id,
+        ConnectionId=vpc_link_id,
+        ConnectionType="VPC_LINK",
+        IntegrationType="HTTP_PROXY",
+        IntegrationUri=listener_arn,
+        PayloadFormatVersion="1.0",
+        IntegrationMethod="ANY",
+    )
+
+
+if __name__ == "__main__":
+    api_id = str(sys.argv[1])
+    vpc_link_id = str(sys.argv[2])
+    create_gateway_intregration(api_id, vpc_link_id)
diff --git a/manager/install.sh b/manager/install.sh
index dd48eaf1d3..2c107ff8d3 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -e
+set -eo pipefail
 
 export CORTEX_VERSION=master
 EKSCTL_TIMEOUT=45m
@@ -44,6 +44,10 @@ function ensure_eks() {
     if [ "$CORTEX_SPOT" == "True" ]; then
       asg_info=$(aws autoscaling describe-auto-scaling-groups --region $CORTEX_REGION --query "AutoScalingGroups[?contains(Tags[?Key==\`alpha.eksctl.io/cluster-name\`].Value, \`$CORTEX_CLUSTER_NAME\`)]|[?contains(Tags[?Key==\`alpha.eksctl.io/nodegroup-name\`].Value, \`ng-cortex-worker-spot\`)]")
       asg_name=$(echo "$asg_info" | jq -r 'first | .AutoScalingGroupName')
+      if [ "$asg_name" = "" ] || [ "$asg_name" = "null" ]; then
+        echo -e "unable to find autoscaling group name from info:\n$asg_info"
+        exit 1
+      fi
       aws autoscaling suspend-processes --region $CORTEX_REGION --auto-scaling-group-name $asg_name --scaling-processes AZRebalance
     fi
 
@@ -90,17 +94,25 @@ function ensure_eks() {
     asg_spot_name=$(echo "$asg_spot_info" | jq -r 'first | .AutoScalingGroupName')
   fi
 
+  if [[ -z "$asg_spot_name" ]] && [[ -z "$asg_on_demand_name" ]]; then
+    echo "error: unable to find valid autoscaling groups"
+    exit 1
+  fi
+
   if [[ -z $asg_spot_name ]]; then
     asg_min_size=$(echo "$asg_on_demand_info" | jq -r 'first | .MinSize')
     asg_max_size=$(echo "$asg_on_demand_info" | jq -r 'first | .MaxSize')
+    if [ "$asg_min_size" = "" ] || [ "$asg_min_size" = "null" ] || [ "$asg_max_size" = "" ] || [ "$asg_max_size" = "null" ]; then
+      echo -e "unable to find on-demand autoscaling group size from info:\n$asg_on_demand_info"
+      exit 1
+    fi
   else
     asg_min_size=$(echo "$asg_spot_info" | jq -r 'first | .MinSize')
     asg_max_size=$(echo "$asg_spot_info" | jq -r 'first | .MaxSize')
-  fi
-
-  if [[ -z "$asg_spot_name" ]] && [[ -z "$asg_on_demand_name" ]]; then
-    echo "error: unable to find valid autoscaling groups"
-    exit 1
+    if [ "$asg_min_size" = "" ] || [ "$asg_min_size" = "null" ] || [ "$asg_max_size" = "" ] || [ "$asg_max_size" = "null" ]; then
+      echo -e "unable to find spot autoscaling group size from info:\n$asg_spot_info"
+      exit 1
+    fi
   fi
 
   asg_on_demand_resize_flags=""
@@ -152,8 +164,52 @@ function ensure_eks() {
 function main() {
   mkdir -p $CORTEX_CLUSTER_WORKSPACE
 
+  # create API Gateway
+  if [ "$arg1" != "--update" ]; then
+    create_api_output=$(aws apigatewayv2 create-api --tags $CORTEX_TAGS --region $CORTEX_REGION --name $CORTEX_CLUSTER_NAME --protocol-type HTTP)
+    api_id=$(echo $create_api_output | jq .ApiId | tr -d '"')
+    if [ "$api_id" = "" ] || [ "$api_id" = "null" ]; then
+      echo -e "unable to extract api gateway ID from create-api output:\n$create_api_output"
+      exit 1
+    fi
+    # create default stage; ignore error because default stage is supposed to be already created, but currently it isn't because of a possible bug in create-api
+    aws apigatewayv2 create-stage --region $CORTEX_REGION --tags $CORTEX_TAGS --api-id $api_id --auto-deploy --stage-name \$default &>/dev/null || true
+  fi
+
+  # create cluster (if it doesn't already exist)
   ensure_eks
 
+  # create VPC Link for API Gateway
+  if [ "$arg1" != "--update" ] && [ "$CORTEX_API_LOAD_BALANCER_SCHEME" == "internal" ]; then
+    vpc_id=$(aws ec2 describe-vpcs --region $CORTEX_REGION --filters Name=tag:eksctl.cluster.k8s.io/v1alpha1/cluster-name,Values=$CORTEX_CLUSTER_NAME | jq .Vpcs[0].VpcId | tr -d '"')
+    if [ "$vpc_id" = "" ] || [ "$vpc_id" = "null" ]; then
+      echo "unable to find cortex vpc"
+      exit 1
+    fi
+
+    # filter all private subnets belonging to cortex cluster
+    private_subnets=$(aws ec2 describe-subnets --region $CORTEX_REGION --filters Name=vpc-id,Values=$vpc_id Name=tag:Name,Values=*Private* | jq -s '.[].Subnets[].SubnetId' | tr -d '"')
+    if [ "$private_subnets" = "" ] || [ "$private_subnets" = "null" ]; then
+      echo "unable to find cortex private subnets"
+      exit 1
+    fi
+
+    # get default security group for cortex VPC
+    default_security_group=$(aws ec2 describe-security-groups --region $CORTEX_REGION --filters Name=vpc-id,Values=$vpc_id Name=group-name,Values=default | jq -c .SecurityGroups[].GroupId | tr -d '"')
+    if [ "$default_security_group" = "" ] || [ "$default_security_group" = "null" ]; then
+      echo "unable to find cortex default security group"
+      exit 1
+    fi
+
+    # create VPC Link
+    create_vpc_link_output=$(aws apigatewayv2 create-vpc-link --region $CORTEX_REGION --tags $CORTEX_TAGS --name $CORTEX_CLUSTER_NAME --subnet-ids $private_subnets --security-group-ids $default_security_group)
+    vpc_link_id=$(echo $create_vpc_link_output | jq .VpcLinkId | tr -d '"')
+    if [ "$vpc_link_id" = "" ] || [ "$vpc_link_id" = "null" ]; then
+      echo -e "unable to extract vpc link ID from create-vpc-link output:\n$create_vpc_link_output"
+      exit 1
+    fi
+  fi
+
   eksctl utils write-kubeconfig --cluster=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION | grep -v "saved kubeconfig as" | grep -v "using region" | grep -v "eksctl version" || true
 
   # pre-download images on cortex cluster up
@@ -195,6 +251,13 @@ function main() {
     echo "✓"
   fi
 
+  # add VPC Link integration to API Gateway
+  if [ "$arg1" != "--update" ] && [ "$CORTEX_API_LOAD_BALANCER_SCHEME" == "internal" ]; then
+    echo -n "￮ creating api gateway vpc link integration "
+    python create_gateway_integration.py $api_id $vpc_link_id
+    echo "✓"
+  fi
+
   echo -n "￮ starting operator "
   kubectl -n=default delete --ignore-not-found=true --grace-period=10 deployment operator >/dev/null 2>&1
   printed_dot="false"
@@ -258,14 +321,14 @@ function setup_istio() {
   echo -n "."
   envsubst < manifests/istio-namespace.yaml | kubectl apply -f - >/dev/null
 
-  if ! kubectl get secret -n istio-system | grep -q istio-customgateway-certs; then
+  if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
     WEBSITE=localhost
     openssl req -subj "/C=US/CN=$WEBSITE" -newkey rsa:2048 -nodes -keyout $WEBSITE.key -x509 -days 3650 -out $WEBSITE.crt >/dev/null 2>&1
     kubectl create -n istio-system secret tls istio-customgateway-certs --key $WEBSITE.key --cert $WEBSITE.crt >/dev/null
   fi
 
   helm template istio-manifests/istio-init --name istio-init --namespace istio-system | kubectl apply -f - >/dev/null
-  until kubectl api-resources | grep -q virtualservice; do
+  until grep -q "virtualservice" <<< $(kubectl api-resources); do
     echo -n "."
     sleep 3
   done
@@ -320,7 +383,7 @@ function validate_cortex() {
     echo -n "."
     sleep 3
 
-    operator_pod_name=$(kubectl -n=default get pods -o=name --sort-by=.metadata.creationTimestamp | grep "^pod/operator-" | tail -1)
+    operator_pod_name=$(kubectl -n=default get pods -o=name --sort-by=.metadata.creationTimestamp | (grep "^pod/operator-" || true) | tail -1)
     if [ "$operator_pod_name" == "" ]; then
       operator_pod_ready_cycles=0
     else
@@ -364,8 +427,12 @@ function validate_cortex() {
     if [ "$operator_pod_ready_cycles" == "0" ] && [ "$operator_pod_name" != "" ]; then
       num_restart=$(kubectl -n=default get "$operator_pod_name" -o jsonpath='{.status.containerStatuses[0].restartCount}')
       if [[ $num_restart -ge 2 ]]; then
-        echo -e "\n\nan error occurred when starting the cortex operator. View the logs with:"
-        echo "  kubectl logs $operator_pod_name"
+        echo -e "\n\nan error occurred when starting the cortex operator"
+        echo -e "\noperator logs (currently running container):\n"
+        kubectl -n=default logs "$operator_pod_name"
+        echo -e "\noperator logs (previous container):\n"
+        kubectl -n=default logs "$operator_pod_name" --previous
+        echo
         exit 1
       fi
       continue
diff --git a/manager/uninstall.sh b/manager/uninstall.sh
index 860e871ea3..d5e9bb9e3a 100755
--- a/manager/uninstall.sh
+++ b/manager/uninstall.sh
@@ -26,7 +26,7 @@ function get_operator_endpoint() {
 }
 operator_endpoint=$(get_operator_endpoint)
 
-echo -e "spinning down the cluster ...\n"
+echo
 
 eksctl delete cluster --wait --name=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION --timeout=$EKSCTL_TIMEOUT
 
diff --git a/pkg/lib/aws/apigateway.go b/pkg/lib/aws/apigateway.go
new file mode 100644
index 0000000000..9e7a67e511
--- /dev/null
+++ b/pkg/lib/aws/apigateway.go
@@ -0,0 +1,335 @@
+/*
+Copyright 2020 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package aws
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/service/apigatewayv2"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+)
+
+// GetVPCLinkByTag Gets a VPC Link by tag (returns nil if there are no matches)
+func (c *Client) GetVPCLinkByTag(tagName string, tagValue string) (*apigatewayv2.VpcLink, error) {
+	var nextToken *string
+
+	for {
+		vpcLinks, err := c.APIGatewayV2().GetVpcLinks(&apigatewayv2.GetVpcLinksInput{
+			NextToken: nextToken,
+		})
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to get vpc links")
+		}
+
+		for _, vpcLink := range vpcLinks.Items {
+			for tag, value := range vpcLink.Tags {
+				if tag == tagName && *value == tagValue {
+					return vpcLink, nil
+				}
+			}
+		}
+
+		nextToken = vpcLinks.NextToken
+		if nextToken == nil {
+			break
+		}
+	}
+
+	return nil, nil
+}
+
+// GetAPIGatewayByTag Gets an API Gateway by tag (returns nil if there are no matches)
+func (c *Client) GetAPIGatewayByTag(tagName string, tagValue string) (*apigatewayv2.Api, error) {
+	var nextToken *string
+
+	for {
+		apis, err := c.APIGatewayV2().GetApis(&apigatewayv2.GetApisInput{
+			NextToken: nextToken,
+		})
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to get api gateways")
+		}
+
+		for _, api := range apis.Items {
+			for tag, value := range api.Tags {
+				if tag == tagName && *value == tagValue {
+					return api, nil
+				}
+			}
+		}
+
+		nextToken = apis.NextToken
+		if nextToken == nil {
+			break
+		}
+	}
+
+	return nil, nil
+}
+
+// DeleteVPCLinkByTag Deletes a VPC Link by tag (returns the deleted VPC Link, or nil if it was not found )
+func (c *Client) DeleteVPCLinkByTag(tagName string, tagValue string) (*apigatewayv2.VpcLink, error) {
+	vpcLink, err := c.GetVPCLinkByTag(tagName, tagValue)
+	if err != nil {
+		return nil, err
+	} else if vpcLink == nil {
+		return nil, nil
+	}
+
+	_, err = c.APIGatewayV2().DeleteVpcLink(&apigatewayv2.DeleteVpcLinkInput{
+		VpcLinkId: vpcLink.VpcLinkId,
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to delete vpc link "+*vpcLink.VpcLinkId)
+	}
+
+	return vpcLink, nil
+}
+
+// DeleteAPIGatewayByTag Deletes an API Gateway by tag (returns whether or not the API Gateway existed)
+func (c *Client) DeleteAPIGatewayByTag(tagName string, tagValue string) (*apigatewayv2.Api, error) {
+	apiGateway, err := c.GetAPIGatewayByTag(tagName, tagValue)
+	if err != nil {
+		return nil, err
+	} else if apiGateway == nil {
+		return nil, nil
+	}
+
+	// Delete mappings in case user added a custom domain name (otherwise this will block API Gateway deletion)
+	err = c.DeleteAPIGatewayMappings(*apiGateway.ApiId)
+	if err != nil {
+		return nil, err
+	}
+
+	_, err = c.APIGatewayV2().DeleteApi(&apigatewayv2.DeleteApiInput{
+		ApiId: apiGateway.ApiId,
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to delete api gateway "+*apiGateway.ApiId)
+	}
+
+	return apiGateway, nil
+}
+
+// DeleteAPIGatewayMappingsForDomainName deletes all API mappings that point to the provided api gateway from the provided domain name
+func (c *Client) DeleteAPIGatewayMappingsForDomainName(apiGatewayID string, domainName string) error {
+	var nextToken *string
+
+	for {
+		apiMappings, err := c.APIGatewayV2().GetApiMappings(&apigatewayv2.GetApiMappingsInput{
+			DomainName: aws.String(domainName),
+			NextToken:  nextToken,
+		})
+		if err != nil {
+			return errors.Wrap(err, "failed to get api mappings")
+		}
+
+		for _, apiMapping := range apiMappings.Items {
+			if *apiMapping.ApiId != apiGatewayID {
+				continue
+			}
+
+			_, err := c.APIGatewayV2().DeleteApiMapping(&apigatewayv2.DeleteApiMappingInput{
+				DomainName:   aws.String(domainName),
+				ApiMappingId: apiMapping.ApiMappingId,
+			})
+			if err != nil {
+				return errors.Wrap(err, fmt.Sprintf("failed to delete api mapping %s in domain %s", *apiMapping.ApiMappingId, domainName))
+			}
+		}
+
+		nextToken = apiMappings.NextToken
+		if nextToken == nil {
+			break
+		}
+	}
+
+	return nil
+}
+
+// DeleteAPIGatewayMappings deletes all API mappings that point to the provided api gateway
+func (c *Client) DeleteAPIGatewayMappings(apiGatewayID string) error {
+	var nextToken *string
+
+	for {
+		domainNames, err := c.APIGatewayV2().GetDomainNames(&apigatewayv2.GetDomainNamesInput{
+			NextToken: nextToken,
+		})
+		if err != nil {
+			return errors.Wrap(err, "failed to get domain names")
+		}
+
+		for _, domainName := range domainNames.Items {
+			err := c.DeleteAPIGatewayMappingsForDomainName(apiGatewayID, *domainName.DomainName)
+			if err != nil {
+				return err
+			}
+		}
+
+		nextToken = domainNames.NextToken
+		if nextToken == nil {
+			break
+		}
+	}
+
+	return nil
+}
+
+// GetVPCLinkIntegration gets the VPC Link integration in an API Gateway, or nil if unable to find it
+func (c *Client) GetVPCLinkIntegration(apiGatewayID string, vpcLinkID string) (*apigatewayv2.Integration, error) {
+	var nextToken *string
+
+	for {
+		integrations, err := c.APIGatewayV2().GetIntegrations(&apigatewayv2.GetIntegrationsInput{
+			ApiId:     &apiGatewayID,
+			NextToken: nextToken,
+		})
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to get api gateway integrations for api gateway "+apiGatewayID)
+		}
+
+		// find integration which is connected to the VPC link
+		for _, integration := range integrations.Items {
+			if *integration.ConnectionId == vpcLinkID {
+				return integration, nil
+			}
+		}
+
+		nextToken = integrations.NextToken
+		if nextToken == nil {
+			break
+		}
+	}
+
+	return nil, nil
+}
+
+// GetRouteIntegrationID returns the integration which is attached to a endpoint route, or empty string if unable to find it
+func (c *Client) GetRouteIntegrationID(apiGatewayID string, endpoint string) (string, error) {
+	route, err := c.GetRoute(apiGatewayID, endpoint)
+	if err != nil {
+		return "", err
+	}
+	if route == nil {
+		return "", nil
+	}
+
+	return ExtractRouteIntegrationID(route), nil
+}
+
+// ExtractRouteIntegrationID extracts the integration ID which is attached to a route, or "" if no route is attached
+func ExtractRouteIntegrationID(route *apigatewayv2.Route) string {
+	if route == nil || route.Target == nil {
+		return ""
+	}
+
+	// trim of prefix of integrationID.
+	// Note: Integrations get attached to routes via a target of the format integrations/<integrationID>
+	integrationID := strings.TrimPrefix(*route.Target, "integrations/")
+	return integrationID
+}
+
+// GetRoute retrieves the route matching an endpoint, or nil if unable to find it
+func (c *Client) GetRoute(apiGatewayID string, endpoint string) (*apigatewayv2.Route, error) {
+	var nextToken *string
+
+	for {
+		routes, err := c.APIGatewayV2().GetRoutes(&apigatewayv2.GetRoutesInput{
+			ApiId:     &apiGatewayID,
+			NextToken: nextToken,
+		})
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to get api gateway routes for api gateway "+apiGatewayID)
+		}
+
+		// find route which matches the endpoint
+		for _, route := range routes.Items {
+			if *route.RouteKey == "ANY "+endpoint {
+				return route, nil
+			}
+		}
+
+		nextToken = routes.NextToken
+		if nextToken == nil {
+			break
+		}
+	}
+
+	return nil, nil
+}
+
+// CreateRoute creates a new route and attaches the route to the integration
+func (c *Client) CreateRoute(apiGatewayID string, integrationID string, endpoint string) error {
+	_, err := c.APIGatewayV2().CreateRoute(&apigatewayv2.CreateRouteInput{
+		ApiId:    &apiGatewayID,
+		RouteKey: aws.String("ANY " + endpoint),
+		Target:   aws.String("integrations/" + integrationID),
+	})
+	if err != nil {
+		return errors.Wrap(err, fmt.Sprintf("failed to create %s route for api gateway %s with integration %s", endpoint, apiGatewayID, integrationID))
+	}
+	return nil
+}
+
+// CreateHTTPIntegration creates new HTTP integration for API Gateway, returns integration ID
+func (c *Client) CreateHTTPIntegration(apiGatewayID string, targetEndpoint string) (string, error) {
+	integrationResponse, err := c.APIGatewayV2().CreateIntegration(&apigatewayv2.CreateIntegrationInput{
+		ApiId:                &apiGatewayID,
+		IntegrationType:      aws.String("HTTP_PROXY"),
+		IntegrationUri:       &targetEndpoint,
+		PayloadFormatVersion: aws.String("1.0"),
+		IntegrationMethod:    aws.String("ANY"),
+	})
+	if err != nil {
+		return "", errors.Wrap(err, fmt.Sprintf("failed to create api gateway integration for endpoint %s in api gateway %s", targetEndpoint, apiGatewayID))
+	}
+	return *integrationResponse.IntegrationId, nil
+}
+
+// DeleteIntegration deletes an integration from API Gateway
+func (c *Client) DeleteIntegration(apiGatewayID string, integrationID string) error {
+	_, err := c.APIGatewayV2().DeleteIntegration(&apigatewayv2.DeleteIntegrationInput{
+		ApiId:         &apiGatewayID,
+		IntegrationId: &integrationID,
+	})
+	if err != nil {
+		return errors.Wrap(err, fmt.Sprintf("failed to delete api gateway integration %s in api gateway %s", integrationID, apiGatewayID))
+	}
+	return nil
+}
+
+// DeleteRoute deletes a route from API Gateway, and returns the deleted route (or nil if it wasn't found)
+func (c *Client) DeleteRoute(apiGatewayID string, endpoint string) (*apigatewayv2.Route, error) {
+	route, err := c.GetRoute(apiGatewayID, endpoint)
+	if err != nil {
+		return nil, err
+	} else if route == nil {
+		return nil, nil
+	}
+
+	_, err = c.APIGatewayV2().DeleteRoute(&apigatewayv2.DeleteRouteInput{
+		ApiId:   &apiGatewayID,
+		RouteId: route.RouteId,
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, fmt.Sprintf("failed to delete api gateway route %s with endpoint %s in api gateway %s", *route.RouteId, endpoint, apiGatewayID))
+	}
+
+	return route, nil
+}
diff --git a/pkg/lib/aws/clients.go b/pkg/lib/aws/clients.go
index efbdad0549..850f06c5ea 100644
--- a/pkg/lib/aws/clients.go
+++ b/pkg/lib/aws/clients.go
@@ -18,6 +18,7 @@ package aws
 
 import (
 	"github.com/aws/aws-sdk-go/service/acm"
+	"github.com/aws/aws-sdk-go/service/apigatewayv2"
 	"github.com/aws/aws-sdk-go/service/autoscaling"
 	"github.com/aws/aws-sdk-go/service/cloudformation"
 	"github.com/aws/aws-sdk-go/service/cloudwatch"
@@ -42,6 +43,7 @@ type clients struct {
 	autoscaling    *autoscaling.AutoScaling
 	cloudWatchLogs *cloudwatchlogs.CloudWatchLogs
 	cloudWatch     *cloudwatch.CloudWatch
+	apiGatewayV2   *apigatewayv2.ApiGatewayV2
 	serviceQuotas  *servicequotas.ServiceQuotas
 	cloudFormation *cloudformation.CloudFormation
 	iam            *iam.IAM
@@ -124,6 +126,13 @@ func (c *Client) CloudWatch() *cloudwatch.CloudWatch {
 	return c.clients.cloudWatch
 }
 
+func (c *Client) APIGatewayV2() *apigatewayv2.ApiGatewayV2 {
+	if c.clients.apiGatewayV2 == nil {
+		c.clients.apiGatewayV2 = apigatewayv2.New(c.sess)
+	}
+	return c.clients.apiGatewayV2
+}
+
 func (c *Client) ServiceQuotas() *servicequotas.ServiceQuotas {
 	if c.clients.serviceQuotas == nil {
 		c.clients.serviceQuotas = servicequotas.New(c.sess)
diff --git a/pkg/lib/errors/error.go b/pkg/lib/errors/error.go
new file mode 100644
index 0000000000..f64e36b3b5
--- /dev/null
+++ b/pkg/lib/errors/error.go
@@ -0,0 +1,196 @@
+/*
+Copyright 2020 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package errors
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	pkgerrors "github.com/pkg/errors"
+)
+
+const ErrNotCortexError = "errors.not_cortex_error"
+
+type Error struct {
+	Kind        string
+	Message     string
+	NoTelemetry bool
+	NoPrint     bool
+	Cause       error
+	stack       *stack
+}
+
+func (cortexError *Error) Error() string {
+	return cortexError.Message
+}
+
+func (cortexError *Error) StackTrace() pkgerrors.StackTrace {
+	stackTrace := make([]pkgerrors.Frame, len(*cortexError.stack))
+	for i := 0; i < len(stackTrace); i++ {
+		stackTrace[i] = pkgerrors.Frame((*cortexError.stack)[i])
+	}
+	return stackTrace
+}
+
+func WithStack(err error) error {
+	if err == nil {
+		return nil
+	}
+
+	cortexError := getCortexError(err)
+
+	if cortexError == nil {
+		cortexError = &Error{
+			Kind:    ErrNotCortexError,
+			Message: strings.TrimSpace(err.Error()),
+			Cause:   err,
+		}
+	}
+
+	if cortexError.stack == nil {
+		cortexError.stack = callers()
+	}
+
+	return cortexError
+}
+
+func Wrap(err error, strs ...string) error {
+	if err == nil {
+		return nil
+	}
+
+	cortexError := WithStack(err).(*Error)
+
+	strs = removeEmptyStrs(strs)
+	strs = append(strs, cortexError.Message)
+	cortexError.Message = strings.Join(strs, ": ")
+
+	return cortexError
+}
+
+// adds to the end of the error message (without adding any whitespace or punctuation)
+func Append(err error, str string) error {
+	if err == nil {
+		return nil
+	}
+
+	cortexError := WithStack(err).(*Error)
+	cortexError.Message = cortexError.Message + str
+	return cortexError
+}
+
+func getCortexError(err error) *Error {
+	if cortexError, ok := err.(*Error); ok {
+		return cortexError
+	}
+	return nil
+}
+
+func GetKind(err error) string {
+	if cortexError, ok := err.(*Error); ok {
+		return cortexError.Kind
+	}
+	return ErrNotCortexError
+}
+
+func IsNoTelemetry(err error) bool {
+	if cortexError, ok := err.(*Error); ok {
+		return cortexError.NoTelemetry
+	}
+	return false
+}
+
+func SetNoTelemetry(err error) error {
+	cortexError := WithStack(err).(*Error)
+	cortexError.NoTelemetry = true
+	return cortexError
+}
+
+func IsNoPrint(err error) bool {
+	if cortexError, ok := err.(*Error); ok {
+		return cortexError.NoPrint
+	}
+	return false
+}
+
+func SetNoPrint(err error) error {
+	cortexError := WithStack(err).(*Error)
+	cortexError.NoPrint = true
+	return cortexError
+}
+
+// Returns nil if no cause
+func Cause(err error) error {
+	if cortexError, ok := err.(*Error); ok {
+		return cortexError.Cause
+	}
+	return nil
+}
+
+func CauseOrSelf(err error) error {
+	if cortexError, ok := err.(*Error); ok {
+		cause := cortexError.Cause
+		if cause != nil {
+			return cause
+		}
+	}
+	return err
+}
+
+func PrintStacktrace(err error) {
+	fmt.Printf("%+v\n", err)
+}
+
+func (cortexError *Error) Format(s fmt.State, verb rune) {
+	switch verb {
+	case 'v':
+		if s.Flag('+') {
+			io.WriteString(s, cortexError.Message)
+			cortexError.stack.Format(s, verb)
+			return
+		}
+		fallthrough
+	case 's':
+		io.WriteString(s, cortexError.Message)
+	case 'q':
+		fmt.Fprintf(s, "%q", cortexError.Message)
+	}
+}
+
+func CastRecoverError(errInterface interface{}, strs ...string) error {
+	var err error
+	var ok bool
+	err, ok = errInterface.(error)
+	if !ok {
+		err = &Error{
+			Kind:    ErrNotCortexError,
+			Message: fmt.Sprint(errInterface),
+		}
+	}
+	return Wrap(err, strs...)
+}
+
+func removeEmptyStrs(strs []string) []string {
+	var cleanStrs []string
+	for _, str := range strs {
+		if str != "" {
+			cleanStrs = append(cleanStrs, str)
+		}
+	}
+	return cleanStrs
+}
diff --git a/pkg/lib/errors/errors.go b/pkg/lib/errors/errors.go
index f64e36b3b5..c94c911edb 100644
--- a/pkg/lib/errors/errors.go
+++ b/pkg/lib/errors/errors.go
@@ -17,180 +17,23 @@ limitations under the License.
 package errors
 
 import (
-	"fmt"
-	"io"
 	"strings"
 
-	pkgerrors "github.com/pkg/errors"
+	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 )
 
-const ErrNotCortexError = "errors.not_cortex_error"
-
-type Error struct {
-	Kind        string
-	Message     string
-	NoTelemetry bool
-	NoPrint     bool
-	Cause       error
-	stack       *stack
-}
-
-func (cortexError *Error) Error() string {
-	return cortexError.Message
-}
-
-func (cortexError *Error) StackTrace() pkgerrors.StackTrace {
-	stackTrace := make([]pkgerrors.Frame, len(*cortexError.stack))
-	for i := 0; i < len(stackTrace); i++ {
-		stackTrace[i] = pkgerrors.Frame((*cortexError.stack)[i])
-	}
-	return stackTrace
-}
-
-func WithStack(err error) error {
-	if err == nil {
-		return nil
-	}
-
-	cortexError := getCortexError(err)
-
-	if cortexError == nil {
-		cortexError = &Error{
-			Kind:    ErrNotCortexError,
-			Message: strings.TrimSpace(err.Error()),
-			Cause:   err,
-		}
-	}
-
-	if cortexError.stack == nil {
-		cortexError.stack = callers()
-	}
-
-	return cortexError
-}
-
-func Wrap(err error, strs ...string) error {
-	if err == nil {
-		return nil
-	}
-
-	cortexError := WithStack(err).(*Error)
-
-	strs = removeEmptyStrs(strs)
-	strs = append(strs, cortexError.Message)
-	cortexError.Message = strings.Join(strs, ": ")
-
-	return cortexError
-}
-
-// adds to the end of the error message (without adding any whitespace or punctuation)
-func Append(err error, str string) error {
-	if err == nil {
-		return nil
-	}
-
-	cortexError := WithStack(err).(*Error)
-	cortexError.Message = cortexError.Message + str
-	return cortexError
-}
-
-func getCortexError(err error) *Error {
-	if cortexError, ok := err.(*Error); ok {
-		return cortexError
-	}
-	return nil
-}
-
-func GetKind(err error) string {
-	if cortexError, ok := err.(*Error); ok {
-		return cortexError.Kind
-	}
-	return ErrNotCortexError
-}
-
-func IsNoTelemetry(err error) bool {
-	if cortexError, ok := err.(*Error); ok {
-		return cortexError.NoTelemetry
-	}
-	return false
-}
-
-func SetNoTelemetry(err error) error {
-	cortexError := WithStack(err).(*Error)
-	cortexError.NoTelemetry = true
-	return cortexError
-}
-
-func IsNoPrint(err error) bool {
-	if cortexError, ok := err.(*Error); ok {
-		return cortexError.NoPrint
-	}
-	return false
-}
-
-func SetNoPrint(err error) error {
-	cortexError := WithStack(err).(*Error)
-	cortexError.NoPrint = true
-	return cortexError
-}
-
-// Returns nil if no cause
-func Cause(err error) error {
-	if cortexError, ok := err.(*Error); ok {
-		return cortexError.Cause
-	}
-	return nil
-}
-
-func CauseOrSelf(err error) error {
-	if cortexError, ok := err.(*Error); ok {
-		cause := cortexError.Cause
-		if cause != nil {
-			return cause
-		}
-	}
-	return err
-}
-
-func PrintStacktrace(err error) {
-	fmt.Printf("%+v\n", err)
-}
-
-func (cortexError *Error) Format(s fmt.State, verb rune) {
-	switch verb {
-	case 'v':
-		if s.Flag('+') {
-			io.WriteString(s, cortexError.Message)
-			cortexError.stack.Format(s, verb)
-			return
-		}
-		fallthrough
-	case 's':
-		io.WriteString(s, cortexError.Message)
-	case 'q':
-		fmt.Fprintf(s, "%q", cortexError.Message)
-	}
-}
+const (
+	ErrUnexpected = "errors.unexpected"
+)
 
-func CastRecoverError(errInterface interface{}, strs ...string) error {
-	var err error
-	var ok bool
-	err, ok = errInterface.(error)
-	if !ok {
-		err = &Error{
-			Kind:    ErrNotCortexError,
-			Message: fmt.Sprint(errInterface),
-		}
+func ErrorUnexpected(msgs ...interface{}) error {
+	strs := make([]string, len(msgs))
+	for i, msg := range msgs {
+		strs[i] = s.ObjFlatNoQuotes(msg)
 	}
-	return Wrap(err, strs...)
-}
 
-func removeEmptyStrs(strs []string) []string {
-	var cleanStrs []string
-	for _, str := range strs {
-		if str != "" {
-			cleanStrs = append(cleanStrs, str)
-		}
-	}
-	return cleanStrs
+	return WithStack(&Error{
+		Kind:    ErrUnexpected,
+		Message: strings.Join(strs, ": "),
+	})
 }
diff --git a/pkg/lib/sets/strset/strset.go b/pkg/lib/sets/strset/strset.go
index f9a9b74385..e78465c866 100644
--- a/pkg/lib/sets/strset/strset.go
+++ b/pkg/lib/sets/strset/strset.go
@@ -62,6 +62,23 @@ func (s Set) Remove(items ...string) {
 	}
 }
 
+// GetOne returns an item from the set or "" if the set is empty.
+func (s Set) GetOne() string {
+	for item := range s {
+		return item
+	}
+	return ""
+}
+
+// GetOne2 returns an item from the set. The second value is a bool that is
+// true if an item exists in the set, or false if the set is empty.
+func (s Set) GetOne2() (string, bool) {
+	for item := range s {
+		return item, true
+	}
+	return "", false
+}
+
 // Pop deletes and returns an item from the Set. The underlying Set s is
 // modified. If Set is empty, the zero value is returned.
 func (s Set) Pop() string {
diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go
index 5fe7902c97..d46908d50b 100644
--- a/pkg/operator/config/config.go
+++ b/pkg/operator/config/config.go
@@ -86,6 +86,32 @@ func Init() error {
 		fmt.Println(errors.Message(err))
 	}
 
+	apiGateway, err := AWS.GetAPIGatewayByTag(clusterconfig.ClusterNameTag, Cluster.ClusterName)
+	if err != nil {
+		return err
+	} else if apiGateway == nil {
+		return ErrorNoAPIGateway()
+	}
+	Cluster.APIGateway = *apiGateway
+
+	if Cluster.APILoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme {
+		vpcLink, err := AWS.GetVPCLinkByTag(clusterconfig.ClusterNameTag, Cluster.ClusterName)
+		if err != nil {
+			return err
+		} else if vpcLink == nil {
+			return ErrorNoVPCLink()
+		}
+		Cluster.VPCLink = vpcLink
+
+		integration, err := AWS.GetVPCLinkIntegration(*Cluster.APIGateway.ApiId, *Cluster.VPCLink.VpcLinkId)
+		if err != nil {
+			return err
+		} else if integration == nil {
+			return ErrorNoVPCLinkIntegration()
+		}
+		Cluster.VPCLinkIntegration = integration
+	}
+
 	Cluster.InstanceMetadata = aws.InstanceMetadatas[*Cluster.Region][*Cluster.InstanceType]
 
 	if K8s, err = k8s.New("default", Cluster.OperatorInCluster); err != nil {
diff --git a/pkg/operator/config/errors.go b/pkg/operator/config/errors.go
new file mode 100644
index 0000000000..67fc3cb35b
--- /dev/null
+++ b/pkg/operator/config/errors.go
@@ -0,0 +1,48 @@
+/*
+Copyright 2020 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package config
+
+import (
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+)
+
+const (
+	ErrNoAPIGateway         = "config.no_api_gateway"
+	ErrNoVPCLink            = "config.no_vpc_link"
+	ErrNoVPCLinkIntegration = "config.no_vpc_link_integration"
+)
+
+func ErrorNoAPIGateway() error {
+	return errors.WithStack(&errors.Error{
+		Kind:    ErrNoAPIGateway,
+		Message: "unable to locate cortex's api gateway",
+	})
+}
+
+func ErrorNoVPCLink() error {
+	return errors.WithStack(&errors.Error{
+		Kind:    ErrNoVPCLink,
+		Message: "unable to locate cortex's vpc link",
+	})
+}
+
+func ErrorNoVPCLinkIntegration() error {
+	return errors.WithStack(&errors.Error{
+		Kind:    ErrNoVPCLinkIntegration,
+		Message: "unable to locate cortex's api gateway vpc link integration",
+	})
+}
diff --git a/pkg/operator/endpoints/deploy.go b/pkg/operator/endpoints/deploy.go
index a29518c4b6..b1345e0613 100644
--- a/pkg/operator/endpoints/deploy.go
+++ b/pkg/operator/endpoints/deploy.go
@@ -48,12 +48,6 @@ func Deploy(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	baseURL, err := operator.APIsBaseURL()
-	if err != nil {
-		respondError(w, r, err)
-		return
-	}
-
 	projectBytes, err := files.ReadReqFile(r, "project.zip")
 	if err != nil {
 		respondError(w, r, err)
@@ -108,6 +102,5 @@ func Deploy(w http.ResponseWriter, r *http.Request) {
 
 	respond(w, schema.DeployResponse{
 		Results: results,
-		BaseURL: baseURL,
 	})
 }
diff --git a/pkg/operator/endpoints/get.go b/pkg/operator/endpoints/get.go
index 5bcafc3fd7..af2e583a72 100644
--- a/pkg/operator/endpoints/get.go
+++ b/pkg/operator/endpoints/get.go
@@ -45,17 +45,10 @@ func GetAPIs(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	baseURL, err := operator.APIsBaseURL()
-	if err != nil {
-		respondError(w, r, err)
-		return
-	}
-
 	respond(w, schema.GetAPIsResponse{
 		APIs:       apis,
 		Statuses:   statuses,
 		AllMetrics: allMetrics,
-		BaseURL:    baseURL,
 	})
 }
 
@@ -80,7 +73,7 @@ func GetAPI(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	baseURL, err := operator.APIsBaseURL()
+	baseURL, err := operator.APIBaseURL(api)
 	if err != nil {
 		respondError(w, r, err)
 		return
diff --git a/pkg/operator/operator/api.go b/pkg/operator/operator/api.go
index aa252ee8f5..127c39fc0b 100644
--- a/pkg/operator/operator/api.go
+++ b/pkg/operator/operator/api.go
@@ -58,6 +58,11 @@ func UpdateAPI(apiConfig *userconfig.API, projectID string, force bool) (*spec.A
 			go deleteK8sResources(api.Name)
 			return nil, "", err
 		}
+		err = addAPIToAPIGateway(*api.Endpoint, api.Networking.APIGateway)
+		if err != nil {
+			go deleteK8sResources(api.Name)
+			return nil, "", err
+		}
 		err = addAPIToDashboard(config.Cluster.ClusterName, api.Name)
 		if err != nil {
 			errors.PrintError(err)
@@ -79,6 +84,9 @@ func UpdateAPI(apiConfig *userconfig.API, projectID string, force bool) (*spec.A
 		if err := applyK8sResources(api, prevDeployment, prevService, prevVirtualService); err != nil {
 			return nil, "", err
 		}
+		if err := updateAPIGatewayK8s(prevVirtualService, api); err != nil {
+			return nil, "", err
+		}
 		return api, fmt.Sprintf("updating %s", api.Name), nil
 	}
 
@@ -134,7 +142,13 @@ func RefreshAPI(apiName string, force bool) (string, error) {
 }
 
 func DeleteAPI(apiName string, keepCache bool) error {
+	// best effort deletion, so don't handle error yet
+	virtualService, vsErr := config.K8s.GetVirtualService(k8sName(apiName))
+
 	err := parallel.RunFirstErr(
+		func() error {
+			return vsErr
+		},
 		func() error {
 			return deleteK8sResources(apiName)
 		},
@@ -146,12 +160,19 @@ func DeleteAPI(apiName string, keepCache bool) error {
 			deleteS3Resources(apiName)
 			return nil
 		},
+		// delete API from API Gateway
+		func() error {
+			err := removeAPIFromAPIGatewayK8s(virtualService)
+			if err != nil {
+				return err
+			}
+			return nil
+		},
 		// delete api from cloudwatch
 		func() error {
 			statuses, err := GetAllStatuses()
 			if err != nil {
-				errors.PrintError(err, "failed to get API Statuses")
-				return nil
+				return errors.Wrap(err, "failed to get API Statuses")
 			}
 			//extract all api names from statuses
 			allAPINames := make([]string, len(statuses))
@@ -160,8 +181,7 @@ func DeleteAPI(apiName string, keepCache bool) error {
 			}
 			err = removeAPIFromDashboard(allAPINames, config.Cluster.ClusterName, apiName)
 			if err != nil {
-				errors.PrintError(err)
-				return nil
+				return errors.Wrap(err, "failed to delete API from dashboard")
 			}
 			return nil
 		},
@@ -372,7 +392,16 @@ func IsAPIDeployed(apiName string) (bool, error) {
 	return deployment != nil, nil
 }
 
-func APIsBaseURL() (string, error) {
+// APIBaseURL returns BaseURL of the API without resource endpoint
+func APIBaseURL(api *spec.API) (string, error) {
+	if api.Networking.APIGateway == userconfig.PublicAPIGatewayType {
+		return *config.Cluster.APIGateway.ApiEndpoint, nil
+	}
+	return APILoadBalancerURL()
+}
+
+// APILoadBalancerURL returns http endpoint of cluster ingress elb
+func APILoadBalancerURL() (string, error) {
 	service, err := config.K8sIstio.GetService("ingressgateway-apis")
 	if err != nil {
 		return "", err
@@ -422,3 +451,16 @@ func DownloadAPISpecs(apiNames []string, apiIDs []string) ([]spec.API, error) {
 
 	return apis, nil
 }
+
+func GetEndpointFromVirtualService(virtualService *kunstructured.Unstructured) (string, error) {
+	endpoints, err := k8s.ExtractVirtualServiceEndpoints(virtualService)
+	if err != nil {
+		return "", err
+	}
+
+	if len(endpoints) != 1 {
+		return "", errors.ErrorUnexpected("expected 1 endpoint, but got", endpoints)
+	}
+
+	return endpoints.GetOne(), nil
+}
diff --git a/pkg/operator/operator/gateway.go b/pkg/operator/operator/gateway.go
new file mode 100644
index 0000000000..8b4977b817
--- /dev/null
+++ b/pkg/operator/operator/gateway.go
@@ -0,0 +1,162 @@
+/*
+Copyright 2020 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package operator
+
+import (
+	"github.com/cortexlabs/cortex/pkg/lib/aws"
+	"github.com/cortexlabs/cortex/pkg/lib/urls"
+	"github.com/cortexlabs/cortex/pkg/operator/config"
+	"github.com/cortexlabs/cortex/pkg/types/clusterconfig"
+	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	kunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+)
+
+func addAPIToAPIGateway(endpoint string, apiGatewayType userconfig.APIGatewayType) error {
+	if apiGatewayType == userconfig.NoneAPIGatewayType {
+		return nil
+	}
+
+	apiGatewayID := *config.Cluster.APIGateway.ApiId
+
+	// check if API Gateway route already exists
+	existingRoute, err := config.AWS.GetRoute(apiGatewayID, endpoint)
+	if err != nil {
+		return err
+	} else if existingRoute != nil {
+		return nil
+	}
+
+	if config.Cluster.APILoadBalancerScheme == clusterconfig.InternalLoadBalancerScheme {
+		err = config.AWS.CreateRoute(apiGatewayID, *config.Cluster.VPCLinkIntegration.IntegrationId, endpoint)
+		if err != nil {
+			return err
+		}
+	}
+
+	if config.Cluster.APILoadBalancerScheme == clusterconfig.InternetFacingLoadBalancerScheme {
+		loadBalancerURL, err := APILoadBalancerURL()
+		if err != nil {
+			return err
+		}
+
+		targetEndpoint := urls.Join(loadBalancerURL, endpoint)
+
+		integrationID, err := config.AWS.CreateHTTPIntegration(apiGatewayID, targetEndpoint)
+		if err != nil {
+			return err
+		}
+
+		err = config.AWS.CreateRoute(apiGatewayID, integrationID, endpoint)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func removeAPIFromAPIGateway(endpoint string, apiGatewayType userconfig.APIGatewayType) error {
+	if apiGatewayType == userconfig.NoneAPIGatewayType {
+		return nil
+	}
+
+	apiGatewayID := *config.Cluster.APIGateway.ApiId
+
+	route, err := config.AWS.DeleteRoute(apiGatewayID, endpoint)
+	if err != nil {
+		return err
+	}
+
+	if config.Cluster.APILoadBalancerScheme == clusterconfig.InternetFacingLoadBalancerScheme && route != nil {
+		integrationID := aws.ExtractRouteIntegrationID(route)
+		if integrationID != "" {
+			err = config.AWS.DeleteIntegration(apiGatewayID, integrationID)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+func updateAPIGateway(
+	prevEndpoint string,
+	prevAPIGatewayType userconfig.APIGatewayType,
+	newEndpoint string,
+	newAPIGatewayType userconfig.APIGatewayType,
+) error {
+
+	if prevAPIGatewayType == userconfig.NoneAPIGatewayType && newAPIGatewayType == userconfig.NoneAPIGatewayType {
+		return nil
+	}
+
+	if prevAPIGatewayType == userconfig.PublicAPIGatewayType && newAPIGatewayType == userconfig.NoneAPIGatewayType {
+		return removeAPIFromAPIGateway(prevEndpoint, prevAPIGatewayType)
+	}
+
+	if prevAPIGatewayType == userconfig.NoneAPIGatewayType && newAPIGatewayType == userconfig.PublicAPIGatewayType {
+		return addAPIToAPIGateway(newEndpoint, newAPIGatewayType)
+	}
+
+	if prevEndpoint == newEndpoint {
+		return nil
+	}
+
+	// the endpoint has changed
+	if err := addAPIToAPIGateway(newEndpoint, newAPIGatewayType); err != nil {
+		return err
+	}
+	if err := removeAPIFromAPIGateway(prevEndpoint, prevAPIGatewayType); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func removeAPIFromAPIGatewayK8s(virtualService *kunstructured.Unstructured) error {
+	if virtualService == nil {
+		return nil // API is not running
+	}
+
+	apiGatewayType, err := userconfig.APIGatewayFromAnnotations(virtualService)
+	if err != nil {
+		return err
+	}
+
+	endpoint, err := GetEndpointFromVirtualService(virtualService)
+	if err != nil {
+		return err
+	}
+
+	return removeAPIFromAPIGateway(endpoint, apiGatewayType)
+}
+
+func updateAPIGatewayK8s(prevVirtualService *kunstructured.Unstructured, newAPI *spec.API) error {
+	prevAPIGatewayType, err := userconfig.APIGatewayFromAnnotations(prevVirtualService)
+	if err != nil {
+		return err
+	}
+
+	prevEndpoint, err := GetEndpointFromVirtualService(prevVirtualService)
+	if err != nil {
+		return err
+	}
+
+	return updateAPIGateway(prevEndpoint, prevAPIGatewayType, *newAPI.Endpoint, newAPI.Networking.APIGateway)
+}
diff --git a/pkg/operator/operator/k8s_specs.go b/pkg/operator/operator/k8s_specs.go
index 1f915989b2..562a0e6a59 100644
--- a/pkg/operator/operator/k8s_specs.go
+++ b/pkg/operator/operator/k8s_specs.go
@@ -127,7 +127,7 @@ func tensorflowPredictorSpec(api *spec.API, prevDeployment *kapps.Deployment) *k
 			"apiID":        api.ID,
 			"deploymentID": api.DeploymentID,
 		},
-		Annotations: api.Autoscaling.ToK8sAnnotations(),
+		Annotations: api.ToK8sAnnotations(),
 		Selector: map[string]string{
 			"apiName": api.Name,
 		},
@@ -305,7 +305,7 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo
 			"apiID":        api.ID,
 			"deploymentID": api.DeploymentID,
 		},
-		Annotations: api.Autoscaling.ToK8sAnnotations(),
+		Annotations: api.ToK8sAnnotations(),
 		Selector: map[string]string{
 			"apiName": api.Name,
 		},
@@ -414,7 +414,7 @@ func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deploym
 			"apiID":        api.ID,
 			"deploymentID": api.DeploymentID,
 		},
-		Annotations: api.Autoscaling.ToK8sAnnotations(),
+		Annotations: api.ToK8sAnnotations(),
 		Selector: map[string]string{
 			"apiName": api.Name,
 		},
@@ -518,9 +518,10 @@ func onnxDownloadArgs(api *spec.API) string {
 
 func serviceSpec(api *spec.API) *kcore.Service {
 	return k8s.Service(&k8s.ServiceSpec{
-		Name:       k8sName(api.Name),
-		Port:       _defaultPortInt32,
-		TargetPort: _defaultPortInt32,
+		Name:        k8sName(api.Name),
+		Port:        _defaultPortInt32,
+		TargetPort:  _defaultPortInt32,
+		Annotations: api.ToK8sAnnotations(),
 		Labels: map[string]string{
 			"apiName": api.Name,
 		},
@@ -538,6 +539,7 @@ func virtualServiceSpec(api *spec.API) *kunstructured.Unstructured {
 		ServicePort: _defaultPortInt32,
 		Path:        *api.Endpoint,
 		Rewrite:     pointer.String("predict"),
+		Annotations: api.ToK8sAnnotations(),
 		Labels: map[string]string{
 			"apiName": api.Name,
 		},
diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index e50c3b83c4..3674b4f448 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -43,7 +43,6 @@ type NodeInfo struct {
 
 type DeployResponse struct {
 	Results []DeployResult `json:"results"`
-	BaseURL string         `json:"base_url"`
 }
 
 type DeployResult struct {
@@ -56,7 +55,6 @@ type GetAPIsResponse struct {
 	APIs       []spec.API        `json:"apis"`
 	Statuses   []status.Status   `json:"statuses"`
 	AllMetrics []metrics.Metrics `json:"all_metrics"`
-	BaseURL    string            `json:"base_url"`
 }
 
 type GetAPIResponse struct {
diff --git a/pkg/types/clusterconfig/clusterconfig.go b/pkg/types/clusterconfig/clusterconfig.go
index 1a70306e3a..1bd41afe1f 100644
--- a/pkg/types/clusterconfig/clusterconfig.go
+++ b/pkg/types/clusterconfig/clusterconfig.go
@@ -23,6 +23,7 @@ import (
 
 	"github.com/aws/amazon-vpc-cni-k8s/pkg/awsutils"
 	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/aws/aws-sdk-go/service/apigatewayv2"
 	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/aws"
 	cr "github.com/cortexlabs/cortex/pkg/lib/configreader"
@@ -93,10 +94,13 @@ type InternalConfig struct {
 	Config
 
 	// Populated by operator
-	ID                string               `json:"id"`
-	APIVersion        string               `json:"api_version"`
-	OperatorInCluster bool                 `json:"operator_in_cluster"`
-	InstanceMetadata  aws.InstanceMetadata `json:"instance_metadata"`
+	ID                 string                    `json:"id"`
+	APIVersion         string                    `json:"api_version"`
+	OperatorInCluster  bool                      `json:"operator_in_cluster"`
+	InstanceMetadata   aws.InstanceMetadata      `json:"instance_metadata"`
+	APIGateway         apigatewayv2.Api          `json:"api_gateway"`
+	VPCLink            *apigatewayv2.VpcLink     `json:"vpc_link"`
+	VPCLinkIntegration *apigatewayv2.Integration `json:"vpc_link_integration"`
 }
 
 // The bare minimum to identify a cluster
diff --git a/pkg/types/spec/validations.go b/pkg/types/spec/validations.go
index 5e949fde4d..7f6d6d67c6 100644
--- a/pkg/types/spec/validations.go
+++ b/pkg/types/spec/validations.go
@@ -73,6 +73,7 @@ func apiValidation(provider types.ProviderType) *cr.StructValidation {
 			},
 			predictorValidation(),
 			monitoringValidation(),
+			networkingValidation(),
 			computeValidation(provider),
 			autoscalingValidation(provider),
 			updateStrategyValidation(provider),
@@ -185,6 +186,26 @@ func monitoringValidation() *cr.StructFieldValidation {
 	}
 }
 
+func networkingValidation() *cr.StructFieldValidation {
+	return &cr.StructFieldValidation{
+		StructField: "Networking",
+		StructValidation: &cr.StructValidation{
+			StructFieldValidations: []*cr.StructFieldValidation{
+				{
+					StructField: "APIGateway",
+					StringValidation: &cr.StringValidation{
+						AllowedValues: userconfig.APIGatewayTypeStrings(),
+						Default:       userconfig.PublicAPIGatewayType.String(),
+					},
+					Parser: func(str string) (interface{}, error) {
+						return userconfig.APIGatewayTypeFromString(str), nil
+					},
+				},
+			},
+		},
+	}
+}
+
 func computeValidation(provider types.ProviderType) *cr.StructFieldValidation {
 	cpuDefault := pointer.String("200m")
 	if provider == types.LocalProviderType {
diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go
index f4fa9974ed..273e8d2a13 100644
--- a/pkg/types/userconfig/api.go
+++ b/pkg/types/userconfig/api.go
@@ -35,6 +35,7 @@ type API struct {
 	LocalPort      *int            `json:"local_port" yaml:"local_port"`
 	Predictor      *Predictor      `json:"predictor" yaml:"predictor"`
 	Monitoring     *Monitoring     `json:"monitoring" yaml:"monitoring"`
+	Networking     *Networking     `json:"networking" yaml:"networking"`
 	Compute        *Compute        `json:"compute" yaml:"compute"`
 	Autoscaling    *Autoscaling    `json:"autoscaling" yaml:"autoscaling"`
 	UpdateStrategy *UpdateStrategy `json:"update_strategy" yaml:"update_strategy"`
@@ -67,6 +68,10 @@ type Monitoring struct {
 	ModelType ModelType `json:"model_type" yaml:"model_type"`
 }
 
+type Networking struct {
+	APIGateway APIGatewayType `json:"api_gateway" yaml:"api_gateway"`
+}
+
 type Compute struct {
 	CPU *k8s.Quantity `json:"cpu" yaml:"cpu"`
 	Mem *k8s.Quantity `json:"mem" yaml:"mem"`
@@ -164,100 +169,109 @@ func IdentifyAPI(filePath string, name string, index int) string {
 }
 
 // InitReplicas was left out deliberately
-func (autoscaling *Autoscaling) ToK8sAnnotations() map[string]string {
+func (api *API) ToK8sAnnotations() map[string]string {
 	return map[string]string{
-		MinReplicasAnnotationKey:                  s.Int32(autoscaling.MinReplicas),
-		MaxReplicasAnnotationKey:                  s.Int32(autoscaling.MaxReplicas),
-		WorkersPerReplicaAnnotationKey:            s.Int32(autoscaling.WorkersPerReplica),
-		ThreadsPerWorkerAnnotationKey:             s.Int32(autoscaling.ThreadsPerWorker),
-		TargetReplicaConcurrencyAnnotationKey:     s.Float64(*autoscaling.TargetReplicaConcurrency),
-		MaxReplicaConcurrencyAnnotationKey:        s.Int64(autoscaling.MaxReplicaConcurrency),
-		WindowAnnotationKey:                       autoscaling.Window.String(),
-		DownscaleStabilizationPeriodAnnotationKey: autoscaling.DownscaleStabilizationPeriod.String(),
-		UpscaleStabilizationPeriodAnnotationKey:   autoscaling.UpscaleStabilizationPeriod.String(),
-		MaxDownscaleFactorAnnotationKey:           s.Float64(autoscaling.MaxDownscaleFactor),
-		MaxUpscaleFactorAnnotationKey:             s.Float64(autoscaling.MaxUpscaleFactor),
-		DownscaleToleranceAnnotationKey:           s.Float64(autoscaling.DownscaleTolerance),
-		UpscaleToleranceAnnotationKey:             s.Float64(autoscaling.UpscaleTolerance),
+		APIGatewayAnnotationKey:                   api.Networking.APIGateway.String(),
+		MinReplicasAnnotationKey:                  s.Int32(api.Autoscaling.MinReplicas),
+		MaxReplicasAnnotationKey:                  s.Int32(api.Autoscaling.MaxReplicas),
+		WorkersPerReplicaAnnotationKey:            s.Int32(api.Autoscaling.WorkersPerReplica),
+		ThreadsPerWorkerAnnotationKey:             s.Int32(api.Autoscaling.ThreadsPerWorker),
+		TargetReplicaConcurrencyAnnotationKey:     s.Float64(*api.Autoscaling.TargetReplicaConcurrency),
+		MaxReplicaConcurrencyAnnotationKey:        s.Int64(api.Autoscaling.MaxReplicaConcurrency),
+		WindowAnnotationKey:                       api.Autoscaling.Window.String(),
+		DownscaleStabilizationPeriodAnnotationKey: api.Autoscaling.DownscaleStabilizationPeriod.String(),
+		UpscaleStabilizationPeriodAnnotationKey:   api.Autoscaling.UpscaleStabilizationPeriod.String(),
+		MaxDownscaleFactorAnnotationKey:           s.Float64(api.Autoscaling.MaxDownscaleFactor),
+		MaxUpscaleFactorAnnotationKey:             s.Float64(api.Autoscaling.MaxUpscaleFactor),
+		DownscaleToleranceAnnotationKey:           s.Float64(api.Autoscaling.DownscaleTolerance),
+		UpscaleToleranceAnnotationKey:             s.Float64(api.Autoscaling.UpscaleTolerance),
+	}
+}
+
+func APIGatewayFromAnnotations(k8sObj kmeta.Object) (APIGatewayType, error) {
+	apiGatewayType := APIGatewayTypeFromString(k8sObj.GetAnnotations()[APIGatewayAnnotationKey])
+	if apiGatewayType == UnknownAPIGatewayType {
+		return UnknownAPIGatewayType, ErrorUnknownAPIGatewayType()
 	}
+	return apiGatewayType, nil
 }
 
-func AutoscalingFromAnnotations(deployment kmeta.Object) (*Autoscaling, error) {
+func AutoscalingFromAnnotations(k8sObj kmeta.Object) (*Autoscaling, error) {
 	a := Autoscaling{}
 
-	minReplicas, err := k8s.ParseInt32Annotation(deployment, MinReplicasAnnotationKey)
+	minReplicas, err := k8s.ParseInt32Annotation(k8sObj, MinReplicasAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.MinReplicas = minReplicas
 
-	maxReplicas, err := k8s.ParseInt32Annotation(deployment, MaxReplicasAnnotationKey)
+	maxReplicas, err := k8s.ParseInt32Annotation(k8sObj, MaxReplicasAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.MaxReplicas = maxReplicas
 
-	workersPerReplica, err := k8s.ParseInt32Annotation(deployment, WorkersPerReplicaAnnotationKey)
+	workersPerReplica, err := k8s.ParseInt32Annotation(k8sObj, WorkersPerReplicaAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.WorkersPerReplica = workersPerReplica
 
-	threadsPerWorker, err := k8s.ParseInt32Annotation(deployment, ThreadsPerWorkerAnnotationKey)
+	threadsPerWorker, err := k8s.ParseInt32Annotation(k8sObj, ThreadsPerWorkerAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.ThreadsPerWorker = threadsPerWorker
 
-	targetReplicaConcurrency, err := k8s.ParseFloat64Annotation(deployment, TargetReplicaConcurrencyAnnotationKey)
+	targetReplicaConcurrency, err := k8s.ParseFloat64Annotation(k8sObj, TargetReplicaConcurrencyAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.TargetReplicaConcurrency = &targetReplicaConcurrency
 
-	maxReplicaConcurrency, err := k8s.ParseInt64Annotation(deployment, MaxReplicaConcurrencyAnnotationKey)
+	maxReplicaConcurrency, err := k8s.ParseInt64Annotation(k8sObj, MaxReplicaConcurrencyAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.MaxReplicaConcurrency = maxReplicaConcurrency
 
-	window, err := k8s.ParseDurationAnnotation(deployment, WindowAnnotationKey)
+	window, err := k8s.ParseDurationAnnotation(k8sObj, WindowAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.Window = window
 
-	downscaleStabilizationPeriod, err := k8s.ParseDurationAnnotation(deployment, DownscaleStabilizationPeriodAnnotationKey)
+	downscaleStabilizationPeriod, err := k8s.ParseDurationAnnotation(k8sObj, DownscaleStabilizationPeriodAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.DownscaleStabilizationPeriod = downscaleStabilizationPeriod
 
-	upscaleStabilizationPeriod, err := k8s.ParseDurationAnnotation(deployment, UpscaleStabilizationPeriodAnnotationKey)
+	upscaleStabilizationPeriod, err := k8s.ParseDurationAnnotation(k8sObj, UpscaleStabilizationPeriodAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.UpscaleStabilizationPeriod = upscaleStabilizationPeriod
 
-	maxDownscaleFactor, err := k8s.ParseFloat64Annotation(deployment, MaxDownscaleFactorAnnotationKey)
+	maxDownscaleFactor, err := k8s.ParseFloat64Annotation(k8sObj, MaxDownscaleFactorAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.MaxDownscaleFactor = maxDownscaleFactor
 
-	maxUpscaleFactor, err := k8s.ParseFloat64Annotation(deployment, MaxUpscaleFactorAnnotationKey)
+	maxUpscaleFactor, err := k8s.ParseFloat64Annotation(k8sObj, MaxUpscaleFactorAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.MaxUpscaleFactor = maxUpscaleFactor
 
-	downscaleTolerance, err := k8s.ParseFloat64Annotation(deployment, DownscaleToleranceAnnotationKey)
+	downscaleTolerance, err := k8s.ParseFloat64Annotation(k8sObj, DownscaleToleranceAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
 	a.DownscaleTolerance = downscaleTolerance
 
-	upscaleTolerance, err := k8s.ParseFloat64Annotation(deployment, UpscaleToleranceAnnotationKey)
+	upscaleTolerance, err := k8s.ParseFloat64Annotation(k8sObj, UpscaleToleranceAnnotationKey)
 	if err != nil {
 		return nil, err
 	}
@@ -292,6 +306,11 @@ func (api *API) UserStr(provider types.ProviderType) string {
 			sb.WriteString(s.Indent(api.Monitoring.UserStr(), "  "))
 		}
 
+		if api.Networking != nil {
+			sb.WriteString(fmt.Sprintf("%s:\n", NetworkingKey))
+			sb.WriteString(s.Indent(api.Networking.UserStr(), "  "))
+		}
+
 		if api.Autoscaling != nil {
 			sb.WriteString(fmt.Sprintf("%s:\n", AutoscalingKey))
 			sb.WriteString(s.Indent(api.Autoscaling.UserStr(), "  "))
@@ -360,6 +379,12 @@ func (monitoring *Monitoring) UserStr() string {
 	return sb.String()
 }
 
+func (networking *Networking) UserStr() string {
+	var sb strings.Builder
+	sb.WriteString(fmt.Sprintf("%s: %s\n", APIGatewayKey, networking.APIGateway))
+	return sb.String()
+}
+
 func (compute *Compute) UserStr() string {
 	var sb strings.Builder
 	if compute.CPU == nil {
diff --git a/pkg/types/userconfig/api_gateway_type.go b/pkg/types/userconfig/api_gateway_type.go
new file mode 100644
index 0000000000..6b3cdacf29
--- /dev/null
+++ b/pkg/types/userconfig/api_gateway_type.go
@@ -0,0 +1,78 @@
+/*
+Copyright 2020 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package userconfig
+
+type APIGatewayType int
+
+const (
+	UnknownAPIGatewayType APIGatewayType = iota
+	PublicAPIGatewayType
+	NoneAPIGatewayType
+)
+
+var _apiGatewayTypes = []string{
+	"unknown",
+	"public",
+	"none",
+}
+
+func APIGatewayTypeFromString(s string) APIGatewayType {
+	for i := 0; i < len(_apiGatewayTypes); i++ {
+		if s == _apiGatewayTypes[i] {
+			return APIGatewayType(i)
+		}
+	}
+	return UnknownAPIGatewayType
+}
+
+func APIGatewayTypeStrings() []string {
+	return _apiGatewayTypes[1:]
+}
+
+func (t APIGatewayType) String() string {
+	return _apiGatewayTypes[t]
+}
+
+// MarshalText satisfies TextMarshaler
+func (t APIGatewayType) MarshalText() ([]byte, error) {
+	return []byte(t.String()), nil
+}
+
+// UnmarshalText satisfies TextUnmarshaler
+func (t *APIGatewayType) UnmarshalText(text []byte) error {
+	enum := string(text)
+	for i := 0; i < len(_apiGatewayTypes); i++ {
+		if enum == _apiGatewayTypes[i] {
+			*t = APIGatewayType(i)
+			return nil
+		}
+	}
+
+	*t = UnknownAPIGatewayType
+	return nil
+}
+
+// UnmarshalBinary satisfies BinaryUnmarshaler
+// Needed for msgpack
+func (t *APIGatewayType) UnmarshalBinary(data []byte) error {
+	return t.UnmarshalText(data)
+}
+
+// MarshalBinary satisfies BinaryMarshaler
+func (t APIGatewayType) MarshalBinary() ([]byte, error) {
+	return []byte(t.String()), nil
+}
diff --git a/pkg/types/userconfig/config_key.go b/pkg/types/userconfig/config_key.go
index 04765aaaf6..86da169d6f 100644
--- a/pkg/types/userconfig/config_key.go
+++ b/pkg/types/userconfig/config_key.go
@@ -23,6 +23,7 @@ const (
 	LocalPortKey      = "local_port"
 	PredictorKey      = "predictor"
 	MonitoringKey     = "monitoring"
+	NetworkingKey     = "networking"
 	ComputeKey        = "compute"
 	AutoscalingKey    = "autoscaling"
 	UpdateStrategyKey = "update_strategy"
@@ -46,6 +47,9 @@ const (
 	KeyKey       = "key"
 	ModelTypeKey = "model_type"
 
+	// Networking
+	APIGatewayKey = "api_gateway"
+
 	// Compute
 	CPUKey = "cpu"
 	MemKey = "mem"
@@ -72,6 +76,7 @@ const (
 	MaxUnavailableKey = "max_unavailable"
 
 	// K8s annotation
+	APIGatewayAnnotationKey                   = "networking.cortex.dev/api-gateway"
 	MinReplicasAnnotationKey                  = "autoscaling.cortex.dev/min-replicas"
 	MaxReplicasAnnotationKey                  = "autoscaling.cortex.dev/max-replicas"
 	WorkersPerReplicaAnnotationKey            = "autoscaling.cortex.dev/workers-per-replica"
diff --git a/pkg/types/userconfig/errors.go b/pkg/types/userconfig/errors.go
new file mode 100644
index 0000000000..2e4a57e5e6
--- /dev/null
+++ b/pkg/types/userconfig/errors.go
@@ -0,0 +1,32 @@
+/*
+Copyright 2020 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package userconfig
+
+import (
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+)
+
+const (
+	ErrUnknownAPIGatewayType = "errors.unknown_api_gateway_type"
+)
+
+func ErrorUnknownAPIGatewayType() error {
+	return errors.WithStack(&errors.Error{
+		Kind:    ErrUnknownAPIGatewayType,
+		Message: "unknown api gateway type",
+	})
+}