cortexlabs
diff --git a/‎cli/cmd/cluster.go
+55-13 b/‎cli/cmd/cluster.go
+55-13
diff --git a/‎cli/cmd/lib_cluster_config.go
+16-2 b/‎cli/cmd/lib_cluster_config.go
+16-2
diff --git a/‎docs/clusters/management/create.md
+3 b/‎docs/clusters/management/create.md
+3
diff --git a/‎docs/clusters/management/production.md
+4 b/‎docs/clusters/management/production.md
+4
diff --git a/‎docs/clusters/networking/load-balancers.md
+2 b/‎docs/clusters/networking/load-balancers.md
+2
diff --git a/‎docs/overview.md
+1-1 b/‎docs/overview.md
+1-1
diff --git a/‎manager/get_api_load_balancer_state.py
+11-6 b/‎manager/get_api_load_balancer_state.py
+11-6
diff --git a/‎manager/get_operator_load_balancer_state.py
+2-2 b/‎manager/get_operator_load_balancer_state.py
+2-2
diff --git a/‎manager/get_operator_target_group_status.py
+2-2 b/‎manager/get_operator_target_group_status.py
+2-2
diff --git a/‎manager/helpers.py
+43-5 b/‎manager/helpers.py
+43-5
diff --git a/‎manager/manifests/istio.yaml.j2
+1-1 b/‎manager/manifests/istio.yaml.j2
+1-1
@@ -29,6 +29,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/autoscaling"
 	"github.com/aws/aws-sdk-go/service/ec2"
 	"github.com/aws/aws-sdk-go/service/eks"
+	"github.com/aws/aws-sdk-go/service/elb"
 	"github.com/aws/aws-sdk-go/service/elbv2"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/cortexlabs/cortex/cli/cluster"
@@ -302,7 +303,7 @@ var _clusterUpCmd = &cobra.Command{
 			exit.Error(ErrorClusterUp(out + helpStr))
 		}
 
-		loadBalancer, err := getLoadBalancer(clusterConfig.ClusterName, OperatorLoadBalancer, awsClient)
+		loadBalancer, err := getNLBLoadBalancer(clusterConfig.ClusterName, OperatorLoadBalancer, awsClient)
 		if err != nil {
 			exit.Error(errors.Append(err, fmt.Sprintf("\n\nyou can attempt to resolve this issue and configure your cli environment by running `cortex cluster info --configure-env %s`", envName)))
 		}
@@ -522,7 +523,7 @@ var _clusterDownCmd = &cobra.Command{
 		}
 
 		// updating CLI env is best-effort, so ignore errors
-		loadBalancer, _ := getLoadBalancer(accessConfig.ClusterName, OperatorLoadBalancer, awsClient)
+		loadBalancer, _ := getNLBLoadBalancer(accessConfig.ClusterName, OperatorLoadBalancer, awsClient)
 
 		fmt.Print("￮ deleting sqs queues ... ")
 		numDeleted, err := awsClient.DeleteQueuesWithPrefix(clusterconfig.SQSNamePrefix(accessConfig.ClusterName))
@@ -735,7 +736,7 @@ var _clusterExportCmd = &cobra.Command{
 			exit.Error(err)
 		}
 
-		loadBalancer, err := getLoadBalancer(accessConfig.ClusterName, OperatorLoadBalancer, awsClient)
+		loadBalancer, err := getNLBLoadBalancer(accessConfig.ClusterName, OperatorLoadBalancer, awsClient)
 		if err != nil {
 			exit.Error(err)
 		}
@@ -881,17 +882,27 @@ func cmdPrintConfig(awsClient *awslib.Client, accessConfig *clusterconfig.Access
 func cmdInfo(awsClient *awslib.Client, accessConfig *clusterconfig.AccessConfig, stacks clusterstate.ClusterStacks, outputType flags.OutputType, disallowPrompt bool) {
 	clusterConfig := refreshCachedClusterConfig(awsClient, accessConfig, outputType == flags.PrettyOutputType)
 
-	operatorLoadBalancer, err := getLoadBalancer(accessConfig.ClusterName, OperatorLoadBalancer, awsClient)
+	operatorLoadBalancer, err := getNLBLoadBalancer(accessConfig.ClusterName, OperatorLoadBalancer, awsClient)
 	if err != nil {
 		exit.Error(err)
 	}
-	apiLoadBalancer, err := getLoadBalancer(accessConfig.ClusterName, APILoadBalancer, awsClient)
-	if err != nil {
-		exit.Error(err)
-	}
-
 	operatorEndpoint := s.EnsurePrefix(*operatorLoadBalancer.DNSName, "https://")
-	apiEndpoint := *apiLoadBalancer.DNSName
+
+	var apiEndpoint string
+	if clusterConfig.APILoadBalancerType == clusterconfig.NLBLoadBalancerType {
+		apiLoadBalancer, err := getNLBLoadBalancer(accessConfig.ClusterName, APILoadBalancer, awsClient)
+		if err != nil {
+			exit.Error(err)
+		}
+		apiEndpoint = *apiLoadBalancer.DNSName
+	}
+	if clusterConfig.APILoadBalancerType == clusterconfig.ELBLoadBalancerType {
+		apiLoadBalancer, err := getELBLoadBalancer(accessConfig.ClusterName, APILoadBalancer, awsClient)
+		if err != nil {
+			exit.Error(err)
+		}
+		apiEndpoint = *apiLoadBalancer.DNSName
+	}
 
 	if outputType == flags.JSONOutputType || outputType == flags.YAMLOutputType {
 		infoResponse, err := getInfoOperatorResponse(operatorEndpoint)
@@ -983,8 +994,17 @@ func printInfoPricing(infoResponse *schema.InfoResponse, clusterConfig clusterco
 	prometheusEBSPrice := awslib.EBSMetadatas[clusterConfig.Region]["gp3"].PriceGB * 20 / 30 / 24
 	metricsEBSPrice := awslib.EBSMetadatas[clusterConfig.Region]["gp2"].PriceGB * (40 + 2) / 30 / 24
 	nlbPrice := awslib.NLBMetadatas[clusterConfig.Region].Price
+	elbPrice := awslib.ELBMetadatas[clusterConfig.Region].Price
 	natUnitPrice := awslib.NATMetadatas[clusterConfig.Region].Price
 
+	var loadBalancersPrice float64
+	usesELBForAPILoadBalancer := clusterConfig.APILoadBalancerType == clusterconfig.ELBLoadBalancerType
+	if usesELBForAPILoadBalancer {
+		loadBalancersPrice = nlbPrice + elbPrice
+	} else {
+		loadBalancersPrice = 2 * nlbPrice
+	}
+
 	headers := []table.Header{
 		{Title: "aws resource"},
 		{Title: "cost per hour"},
@@ -1033,12 +1053,17 @@ func printInfoPricing(infoResponse *schema.InfoResponse, clusterConfig clusterco
 	} else if clusterConfig.NATGateway == clusterconfig.HighlyAvailableNATGateway {
 		natTotalPrice = natUnitPrice * float64(len(clusterConfig.AvailabilityZones))
 	}
-	totalPrice := eksPrice + totalNodeGroupsPrice + operatorNodeGroupPrice + prometheusNodeGroupPrice + nlbPrice*2 + natTotalPrice
+	totalPrice := eksPrice + totalNodeGroupsPrice + operatorNodeGroupPrice + prometheusNodeGroupPrice + loadBalancersPrice + natTotalPrice
 	fmt.Printf(console.Bold("\nyour cluster currently costs %s per hour\n\n"), s.DollarsAndCents(totalPrice))
 
 	rows = append(rows, []interface{}{fmt.Sprintf("%d t3.medium %s (cortex system)", len(infoResponse.OperatorNodeInfos), s.PluralS("instance", len(infoResponse.OperatorNodeInfos))), s.DollarsAndTenthsOfCents(operatorNodeGroupPrice) + " total"})
 	rows = append(rows, []interface{}{fmt.Sprintf("1 %s instance (prometheus)", clusterConfig.PrometheusInstanceType), s.DollarsAndTenthsOfCents(prometheusNodeGroupPrice)})
-	rows = append(rows, []interface{}{"2 network load balancers", s.DollarsMaxPrecision(nlbPrice*2) + " total"})
+	if usesELBForAPILoadBalancer {
+		rows = append(rows, []interface{}{"1 network load balancer", s.DollarsMaxPrecision(nlbPrice)})
+		rows = append(rows, []interface{}{"1 classic load balancer", s.DollarsMaxPrecision(elbPrice)})
+	} else {
+		rows = append(rows, []interface{}{"2 network load balancers", s.DollarsMaxPrecision(loadBalancersPrice) + " total"})
+	}
 
 	if clusterConfig.NATGateway == clusterconfig.SingleNATGateway {
 		rows = append(rows, []interface{}{"1 nat gateway", s.DollarsMaxPrecision(natUnitPrice)})
@@ -1366,7 +1391,24 @@ func (lb LoadBalancer) String() string {
 }
 
 // Will return error if the load balancer can't be found
-func getLoadBalancer(clusterName string, whichLB LoadBalancer, awsClient *awslib.Client) (*elbv2.LoadBalancer, error) {
+func getNLBLoadBalancer(clusterName string, whichLB LoadBalancer, awsClient *awslib.Client) (*elbv2.LoadBalancer, error) {
+	loadBalancer, err := awsClient.FindLoadBalancerV2(map[string]string{
+		clusterconfig.ClusterNameTag: clusterName,
+		"cortex.dev/load-balancer":   whichLB.String(),
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, fmt.Sprintf("unable to locate %s load balancer", whichLB.String()))
+	}
+
+	if loadBalancer == nil {
+		return nil, ErrorNoOperatorLoadBalancer(whichLB.String())
+	}
+
+	return loadBalancer, nil
+}
+
+// Will return error if the load balancer can't be found
+func getELBLoadBalancer(clusterName string, whichLB LoadBalancer, awsClient *awslib.Client) (*elb.LoadBalancerDescription, error) {
 	loadBalancer, err := awsClient.FindLoadBalancer(map[string]string{
 		clusterconfig.ClusterNameTag: clusterName,
 		"cortex.dev/load-balancer":   whichLB.String(),
 
@@ -169,8 +169,17 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient
 	prometheusEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp3"].PriceGB * 20 / 30 / 24
 	metricsEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp2"].PriceGB * (40 + 2) / 30 / 24
 	nlbPrice := aws.NLBMetadatas[clusterConfig.Region].Price
+	elbPrice := aws.ELBMetadatas[clusterConfig.Region].Price
 	natUnitPrice := aws.NATMetadatas[clusterConfig.Region].Price
 
+	var loadBalancersPrice float64
+	usesELBForAPILoadBalancer := clusterConfig.APILoadBalancerType == clusterconfig.ELBLoadBalancerType
+	if usesELBForAPILoadBalancer {
+		loadBalancersPrice = nlbPrice + elbPrice
+	} else {
+		loadBalancersPrice = 2 * nlbPrice
+	}
+
 	var natTotalPrice float64
 	if clusterConfig.NATGateway == clusterconfig.SingleNATGateway {
 		natTotalPrice = natUnitPrice
@@ -187,7 +196,7 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient
 	rows = append(rows, []interface{}{"1 eks cluster", s.DollarsMaxPrecision(eksPrice)})
 
 	ngNameToSpotInstancesUsed := map[string]int{}
-	fixedPrice := eksPrice + 2*(operatorInstancePrice+operatorEBSPrice) + prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice + 2*nlbPrice + natTotalPrice
+	fixedPrice := eksPrice + 2*(operatorInstancePrice+operatorEBSPrice) + prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice + loadBalancersPrice + natTotalPrice
 	totalMinPrice := fixedPrice
 	totalMaxPrice := fixedPrice
 	for _, ng := range clusterConfig.NodeGroups {
@@ -236,7 +245,12 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient
 	prometheusNodeGroupPrice := prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice
 	rows = append(rows, []interface{}{"2 t3.medium instances (cortex system)", s.DollarsAndTenthsOfCents(operatorNodeGroupPrice) + " total"})
 	rows = append(rows, []interface{}{fmt.Sprintf("1 %s instance (prometheus)", clusterConfig.PrometheusInstanceType), s.DollarsAndTenthsOfCents(prometheusNodeGroupPrice)})
-	rows = append(rows, []interface{}{"2 network load balancers", s.DollarsMaxPrecision(nlbPrice) + " each"})
+	if usesELBForAPILoadBalancer {
+		rows = append(rows, []interface{}{"1 network load balancer", s.DollarsMaxPrecision(nlbPrice)})
+		rows = append(rows, []interface{}{"1 classic load balancer", s.DollarsMaxPrecision(elbPrice)})
+	} else {
+		rows = append(rows, []interface{}{"2 network load balancers", s.DollarsMaxPrecision(loadBalancersPrice) + " total"})
+	}
 
 	if clusterConfig.NATGateway == clusterconfig.SingleNATGateway {
 		rows = append(rows, []interface{}{"1 nat gateway", s.DollarsMaxPrecision(natUnitPrice)})
 
@@ -58,6 +58,9 @@ subnet_visibility: public
 # NAT gateway (required when using private subnets) [none | single | highly_available (a NAT gateway per availability zone)]
 nat_gateway: none
 
+# API load balancer type [nlb | elb]
+api_load_balancer_type: nlb
+
 # API load balancer scheme [internet-facing | internal]
 api_load_balancer_scheme: internet-facing
 
 
@@ -45,6 +45,10 @@ operator_load_balancer_cidr_white_list: [0.0.0.0/0]
 
 See [here](../networking/load-balancers.md) for more information about the load balancers.
 
+### Workload load-balancing
+
+Depending on your application's requirements, you might have different needs from the cluster's api load balancer. By default, the api load balancer is a [Network load balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html) (NLB). In some situations, a [Classic load balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/introduction.html) (ELB) may be preferred, and can be selected in your cluster config by setting `api_load_balancer_type: elb`. This selection can only be made before creating your cluster.
+
 ### Ensure node provisioning
 
 You can take advantage of the cost savings of spot instances and the reliability of on-demand instances by utilizing the `priority` field in node groups. You can deploy two node groups, one that is spot and another that is on-demand. Set the priority of the spot node group to be higher than the priority of the on-demand node group. This encourages the cluster-autoscaler to try to spin up instances from the spot node group first. If there are no more spot instances available, the on-demand node group will be used instead.
 
@@ -7,3 +7,5 @@ All APIs share a single API load balancer. By default, the API load balancer is
 The SSL certificate on the API load balancer is autogenerated during installation using `localhost` as the Common Name (CN). Therefore, clients will need to skip certificate verification when making HTTPS requests to your APIs (e.g. `curl -k https://***`), or make HTTP requests instead (e.g. `curl http://***`). Alternatively, you can enable HTTPS by using a [custom domain](custom-domain.md) and setting up [https](https.md) or by [creating an API Gateway](api-gateway.md) to forward requests to your API load balancer.
 
 There is a separate load balancer for the Cortex operator. By default, the operator load balancer is public. You can configure your operator load balancer to be private by setting `operator_load_balancer_scheme: internal` in your cluster configuration file (before creating your cluster). You can use [VPC Peering](vpc-peering.md) to enable your Cortex CLI to connect to your cluster operator from another VPC. You can enforce that incoming requests to the Cortex operator must originate from specific ip address ranges by specifying `operator_load_balancer_cidr_white_list: [<CIDR list>]` in your cluster configuration.
+
+By default, the API load balancer and Operator load balancer are both [Network load balancers](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html) (NLB). The api load balancer can be configured as a [Classic load balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/introduction.html) (ELB) instead if desired. The API load balancer type must be specified before creating your cluster.
@@ -14,7 +14,7 @@ Cortex uses the Kubernetes Cluster Autoscaler to scale the appropriate node grou
 
 By default, a new dedicated VPC is created for the cluster during installation.
 
-Two network load balancers (NLBs) are created to route traffic to the cluster. One load balancer is dedicated for traffic to your APIs, and the other load balancer is dedicated for API management requests to Cortex from your CLI or Python client. Traffic to the load balancers can be secured and restricted based on your cluster configuration.
+Two AWS load balancers are created to route traffic to the cluster. One load balancer is dedicated for traffic to your APIs, and the other load balancer is dedicated for API management requests to Cortex from your CLI or Python client. Traffic to the load balancers can be secured and restricted based on your cluster configuration.
 
 ### Observability
 
 
@@ -15,17 +15,22 @@
 import boto3
 import os
 
-from helpers import get_api_load_balancer
+from helpers import get_api_load_balancer_v2, get_api_load_balancer, get_api_load_balancer_health
 
 
 def get_api_load_balancer_state():
     cluster_name = os.environ["CORTEX_CLUSTER_NAME"]
     region = os.environ["CORTEX_REGION"]
-
-    client_elbv2 = boto3.client("elbv2", region_name=region)
-
-    load_balancer = get_api_load_balancer(cluster_name, client_elbv2)
-    return load_balancer["State"]["Code"]
+    load_balancer_type = os.environ["CORTEX_API_LOAD_BALANCER_TYPE"]
+
+    if load_balancer_type == "nlb":
+        client_elbv2 = boto3.client("elbv2", region_name=region)
+        load_balancer = get_api_load_balancer_v2(cluster_name, client_elbv2)
+        return load_balancer["State"]["Code"]
+    else:
+        client_elb = boto3.client("elb", region_name=region)
+        load_balancer = get_api_load_balancer(cluster_name, client_elb)
+        return get_api_load_balancer_health(load_balancer["LoadBalancerName"], client_elb)
 
 
 if __name__ == "__main__":
 
@@ -15,7 +15,7 @@
 import boto3
 import os
 
-from helpers import get_operator_load_balancer
+from helpers import get_operator_load_balancer_v2
 
 
 def get_operator_load_balancer_state():
@@ -24,7 +24,7 @@ def get_operator_load_balancer_state():
 
     client_elbv2 = boto3.client("elbv2", region_name=region)
 
-    load_balancer = get_operator_load_balancer(cluster_name, client_elbv2)
+    load_balancer = get_operator_load_balancer_v2(cluster_name, client_elbv2)
     return load_balancer["State"]["Code"]
 
 
 
@@ -16,7 +16,7 @@
 import os
 import json
 
-from helpers import get_operator_load_balancer
+from helpers import get_operator_load_balancer_v2
 
 
 def get_operator_target_group_status():
@@ -25,7 +25,7 @@ def get_operator_target_group_status():
 
     client_elbv2 = boto3.client("elbv2", region_name=region)
 
-    load_balancer_arn = get_operator_load_balancer(cluster_name, client_elbv2)["LoadBalancerArn"]
+    load_balancer_arn = get_operator_load_balancer_v2(cluster_name, client_elbv2)["LoadBalancerArn"]
     target_group_arn = get_load_balancer_https_target_group_arn(load_balancer_arn, client_elbv2)
     return get_target_health(target_group_arn, client_elbv2)
 
 
@@ -13,15 +13,29 @@
 # limitations under the License.
 
 
-def get_operator_load_balancer(cluster_name, client_elbv2):
-    return _get_load_balancer("operator", cluster_name, client_elbv2)
+def get_operator_load_balancer_v2(cluster_name, client_elbv2):
+    return _get_load_balancer_v2("operator", cluster_name, client_elbv2)
 
 
-def get_api_load_balancer(cluster_name, client_elbv2):
-    return _get_load_balancer("api", cluster_name, client_elbv2)
+def get_api_load_balancer_v2(cluster_name, client_elbv2):
+    return _get_load_balancer_v2("api", cluster_name, client_elbv2)
 
 
-def _get_load_balancer(load_balancer_tag, cluster_name, client_elbv2):
+def get_api_load_balancer(cluster_name, client_elb):
+    return _get_load_balancer("api", cluster_name, client_elb)
+
+
+def get_api_load_balancer_health(load_balancer_name, client_elb):
+    instance_health = client_elb.describe_instance_health(
+        LoadBalancerName=load_balancer_name,
+    )
+    for instance_state in instance_health["InstanceStates"]:
+        if instance_state["State"] != "InService":
+            return "inactive"
+    return "active"
+
+
+def _get_load_balancer_v2(load_balancer_tag, cluster_name, client_elbv2):
     paginator = client_elbv2.get_paginator("describe_load_balancers")
     for load_balancer_page in paginator.paginate(PaginationConfig={"PageSize": 20}):
         load_balancers = {
@@ -43,3 +57,27 @@ def _get_load_balancer(load_balancer_tag, cluster_name, client_elbv2):
                 return load_balancers[tag_description["ResourceArn"]]
 
     raise Exception(f"unable to find {load_balancer_tag} load balancer")
+
+
+def _get_load_balancer(load_balancer_tag, cluster_name, client_elb):
+    paginator = client_elb.get_paginator("describe_load_balancers")
+    for load_balancer_page in paginator.paginate(PaginationConfig={"PageSize": 20}):
+        load_balancers = {
+            load_balancer["LoadBalancerName"]: load_balancer
+            for load_balancer in load_balancer_page["LoadBalancerDescriptions"]
+        }
+        tag_descriptions = client_elb.describe_tags(LoadBalancerNames=list(load_balancers.keys()))[
+            "TagDescriptions"
+        ]
+        for tag_description in tag_descriptions:
+            foundClusterNameTag = False
+            foundLoadBalancerTag = False
+            for tags in tag_description["Tags"]:
+                if tags["Key"] == "cortex.dev/cluster-name" and tags["Value"] == cluster_name:
+                    foundClusterNameTag = True
+                if tags["Key"] == "cortex.dev/load-balancer" and tags["Value"] == load_balancer_tag:
+                    foundLoadBalancerTag = True
+            if foundClusterNameTag and foundLoadBalancerTag:
+                return load_balancers[tag_description["LoadBalancerName"]]
+
+    raise Exception(f"unable to find {load_balancer_tag} load balancer")
@@ -109,7 +109,7 @@ spec:
           istio: ingressgateway-apis
         k8s:
           serviceAnnotations:
-            service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
+            service.beta.kubernetes.io/aws-load-balancer-type: "{{ env['CORTEX_API_LOAD_BALANCER_TYPE'] }}"
             service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: "true"
             service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags: "{{ env['CORTEX_API_LOAD_BALANCER_TAGS'] }}"
             service.beta.kubernetes.io/aws-load-balancer-backend-protocol: "tcp"