Skip to content

Improve availability zone selection and validation #885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 21, 2020
4 changes: 0 additions & 4 deletions cli/cmd/lib_cluster_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,6 @@ func getInstallClusterConfig(awsCreds AWSCredentials) (*clusterconfig.Config, er
return nil, err
}

if clusterConfig.Spot != nil && *clusterConfig.Spot {
clusterConfig.AutoFillSpot(awsClient)
}

err = clusterConfig.Validate(awsClient)
if err != nil {
if _flagClusterConfig != "" {
Expand Down
78 changes: 73 additions & 5 deletions pkg/lib/aws/ec2.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/cortexlabs/cortex/pkg/lib/errors"
"github.com/cortexlabs/cortex/pkg/lib/parallel"
"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
s "github.com/cortexlabs/cortex/pkg/lib/strings"
)

Expand Down Expand Up @@ -64,19 +66,85 @@ func (c *Client) SpotInstancePrice(region string, instanceType string) (float64,
return min, nil
}

func (c *Client) GetAvailabilityZones() ([]string, error) {
input := &ec2.DescribeAvailabilityZonesInput{}
func (c *Client) ListAvailabilityZones() (strset.Set, error) {
input := &ec2.DescribeAvailabilityZonesInput{
Filters: []*ec2.Filter{
{
Name: aws.String("region-name"),
Values: []*string{aws.String(c.Region)},
},
{
Name: aws.String("state"),
Values: []*string{aws.String(ec2.AvailabilityZoneStateAvailable)},
},
},
}

result, err := c.EC2().DescribeAvailabilityZones(input)
if err != nil {
return nil, errors.WithStack(err)
}

availabilityZones := []string{}
zones := strset.New()
for _, az := range result.AvailabilityZones {
if az.ZoneName != nil {
availabilityZones = append(availabilityZones, *az.ZoneName)
zones.Add(*az.ZoneName)
}
}

return availabilityZones, nil
return zones, nil
}

func (c *Client) listSupportedAvailabilityZonesSingle(instanceType string) (strset.Set, error) {
input := &ec2.DescribeReservedInstancesOfferingsInput{
InstanceType: &instanceType,
IncludeMarketplace: aws.Bool(false),
Filters: []*ec2.Filter{
{
Name: aws.String("scope"),
Values: []*string{aws.String(ec2.ScopeAvailabilityZone)},
},
},
}

zones := strset.New()
err := c.EC2().DescribeReservedInstancesOfferingsPages(input, func(output *ec2.DescribeReservedInstancesOfferingsOutput, lastPage bool) bool {
for _, offering := range output.ReservedInstancesOfferings {
if offering.AvailabilityZone != nil {
zones.Add(*offering.AvailabilityZone)
}
}
return true
})

if err != nil {
return nil, errors.WithStack(err)
}

return zones, nil
}

func (c *Client) ListSupportedAvailabilityZones(instanceType string, instanceTypes ...string) (strset.Set, error) {
allInstanceTypes := append(instanceTypes, instanceType)
zoneSets := make([]strset.Set, len(allInstanceTypes))
fns := make([]func() error, len(allInstanceTypes))

for i := range allInstanceTypes {
localIdx := i
fns[i] = func() error {
zones, err := c.listSupportedAvailabilityZonesSingle(allInstanceTypes[localIdx])
if err != nil {
return err
}
zoneSets[localIdx] = zones
return nil
}
}

err := parallel.RunFirstErr(fns[0], fns[1:]...)
if err != nil {
return nil, err
}

return strset.Intersection(zoneSets...), nil
}
19 changes: 17 additions & 2 deletions pkg/lib/sets/strset/strset.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package strset
import (
"fmt"
"math"
"sort"
"strings"
)

Expand Down Expand Up @@ -159,7 +160,7 @@ func (s Set) String() string {
for item := range s {
v = append(v, fmt.Sprintf("%v", item))
}
return fmt.Sprintf("[\"%s\"]", strings.Join(v, ", "))
return fmt.Sprintf("[%s]", strings.Join(v, ", "))
}

// List returns a slice of all items.
Expand All @@ -171,6 +172,13 @@ func (s Set) Slice() []string {
return v
}

// List returns a sorted slice of all items.
func (s Set) SliceSorted() []string {
v := s.Slice()
sort.Strings(v)
return v
}

// Merge is like Union, however it modifies the current Set it's applied on
// with the given t Set.
func (s Set) Merge(sets ...Set) {
Expand All @@ -181,7 +189,7 @@ func (s Set) Merge(sets ...Set) {
}
}

// Subtract removes the Set items containing in sets from Set s
// Subtract removes the Set items contained in sets from Set s
func (s Set) Subtract(sets ...Set) {
for _, set := range sets {
for item := range set {
Expand All @@ -190,6 +198,13 @@ func (s Set) Subtract(sets ...Set) {
}
}

// Remove items until len(s) <= targetLen
func (s Set) Shrink(targetLen int) {
for len(s) > targetLen {
s.Pop()
}
}

// Union is the merger of multiple sets. It returns a new set with all the
// elements present in all the sets that are passed.
func Union(sets ...Set) Set {
Expand Down
103 changes: 103 additions & 0 deletions pkg/types/clusterconfig/availability_zones.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
Copyright 2020 Cortex Labs, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package clusterconfig

import (
"github.com/cortexlabs/cortex/pkg/lib/aws"
"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
)

var _azBlacklist = strset.New("us-east-1e")

func (cc *Config) validateAvailabilityZones(awsClient *aws.Client) error {
var extraInstances []string
if cc.Spot != nil && *cc.Spot && len(cc.SpotConfig.InstanceDistribution) >= 0 {
for _, instanceType := range cc.SpotConfig.InstanceDistribution {
if instanceType != *cc.InstanceType {
extraInstances = append(extraInstances, instanceType)
}
}
}

if len(cc.AvailabilityZones) == 0 {
if err := cc.setDefaultAvailabilityZones(awsClient, extraInstances...); err != nil {
return err
}
return nil
}

if err := cc.validateUserAvailabilityZones(awsClient, extraInstances...); err != nil {
return err
}

return nil
}

func (cc *Config) setDefaultAvailabilityZones(awsClient *aws.Client, extraInstances ...string) error {
zones, err := awsClient.ListSupportedAvailabilityZones(*cc.InstanceType, extraInstances...)
if err != nil {
// Try again without checking instance types
zones, err = awsClient.ListAvailabilityZones()
if err != nil {
return nil // Let eksctl choose the availability zones
}
}

zones.Subtract(_azBlacklist)

if len(zones) < 2 {
return ErrorNotEnoughDefaultSupportedZones(awsClient.Region, zones, *cc.InstanceType, extraInstances...)
}

// See https://github.com/weaveworks/eksctl/blob/master/pkg/eks/api.go
if awsClient.Region == "us-east-1" {
zones.Shrink(2)
} else {
zones.Shrink(3)
}

cc.AvailabilityZones = zones.SliceSorted()

return nil
}

func (cc *Config) validateUserAvailabilityZones(awsClient *aws.Client, extraInstances ...string) error {
allZones, err := awsClient.ListAvailabilityZones()
if err != nil {
return nil // Skip validation
}

for _, userZone := range cc.AvailabilityZones {
if !allZones.Has(userZone) {
return ErrorInvalidAvailabilityZone(userZone, allZones)
}
}

supportedZones, err := awsClient.ListSupportedAvailabilityZones(*cc.InstanceType, extraInstances...)
if err != nil {
// Skip validation instance-based validation
supportedZones = strset.Difference(allZones, _azBlacklist)
}

for _, userZone := range cc.AvailabilityZones {
if !supportedZones.Has(userZone) {
return ErrorUnsupportedAvailabilityZone(userZone, *cc.InstanceType, extraInstances...)
}
}

return nil
}
36 changes: 18 additions & 18 deletions pkg/types/clusterconfig/clusterconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package clusterconfig

import (
"fmt"
"regexp"
"sort"
"strings"
Expand All @@ -30,7 +31,6 @@ import (
"github.com/cortexlabs/cortex/pkg/lib/hash"
"github.com/cortexlabs/cortex/pkg/lib/pointer"
"github.com/cortexlabs/cortex/pkg/lib/prompt"
"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
s "github.com/cortexlabs/cortex/pkg/lib/strings"
"github.com/cortexlabs/cortex/pkg/lib/table"
)
Expand Down Expand Up @@ -461,6 +461,8 @@ func (cc *Config) ToAccessConfig() AccessConfig {
}

func (cc *Config) Validate(awsClient *aws.Client) error {
fmt.Print("verifying your configuration...\n\n")

if *cc.MinInstances > *cc.MaxInstances {
return ErrorMinInstancesGreaterThanMax(*cc.MinInstances, *cc.MaxInstances)
}
Expand All @@ -481,21 +483,24 @@ func (cc *Config) Validate(awsClient *aws.Client) error {
}
}

if len(cc.AvailabilityZones) > 0 {
zones, err := awsClient.GetAvailabilityZones()
if err != nil {
return err
}
zoneSet := strset.New(zones...)

for _, az := range cc.AvailabilityZones {
if !zoneSet.Has(az) {
return errors.Wrap(ErrorInvalidAvailabilityZone(az, zones), AvailabilityZonesKey)
// instance_distribution cleanup must be performed before availability_zone cleanup
if cc.Spot != nil && *cc.Spot && len(cc.SpotConfig.InstanceDistribution) >= 0 {
cleanedDistribution := []string{*cc.InstanceType}
for _, instanceType := range cc.SpotConfig.InstanceDistribution {
if instanceType != *cc.InstanceType {
cleanedDistribution = append(cleanedDistribution, instanceType)
}
}
cc.SpotConfig.InstanceDistribution = cleanedDistribution
}

if err := cc.validateAvailabilityZones(awsClient); err != nil {
return errors.Wrap(err, AvailabilityZonesKey)
}

if cc.Spot != nil && *cc.Spot {
cc.AutoFillSpot(awsClient)

chosenInstance := aws.InstanceMetadatas[*cc.Region][*cc.InstanceType]
compatibleSpots := CompatibleSpotInstances(awsClient, chosenInstance, cc.SpotConfig.MaxPrice, _spotInstanceDistributionLength)
if len(compatibleSpots) == 0 {
Expand Down Expand Up @@ -635,9 +640,7 @@ func CompatibleSpotInstances(awsClient *aws.Client, targetInstance aws.InstanceM

func AutoGenerateSpotConfig(awsClient *aws.Client, spotConfig *SpotConfig, region string, instanceType string) error {
chosenInstance := aws.InstanceMetadatas[region][instanceType]
if len(spotConfig.InstanceDistribution) == 0 {
spotConfig.InstanceDistribution = append(spotConfig.InstanceDistribution, chosenInstance.Type)

if len(spotConfig.InstanceDistribution) == 1 {
compatibleSpots := CompatibleSpotInstances(awsClient, chosenInstance, spotConfig.MaxPrice, _spotInstanceDistributionLength)
if len(compatibleSpots) == 0 {
return errors.Wrap(ErrorNoCompatibleSpotInstanceFound(chosenInstance.Type), InstanceTypeKey)
Expand All @@ -646,11 +649,8 @@ func AutoGenerateSpotConfig(awsClient *aws.Client, spotConfig *SpotConfig, regio
for _, instance := range compatibleSpots {
spotConfig.InstanceDistribution = append(spotConfig.InstanceDistribution, instance.Type)
}
} else {
instanceDistributionSet := strset.New(spotConfig.InstanceDistribution...)
instanceDistributionSet.Remove(instanceType)
spotConfig.InstanceDistribution = append([]string{instanceType}, instanceDistributionSet.Slice()...)
}

if spotConfig.MaxPrice == nil {
spotConfig.MaxPrice = &chosenInstance.Price
}
Expand Down
Loading