Skip to content

Commit

Permalink
Add support for EKS accelerated AMIs based on AL2023 (#7996)
Browse files Browse the repository at this point in the history
add support for EKS accelerated AMIs based on AL2023
  • Loading branch information
TiberiuGC authored Oct 11, 2024
1 parent d70fff1 commit 486acdf
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 77 deletions.
6 changes: 4 additions & 2 deletions pkg/ami/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ import (
// Variations of image classes
const (
ImageClassGeneral = iota
ImageClassGPU
ImageClassNvidia
ImageClassNeuron
ImageClassARM
)

// ImageClasses is a list of image class names
var ImageClasses = []string{
"ImageClassGeneral",
"ImageClassGPU",
"ImageClassNvidia",
"ImageClassNeuron",
"ImageClassARM",
}

Expand Down
23 changes: 16 additions & 7 deletions pkg/ami/auto_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ func MakeImageSearchPatterns(version string) map[string]map[int]string {
return map[string]map[int]string{
api.NodeImageFamilyAmazonLinux2023: {
ImageClassGeneral: fmt.Sprintf("amazon-eks-node-al2023-x86_64-standard-%s-v*", version),
ImageClassNvidia: fmt.Sprintf("amazon-eks-node-al2023-x86_64-nvidia-*-%s-v*", version),
ImageClassNeuron: fmt.Sprintf("amazon-eks-node-al2023-x86_64-neuron-%s-v*", version),
ImageClassARM: fmt.Sprintf("amazon-eks-node-al2023-arm64-standard-%s-v*", version),
},
api.NodeImageFamilyAmazonLinux2: {
ImageClassGeneral: fmt.Sprintf("amazon-eks-node-%s-v*", version),
ImageClassGPU: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
ImageClassNvidia: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
ImageClassNeuron: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
ImageClassARM: fmt.Sprintf("amazon-eks-arm64-node-%s-*", version),
},
api.NodeImageFamilyUbuntuPro2204: {
Expand Down Expand Up @@ -90,16 +93,22 @@ func (r *AutoResolver) Resolve(ctx context.Context, region, version, instanceTyp

imageClasses := MakeImageSearchPatterns(version)[imageFamily]
namePattern := imageClasses[ImageClassGeneral]
if instanceutils.IsGPUInstanceType(instanceType) {
var ok bool
switch {
case instanceutils.IsNvidiaInstanceType(instanceType):
namePattern, ok = imageClasses[ImageClassNvidia]
if !ok {
logger.Critical("image family %s doesn't support Nvidia GPU image class", imageFamily)
return "", NewErrFailedResolution(region, version, instanceType, imageFamily)
}
case instanceutils.IsNeuronInstanceType(instanceType):
var ok bool
namePattern, ok = imageClasses[ImageClassGPU]
namePattern, ok = imageClasses[ImageClassNeuron]
if !ok {
logger.Critical("image family %s doesn't support GPU image class", imageFamily)
logger.Critical("image family %s doesn't support Neuron GPU image class", imageFamily)
return "", NewErrFailedResolution(region, version, instanceType, imageFamily)
}
}

if instanceutils.IsARMInstanceType(instanceType) {
case instanceutils.IsARMInstanceType(instanceType):
var ok bool
namePattern, ok = imageClasses[ImageClassARM]
if !ok {
Expand Down
16 changes: 14 additions & 2 deletions pkg/ami/ssm_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ func MakeSSMParameterName(version, instanceType, imageFamily string) (string, er

switch imageFamily {
case api.NodeImageFamilyAmazonLinux2023:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/%s/standard/recommended/%s",
version, utils.ToKebabCase(imageFamily), instanceEC2ArchName(instanceType), fieldName), nil
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/%s/%s/recommended/%s",
version, utils.ToKebabCase(imageFamily), instanceEC2ArchName(instanceType), imageType(imageFamily, instanceType, version), fieldName), nil
case api.NodeImageFamilyAmazonLinux2:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/recommended/%s", version, imageType(imageFamily, instanceType, version), fieldName), nil
case api.NodeImageFamilyWindowsServer2019CoreContainer,
Expand Down Expand Up @@ -102,6 +102,10 @@ func MakeManagedSSMParameterName(version string, amiType ekstypes.AMITypes) stri
switch amiType {
case ekstypes.AMITypesAl2023X8664Standard:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2023X8664Nvidia:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/nvidia/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2023X8664Neuron:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/neuron/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2023Arm64Standard:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/arm64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2X8664:
Expand Down Expand Up @@ -138,6 +142,14 @@ func ubuntuArchName(instanceType string) string {
func imageType(imageFamily, instanceType, version string) string {
family := utils.ToKebabCase(imageFamily)
switch imageFamily {
case api.NodeImageFamilyAmazonLinux2023:
if instanceutils.IsNvidiaInstanceType(instanceType) {
return "nvidia"
}
if instanceutils.IsNeuronInstanceType(instanceType) {
return "neuron"
}
return "standard"
case api.NodeImageFamilyBottlerocket:
if instanceutils.IsNvidiaInstanceType(instanceType) {
return fmt.Sprintf("%s-%s", version, "nvidia")
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/eksctl.io/v1alpha5/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func SetManagedNodeGroupDefaults(ng *ManagedNodeGroup, meta *ClusterMeta, contro
// When using custom AMIs, we want the user to explicitly specify AMI family.
// Thus, we only set up default AMI family when no custom AMI is being used.
if ng.AMIFamily == "" && ng.AMI == "" {
if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer && !instanceutils.IsGPUInstanceType(ng.InstanceType) &&
if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer &&
!instanceutils.IsARMGPUInstanceType(ng.InstanceType) {
ng.AMIFamily = NodeImageFamilyAmazonLinux2023
} else {
Expand Down
36 changes: 12 additions & 24 deletions pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,16 @@ var _ = Describe("GPU instance support", func() {
assertValidationError(e, api.ValidateManagedNodeGroup(0, mng))
},
Entry("AL2023 INF", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Inferentia",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
}),
Entry("AL2023 TRN", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Trainium",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
}),
Entry("AL2023 NVIDIA", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "GPU",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "asdf",
Expand Down Expand Up @@ -107,22 +101,16 @@ var _ = Describe("GPU instance support", func() {

},
Entry("AL2023 INF", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Inferentia",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
}),
Entry("AL2023 TRN", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Trainium",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
}),
Entry("AL2023 NVIDIA", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "GPU",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "g4dn.xlarge",
Expand Down
18 changes: 9 additions & 9 deletions pkg/apis/eksctl.io/v1alpha5/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,12 +661,10 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool

instanceType := SelectInstanceType(np)

if ng.AMIFamily == NodeImageFamilyAmazonLinux2023 && instanceutils.IsNvidiaInstanceType(instanceType) {
return ErrUnsupportedInstanceTypes("GPU", NodeImageFamilyAmazonLinux2023,
fmt.Sprintf("EKS accelerated AMIs based on %s will be available at a later date", NodeImageFamilyAmazonLinux2023))
}

if ng.AMIFamily != NodeImageFamilyAmazonLinux2 && ng.AMIFamily != NodeImageFamilyBottlerocket && ng.AMIFamily != "" {
if ng.AMIFamily != NodeImageFamilyAmazonLinux2023 &&
ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
ng.AMIFamily != NodeImageFamilyBottlerocket &&
ng.AMIFamily != "" {
if instanceutils.IsNvidiaInstanceType(instanceType) {
logger.Warning(GPUDriversWarning(ng.AMIFamily))
}
Expand All @@ -676,12 +674,14 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool
}
}

if ng.AMIFamily != NodeImageFamilyAmazonLinux2 && ng.AMIFamily != "" {
// Only AL2 supports Inferentia hosts.
if ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
ng.AMIFamily != NodeImageFamilyAmazonLinux2023 &&
ng.AMIFamily != "" {
// Only AL2 and AL2023 support Inferentia hosts.
if instanceutils.IsInferentiaInstanceType(instanceType) {
return ErrUnsupportedInstanceTypes("Inferentia", ng.AMIFamily, fmt.Sprintf("please use %s instead", NodeImageFamilyAmazonLinux2))
}
// Only AL2 supports Trainium hosts.
// Only AL2 and AL2023 support Trainium hosts.
if instanceutils.IsTrainiumInstanceType(instanceType) {
return ErrUnsupportedInstanceTypes("Trainium", ng.AMIFamily, fmt.Sprintf("please use %s instead", NodeImageFamilyAmazonLinux2))
}
Expand Down
57 changes: 31 additions & 26 deletions pkg/cfn/builder/managed_nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,41 +263,45 @@ func validateLaunchTemplate(launchTemplateData *ec2types.ResponseLaunchTemplateD

func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes {
amiTypeMapping := map[string]struct {
X86x64 ekstypes.AMITypes
X86GPU ekstypes.AMITypes
ARM ekstypes.AMITypes
ARMGPU ekstypes.AMITypes
X86x64 ekstypes.AMITypes
X86Nvidia ekstypes.AMITypes
X86Neuron ekstypes.AMITypes
ARM ekstypes.AMITypes
ARMGPU ekstypes.AMITypes
}{
api.NodeImageFamilyAmazonLinux2023: {
X86x64: ekstypes.AMITypesAl2023X8664Standard,
ARM: ekstypes.AMITypesAl2023Arm64Standard,
X86x64: ekstypes.AMITypesAl2023X8664Standard,
X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia,
X86Neuron: ekstypes.AMITypesAl2023X8664Neuron,
ARM: ekstypes.AMITypesAl2023Arm64Standard,
},
api.NodeImageFamilyAmazonLinux2: {
X86x64: ekstypes.AMITypesAl2X8664,
X86GPU: ekstypes.AMITypesAl2X8664Gpu,
ARM: ekstypes.AMITypesAl2Arm64,
X86x64: ekstypes.AMITypesAl2X8664,
X86Nvidia: ekstypes.AMITypesAl2X8664Gpu,
X86Neuron: ekstypes.AMITypesAl2X8664Gpu,
ARM: ekstypes.AMITypesAl2Arm64,
},
api.NodeImageFamilyBottlerocket: {
X86x64: ekstypes.AMITypesBottlerocketX8664,
X86GPU: ekstypes.AMITypesBottlerocketX8664Nvidia,
ARM: ekstypes.AMITypesBottlerocketArm64,
ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia,
X86x64: ekstypes.AMITypesBottlerocketX8664,
X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia,
ARM: ekstypes.AMITypesBottlerocketArm64,
ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia,
},
api.NodeImageFamilyWindowsServer2019FullContainer: {
X86x64: ekstypes.AMITypesWindowsFull2019X8664,
X86GPU: ekstypes.AMITypesWindowsFull2019X8664,
X86x64: ekstypes.AMITypesWindowsFull2019X8664,
X86Nvidia: ekstypes.AMITypesWindowsFull2019X8664,
},
api.NodeImageFamilyWindowsServer2019CoreContainer: {
X86x64: ekstypes.AMITypesWindowsCore2019X8664,
X86GPU: ekstypes.AMITypesWindowsCore2019X8664,
X86x64: ekstypes.AMITypesWindowsCore2019X8664,
X86Nvidia: ekstypes.AMITypesWindowsCore2019X8664,
},
api.NodeImageFamilyWindowsServer2022FullContainer: {
X86x64: ekstypes.AMITypesWindowsFull2022X8664,
X86GPU: ekstypes.AMITypesWindowsFull2022X8664,
X86x64: ekstypes.AMITypesWindowsFull2022X8664,
X86Nvidia: ekstypes.AMITypesWindowsFull2022X8664,
},
api.NodeImageFamilyWindowsServer2022CoreContainer: {
X86x64: ekstypes.AMITypesWindowsCore2022X8664,
X86GPU: ekstypes.AMITypesWindowsCore2022X8664,
X86x64: ekstypes.AMITypesWindowsCore2022X8664,
X86Nvidia: ekstypes.AMITypesWindowsCore2022X8664,
},
}

Expand All @@ -307,13 +311,14 @@ func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes
}

switch {
case instanceutils.IsGPUInstanceType(instanceType):
if instanceutils.IsARMInstanceType(instanceType) {
return amiType.ARMGPU
}
return amiType.X86GPU
case instanceutils.IsARMGPUInstanceType(instanceType):
return amiType.ARMGPU
case instanceutils.IsARMInstanceType(instanceType):
return amiType.ARM
case instanceutils.IsNvidiaInstanceType(instanceType):
return amiType.X86Nvidia
case instanceutils.IsNeuronInstanceType(instanceType):
return amiType.X86Neuron
default:
return amiType.X86x64
}
Expand Down
23 changes: 17 additions & 6 deletions pkg/cfn/builder/managed_nodegroup_ami_type_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,23 +77,24 @@ var _ = DescribeTable("Managed Nodegroup AMI type", func(e amiTypeEntry) {
expectedAMIType: "AL2_x86_64",
}),

Entry("AMI type", amiTypeEntry{
Entry("default Nvidia GPU instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Name: "test",
Name: "test",
InstanceType: "p2.xlarge",
},
},
expectedAMIType: "AL2023_x86_64_STANDARD",
expectedAMIType: "AL2023_x86_64_NVIDIA",
}),

Entry("default GPU instance type", amiTypeEntry{
Entry("default Neuron GPU instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Name: "test",
InstanceType: "p2.xlarge",
InstanceType: "inf1.2xlarge",
},
},
expectedAMIType: "AL2_x86_64_GPU",
expectedAMIType: "AL2023_x86_64_NEURON",
}),

Entry("AL2 GPU instance type", amiTypeEntry{
Expand All @@ -107,6 +108,16 @@ var _ = DescribeTable("Managed Nodegroup AMI type", func(e amiTypeEntry) {
expectedAMIType: "AL2_x86_64_GPU",
}),

Entry("default ARM instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Name: "test",
InstanceType: "a1.2xlarge",
},
},
expectedAMIType: "AL2023_ARM_64_STANDARD",
}),

Entry("AL2 ARM instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Expand Down

0 comments on commit 486acdf

Please sign in to comment.