Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added eks node auto-repair #65

Merged
merged 1 commit into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ module "managed_node_group_addons" {
managed_ng_ebs_volume_size = 50
managed_ng_instance_types = ["t3a.large", "t2.large", "t2.xlarge", "t3.large", "m5.large"]
managed_ng_kms_policy_arn = module.eks.kms_policy_arn
managed_ng_node_autorepair = {
enabled = false
enable_node_monitoring_agent_addon = true
}
eks_cluster_name = module.eks.eks_cluster_name
worker_iam_role_name = module.eks.worker_iam_role_name
worker_iam_role_arn = module.eks.worker_iam_role_arn
Expand Down
4 changes: 2 additions & 2 deletions examples/complete/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ This directory contains a complete example that demonstrates the usage of the Te
| <a name="module_key_pair_vpn"></a> [key\_pair\_vpn](#module\_key\_pair\_vpn) | squareops/keypair/aws | 1.0.2 |
| <a name="module_key_pair_eks"></a> [key\_pair\_eks](#module\_key\_pair\_eks) | squareops/keypair/aws | 1.0.2 |
| <a name="module_vpc"></a> [vpc](#module\_vpc) | squareops/vpc/aws | 3.4.1 |
| <a name="module_eks"></a> [eks](#module\_eks) | squareops/eks/aws | 5.1.1 |
| <a name="module_managed_node_group_addons"></a> [managed\_node\_group\_addons](#module\_managed\_node\_group\_addons) | squareops/eks/aws//modules/managed-nodegroup | 5.1.1 |
| <a name="module_eks"></a> [eks](#module\_eks) | squareops/eks/aws | 5.3.0 |
| <a name="module_managed_node_group_addons"></a> [managed\_node\_group\_addons](#module\_managed\_node\_group\_addons) | squareops/eks/aws//modules/managed-nodegroup | 5.3.0 |
| <a name="module_fargate_profle"></a> [fargate\_profle](#module\_fargate\_profle) | squareops/eks/aws//modules/fargate-profile | n/a |

## Resources
Expand Down
54 changes: 29 additions & 25 deletions examples/complete/main.tf
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
locals {
region = "us-west-1"
region = "us-east-2"
kms_deletion_window_in_days = 7
kms_key_rotation_enabled = true
is_enabled = true
multi_region = false
environment = "stage"
name = "sqops"
auto_assign_public_ip = true
vpc_availability_zones = ["us-west-1a", "us-west-1b"]
vpc_availability_zones = ["us-east-2a", "us-east-2b"]
vpc_public_subnet_enabled = true
vpc_private_subnet_enabled = true
vpc_database_subnet_enabled = true
vpc_intra_subnet_enabled = true
vpc_one_nat_gateway_per_az = true
vpc_one_nat_gateway_per_az = false
vpn_server_instance_type = "t3a.small"
vpc_flow_log_enabled = false
kms_user = null
vpc_cidr = "10.10.0.0/16"
vpn_server_enabled = true
cluster_version = "1.30"
vpn_server_enabled = false
cluster_version = "1.31"
cluster_log_types = ["api", "audit", "authenticator", "controllerManager", "scheduler"]
cluster_log_retention_in_days = 30
managed_ng_capacity_type = "SPOT" # Choose the capacity type ("SPOT" or "ON_DEMAND")
cluster_endpoint_private_access = false
cluster_endpoint_private_access = true
cluster_endpoint_public_access = true
cluster_endpoint_public_access_cidrs = ["0.0.0.0/0"]
ebs_volume_size = 50
fargate_profile_name = "app"
vpc_s3_endpoint_enabled = true
vpc_s3_endpoint_enabled = false
vpc_ecr_endpoint_enabled = false
vpc_public_subnets_counts = 2
vpc_private_subnets_counts = 2
Expand Down Expand Up @@ -139,7 +139,7 @@ module "vpc" {

module "eks" {
source = "squareops/eks/aws"
version = "5.2.1"
version = "5.3.0"
access_entry_enabled = true
access_entries = {
"example" = {
Expand Down Expand Up @@ -185,22 +185,26 @@ module "eks" {
}

module "managed_node_group_addons" {
source = "squareops/eks/aws//modules/managed-nodegroup"
version = "5.2.1"
depends_on = [module.vpc, module.eks]
managed_ng_name = "Infra"
managed_ng_min_size = 2
managed_ng_max_size = 5
managed_ng_desired_size = 2
vpc_subnet_ids = [module.vpc.private_subnets[0]]
environment = local.environment
managed_ng_kms_key_arn = module.kms.key_arn
managed_ng_capacity_type = local.managed_ng_capacity_type
managed_ng_ebs_volume_size = local.ebs_volume_size
managed_ng_ebs_volume_type = "gp3"
managed_ng_ebs_encrypted = true
managed_ng_instance_types = ["t3a.large", "t2.large", "t2.xlarge", "t3.large", "m5.large"] # Pass instance type according to the ami architecture.
managed_ng_kms_policy_arn = module.eks.kms_policy_arn
source = "squareops/eks/aws//modules/managed-nodegroup"
version = "5.3.0"
depends_on = [module.vpc, module.eks]
managed_ng_name = "Infra"
managed_ng_min_size = 2
managed_ng_max_size = 5
managed_ng_desired_size = 2
vpc_subnet_ids = [module.vpc.private_subnets[0]]
environment = local.environment
managed_ng_kms_key_arn = module.kms.key_arn
managed_ng_capacity_type = local.managed_ng_capacity_type
managed_ng_ebs_volume_size = local.ebs_volume_size
managed_ng_ebs_volume_type = "gp3"
managed_ng_ebs_encrypted = true
managed_ng_instance_types = ["t3a.large", "t3.large", "t3.medium"] # Pass instance type according to the ami architecture.
managed_ng_kms_policy_arn = module.eks.kms_policy_arn
managed_ng_node_autorepair = {
enabled = false
enable_node_monitoring_agent_addon = true
}
eks_cluster_name = module.eks.cluster_name
worker_iam_role_name = module.eks.worker_iam_role_name
worker_iam_role_arn = module.eks.worker_iam_role_arn
Expand All @@ -212,7 +216,7 @@ module "managed_node_group_addons" {
"Addons-Services" = "true"
}
tags = local.additional_aws_tags
custom_ami_id = "" # Optional, if not passed terraform will automatically select the latest supported ami id
custom_ami_id = "" # Optional, if not passed terraform will automatically select the latest supported ami id
aws_managed_node_group_arch = local.aws_managed_node_group_arch # optional if "custom_ami_id" is passed
enable_bottlerocket_ami = local.enable_bottlerocket_ami # Set it to false if using Amazon Linux AMIs
bottlerocket_node_config = {
Expand Down
2 changes: 2 additions & 0 deletions modules/managed-nodegroup/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ No modules.

| Name | Type |
|------|------|
| [aws_eks_addon.node_monitoring_addon](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_addon) | resource |
| [aws_eks_node_group.managed_ng](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_node_group) | resource |
| [aws_launch_template.eks_template](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template) | resource |
| [aws_ami.launch_template_ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source |
Expand Down Expand Up @@ -75,6 +76,7 @@ No modules.
| <a name="input_enable_bottlerocket_ami"></a> [enable\_bottlerocket\_ami](#input\_enable\_bottlerocket\_ami) | Set to true to enable the use of Bottlerocket AMIs for instances. | `bool` | `false` | no |
| <a name="input_bottlerocket_node_config"></a> [bottlerocket\_node\_config](#input\_bottlerocket\_node\_config) | Bottlerocket Node configurations for EKS. | `map(any)` | <pre>{<br> "bottlerocket_eks_enable_control_container": true,<br> "bottlerocket_eks_node_admin_container_enabled": false<br>}</pre> | no |
| <a name="input_custom_ami_id"></a> [custom\_ami\_id](#input\_custom\_ami\_id) | worker node AMI id to be created | `string` | `""` | no |
| <a name="input_managed_ng_node_autorepair"></a> [managed\_ng\_node\_autorepair](#input\_managed\_ng\_node\_autorepair) | Choose whether to enable managed nodegroup auto repair feature | <pre>object({<br> enabled = bool<br> enable_node_monitoring_agent_addon = bool<br> })</pre> | <pre>{<br> "enable_node_monitoring_agent_addon": false,<br> "enabled": false<br>}</pre> | no |

## Outputs

Expand Down
37 changes: 37 additions & 0 deletions modules/managed-nodegroup/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ resource "aws_eks_node_group" "managed_ng" {
capacity_type = var.managed_ng_capacity_type
instance_types = var.managed_ng_instance_types
force_update_version = true
node_repair_config {
enabled = var.managed_ng_node_autorepair.enabled
}
launch_template {
id = aws_launch_template.eks_template.id
version = aws_launch_template.eks_template.latest_version
Expand All @@ -128,3 +131,37 @@ resource "aws_eks_node_group" "managed_ng" {
var.tags
)
}

resource "aws_eks_addon" "node_monitoring_addon" {
count = var.managed_ng_node_autorepair.enabled == true && var.managed_ng_node_autorepair.enable_node_monitoring_agent_addon == true ? 1 : 0
cluster_name = var.eks_cluster_name
addon_name = "eks-node-monitoring-agent"
addon_version = "v1.0.1-eksbuild.2"
resolve_conflicts_on_update = "PRESERVE"
configuration_values = jsonencode({
dcgmAgent = {
resources = {
limits = {
cpu = "50m"
memory = "100Mi"
}
requests = {
cpu = "10m"
memory = "30Mi"
}
}
}
monitoringAgent = {
resources = {
limits = {
cpu = "50m"
memory = "100Mi"
}
requests = {
cpu = "10m"
memory = "30Mi"
}
}
}
})
}
12 changes: 12 additions & 0 deletions modules/managed-nodegroup/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,15 @@ variable "custom_ami_id" {
description = "worker node AMI id to be created"
default = ""
}

variable "managed_ng_node_autorepair" {
type = object({
enabled = bool
enable_node_monitoring_agent_addon = bool
})
description = "Choose whether to enable managed nodegroup auto repair feature"
default = {
enabled = false
enable_node_monitoring_agent_addon = false
}
}