Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Troubleshooting module for VPC CNI by rimaulana #4

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/cleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# VPC_CNI_IAM_ROLE_NAME="eksctl-eks-workshop-addon-vpc-cni-Role1-n85u3l0IhDSv"

kubectl delete namespace cni-tshoot
attached_policies=$(aws iam list-attached-role-policies --role-name $VPC_CNI_IAM_ROLE_NAME --query 'AttachedPolicies[*].PolicyArn' --output text)

is_policy_exist=0

for policy in ${attached_policies[@]}; do
if [ "$policy" == "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" ]; then
is_policy_exist=1
else
aws iam detach-role-policy --role-name $VPC_CNI_IAM_ROLE_NAME --policy-arn $policy
fi
done

if [ $is_policy_exist -eq 0 ]; then
logmessage "Attaching back AmazonEKS_CNI_Policy policy into VPC CNI addon role"

aws iam attach-role-policy --role-name $VPC_CNI_IAM_ROLE_NAME --policy-arn arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
fi

nodes=$(aws eks list-nodegroups --cluster-name $EKS_CLUSTER_NAME --query 'nodegroups' --output text)
deleted_nodes=()

logmessage "Reverting EKS managed nodegroup configuration"
for node in ${nodes[@]}; do
if [[ "$node" != "default" && "$node" != "cni_troubleshooting_nodes" ]]; then
logmessage "Deleting nodegroup $node"
aws eks delete-nodegroup --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $node
deleted_nodes+=$node
fi
done

logmessage "Waiting for EKS managed nodegroup to be deleted"
for deleted_node in ${deleted_nodes[@]}; do
logmessage "waiting for deletion of $deleted_node"
aws eks wait nodegroup-deleted --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $deleted_node
done

DEFAULT_CONFIG='{"enableNetworkPolicy":"true","env":{"ENABLE_POD_ENI":"true","ENABLE_PREFIX_DELEGATION":"true","POD_SECURITY_GROUP_ENFORCING_MODE":"standard"},"nodeAgent":{"enablePolicyEventLogs":"true"}}'
CURRENT_CONFIG=$(aws eks describe-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --query addon.configurationValues --output text | jq --sort-keys -c .)

if [ $DEFAULT_CONFIG != $CURRENT_CONFIG ]; then
logmessage "Reverting VPC CNI config to default"
addons_status=$(aws eks describe-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --query addon.status --output text)
while [ $addons_status == "UPDATING" ]; do
logmessage "Waiting for VPC CNI addons status to not be in UPDATING"
sleep 60
addons_status=$(aws eks describe-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --query addon.status --output text)
done

aws eks update-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --service-account-role-arn $VPC_CNI_IAM_ROLE_ARN --configuration-values $DEFAULT_CONFIG
fi


23 changes: 23 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/ssm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
COMMAND_ID=$(aws ssm send-command \
--instance-ids $1 \
--document-name "AWS-RunShellScript" \
--comment "Demo run shell script on Linux Instances" \
--parameters '{"commands":["sudo -Hiu root bash << END","tail -n '$3' /var/log/aws-routed-eni/'$2'.log | grep '$4'", "END"]}' \
--output text \
--query "Command.CommandId")

STATUS=InProgress
while [ "$STATUS" == "InProgress" ]; do
STATUS=$(aws ssm get-command-invocation \
--command-id "$COMMAND_ID" \
--instance-id $1 \
--output text \
--query "Status")
done

aws ssm list-command-invocations \
--command-id "$COMMAND_ID" \
--details \
--output text \
--query "CommandInvocations[].CommandPlugins[].Output"
197 changes: 197 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/terraform/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
locals {
tags = {
module = "troubleshooting"
}
secondary_cidr = "100.64.0.0/22"
}

data "aws_vpc" "selected" {
tags = {
created-by = "eks-workshop-v2"
env = var.addon_context.eks_cluster_id
}
}

data "aws_subnets" "private" {
tags = {
created-by = "eks-workshop-v2"
env = var.addon_context.eks_cluster_id
}

filter {
name = "tag:Name"
values = ["*Private*"]
}
}

resource "aws_vpc_ipv4_cidr_block_association" "secondary_cidr" {
vpc_id = data.aws_vpc.selected.id
cidr_block = local.secondary_cidr
}

data "aws_subnet" "selected" {
count = length(data.aws_subnets.private.ids)

id = data.aws_subnets.private.ids[count.index]
}

resource "aws_subnet" "large_subnet" {
count = length(data.aws_subnets.private.ids)

vpc_id = aws_vpc_ipv4_cidr_block_association.secondary_cidr.vpc_id
cidr_block = cidrsubnet(local.secondary_cidr, 2, count.index)
availability_zone = data.aws_subnet.selected[count.index].availability_zone

tags = merge(local.tags, var.tags, {
AdditionalSubnet = "true"
Size = "large"
})

depends_on = [
aws_vpc_ipv4_cidr_block_association.secondary_cidr
]
}

resource "aws_subnet" "small_subnet" {
count = length(data.aws_subnets.private.ids)

vpc_id = aws_vpc_ipv4_cidr_block_association.secondary_cidr.vpc_id
cidr_block = cidrsubnet(local.secondary_cidr, 6, count.index + 48)
availability_zone = data.aws_subnet.selected[count.index].availability_zone

tags = merge(local.tags, {
AdditionalSubnet = "true"
Size = "small"
})

depends_on = [
aws_vpc_ipv4_cidr_block_association.secondary_cidr
]
}

data "aws_route_table" "private" {
count = length(data.aws_subnets.private.ids)

vpc_id = data.aws_vpc.selected.id
subnet_id = data.aws_subnets.private.ids[count.index]
}

resource "aws_route_table_association" "small_subnet" {
count = length(data.aws_subnets.private.ids)

subnet_id = aws_subnet.small_subnet[count.index].id
route_table_id = data.aws_route_table.private[count.index].route_table_id
}

resource "aws_route_table_association" "large_subnet" {
count = length(data.aws_subnets.private.ids)

subnet_id = aws_subnet.large_subnet[count.index].id
route_table_id = data.aws_route_table.private[count.index].route_table_id
}

resource "aws_iam_role" "node_role" {
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = [
"ec2.amazonaws.com"
]
}
Action = "sts:AssumeRole"
}
]
})
managed_policy_arns = [
"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy",
"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
]
}

resource "aws_eks_access_entry" "cni_troubleshooting_nodes" {
cluster_name = var.eks_cluster_id
principal_arn = aws_iam_role.node_role.arn
type = "EC2_LINUX"
}

resource "aws_eks_node_group" "cni_troubleshooting_nodes" {

cluster_name = var.eks_cluster_id
node_group_name = "cni_troubleshooting_nodes"
node_role_arn = aws_iam_role.node_role.arn
subnet_ids = aws_subnet.small_subnet[*].id
instance_types = ["m5.large"]

scaling_config {
desired_size = 0
max_size = 6
min_size = 0
}

labels = {
app = "cni_troubleshooting"
}

taint {
key = "purpose"
value = "cni_troubleshooting"
effect = "NO_SCHEDULE"
}

update_config {
max_unavailable = 1
}

tags = merge(local.tags, var.tags)

}


data "aws_eks_addon" "vpc_cni" {
addon_name = "vpc-cni"
cluster_name = var.addon_context.eks_cluster_id
}

resource "null_resource" "change_config" {
triggers = {
config = data.aws_eks_addon.vpc_cni.configuration_values,
cluster_name = var.addon_context.eks_cluster_id,
role_arn = data.aws_eks_addon.vpc_cni.service_account_role_arn,
node_group_name = aws_eks_node_group.cni_troubleshooting_nodes.node_group_name,
role_name = split("/", data.aws_eks_addon.vpc_cni.service_account_role_arn)[1],
timestamp = timestamp()
}

provisioner "local-exec" {
command = <<EOF
mkdir -p /eks-workshop/temp
CURRENT_CONFIG='${jsonencode(self.triggers.config)}'
NEW_CONFIG=$(echo $CURRENT_CONFIG | jq -r . | jq -c '. += {"resources":{"requests":{"memory":"2G"}}}')
aws eks update-addon --addon-name vpc-cni --cluster-name ${self.triggers.cluster_name} --service-account-role-arn ${self.triggers.role_arn} --configuration-values $NEW_CONFIG
addons_status="UPDATING"
while [ $addons_status == "UPDATING" ]; do
sleep 60
addons_status=$(aws eks describe-addon --addon-name vpc-cni --cluster-name ${self.triggers.cluster_name} --query addon.status --output text)
done
aws iam detach-role-policy --role-name ${self.triggers.role_name} --policy-arn arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
aws eks update-nodegroup-config --cluster-name ${self.triggers.cluster_name} --nodegroup-name ${self.triggers.node_group_name} --scaling-config minSize=0,maxSize=6,desiredSize=1
EOF
}

}

resource "null_resource" "kustomize_app" {
triggers = {
always_run = timestamp()
}

provisioner "local-exec" {
command = "kubectl apply -k ~/environment/eks-workshop/modules/troubleshooting/cni/workload"
when = create
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
output "environment_variables" {
description = "Environment variables to be added to the IDE shell"
value = {
# VPC_ID = data.aws_vpc.selected.id,
# LOAD_BALANCER_CONTROLLER_ROLE_NAME = module.eks_blueprints_addons.aws_load_balancer_controller.iam_role_name,
# LOAD_BALANCER_CONTROLLER_POLICY_ARN_FIX = module.eks_blueprints_addons.aws_load_balancer_controller.iam_policy_arn,
# LOAD_BALANCER_CONTROLLER_POLICY_ARN_ISSUE = aws_iam_policy.issue.arn,
# LOAD_BALANCER_CONTROLLER_ROLE_ARN = module.eks_blueprints_addons.aws_load_balancer_controller.iam_role_arn
VPC_CNI_IAM_ROLE_NAME = split("/", data.aws_eks_addon.vpc_cni.service_account_role_arn)[1],
VPC_CNI_IAM_ROLE_ARN = data.aws_eks_addon.vpc_cni.service_account_role_arn,
ADDITIONAL_SUBNET_1 = aws_subnet.large_subnet[0].id,
ADDITIONAL_SUBNET_2 = aws_subnet.large_subnet[1].id,
ADDITIONAL_SUBNET_3 = aws_subnet.large_subnet[2].id,
NODEGROUP_IAM_ROLE = aws_iam_role.node_role.arn,
AWS_NODE_ADDON_CONFIG = jsonencode(data.aws_eks_addon.vpc_cni.configuration_values)
}
}
35 changes: 35 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/terraform/vars.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# tflint-ignore: terraform_unused_declarations
variable "eks_cluster_id" {
description = "EKS cluster name"
type = string
}

# tflint-ignore: terraform_unused_declarations
variable "eks_cluster_version" {
description = "EKS cluster version"
type = string
}

# tflint-ignore: terraform_unused_declarations
variable "cluster_security_group_id" {
description = "EKS cluster security group ID"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "addon_context" {
description = "Addon context that can be passed directly to blueprints addon modules"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "tags" {
description = "Tags to apply to AWS resources"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "resources_precreated" {
description = "Have expensive resources been created already"
type = bool
}
42 changes: 42 additions & 0 deletions manifests/modules/troubleshooting/cni/workload/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: cni-tshoot

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-app
namespace: cni-tshoot
labels:
app: nginx
spec:
replicas: 15
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: app
operator: In
values:
- cni_troubleshooting
containers:
- name: nginx
image: nginx:1.14.2
ports:
- containerPort: 80
tolerations:
- key: "purpose"
operator: "Exists"
effect: "NoSchedule"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../../../base-application/ui
- deployment.yaml
Loading