diff --git a/.github/workflows/bats.yml b/.github/workflows/bats.yml
index 4915cdace..798006459 100644
--- a/.github/workflows/bats.yml
+++ b/.github/workflows/bats.yml
@@ -1,7 +1,7 @@
name: bats
on:
- pull_request_target:
+ pull_request:
types: [labeled, opened, synchronize, unlabeled]
jobs:
@@ -19,10 +19,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
- repository: ${{ github.event.pull_request.head.repo.full_name }}
- # Check out the PR commit, not the merge commit
- # Use `ref` instead of `sha` to enable pushing back to `ref`
- ref: ${{ github.event.pull_request.head.ref }}
- name: Run tests on modified modules
id: get-modified-files
diff --git a/modules/eks/actions-runner-controller/README.md b/modules/eks/actions-runner-controller/README.md
index 7ea596356..b9adc5f1a 100644
--- a/modules/eks/actions-runner-controller/README.md
+++ b/modules/eks/actions-runner-controller/README.md
@@ -26,7 +26,7 @@ components:
name: "actions-runner" # avoids hitting name length limit on IAM role
chart: "actions-runner-controller"
chart_repository: "https://actions-runner-controller.github.io/actions-runner-controller"
- chart_version: "0.22.0"
+ chart_version: "0.23.7"
kubernetes_namespace: "actions-runner-system"
create_namespace: true
kubeconfig_exec_auth_api_version: "client.authentication.k8s.io/v1beta1"
@@ -79,12 +79,11 @@ components:
image: summerwind/actions-runner-dind
# `scope` is org name for Organization runners, repo name for Repository runners
scope: "org/infra"
- # We can trade the fast-start behavior of min_replicas > 0 for the better guarantee
- # that Karpenter will not terminate the runner while it is running a job.
- # # Tell Karpenter not to evict this pod. This is only safe when min_replicas is 0.
- # # If we do not set this, Karpenter will feel free to terminate the runner while it is running a job.
- # pod_annotations:
- # karpenter.sh/do-not-evict: "true"
+ # Tell Karpenter not to evict this pod while it is running a job.
+ # If we do not set this, Karpenter will feel free to terminate the runner while it is running a job,
+ # as part of its consolidation efforts, even when using "on demand" instances.
+ running_pod_annotations:
+ karpenter.sh/do-not-disrupt: "true"
min_replicas: 1
max_replicas: 20
scale_down_delay_seconds: 100
@@ -96,7 +95,14 @@ components:
cpu: 100m
memory: 128Mi
webhook_driven_scaling_enabled: true
- webhook_startup_timeout: "30m"
+ # The name `webhook_startup_timeout` is misleading.
+ # It is actually the duration after which a job will be considered completed,
+ # (and the runner killed) even if the webhook has not received a "job completed" event.
+ # This is to ensure that if an event is missed, it does not leave the runner running forever.
+ # Set it long enough to cover the longest job you expect to run and then some.
+ # See https://github.com/actions/actions-runner-controller/blob/9afd93065fa8b1f87296f0dcdf0c2753a0548cb7/docs/automatically-scaling-runners.md?plain=1#L264-L268
+ webhook_startup_timeout: "90m"
+ # Pull-driven scaling is obsolete and should not be used.
pull_driven_scaling_enabled: false
# Labels are not case-sensitive to GitHub, but *are* case-sensitive
# to the webhook based autoscaler, which requires exact matches
@@ -134,11 +140,12 @@ components:
# # `scope` is org name for Organization runners, repo name for Repository runners
# scope: "org/infra"
# group: "ArmRunners"
- # # Tell Karpenter not to evict this pod. This is only safe when min_replicas is 0.
- # # If we do not set this, Karpenter will feel free to terminate the runner while it is running a job.
- # pod_annotations:
- # karpenter.sh/do-not-evict: "true"
- # min_replicas: 0
+ # # Tell Karpenter not to evict this pod while it is running a job.
+ # # If we do not set this, Karpenter will feel free to terminate the runner while it is running a job,
+ # # as part of its consolidation efforts, even when using "on demand" instances.
+ # running_pod_annotations:
+ # karpenter.sh/do-not-disrupt: "true"
+ # min_replicas: 0 # Set to so that no ARM instance is running idle, set to 1 for faster startups
# max_replicas: 20
# scale_down_delay_seconds: 100
# resources:
@@ -149,7 +156,7 @@ components:
# cpu: 100m
# memory: 128Mi
# webhook_driven_scaling_enabled: true
- # webhook_startup_timeout: "30m"
+ # webhook_startup_timeout: "90m"
# pull_driven_scaling_enabled: false
# # Labels are not case-sensitive to GitHub, but *are* case-sensitive
# # to the webhook based autoscaler, which requires exact matches
@@ -315,8 +322,10 @@ can assign one or more Runner pools (from the `runners` map) to groups (only one
### Using Webhook Driven Autoscaling (recommended)
-We recommend using Webhook Driven Autoscaling until GitHub releases their own autoscaling solution (said to be "in the
-works" as of April 2023).
+We recommend using Webhook Driven Autoscaling until GitHub's own autoscaling solution is as capable as the Summerwind
+solution this component deploys. See
+[this discussion](https://github.com/actions/actions-runner-controller/discussions/3340) for some perspective on why the
+Summerwind solution is currently (summer 2024) considered superior.
To use the Webhook Driven Autoscaling, in addition to setting `webhook_driven_scaling_enabled` to `true`, you must also
install the GitHub organization-level webhook after deploying the component (specifically, the webhook server). The URL
@@ -424,7 +433,7 @@ spec:
template:
metadata:
annotations:
- karpenter.sh/do-not-evict: "true"
+ karpenter.sh/do-not-disrupt: "true"
```
When you set this annotation on the Pod, Karpenter will not evict it. This means that the Pod will stay on the Node it
@@ -437,14 +446,14 @@ Since the Runner Pods terminate at the end of the job, this is not a problem for
However, if you have set `minReplicas > 0`, then you have some Pods that are just idling, waiting for jobs to be
assigned to them. These Pods are exactly the kind of Pods you want terminated and moved when the cluster is
underutilized. Therefore, when you set `minReplicas > 0`, you should **NOT** set `karpenter.sh/do-not-evict: "true"` on
-the Pod.
+the Pod via the `pod_annotations` attribute of the `runners` input. (**But wait**, _there is good news_!)
We have [requested a feature](https://github.com/actions/actions-runner-controller/issues/2562) that will allow you to
-set `karpenter.sh/do-not-evict: "true"` and `minReplicas > 0` at the same time by only annotating Pods running jobs.
-Meanwhile, another option is to set `minReplicas = 0` on a schedule using an ARC Autoscaler
-[scheduled override](https://github.com/actions/actions-runner-controller/blob/master/docs/automatically-scaling-runners.md#scheduled-overrides).
-At present, this component does not support that option, but it could be added in the future if our preferred solution
-is not implemented.
+set `karpenter.sh/do-not-disrupt: "true"` and `minReplicas > 0` at the same time by only annotating Pods running jobs.
+Meanwhile, **we have implemented this for you** using a job startup hook. This hook will set annotations on the Pod when
+the job starts. When the job finishes, the Pod will be deleted by the controller, so the annotations will not need to be
+removed. Configure annotations that apply only to Pods running jobs in the `running_pod_annotations` attribute of the
+`runners` input.
### Updating CRDs
@@ -485,8 +494,8 @@ documentation for further details.
| Name | Source | Version |
|------|--------|---------|
-| [actions\_runner](#module\_actions\_runner) | cloudposse/helm-release/aws | 0.10.0 |
-| [actions\_runner\_controller](#module\_actions\_runner\_controller) | cloudposse/helm-release/aws | 0.10.0 |
+| [actions\_runner](#module\_actions\_runner) | cloudposse/helm-release/aws | 0.10.1 |
+| [actions\_runner\_controller](#module\_actions\_runner\_controller) | cloudposse/helm-release/aws | 0.10.1 |
| [eks](#module\_eks) | cloudposse/stack-config/yaml//modules/remote-state | 1.5.0 |
| [iam\_roles](#module\_iam\_roles) | ../../account-map/modules/iam-roles | n/a |
| [this](#module\_this) | cloudposse/label/null | 0.25.0 |
@@ -515,6 +524,7 @@ documentation for further details.
| [cleanup\_on\_fail](#input\_cleanup\_on\_fail) | Allow deletion of new resources created in this upgrade when upgrade fails. | `bool` | `true` | no |
| [context](#input\_context) | Single object for setting entire context at once.
See description of individual variables for details.
Leave string and numeric variables as `null` to use default value.
Individual variable settings (non-null) override settings in context object,
except for attributes, tags, and additional\_tag\_map, which are merged. | `any` |
{| no | | [context\_tags\_enabled](#input\_context\_tags\_enabled) | Whether or not to include all context tags as labels for each runner | `bool` | `false` | no | +| [controller\_replica\_count](#input\_controller\_replica\_count) | The number of replicas of the runner-controller to run. | `number` | `2` | no | | [create\_namespace](#input\_create\_namespace) | Create the namespace if it does not yet exist. Defaults to `false`. | `bool` | `null` | no | | [delimiter](#input\_delimiter) | Delimiter to be used between ID elements.
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"descriptor_formats": {},
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_key_case": null,
"label_order": [],
"label_value_case": null,
"labels_as_tags": [
"unset"
],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {},
"tenant": null
}
object({| n/a | yes | -| [runners](#input\_runners) | Map of Action Runner configurations, with the key being the name of the runner. Please note that the name must be in
limits = object({
cpu = string
memory = string
})
requests = object({
cpu = string
memory = string
})
})
hcl|
organization_runner = {
type = "organization" # can be either 'organization' or 'repository'
dind_enabled: false # A Docker sidecar container will be deployed
image: summerwind/actions-runner # If dind_enabled=true, set this to 'summerwind/actions-runner-dind'
scope = "ACME" # org name for Organization runners, repo name for Repository runners
group = "core-automation" # Optional. Assigns the runners to a runner group, for access control.
scale_down_delay_seconds = 300
min_replicas = 1
max_replicas = 5
busy_metrics = {
scale_up_threshold = 0.75
scale_down_threshold = 0.25
scale_up_factor = 2
scale_down_factor = 0.5
}
labels = [
"Ubuntu",
"core-automation",
]
}
map(object({| n/a | yes | +| [runners](#input\_runners) | Map of Action Runner configurations, with the key being the name of the runner. Please note that the name must be in
type = string
scope = string
group = optional(string, null)
image = optional(string, "")
dind_enabled = bool
node_selector = optional(map(string), {})
pod_annotations = optional(map(string), {})
tolerations = optional(list(object({
key = string
operator = string
value = optional(string, null)
effect = string
})), [])
scale_down_delay_seconds = number
min_replicas = number
max_replicas = number
busy_metrics = optional(object({
scale_up_threshold = string
scale_down_threshold = string
scale_up_adjustment = optional(string)
scale_down_adjustment = optional(string)
scale_up_factor = optional(string)
scale_down_factor = optional(string)
}))
webhook_driven_scaling_enabled = bool
webhook_startup_timeout = optional(string, null)
pull_driven_scaling_enabled = bool
labels = list(string)
storage = optional(string, null)
pvc_enabled = optional(bool, false)
resources = object({
limits = object({
cpu = string
memory = string
ephemeral_storage = optional(string, null)
})
requests = object({
cpu = string
memory = string
})
})
}))
hcl|
organization_runner = {
type = "organization" # can be either 'organization' or 'repository'
dind_enabled: true # A Docker daemon will be started in the runner Pod
image: summerwind/actions-runner-dind # If dind_enabled=false, set this to 'summerwind/actions-runner'
scope = "ACME" # org name for Organization runners, repo name for Repository runners
group = "core-automation" # Optional. Assigns the runners to a runner group, for access control.
scale_down_delay_seconds = 300
min_replicas = 1
max_replicas = 5
labels = [
"Ubuntu",
"core-automation",
]
}
map(object({| n/a | yes | | [s3\_bucket\_arns](#input\_s3\_bucket\_arns) | List of ARNs of S3 Buckets to which the runners will have read-write access to. | `list(string)` | `[]` | no | | [ssm\_docker\_config\_json\_path](#input\_ssm\_docker\_config\_json\_path) | SSM path to the Docker config JSON | `string` | `null` | no | | [ssm\_github\_secret\_path](#input\_ssm\_github\_secret\_path) | The path in SSM to the GitHub app private key file contents or GitHub PAT token. | `string` | `""` | no | @@ -559,7 +569,7 @@ documentation for further details. | [tenant](#input\_tenant) | ID element \_(Rarely used, not included by default)\_. A customer identifier, indicating who this instance of a resource is for | `string` | `null` | no | | [timeout](#input\_timeout) | Time in seconds to wait for any individual kubernetes operation (like Jobs for hooks). Defaults to `300` seconds | `number` | `null` | no | | [wait](#input\_wait) | Will wait until all resources are in a ready state before marking the release as successful. It will wait for as long as `timeout`. Defaults to `true`. | `bool` | `null` | no | -| [webhook](#input\_webhook) | Configuration for the GitHub Webhook Server.
type = string
scope = string
group = optional(string, null)
image = optional(string, "summerwind/actions-runner-dind")
dind_enabled = optional(bool, true)
node_selector = optional(map(string), {})
pod_annotations = optional(map(string), {})
# running_pod_annotations are only applied to the pods once they start running a job
running_pod_annotations = optional(map(string), {})
# affinity is too complex to model. Whatever you assigned affinity will be copied
# to the runner Pod spec.
affinity = optional(any)
tolerations = optional(list(object({
key = string
operator = string
value = optional(string, null)
effect = string
})), [])
scale_down_delay_seconds = optional(number, 300)
min_replicas = number
max_replicas = number
busy_metrics = optional(object({
scale_up_threshold = string
scale_down_threshold = string
scale_up_adjustment = optional(string)
scale_down_adjustment = optional(string)
scale_up_factor = optional(string)
scale_down_factor = optional(string)
}))
webhook_driven_scaling_enabled = optional(bool, true)
# The name `webhook_startup_timeout` is misleading.
# It is actually the duration after which a job will be considered completed,
# (and the runner killed) even if the webhook has not received a "job completed" event.
# This is to ensure that if an event is missed, it does not leave the runner running forever.
# Set it long enough to cover the longest job you expect to run and then some.
# See https://github.com/actions/actions-runner-controller/blob/9afd93065fa8b1f87296f0dcdf0c2753a0548cb7/docs/automatically-scaling-runners.md?plain=1#L264-L268
webhook_startup_timeout = optional(string, "1h")
pull_driven_scaling_enabled = optional(bool, false)
labels = optional(list(string), [])
docker_storage = optional(string, null)
# storage is deprecated in favor of docker_storage, since it is only storage for the Docker daemon
storage = optional(string, null)
pvc_enabled = optional(bool, false)
resources = optional(object({
limits = optional(object({
cpu = optional(string, "1")
memory = optional(string, "1Gi")
ephemeral_storage = optional(string, "10Gi")
}), {})
requests = optional(object({
cpu = optional(string, "500m")
memory = optional(string, "256Mi")
ephemeral_storage = optional(string, "1Gi")
}), {})
}), {})
}))
object({|
enabled = bool
hostname_template = string
queue_limit = optional(number, 100)
})
{| no | +| [webhook](#input\_webhook) | Configuration for the GitHub Webhook Server.
"enabled": false,
"hostname_template": null,
"queue_limit": 100
}
object({|
enabled = bool
hostname_template = string
queue_limit = optional(number, 1000)
})
{| no | ## Outputs diff --git a/modules/eks/actions-runner-controller/charts/actions-runner/Chart.yaml b/modules/eks/actions-runner-controller/charts/actions-runner/Chart.yaml index b5c10525b..1ec5333d2 100644 --- a/modules/eks/actions-runner-controller/charts/actions-runner/Chart.yaml +++ b/modules/eks/actions-runner-controller/charts/actions-runner/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.2 +version: 0.2.0 # This chart only deploys Resources for actions-runner-controller, so app version does not really apply. # We use Resource API version instead. diff --git a/modules/eks/actions-runner-controller/charts/actions-runner/templates/runnerdeployment.yaml b/modules/eks/actions-runner-controller/charts/actions-runner/templates/runnerdeployment.yaml index a44658dec..1321f22c8 100644 --- a/modules/eks/actions-runner-controller/charts/actions-runner/templates/runnerdeployment.yaml +++ b/modules/eks/actions-runner-controller/charts/actions-runner/templates/runnerdeployment.yaml @@ -1,34 +1,3 @@ -{{- if .Values.pvc_enabled }} ---- -# Persistent Volumes can be used for image caching -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ .Values.release_name }} -spec: - accessModes: - - ReadWriteMany - # StorageClassName comes from efs-controller and must be deployed first. - storageClassName: efs-sc - resources: - requests: - # EFS is not actually storage constrained, but this storage request is - # required. 100Gi is a ballpark for how much we initially request, but this - # may grow. We are responsible for docker pruning this periodically to - # save space. - storage: 100Gi -{{- end }} -{{- if .Values.docker_config_json_enabled }} ---- -apiVersion: v1 -kind: Secret -metadata: - name: {{ .Values.release_name }}-regcred -type: kubernetes.io/dockerconfigjson -data: - .dockerconfigjson: {{ .Values.docker_config_json }} -{{- end }} ---- apiVersion: actions.summerwind.dev/v1alpha1 kind: RunnerDeployment metadata: @@ -38,13 +7,13 @@ spec: # See https://github.com/actions-runner-controller/actions-runner-controller/issues/206#issuecomment-748601907 # replicas: 1 template: - {{- with index .Values "pod_annotations" }} + {{- with .Values.pod_annotations }} metadata: annotations: {{- toYaml . | nindent 8 }} {{- end }} spec: - {{- if .Values.docker_config_json_enabled }} + {{- if .Values.docker_config_json_enabled }} # secrets volumeMount are always mounted readOnly so config.json has to be copied to the correct directory # https://github.com/kubernetes/kubernetes/issues/62099 # https://github.com/actions/actions-runner-controller/issues/2123#issuecomment-1527077517 @@ -82,14 +51,41 @@ spec: # - effect: NoSchedule # key: node-role.kubernetes.io/actions-runner # operator: Exists + {{- with .Values.node_selector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + + {{- with .Values.running_pod_annotations }} + # Run a pre-run hook to set pod annotations + # See https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job#triggering-the-scripts + containers: + - name: runner + # ARC (Summerwind) has its own pre-run hook, so we do not want to set + # env: + # - name: ACTIONS_RUNNER_HOOK_JOB_STARTED + # value: /hooks/pre-run.sh # triggers when a job is started, and sets the pod to NOT safe-to-evict + # Instead, its pre-run hook runs scripts in /etc/arc/hooks/job-started.d/ + volumeMounts: + - name: hooks + mountPath: /etc/arc/hooks/job-started.d/ + {{- end }} - {{ if eq .Values.type "organization" }} + {{- if eq .Values.type "organization" }} organization: {{ .Values.scope }} {{- end }} - {{ if eq .Values.type "repository" }} + {{- if eq .Values.type "repository" }} repository: {{ .Values.scope }} {{- end }} - {{ if index .Values "group" }} + {{- if index .Values "group" }} group: {{ .Values.group }} {{- end }} # You can use labels to create subsets of runners. @@ -103,14 +99,6 @@ spec: {{- range .Values.labels }} - {{ . | quote }} {{- end }} - {{- if gt ( len (index .Values "node_selector") ) 0 }} - nodeSelector: - {{- toYaml .Values.node_selector | nindent 8 }} - {{- end }} - {{- if gt ( len (index .Values "tolerations") ) 0 }} - tolerations: - {{- toYaml .Values.tolerations | nindent 8 }} - {{- end }} # dockerdWithinRunnerContainer = false means access to a Docker daemon is provided by a sidecar container. dockerdWithinRunnerContainer: {{ .Values.dind_enabled }} image: {{ .Values.image | quote }} @@ -133,7 +121,7 @@ spec: {{- if index .Values.resources.requests "ephemeral_storage" }} ephemeral-storage: {{ .Values.resources.requests.ephemeral_storage }} {{- end }} - {{- if and .Values.dind_enabled .Values.storage }} + {{- if and .Values.dind_enabled .Values.docker_storage }} dockerVolumeMounts: - mountPath: /var/lib/docker name: docker-volume @@ -150,10 +138,10 @@ spec: - mountPath: /home/runner/.docker name: docker-config-volume {{- end }} - {{- end }} - {{- if or (and .Values.dind_enabled .Values.storage) (.Values.pvc_enabled) (.Values.docker_config_json_enabled) }} + {{- end }}{{/* End of volumeMounts */}} + {{- if or (and .Values.dind_enabled .Values.docker_storage) (.Values.pvc_enabled) (.Values.docker_config_json_enabled) (not (empty .Values.running_pod_annotations)) }} volumes: - {{- if and .Values.dind_enabled .Values.storage }} + {{- if and .Values.dind_enabled .Values.docker_storage }} - name: docker-volume ephemeral: volumeClaimTemplate: @@ -161,13 +149,13 @@ spec: accessModes: [ "ReadWriteOnce" ] # Only 1 pod can connect at a time resources: requests: - storage: {{ .Values.storage }} - {{- end }} - {{- if .Values.pvc_enabled }} + storage: {{ .Values.docker_storage }} + {{- end }} + {{- if .Values.pvc_enabled }} - name: shared-volume persistentVolumeClaim: claimName: {{ .Values.release_name }} - {{- end }} + {{- end }} {{- if .Values.docker_config_json_enabled }} - name: docker-secret secret: @@ -178,4 +166,88 @@ spec: - name: docker-config-volume emptyDir: {{- end }} - {{- end }} + {{- with .Values.running_pod_annotations }} + - name: hooks + configMap: + name: runner-hooks + defaultMode: 0755 # Set execute permissions for all files + {{- end }} + {{- end }}{{/* End of volumes */}} +{{- if .Values.pvc_enabled }} +--- +# Persistent Volumes can be used for image caching +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Values.release_name }} +spec: + accessModes: + - ReadWriteMany + # StorageClassName comes from efs-controller and must be deployed first. + storageClassName: efs-sc + resources: + requests: + # EFS is not actually storage constrained, but this storage request is + # required. 100Gi is a ballpark for how much we initially request, but this + # may grow. We are responsible for docker pruning this periodically to + # save space. + storage: 100Gi +{{- end }} +{{- if .Values.docker_config_json_enabled }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.release_name }}-regcred +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: {{ .Values.docker_config_json }} +{{- end }} +{{- with .Values.running_pod_annotations }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: runner-hooks +data: + annotate.sh: | + #!/bin/bash + + # If we had kubectl and a KUBECONFIG, we could do this: + # kubectl annotate pod $HOSTNAME 'karpenter.sh/do-not-evict="true"' --overwrite + # kubectl annotate pod $HOSTNAME 'karpenter.sh/do-not-disrupt="true"' --overwrite + + # This is the same thing, the hard way + + # Metadata about the pod + NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) + POD_NAME=$(hostname) + + # Kubernetes API URL + API_URL="https://kubernetes.default.svc" + + # Read the service account token + TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) + + # Content type + CONTENT_TYPE="application/merge-patch+json" + + PATCH_JSON=$(cat <
"enabled": false,
"hostname_template": null,
"queue_limit": 1000
}
[| no | | [name](#input\_name) | ID element. Usually the component or solution name, e.g. 'app' or 'jenkins'.
"default"
]
map(object({| n/a | yes | +| [node\_pools](#input\_node\_pools) | Configuration for node pools. See code for details. |
# The name of the Karpenter provisioner. The map key is used if this is not set.
name = optional(string)
# Whether to place EC2 instances launched by Karpenter into VPC private subnets. Set it to `false` to use public subnets.
private_subnets_enabled = bool
# The Disruption spec controls how Karpenter scales down the node group.
# See the example (sadly not the specific `spec.disruption` documentation) at https://karpenter.sh/docs/concepts/nodepools/ for details
disruption = optional(object({
# Describes which types of Nodes Karpenter should consider for consolidation.
# If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or
# replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost.
# If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods.
consolidation_policy = optional(string, "WhenUnderutilized")
# The amount of time Karpenter should wait after discovering a consolidation decision (`go` duration string, s|m|h).
# This value can currently (v0.36.0) only be set when the consolidationPolicy is 'WhenEmpty'.
# You can choose to disable consolidation entirely by setting the string value 'Never' here.
# Earlier versions of Karpenter called this field `ttl_seconds_after_empty`.
consolidate_after = optional(string)
# The amount of time a Node can live on the cluster before being removed (`go` duration string, s|m|h).
# You can choose to disable expiration entirely by setting the string value 'Never' here.
# This module sets a default of 336 hours (14 days), while the Karpenter default is 720 hours (30 days).
# Note that Karpenter calls this field "expiresAfter", and earlier versions called it `ttl_seconds_until_expired`,
# but we call it "max_instance_lifetime" to match the corresponding field in EC2 Auto Scaling Groups.
max_instance_lifetime = optional(string, "336h")
# Budgets control the the maximum number of NodeClaims owned by this NodePool that can be terminating at once.
# See https://karpenter.sh/docs/concepts/disruption/#disruption-budgets for details.
# A percentage is the percentage of the total number of active, ready nodes not being deleted, rounded up.
# If there are multiple active budgets, Karpenter uses the most restrictive value.
# If left undefined, this will default to one budget with a value of nodes: 10%.
# Note that budgets do not prevent or limit involuntary terminations.
# Example:
# On Weekdays during business hours, don't do any deprovisioning.
# budgets = {
# schedule = "0 9 * * mon-fri"
# duration = 8h
# nodes = "0"
# }
budgets = optional(list(object({
# The schedule specifies when a budget begins being active, using extended cronjob syntax.
# See https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#schedule-syntax for syntax details.
# Timezones are not supported. This field is required if Duration is set.
schedule = optional(string)
# Duration determines how long a Budget is active after each Scheduled start.
# If omitted, the budget is always active. This is required if Schedule is set.
# Must be a whole number of minutes and hours, as cron does not work in seconds,
# but since Go's `duration.String()` always adds a "0s" at the end, that is allowed.
duration = optional(string)
# The percentage or number of nodes that Karpenter can scale down during the budget.
nodes = string
})), [])
}), {})
# Karpenter provisioner total CPU limit for all pods running on the EC2 instances launched by Karpenter
total_cpu_limit = string
# Karpenter provisioner total memory limit for all pods running on the EC2 instances launched by Karpenter
total_memory_limit = string
# Set a weight for this node pool.
# See https://karpenter.sh/docs/concepts/scheduling/#weighted-nodepools
weight = optional(number, 50)
labels = optional(map(string))
annotations = optional(map(string))
# Karpenter provisioner taints configuration. See https://aws.github.io/aws-eks-best-practices/karpenter/#create-provisioners-that-are-mutually-exclusive for more details
taints = optional(list(object({
key = string
effect = string
value = string
})))
startup_taints = optional(list(object({
key = string
effect = string
value = string
})))
# Karpenter node metadata options. See https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions for more details
metadata_options = optional(object({
httpEndpoint = optional(string, "enabled")
httpProtocolIPv6 = optional(string, "disabled")
httpPutResponseHopLimit = optional(number, 2)
# httpTokens can be either "required" or "optional"
httpTokens = optional(string, "required")
}), {})
# The AMI used by Karpenter provisioner when provisioning nodes. Based on the value set for amiFamily, Karpenter will automatically query for the appropriate EKS optimized AMI via AWS Systems Manager (SSM)
ami_family = string
# Karpenter nodes block device mappings. Controls the Elastic Block Storage volumes that Karpenter attaches to provisioned nodes.
# Karpenter uses default block device mappings for the AMI Family specified.
# For example, the Bottlerocket AMI Family defaults with two block device mappings,
# and normally you only want to scale `/dev/xvdb` where Containers and there storage are stored.
# Most other AMIs only have one device mapping at `/dev/xvda`.
# See https://karpenter.sh/docs/concepts/nodeclasses/#specblockdevicemappings for more details
block_device_mappings = list(object({
deviceName = string
ebs = optional(object({
volumeSize = string
volumeType = string
deleteOnTermination = optional(bool, true)
encrypted = optional(bool, true)
iops = optional(number)
kmsKeyID = optional(string, "alias/aws/ebs")
snapshotID = optional(string)
throughput = optional(number)
}))
}))
# Set acceptable (In) and unacceptable (Out) Kubernetes and Karpenter values for node provisioning based on Well-Known Labels and cloud-specific settings. These can include instance types, zones, computer architecture, and capacity type (such as AWS spot or on-demand). See https://karpenter.sh/v0.18.0/provisioner/#specrequirements for more details
requirements = list(object({
key = string
operator = string
# Operators like "Exists" and "DoesNotExist" do not require a value
values = optional(list(string))
}))
}))
map(object({| n/a | yes | | [regex\_replace\_chars](#input\_regex\_replace\_chars) | Terraform regular expression (regex) string.
# The name of the Karpenter provisioner. The map key is used if this is not set.
name = optional(string)
# Whether to place EC2 instances launched by Karpenter into VPC private subnets. Set it to `false` to use public subnets.
private_subnets_enabled = bool
# The Disruption spec controls how Karpenter scales down the node group.
# See the example (sadly not the specific `spec.disruption` documentation) at https://karpenter.sh/docs/concepts/nodepools/ for details
disruption = optional(object({
# Describes which types of Nodes Karpenter should consider for consolidation.
# If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or
# replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost.
# If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods.
consolidation_policy = optional(string, "WhenUnderutilized")
# The amount of time Karpenter should wait after discovering a consolidation decision (`go` duration string, smh).
# This value can currently (v0.36.0) only be set when the consolidationPolicy is 'WhenEmpty'.
# You can choose to disable consolidation entirely by setting the string value 'Never' here.
# Earlier versions of Karpenter called this field `ttl_seconds_after_empty`.
consolidate_after = optional(string)
# The amount of time a Node can live on the cluster before being removed (`go` duration string, smh).
# You can choose to disable expiration entirely by setting the string value 'Never' here.
# This module sets a default of 336 hours (14 days), while the Karpenter default is 720 hours (30 days).
# Note that Karpenter calls this field "expiresAfter", and earlier versions called it `ttl_seconds_until_expired`,
# but we call it "max_instance_lifetime" to match the corresponding field in EC2 Auto Scaling Groups.
max_instance_lifetime = optional(string, "336h")
# Budgets control the the maximum number of NodeClaims owned by this NodePool that can be terminating at once.
# See https://karpenter.sh/docs/concepts/disruption/#disruption-budgets for details.
# A percentage is the percentage of the total number of active, ready nodes not being deleted, rounded up.
# If there are multiple active budgets, Karpenter uses the most restrictive value.
# If left undefined, this will default to one budget with a value of nodes: 10%.
# Note that budgets do not prevent or limit involuntary terminations.
# Example:
# On Weekdays during business hours, don't do any deprovisioning.
# budgets = {
# schedule = "0 9 * * mon-fri"
# duration = 8h
# nodes = "0"
# }
budgets = optional(list(object({
# The schedule specifies when a budget begins being active, using extended cronjob syntax.
# See https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#schedule-syntax for syntax details.
# Timezones are not supported. This field is required if Duration is set.
schedule = optional(string)
# Duration determines how long a Budget is active after each Scheduled start.
# If omitted, the budget is always active. This is required if Schedule is set.
# Must be a whole number of minutes and hours, as cron does not work in seconds,
# but since Go's `duration.String()` always adds a "0s" at the end, that is allowed.
duration = optional(string)
# The percentage or number of nodes that Karpenter can scale down during the budget.
nodes = string
})), [])
}), {})
# Karpenter provisioner total CPU limit for all pods running on the EC2 instances launched by Karpenter
total_cpu_limit = string
# Karpenter provisioner total memory limit for all pods running on the EC2 instances launched by Karpenter
total_memory_limit = string
# Set a weight for this node pool.
# See https://karpenter.sh/docs/concepts/scheduling/#weighted-nodepools
weight = optional(number, 50)
labels = optional(map(string))
annotations = optional(map(string))
# Karpenter provisioner taints configuration. See https://aws.github.io/aws-eks-best-practices/karpenter/#create-provisioners-that-are-mutually-exclusive for more details
taints = optional(list(object({
key = string
effect = string
value = string
})))
startup_taints = optional(list(object({
key = string
effect = string
value = string
})))
# Karpenter node metadata options. See https://karpenter.sh/docs/concepts/nodeclasses/#specmetadataoptions for more details
metadata_options = optional(object({
httpEndpoint = optional(string, "enabled")
httpProtocolIPv6 = optional(string, "disabled")
httpPutResponseHopLimit = optional(number, 2)
# httpTokens can be either "required" or "optional"
httpTokens = optional(string, "required")
}), {})
# The AMI used by Karpenter provisioner when provisioning nodes. Based on the value set for amiFamily, Karpenter will automatically query for the appropriate EKS optimized AMI via AWS Systems Manager (SSM)
ami_family = string
# Karpenter nodes block device mappings. Controls the Elastic Block Storage volumes that Karpenter attaches to provisioned nodes.
# Karpenter uses default block device mappings for the AMI Family specified.
# For example, the Bottlerocket AMI Family defaults with two block device mappings,
# and normally you only want to scale `/dev/xvdb` where Containers and there storage are stored.
# Most other AMIs only have one device mapping at `/dev/xvda`.
# See https://karpenter.sh/docs/concepts/nodeclasses/#specblockdevicemappings for more details
block_device_mappings = list(object({
deviceName = string
ebs = optional(object({
volumeSize = string
volumeType = string
deleteOnTermination = optional(bool, true)
encrypted = optional(bool, true)
iops = optional(number)
kmsKeyID = optional(string, "alias/aws/ebs")
snapshotID = optional(string)
throughput = optional(number)
}))
}))
# Set acceptable (In) and unacceptable (Out) Kubernetes and Karpenter values for node provisioning based on Well-Known Labels and cloud-specific settings. These can include instance types, zones, computer architecture, and capacity type (such as AWS spot or on-demand). See https://karpenter.sh/v0.18.0/provisioner/#specrequirements for more details
requirements = list(object({
key = string
operator = string
# Operators like "Exists" and "DoesNotExist" do not require a value
values = optional(list(string))
}))
}))
{| no | -| [create\_namespace](#input\_create\_namespace) | Create the namespace if it does not yet exist. Defaults to `false`. | `bool` | `true` | no | +| [create\_namespace](#input\_create\_namespace) | Create the namespace if it does not yet exist. Defaults to `true`. | `bool` | `true` | no | | [delimiter](#input\_delimiter) | Delimiter to be used between ID elements.
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"descriptor_formats": {},
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_key_case": null,
"label_order": [],
"label_value_case": null,
"labels_as_tags": [
"unset"
],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {},
"tenant": null
}
{| no | | [custom\_origins](#input\_custom\_origins) | A list of additional custom website [origins](https://www.terraform.io/docs/providers/aws/r/cloudfront_distribution.html#origin-arguments) for this distribution. |
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"descriptor_formats": {},
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_key_case": null,
"label_order": [],
"label_value_case": null,
"labels_as_tags": [
"unset"
],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {},
"tenant": null
}
list(object({| `[]` | no | | [delimiter](#input\_delimiter) | Delimiter to be used between ID elements.
domain_name = string
origin_id = string
origin_path = string
custom_headers = list(object({
name = string
value = string
}))
custom_origin_config = object({
http_port = number
https_port = number
origin_protocol_policy = string
origin_ssl_protocols = list(string)
origin_keepalive_timeout = number
origin_read_timeout = number
})
}))