diff --git a/.ci/build-yarn.sh b/.ci/build-yarn.sh new file mode 100755 index 0000000000..1283d9ea8b --- /dev/null +++ b/.ci/build-yarn.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# Build all the lambda's, output on the default place (inside the lambda module) + +lambdaSrcDirs=("modules/runner-binaries-syncer/lambdas/runner-binaries-syncer" "modules/runners/lambdas/runners" "modules/webhook/lambdas/webhook") +repoRoot=$(dirname $(dirname $(realpath ${BASH_SOURCE[0]}))) + +for lambdaDir in ${lambdaSrcDirs[@]}; do + cd "$repoRoot/${lambdaDir}" + yarn && yarn run dist +done diff --git a/README.md b/README.md index b38102056b..86354d19cd 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,16 @@ This [Terraform](https://www.terraform.io/) module creates the required infrastructure needed to host [GitHub Actions](https://github.com/features/actions) self hosted, auto scaling runners on [AWS spot instances](https://aws.amazon.com/ec2/spot/). It provides the required logic to handle the life cycle for scaling up and down using a set of AWS Lambda functions. Runners are scaled down to zero to avoid costs when no workflows are active. +> NEW: Ephemeral runners available as beta feature. + +> NEW: Windows runners are available. + +> NEW: Examples for custom AMI are available. + - [Motivation](#motivation) - [Overview](#overview) - - [ARM64 support via Graviton/Graviton2 instance-types](#arm64-support-via-gravitongraviton2-instance-types) + - [Major configuration options.](#major-configuration-options) + - [ARM64 support via Graviton/Graviton2 instance-types](#arm64-support-via-gravitongraviton2-instance-types) - [Usages](#usages) - [Setup GitHub App (part 1)](#setup-github-app-part-1) - [Setup terraform module](#setup-terraform-module) @@ -16,6 +23,7 @@ This [Terraform](https://www.terraform.io/) module creates the required infrastr - [Install app](#install-app) - [Encryption](#encryption) - [Idle runners](#idle-runners) + - [Ephemeral runners](#ephemeral-runners) - [Prebuilt Images](#prebuilt-images) - [Examples](#examples) - [Sub modules](#sub-modules) @@ -48,7 +56,6 @@ For receiving the `check_run` or `workflow_job` event by the webhook (lambda) a - `check_run`: create a webhook on enterprise, org, repo or app level. When using the app option, the app needs to be installed to repo's are using the self-hosted runners. - a Webhook needs to be created. The webhook hook can be defined on enterprise, org, repo, or app level. - In AWS a [API gateway](https://docs.aws.amazon.com/apigateway/index.html) endpoint is created that is able to receive the GitHub webhook events via HTTP post. The gateway triggers the webhook lambda which will verify the signature of the event. This check guarantees the event is sent by the GitHub App. The lambda only handles `workflow_job` or `check_run` events with status `queued` and matching the runner labels (only for `workflow_job`). The accepted events are posted on a SQS queue. Messages on this queue will be delayed for a configurable amount of seconds (default 30 seconds) to give the available runners time to pick up this build. The "scale up runner" lambda is listening to the SQS queue and picks up events. The lambda runs various checks to decide whether a new EC2 spot instance needs to be created. For example, the instance is not created if the build is already started by an existing runner, or the maximum number of runners is reached. @@ -71,7 +78,18 @@ Permission are managed on several places. Below the most important ones. For det Besides these permissions, the lambdas also need permission to CloudWatch (for logging and scheduling), SSM and S3. For more details about the required permissions see the [documentation](./modules/setup-iam-permissions/README.md) of the IAM module which uses permission boundaries. -### ARM64 support via Graviton/Graviton2 instance-types +### Major configuration options. + +To be able to support a number of use-cases the module has quite a lot configuration options. We try to choose reasonable defaults. The several examples also shows for the main cases how to configure the runners. + +- Org vs Repo level. You can configure the module to connect the runners in GitHub on a org level and share the runners in your org. Or set the runners on repo level. The module will install the runner to the repo. This can be multiple repo's but runners are not shared between repo's. +- Checkrun vs Workflow job event. You can configure the webhook in GitHub to send checkrun or workflow job events to the webhook. Workflow job events are introduced by GitHub in September 2021 and are designed to support scalable runners. We advise when possible to use the workflow job event, you can set `disable_check_wokflow_job_labels = true` to disable the label check. +- Linux vs Windows. you can configure the os types linux and win. Linux will be used by default. +- Re-use vs Ephemeral. By default runners are re-used for till detected idle, once idle they will be removed from the pool. To improve security we are introducing ephemeral runners. Those runners are only used for one job. Ephemeral runners are only working in combination with the workflow job event. We also suggest to use a pre-build AMI to improve the start time of jobs. +- GitHub cloud vs GitHub enterprise server (GHES). The runner support GitHub cloud as well GitHub enterprise service. For GHES we rely on our community to test and support. We have no possibility to test ourselves on GHES. + + +#### ARM64 support via Graviton/Graviton2 instance-types When using the default example or top-level module, specifying an `instance_type` that matches a Graviton/Graviton 2 (ARM64) architecture (e.g. a1 or any 6th-gen `g` or `gd` type), the sub-modules will be automatically configured to provision with ARM64 AMIs and leverage GitHub's ARM64 action runner. See below for more details. @@ -268,6 +286,17 @@ idle_config = [{ _**Note**_: When using Windows runners it's recommended to keep a few runners warmed up due to the minutes-long cold start time. +### Ephemeral runners + +Currently a beta feature! You can configure runners to be ephemeral, runners will be used only for one job. The feature should be used in conjunction with listening for the workflow job event. Please consider the following: + +- The scale down lambda is still active, and should only remove orphan instances. But there is no strict check in place. So ensure you configure the `minimum_running_time_in_minutes` to a value that is high enough to got your runner booted and connected to avoid it got terminated before executing a job. +- The messages sent from the webhook lambda to scale-up lambda are by default delayed delayed by SQS, to give available runners to option to start the job before the decision is made to scale more runners. For ephemeral runners there is no need to wait. Set `delay_webhook_event` to `0`. +- To ensure runners are created in the same order GitHub sends the events we use by default a FIFO queue, this is mainly relevant for repo level runners. For ephemeral runners you can set `fifo_build_queue` to `false`. +- Error related to scaling should be retried via SQS. You can configure `job_queue_retention_in_seconds` `redrive_build_queue` to tune the behavior. We have no mechanism to avoid events will never processed, which means potential no runner could be created and the job in GitHub can time out in 6 hours. + +The example for [ephemeral runners](./examples/ephemeral) is based on the [default example](./examples/default). Have look on the diff to see the major configuration differences. + ### Prebuilt Images This module also allows you to run agents from a prebuilt AMI to gain faster startup times. You can find more information in [the image README.md](/images/README.md) @@ -295,10 +324,11 @@ For time zones please check [TZ database name column](https://en.wikipedia.org/w Examples are located in the [examples](./examples) directory. The following examples are provided: - _[Default](examples/default/README.md)_: The default example of the module -- _[Permissions boundary](examples/permissions-boundary/README.md)_: Example usages of permissions boundaries. - _[Ubuntu](examples/ubuntu/README.md)_: Example usage of creating a runner using Ubuntu AMIs. -- _[Prebuilt Images](examples/prebuilt/README.md)_: Example usages of deploying runners with a custom prebuilt image. - _[Windows](examples/windows/README.md)_: Example usage of creating a runner using Windows as the OS. +- _[Ephemeral](examples/ephemeral/README.md) : Example usages of ephemeral runners based on the default example. +- _[Prebuilt Images](examples/prebuilt/README.md)_: Example usages of deploying runners with a custom prebuilt image. +- _[Permissions boundary](examples/permissions-boundary/README.md)_: Example usages of permissions boundaries. ## Sub modules @@ -367,6 +397,7 @@ In case the setup does not work as intended follow the trace of events: |------|------| | [aws_resourcegroups_group.resourcegroups_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/resourcegroups_group) | resource | | [aws_sqs_queue.queued_builds](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | +| [aws_sqs_queue.queued_builds_dlq](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | | [random_string.random](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/string) | resource | ## Inputs @@ -382,10 +413,12 @@ In case the setup does not work as intended follow the trace of events: | [delay\_webhook\_event](#input\_delay\_webhook\_event) | The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event. | `number` | `30` | no | | [disable\_check\_wokflow\_job\_labels](#input\_disable\_check\_wokflow\_job\_labels) | Disable the the check of workflow labels for received workflow job events. | `bool` | `false` | no | | [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no | +| [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no | | [enable\_organization\_runners](#input\_enable\_organization\_runners) | Register runners to organization, instead of repo level | `bool` | `false` | no | | [enable\_ssm\_on\_runners](#input\_enable\_ssm\_on\_runners) | Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | `false` | no | | [enabled\_userdata](#input\_enabled\_userdata) | Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI | `bool` | `true` | no | | [environment](#input\_environment) | A name that identifies the environment, used as prefix and for tagging. | `string` | n/a | yes | +| [fifo\_build\_queue](#input\_fifo\_build\_queue) | Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners. | `bool` | `false` | no | | [ghes\_ssl\_verify](#input\_ghes\_ssl\_verify) | GitHub Enterprise SSL verification. Set to 'false' when custom certificate (chains) is used for GitHub Enterprise Server (insecure). | `bool` | `true` | no | | [ghes\_url](#input\_ghes\_url) | GitHub Enterprise Server URL. Example: https://github.internal.co - DO NOT SET IF USING PUBLIC GITHUB | `string` | `null` | no | | [github\_app](#input\_github\_app) | GitHub app parameters, see your github app. Ensure the key is the base64-encoded `.pem` file (the output of `base64 app.private-key.pem`, not the content of `private-key.pem`). |
object({| n/a | yes | @@ -405,6 +438,7 @@ In case the setup does not work as intended follow the trace of events: | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [market\_options](#input\_market\_options) | Market options for the action runner instances. Setting the value to `null` let the scaler create on-demand instances instead of spot instances. | `string` | `"spot"` | no | | [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if not busy. | `number` | `null` | no | +| [redrive\_build\_queue](#input\_redrive\_build\_queue) | Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting, `enalbed' to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries.` |
key_base64 = string
id = string
webhook_secret = string
})
object({|
enabled = bool
maxReceiveCount = number
})
{| no | | [repository\_white\_list](#input\_repository\_white\_list) | List of repositories allowed to use the github app | `list(string)` | `[]` | no | | [role\_path](#input\_role\_path) | The path that will be added to role path for created roles, if not set the environment name will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created roles. | `string` | `null` | no | @@ -428,7 +462,7 @@ In case the setup does not work as intended follow the trace of events: | [runners\_lambda\_zip](#input\_runners\_lambda\_zip) | File location of the lambda zip file for scaling runners. | `string` | `null` | no | | [runners\_maximum\_count](#input\_runners\_maximum\_count) | The maximum number of runners that will be created. | `number` | `3` | no | | [runners\_scale\_down\_lambda\_timeout](#input\_runners\_scale\_down\_lambda\_timeout) | Time out for the scale down lambda in seconds. | `number` | `60` | no | -| [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `180` | no | +| [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `30` | no | | [scale\_down\_schedule\_expression](#input\_scale\_down\_schedule\_expression) | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no | | [scale\_up\_reserved\_concurrent\_executions](#input\_scale\_up\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | @@ -450,6 +484,7 @@ In case the setup does not work as intended follow the trace of events: | Name | Description | |------|-------------| | [binaries\_syncer](#output\_binaries\_syncer) | n/a | +| [queues](#output\_queues) | SQS queues. | | [runners](#output\_runners) | n/a | | [ssm\_parameters](#output\_ssm\_parameters) | n/a | | [webhook](#output\_webhook) | n/a | diff --git a/examples/default/main.tf b/examples/default/main.tf index 2a5ec1edee..46e57fc183 100644 --- a/examples/default/main.tf +++ b/examples/default/main.tf @@ -30,11 +30,13 @@ module "runners" { webhook_secret = random_id.random.hex } + # Grab zip files via lambda_download webhook_lambda_zip = "lambdas-download/webhook.zip" runner_binaries_syncer_lambda_zip = "lambdas-download/runner-binaries-syncer.zip" runners_lambda_zip = "lambdas-download/runners.zip" - enable_organization_runners = false - runner_extra_labels = "default,example" + + enable_organization_runners = false + runner_extra_labels = "default,example" # enable access to the runners via SSM enable_ssm_on_runners = true @@ -61,7 +63,11 @@ module "runners" { instance_types = ["m5.large", "c5.large"] # override delay of events in seconds - delay_webhook_event = 5 + delay_webhook_event = 5 + runners_maximum_count = 1 + + # set up a fifo queue to remain order + fifo_build_queue = true # override scaling down scale_down_schedule_expression = "cron(* * * * ? *)" diff --git a/examples/ephemeral/.terraform.lock.hcl b/examples/ephemeral/.terraform.lock.hcl new file mode 100644 index 0000000000..d940521fcb --- /dev/null +++ b/examples/ephemeral/.terraform.lock.hcl @@ -0,0 +1,57 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "3.61.0" + constraints = ">= 3.27.0" + hashes = [ + "h1:fpZ14qQnn+uEOO2ZOlBFHgty48Ol8IOwd+ewxZ4z3zc=", + "zh:0483ca802ddb0ae4f73144b4357ba72242c6e2641aeb460b1aa9a6f6965464b0", + "zh:274712214ebeb0c1269cbc468e5705bb5741dc45b05c05e9793ca97f22a1baa1", + "zh:3c6bd97a2ca809469ae38f6893348386c476cb3065b120b785353c1507401adf", + "zh:53dd41a9aed9860adbbeeb71a23e4f8195c656fd15a02c90fa2d302a5f577d8c", + "zh:65c639c547b97bc880fd83e65511c0f4bbfc91b63cada3b8c0d5776444221700", + "zh:a2769e19137ff480c1dd3e4f248e832df90fb6930a22c66264d9793895161714", + "zh:a5897a99332cc0071e46a71359b86a8e53ab09c1453e94cd7cf45a0b577ff590", + "zh:bdc2353642d16d8e2437a9015cd4216a1772be9736645cc17d1a197480e2b5b7", + "zh:cbeace1deae938f6c0aca3734e6088f3633ca09611aff701c15cb6d42f2b918a", + "zh:d33ca19012aabd98cc03fdeccd0bd5ce56e28f61a1dfbb2eea88e89487de7fb3", + "zh:d548b29a864b0687e85e8a993f208e25e3ecc40fcc5b671e1985754b32fdd658", + ] +} + +provider "registry.terraform.io/hashicorp/local" { + version = "2.1.0" + hashes = [ + "h1:KfieWtVyGWwplSoLIB5usKAUnrIkDQBkWaR5TI+4WYg=", + "zh:0f1ec65101fa35050978d483d6e8916664b7556800348456ff3d09454ac1eae2", + "zh:36e42ac19f5d68467aacf07e6adcf83c7486f2e5b5f4339e9671f68525fc87ab", + "zh:6db9db2a1819e77b1642ec3b5e95042b202aee8151a0256d289f2e141bf3ceb3", + "zh:719dfd97bb9ddce99f7d741260b8ece2682b363735c764cac83303f02386075a", + "zh:7598bb86e0378fd97eaa04638c1a4c75f960f62f69d3662e6d80ffa5a89847fe", + "zh:ad0a188b52517fec9eca393f1e2c9daea362b33ae2eb38a857b6b09949a727c1", + "zh:c46846c8df66a13fee6eff7dc5d528a7f868ae0dcf92d79deaac73cc297ed20c", + "zh:dc1a20a2eec12095d04bf6da5321f535351a594a636912361db20eb2a707ccc4", + "zh:e57ab4771a9d999401f6badd8b018558357d3cbdf3d33cc0c4f83e818ca8e94b", + "zh:ebdcde208072b4b0f8d305ebf2bfdc62c926e0717599dcf8ec2fd8c5845031c3", + "zh:ef34c52b68933bedd0868a13ccfd59ff1c820f299760b3c02e008dc95e2ece91", + ] +} + +provider "registry.terraform.io/hashicorp/random" { + version = "3.1.0" + hashes = [ + "h1:rKYu5ZUbXwrLG1w81k7H3nce/Ys6yAxXhWcbtk36HjY=", + "zh:2bbb3339f0643b5daa07480ef4397bd23a79963cc364cdfbb4e86354cb7725bc", + "zh:3cd456047805bf639fbf2c761b1848880ea703a054f76db51852008b11008626", + "zh:4f251b0eda5bb5e3dc26ea4400dba200018213654b69b4a5f96abee815b4f5ff", + "zh:7011332745ea061e517fe1319bd6c75054a314155cb2c1199a5b01fe1889a7e2", + "zh:738ed82858317ccc246691c8b85995bc125ac3b4143043219bd0437adc56c992", + "zh:7dbe52fac7bb21227acd7529b487511c91f4107db9cc4414f50d04ffc3cab427", + "zh:a3a9251fb15f93e4cfc1789800fc2d7414bbc18944ad4c5c98f466e6477c42bc", + "zh:a543ec1a3a8c20635cf374110bd2f87c07374cf2c50617eee2c669b3ceeeaa9f", + "zh:d9ab41d556a48bd7059f0810cf020500635bfc696c9fc3adab5ea8915c1d886b", + "zh:d9e13427a7d011dbd654e591b0337e6074eef8c3b9bb11b2e39eaaf257044fd7", + "zh:f7605bd1437752114baf601bdf6931debe6dc6bfe3006eb7e9bb9080931dca8a", + ] +} diff --git a/examples/ephemeral/README.md b/examples/ephemeral/README.md new file mode 100644 index 0000000000..0eec98561d --- /dev/null +++ b/examples/ephemeral/README.md @@ -0,0 +1,30 @@ +# Action runners deployment ephemeral example + +This example is based on the default setup, but shows how runners can be used with the ephemeral flag enabled. Once enabled, ephemeral runners will be used for one job only. Each job requires a fresh instance. This feature should be used in combination with the `workflow_job` event. See GitHub webhook endpoint configuration(link needed here). It is also suggested to use a pre-build AMI to minimize runner launch times. +## Usages + +Steps for the full setup, such as creating a GitHub app can be found in the root module's [README](../../README.md). First download the Lambda releases from GitHub. Alternatively you can build the lambdas locally with Node or Docker, there is a simple build script in `
"enabled": false,
"maxReceiveCount": null
}
{| no | +| [ami\_filter](#input\_ami\_filter) | Map of lists used to create the AMI filter for the action runner AMI. | `map(list(string))` | `null` | no | | [ami\_owners](#input\_ami\_owners) | The list of owners used to select the AMI of action runner instances. | `list(string)` |
"name": [
"amzn2-ami-hvm-2.*-x86_64-ebs"
]
}
[| no | | [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes | | [block\_device\_mappings](#input\_block\_device\_mappings) | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops` | `map(string)` | `{}` | no | @@ -118,8 +119,10 @@ No modules. | [create\_service\_linked\_role\_spot](#input\_create\_service\_linked\_role\_spot) | (optional) create the service linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no | | [egress\_rules](#input\_egress\_rules) | List of egress rules for the GitHub runner instances. |
"amazon"
]
list(object({|
cidr_blocks = list(string)
ipv6_cidr_blocks = list(string)
prefix_list_ids = list(string)
from_port = number
protocol = string
security_groups = list(string)
self = bool
to_port = number
description = string
}))
[| no | | [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no | +| [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no | | [enable\_organization\_runners](#input\_enable\_organization\_runners) | n/a | `bool` | n/a | yes | | [enable\_ssm\_on\_runners](#input\_enable\_ssm\_on\_runners) | Enable to allow access to the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | n/a | yes | +| [enabled\_userdata](#input\_enabled\_userdata) | Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI | `bool` | `true` | no | | [environment](#input\_environment) | A name that identifies the environment, used as prefix and for tagging. | `string` | n/a | yes | | [ghes\_ssl\_verify](#input\_ghes\_ssl\_verify) | GitHub Enterprise SSL verification. Set to 'false' when custom certificate (chains) is used for GitHub Enterprise Server (insecure). | `bool` | `true` | no | | [ghes\_url](#input\_ghes\_url) | GitHub Enterprise Server URL. DO NOT SET IF USING PUBLIC GITHUB | `string` | `null` | no | @@ -127,7 +130,7 @@ No modules. | [idle\_config](#input\_idle\_config) | List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle. |
{
"cidr_blocks": [
"0.0.0.0/0"
],
"description": null,
"from_port": 0,
"ipv6_cidr_blocks": [
"::/0"
],
"prefix_list_ids": null,
"protocol": "-1",
"security_groups": null,
"self": null,
"to_port": 0
}
]
list(object({| `[]` | no | | [instance\_profile\_path](#input\_instance\_profile\_path) | The path that will be added to the instance\_profile, if not set the environment name will be used. | `string` | `null` | no | | [instance\_type](#input\_instance\_type) | [DEPRECATED] See instance\_types. | `string` | `"m5.large"` | no | -| [instance\_types](#input\_instance\_types) | List of instance types for the action runner. | `list(string)` | `null` | no | +| [instance\_types](#input\_instance\_types) | List of instance types for the action runner. Defaults are based on runner\_os (amzn2 for linux and Windows Server Core for win). | `list(string)` | `null` | no | | [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. | `string` | `null` | no | | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `any` | `null` | no | @@ -141,7 +144,7 @@ No modules. | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [market\_options](#input\_market\_options) | Market options for the action runner instances. | `string` | `"spot"` | no | | [metadata\_options](#input\_metadata\_options) | Metadata options for the ec2 runner instances. | `map(any)` |
cron = string
timeZone = string
idleCount = number
}))
{| no | -| [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if non busy. | `number` | `5` | no | +| [minimum\_running\_time\_in\_minutes](#input\_minimum\_running\_time\_in\_minutes) | The time an ec2 action runner should be running at minimum before terminated if non busy. If not set the default is calculated based on the OS. | `number` | `null` | no | | [overrides](#input\_overrides) | This map provides the possibility to override some defaults. The following attributes are supported: `name_sg` overrides the `Name` tag for all security groups created by this module. `name_runner_agent_instance` overrides the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` overrides the `Name` tag spot instances created by the runner agent. | `map(string)` |
"http_endpoint": "enabled",
"http_put_response_hop_limit": 1,
"http_tokens": "optional"
}
{| no | | [role\_path](#input\_role\_path) | The path that will be added to the role; if not set, the environment name will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | @@ -153,7 +156,8 @@ No modules. | [runner\_extra\_labels](#input\_runner\_extra\_labels) | Extra labels for the runners (GitHub). Separate each label by a comma | `string` | `""` | no | | [runner\_group\_name](#input\_runner\_group\_name) | Name of the runner group. | `string` | `"Default"` | no | | [runner\_iam\_role\_managed\_policy\_arns](#input\_runner\_iam\_role\_managed\_policy\_arns) | Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role | `list(string)` | `[]` | no | -| [runner\_log\_files](#input\_runner\_log\_files) | (optional) List of logfiles to send to CloudWatch, will only be used if `enable_cloudwatch_agent` is set to true. Object description: `log_group_name`: Name of the log group, `prefix_log_group`: If true, the log group name will be prefixed with `/github-self-hosted-runners/
"name_runner": "",
"name_sg": ""
}
list(object({|
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
}))
[| no | +| [runner\_log\_files](#input\_runner\_log\_files) | (optional) List of logfiles to send to CloudWatch, will only be used if `enable_cloudwatch_agent` is set to true. Object description: `log_group_name`: Name of the log group, `prefix_log_group`: If true, the log group name will be prefixed with `/github-self-hosted-runners/
{
"file_path": "/var/log/messages",
"log_group_name": "messages",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/var/log/user-data.log",
"log_group_name": "user_data",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/var/log/runner-startup.log",
"log_group_name": "runner-startup",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/home/ec2-user/actions-runner/_diag/Runner_**.log",
"log_group_name": "runner",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
}
]
list(object({| `null` | no | +| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux,win). | `string` | `"linux"` | no | | [runners\_lambda\_s3\_key](#input\_runners\_lambda\_s3\_key) | S3 key for runners lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | | [runners\_lambda\_s3\_object\_version](#input\_runners\_lambda\_s3\_object\_version) | S3 object version for runners lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | | [runners\_maximum\_count](#input\_runners\_maximum\_count) | The maximum number of runners that will be created. | `number` | `3` | no | @@ -164,7 +168,6 @@ No modules. | [sqs\_build\_queue](#input\_sqs\_build\_queue) | SQS queue to consume accepted build events. |
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
}))
object({| n/a | yes | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | -| [enabled\_userdata](#input\_enabled_userdata) | Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI | `bool` | `true` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | User-data script snippet to insert after GitHub action runner install | `string` | `""` | no | | [userdata\_pre\_install](#input\_userdata\_pre\_install) | User-data script snippet to insert before GitHub action runner install | `string` | `""` | no | | [userdata\_template](#input\_userdata\_template) | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | diff --git a/modules/runners/lambdas/runners/jest.config.js b/modules/runners/lambdas/runners/jest.config.js index 79ed0ba8aa..8c7a9f17c5 100644 --- a/modules/runners/lambdas/runners/jest.config.js +++ b/modules/runners/lambdas/runners/jest.config.js @@ -2,7 +2,7 @@ module.exports = { preset: 'ts-jest', testEnvironment: 'node', collectCoverage: true, - collectCoverageFrom: ['src/**/*.{ts,js,jsx}'], + collectCoverageFrom: ['src/**/*.{ts,js,jsx}','!src/**/*local*.ts'], coverageThreshold: { global: { branches: 80, diff --git a/modules/runners/lambdas/runners/src/lambda.test.ts b/modules/runners/lambdas/runners/src/lambda.test.ts new file mode 100644 index 0000000000..43f5ffdff3 --- /dev/null +++ b/modules/runners/lambdas/runners/src/lambda.test.ts @@ -0,0 +1,136 @@ +import { fail } from 'assert'; +import { Context, SQSEvent, SQSRecord } from 'aws-lambda'; +import { mocked } from 'ts-jest/utils'; +import { scaleUpHandler } from './lambda'; +import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up'; +import ScaleError from './scale-runners/ScaleError'; +import { logger } from './scale-runners/logger'; +import { scaleDown } from './scale-runners/scale-down'; + +const body: ActionRequestMessage = { + eventType: 'workflow_job', + id: 1, + installationId: 1, + repositoryName: 'name', + repositoryOwner: 'owner', +}; + +const sqsRecord: SQSRecord = { + attributes: { + ApproximateFirstReceiveTimestamp: '', + ApproximateReceiveCount: '', + SenderId: '', + SentTimestamp: '', + }, + awsRegion: '', + body: JSON.stringify(body), + eventSource: 'aws:SQS', + eventSourceARN: '', + md5OfBody: '', + messageAttributes: {}, + messageId: '', + receiptHandle: '', +}; + +const sqsEvent: SQSEvent = { + Records: [sqsRecord], +}; + +const context: Context = { + awsRequestId: '1', + callbackWaitsForEmptyEventLoop: false, + functionName: '', + functionVersion: '', + getRemainingTimeInMillis: () => 0, + invokedFunctionArn: '', + logGroupName: '', + logStreamName: '', + memoryLimitInMB: '', + done: () => { + return; + }, + fail: () => { + return; + }, + succeed: () => { + return; + }, +}; + +jest.mock('./scale-runners/scale-up'); +jest.mock('./scale-runners/scale-down'); +jest.mock('./scale-runners/logger'); + +describe('Test scale up lambda wrapper.', () => { + it('Do not handle multiple record sets.', async () => { + await testInvalidRecords([sqsRecord, sqsRecord]); + }); + + it('Do not handle empty record sets.', async () => { + await testInvalidRecords([]); + }); + + it('Scale without error should resolve.', async () => { + const mock = mocked(scaleUp); + mock.mockImplementation(() => { + return new Promise((resolve, reject) => { + resolve(); + }); + }); + await expect(scaleUpHandler(sqsEvent, context)).resolves; + }); + + it('Non scale should resolve.', async () => { + const error = new Error('some error'); + const mock = mocked(scaleUp); + mock.mockRejectedValue(error); + await expect(scaleUpHandler(sqsEvent, context)).resolves; + }); + + it('Scale should be rejected', async () => { + const error = new ScaleError('some scale error'); + const mock = mocked(scaleUp); + + mock.mockRejectedValue(error); + await expect(scaleUpHandler(sqsEvent, context)).rejects.toThrow(error); + }); +}); + +async function testInvalidRecords(sqsRecords: SQSRecord[]) { + const mock = mocked(scaleUp); + const logWarnSpy = jest.spyOn(logger, 'warn'); + mock.mockImplementation(() => { + return new Promise((resolve) => { + resolve(); + }); + }); + const sqsEventMultipleRecords: SQSEvent = { + Records: sqsRecords, + }; + + await expect(scaleUpHandler(sqsEventMultipleRecords, context)).resolves; + + expect(logWarnSpy).toHaveBeenCalledWith( + 'Event ignored, only one record at the time can be handled, ensure the lambda batch size is set to 1.', + undefined, + ); +} + +describe('Test scale down lambda wrapper.', () => { + it('Scaling down no error.', async () => { + const mock = mocked(scaleDown); + mock.mockImplementation(() => { + return new Promise((resolve) => { + resolve(); + }); + }); + await expect(scaleDown()).resolves; + }); + + it('Scaling down with error.', async () => { + const error = new Error('some error'); + const mock = mocked(scaleDown); + mock.mockRejectedValue(error); + await expect(scaleDown()).resolves; + }); +}); diff --git a/modules/runners/lambdas/runners/src/lambda.ts b/modules/runners/lambdas/runners/src/lambda.ts index a784c0d059..20e1c40135 100644 --- a/modules/runners/lambdas/runners/src/lambda.ts +++ b/modules/runners/lambdas/runners/src/lambda.ts @@ -1,31 +1,38 @@ import { scaleUp } from './scale-runners/scale-up'; import { scaleDown } from './scale-runners/scale-down'; import { SQSEvent, ScheduledEvent, Context, Callback } from 'aws-lambda'; -import { logger } from './scale-runners/logger'; +import { LogFields, logger } from './scale-runners/logger'; +import ScaleError from './scale-runners/ScaleError'; import 'source-map-support/register'; -export async function scaleUpHandler(event: SQSEvent, context: Context, callback: Callback): Promise
arn = string
})
object({| n/a | yes | -| tags | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | -| webhook\_lambda\_s3\_key | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | -| webhook\_lambda\_s3\_object\_version | S3 object version for webhook lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | +| [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes | +| [disable\_check\_wokflow\_job\_labels](#input\_disable\_check\_wokflow\_job\_labels) | Disable the the check of workflow labels. | `bool` | `false` | no | +| [environment](#input\_environment) | A name that identifies the environment, used as prefix and for tagging. | `string` | n/a | yes | +| [github\_app\_webhook\_secret\_arn](#input\_github\_app\_webhook\_secret\_arn) | n/a | `string` | n/a | yes | +| [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. | `string` | `null` | no | +| [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `any` | `null` | no | +| [lambda\_timeout](#input\_lambda\_timeout) | Time out of the lambda in seconds. | `number` | `10` | no | +| [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | +| [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | +| [log\_type](#input\_log\_type) | Logging format for lambda logging. Valid values are 'json', 'pretty', 'hidden'. | `string` | `"pretty"` | no | +| [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `7` | no | +| [repository\_white\_list](#input\_repository\_white\_list) | List of repositories allowed to use the github app | `list(string)` | `[]` | no | +| [role\_path](#input\_role\_path) | The path that will be added to the role; if not set, the environment name will be used. | `string` | `null` | no | +| [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | +| [runner\_extra\_labels](#input\_runner\_extra\_labels) | Extra labels for the runners (GitHub). Separate each label by a comma | `string` | `""` | no | +| [sqs\_build\_queue](#input\_sqs\_build\_queue) | SQS queue to publish accepted build events. |
id = string
arn = string
})
object({| n/a | yes | +| [sqs\_build\_queue\_fifo](#input\_sqs\_build\_queue\_fifo) | Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners. | `bool` | `false` | no | +| [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [webhook\_lambda\_s3\_key](#input\_webhook\_lambda\_s3\_key) | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | +| [webhook\_lambda\_s3\_object\_version](#input\_webhook\_lambda\_s3\_object\_version) | S3 object version for webhook lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | ## Outputs | Name | Description | |------|-------------| -| endpoint\_relative\_path | n/a | -| gateway | n/a | -| lambda | n/a | -| role | n/a | +| [endpoint\_relative\_path](#output\_endpoint\_relative\_path) | n/a | +| [gateway](#output\_gateway) | n/a | +| [lambda](#output\_lambda) | n/a | +| [role](#output\_role) | n/a | ## Philips Forest diff --git a/modules/webhook/lambdas/webhook/jest.config.js b/modules/webhook/lambdas/webhook/jest.config.js index 4a5b465ecb..02a6524ce9 100644 --- a/modules/webhook/lambdas/webhook/jest.config.js +++ b/modules/webhook/lambdas/webhook/jest.config.js @@ -1,4 +1,14 @@ module.exports = { preset: 'ts-jest', testEnvironment: 'node', + collectCoverage: true, + collectCoverageFrom: ['src/**/*.{ts,js,jsx}', '!src/**/*local*.ts'], + coverageThreshold: { + global: { + branches: 85, + functions: 85, + lines: 85, + statements: 85 + } + } }; diff --git a/modules/webhook/lambdas/webhook/src/sqs/index.test.ts b/modules/webhook/lambdas/webhook/src/sqs/index.test.ts new file mode 100644 index 0000000000..de8570157f --- /dev/null +++ b/modules/webhook/lambdas/webhook/src/sqs/index.test.ts @@ -0,0 +1,65 @@ +import { SQS } from 'aws-sdk'; +import { sendActionRequest, ActionRequestMessage } from '.'; + +const mockSQS = { + sendMessage: jest.fn(() => { + { + return { promise: jest.fn() }; + } + }), +}; +jest.mock('aws-sdk', () => ({ + SQS: jest.fn().mockImplementation(() => mockSQS), +})); + +describe('Test sending message to SQS.', () => { + const message: ActionRequestMessage = { + eventType: 'type', + id: 0, + installationId: 0, + repositoryName: 'test', + repositoryOwner: 'owner', + }; + const sqsMessage: SQS.Types.SendMessageRequest = { + QueueUrl: 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds', + MessageBody: JSON.stringify(message), + }; + + it('no fifo queue, based on defaults', async () => { + // Arrange + process.env.SQS_URL_WEBHOOK = sqsMessage.QueueUrl; + + // Act + const result = await sendActionRequest(message); + + // Assert + expect(mockSQS.sendMessage).toBeCalledWith(sqsMessage); + expect(result).resolves; + }); + + it('no fifo queue', async () => { + // Arrange + process.env.SQS_URL_WEBHOOK = sqsMessage.QueueUrl; + process.env.SQS_IS_FIFO = 'false'; + + // Act + const result = await sendActionRequest(message); + + // Assert + expect(mockSQS.sendMessage).toBeCalledWith(sqsMessage); + expect(result).resolves; + }); + + it('use a fifo queue', async () => { + // Arrange + process.env.SQS_URL_WEBHOOK = sqsMessage.QueueUrl; + process.env.SQS_IS_FIFO = 'true'; + + // Act + const result = await sendActionRequest(message); + + // Assert + expect(mockSQS.sendMessage).toBeCalledWith({ ...sqsMessage, MessageGroupId: String(message.id) }); + expect(result).resolves; + }); +}); diff --git a/modules/webhook/lambdas/webhook/src/sqs/index.ts b/modules/webhook/lambdas/webhook/src/sqs/index.ts index 1a6e75e808..63a2a240f2 100644 --- a/modules/webhook/lambdas/webhook/src/sqs/index.ts +++ b/modules/webhook/lambdas/webhook/src/sqs/index.ts @@ -1,10 +1,5 @@ -import AWS, { SQS } from 'aws-sdk'; - -AWS.config.update({ - region: process.env.AWS_REGION, -}); - -const sqs = new SQS(); +import { SQS } from 'aws-sdk'; +import { LogFields, logger as logger } from '../webhook/logger'; export interface ActionRequestMessage { id: number; @@ -15,11 +10,20 @@ export interface ActionRequestMessage { } export const sendActionRequest = async (message: ActionRequestMessage): Promise
id = string
arn = string
})