From 1911bd22007f29102753731c949f7d4d78726fca Mon Sep 17 00:00:00 2001 From: Peyton Walters Date: Wed, 17 Feb 2021 09:31:53 -0500 Subject: [PATCH 1/7] Propagate deploy failures (#54) * Propogate deploy failures * Fix formatting Co-authored-by: Armaan Tobaccowalla --- cdk/kraken/CHANGELOG.md | 4 +++ cdk/kraken/package.json | 2 +- cdk/kraken/src/deploy.ts | 24 +++++++++++----- .../test/__snapshots__/custom.test.ts.snap | 18 +++++++++--- .../labs-application.test.ts.snap | 28 ++++++++++++++++--- cdk/kraken/version.json | 2 +- 6 files changed, 61 insertions(+), 17 deletions(-) diff --git a/cdk/kraken/CHANGELOG.md b/cdk/kraken/CHANGELOG.md index 6b4a7e13..25dabaaa 100644 --- a/cdk/kraken/CHANGELOG.md +++ b/cdk/kraken/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.4.10 (2021-02-13) + +* Modify deploy script to fully fail if a single command fails + ## 0.4.9 (2021-02-11) * Create an auto-approve stack for dependabot PRs diff --git a/cdk/kraken/package.json b/cdk/kraken/package.json index 93b28737..11564970 100644 --- a/cdk/kraken/package.json +++ b/cdk/kraken/package.json @@ -64,7 +64,7 @@ ], "main": "lib/index.js", "license": "MIT", - "version": "0.4.9", + "version": "0.4.10", "jest": { "clearMocks": true, "collectCoverage": true, diff --git a/cdk/kraken/src/deploy.ts b/cdk/kraken/src/deploy.ts index f4e92474..a4486442 100644 --- a/cdk/kraken/src/deploy.ts +++ b/cdk/kraken/src/deploy.ts @@ -52,10 +52,13 @@ export class DeployJob extends CheckoutJob { if: `github.ref == 'refs/heads/${fullConfig.defaultBranch}'`, steps: [{ name: 'Deploy', - run: dedent`curl -s -X GET -H "Content-Type: application/json" -H "Authorization: Bearer $DO_AUTH_TOKEN" "https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig" > /kubeconfig.conf - + run: dedent`# default options to make sure failures stop script execution + set -euo pipefail + + curl -s -X GET -H "Content-Type: application/json" -H "Authorization: Bearer $DO_AUTH_TOKEN" "https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig" > /kubeconfig.conf + export KUBECONFIG=/kubeconfig.conf - + # get repo name (by removing owner/organization) RELEASE_NAME=\${REPOSITORY#*/} @@ -65,14 +68,21 @@ export class DeployJob extends CheckoutJob { echo "Could not find deploy tag" exit 1 fi - + helm repo add pennlabs https://helm.pennlabs.org/ - for i in {1..10}; do - helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version "\${DEPLOY_TAG}" $RELEASE_NAME pennlabs/icarus && exit 0 + # This is bash soup, but it'll do. + # 1. Attempt to install with helm + # 2. If this succeeds, exit with a success status code + # 3. If it fails, mark the command as succeeded so that '-e' doesn't kick us out + # 4. Wait 10s and try again + helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version "\${DEPLOY_TAG}" $RELEASE_NAME pennlabs/icarus && exit 0 || true sleep 10s echo "Retrying deploy for $i times" - done`, + done + + # If we get here, all helm installs failed so our command should fail + exit 1`, env: { IMAGE_TAG: fullConfig.deployTag, DO_AUTH_TOKEN: '${{ secrets.DO_AUTH_TOKEN }}', diff --git a/cdk/kraken/test/__snapshots__/custom.test.ts.snap b/cdk/kraken/test/__snapshots__/custom.test.ts.snap index 21d145b2..06bcad1b 100644 --- a/cdk/kraken/test/__snapshots__/custom.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/custom.test.ts.snap @@ -27,7 +27,10 @@ Object { "REPOSITORY": "\${{ github.repository }}", }, "name": "Deploy", - "run": "curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf + "run": "# default options to make sure failures stop script execution +set -euo pipefail + +curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf @@ -42,12 +45,19 @@ if [ \\"$DEPLOY_TAG\\" = \\"null\\" ]; then fi helm repo add pennlabs https://helm.pennlabs.org/ - for i in {1..10}; do - helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version \\"\${DEPLOY_TAG}\\" $RELEASE_NAME pennlabs/icarus && exit 0 + # This is bash soup, but it'll do. + # 1. Attempt to install with helm + # 2. If this succeeds, exit with a success status code + # 3. If it fails, mark the command as succeeded so that '-e' doesn't kick us out + # 4. Wait 10s and try again + helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version \\"\${DEPLOY_TAG}\\" $RELEASE_NAME pennlabs/icarus && exit 0 || true sleep 10s echo \\"Retrying deploy for $i times\\" -done", +done + +# If we get here, all helm installs failed so our command should fail +exit 1", }, ], }, diff --git a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap index 98156fed..e4da72e1 100644 --- a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap @@ -141,6 +141,9 @@ jobs: - uses: actions/checkout@v2 - name: Deploy run: |- + # default options to make sure failures stop script execution + set -euo pipefail + curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf @@ -156,12 +159,19 @@ jobs: fi helm repo add pennlabs https://helm.pennlabs.org/ - for i in {1..10}; do - helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version \\"\${DEPLOY_TAG}\\" $RELEASE_NAME pennlabs/icarus && exit 0 + # This is bash soup, but it'll do. + # 1. Attempt to install with helm + # 2. If this succeeds, exit with a success status code + # 3. If it fails, mark the command as succeeded so that '-e' doesn't kick us out + # 4. Wait 10s and try again + helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version \\"\${DEPLOY_TAG}\\" $RELEASE_NAME pennlabs/icarus && exit 0 || true sleep 10s echo \\"Retrying deploy for $i times\\" done + + # If we get here, all helm installs failed so our command should fail + exit 1 env: IMAGE_TAG: \${{ github.sha }} DO_AUTH_TOKEN: \${{ secrets.DO_AUTH_TOKEN }} @@ -386,6 +396,9 @@ jobs: - uses: actions/checkout@v2 - name: Deploy run: |- + # default options to make sure failures stop script execution + set -euo pipefail + curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf @@ -401,12 +414,19 @@ jobs: fi helm repo add pennlabs https://helm.pennlabs.org/ - for i in {1..10}; do - helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version \\"\${DEPLOY_TAG}\\" $RELEASE_NAME pennlabs/icarus && exit 0 + # This is bash soup, but it'll do. + # 1. Attempt to install with helm + # 2. If this succeeds, exit with a success status code + # 3. If it fails, mark the command as succeeded so that '-e' doesn't kick us out + # 4. Wait 10s and try again + helm upgrade --install --atomic --set=image_tag=$IMAGE_TAG -f k8s/values.yaml --version \\"\${DEPLOY_TAG}\\" $RELEASE_NAME pennlabs/icarus && exit 0 || true sleep 10s echo \\"Retrying deploy for $i times\\" done + + # If we get here, all helm installs failed so our command should fail + exit 1 env: IMAGE_TAG: \${{ github.sha }} DO_AUTH_TOKEN: \${{ secrets.DO_AUTH_TOKEN }} diff --git a/cdk/kraken/version.json b/cdk/kraken/version.json index b565e303..da31324f 100644 --- a/cdk/kraken/version.json +++ b/cdk/kraken/version.json @@ -1,3 +1,3 @@ { - "version": "0.4.9" + "version": "0.4.10" } From 92a64f2593112936d4d772cbed1e340fbc048851 Mon Sep 17 00:00:00 2001 From: Armaan Tobaccowalla Date: Wed, 17 Feb 2021 12:25:35 -0500 Subject: [PATCH 2/7] Release kraken 0.4.11 --- cdk/kraken/CHANGELOG.md | 4 ++++ cdk/kraken/package.json | 2 +- cdk/kraken/src/deploy.ts | 5 +---- cdk/kraken/test/__snapshots__/custom.test.ts.snap | 5 +---- cdk/kraken/test/__snapshots__/labs-application.test.ts.snap | 6 ------ cdk/kraken/version.json | 2 +- 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cdk/kraken/CHANGELOG.md b/cdk/kraken/CHANGELOG.md index 25dabaaa..2b35343f 100644 --- a/cdk/kraken/CHANGELOG.md +++ b/cdk/kraken/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.4.11 (2021-02-17) + +* Hotfix deploy job + ## 0.4.10 (2021-02-13) * Modify deploy script to fully fail if a single command fails diff --git a/cdk/kraken/package.json b/cdk/kraken/package.json index 11564970..2c2a1e73 100644 --- a/cdk/kraken/package.json +++ b/cdk/kraken/package.json @@ -64,7 +64,7 @@ ], "main": "lib/index.js", "license": "MIT", - "version": "0.4.10", + "version": "0.4.11", "jest": { "clearMocks": true, "collectCoverage": true, diff --git a/cdk/kraken/src/deploy.ts b/cdk/kraken/src/deploy.ts index a4486442..b3cc9e12 100644 --- a/cdk/kraken/src/deploy.ts +++ b/cdk/kraken/src/deploy.ts @@ -52,10 +52,7 @@ export class DeployJob extends CheckoutJob { if: `github.ref == 'refs/heads/${fullConfig.defaultBranch}'`, steps: [{ name: 'Deploy', - run: dedent`# default options to make sure failures stop script execution - set -euo pipefail - - curl -s -X GET -H "Content-Type: application/json" -H "Authorization: Bearer $DO_AUTH_TOKEN" "https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig" > /kubeconfig.conf + run: dedent`curl -s -X GET -H "Content-Type: application/json" -H "Authorization: Bearer $DO_AUTH_TOKEN" "https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf diff --git a/cdk/kraken/test/__snapshots__/custom.test.ts.snap b/cdk/kraken/test/__snapshots__/custom.test.ts.snap index 06bcad1b..4eb2e84b 100644 --- a/cdk/kraken/test/__snapshots__/custom.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/custom.test.ts.snap @@ -27,10 +27,7 @@ Object { "REPOSITORY": "\${{ github.repository }}", }, "name": "Deploy", - "run": "# default options to make sure failures stop script execution -set -euo pipefail - -curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf + "run": "curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf diff --git a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap index e4da72e1..85b37671 100644 --- a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap @@ -141,9 +141,6 @@ jobs: - uses: actions/checkout@v2 - name: Deploy run: |- - # default options to make sure failures stop script execution - set -euo pipefail - curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf @@ -396,9 +393,6 @@ jobs: - uses: actions/checkout@v2 - name: Deploy run: |- - # default options to make sure failures stop script execution - set -euo pipefail - curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf export KUBECONFIG=/kubeconfig.conf diff --git a/cdk/kraken/version.json b/cdk/kraken/version.json index da31324f..35313e03 100644 --- a/cdk/kraken/version.json +++ b/cdk/kraken/version.json @@ -1,3 +1,3 @@ { - "version": "0.4.10" + "version": "0.4.11" } From d810043cc8b145c89727e16a7fc3905facc54aae Mon Sep 17 00:00:00 2001 From: Armaan Tobaccowalla Date: Wed, 17 Feb 2021 21:17:52 -0500 Subject: [PATCH 3/7] Release kraken 0.4.12 --- cdk/kraken/CHANGELOG.md | 4 ++++ cdk/kraken/package.json | 2 +- cdk/kraken/src/postintegrationimagepublishjob.ts | 4 ++-- .../test/__snapshots__/integration-tests.test.ts.snap | 4 ++-- cdk/kraken/test/__snapshots__/labs-application.test.ts.snap | 6 +++--- cdk/kraken/version.json | 2 +- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cdk/kraken/CHANGELOG.md b/cdk/kraken/CHANGELOG.md index 2b35343f..bfe7d9d2 100644 --- a/cdk/kraken/CHANGELOG.md +++ b/cdk/kraken/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.4.12 (2021-02-17) + +* Hotfix deploy job part 2 + ## 0.4.11 (2021-02-17) * Hotfix deploy job diff --git a/cdk/kraken/package.json b/cdk/kraken/package.json index 2c2a1e73..2ece0088 100644 --- a/cdk/kraken/package.json +++ b/cdk/kraken/package.json @@ -64,7 +64,7 @@ ], "main": "lib/index.js", "license": "MIT", - "version": "0.4.11", + "version": "0.4.12", "jest": { "clearMocks": true, "collectCoverage": true, diff --git a/cdk/kraken/src/postintegrationimagepublishjob.ts b/cdk/kraken/src/postintegrationimagepublishjob.ts index 5153ddf0..0efe9b52 100644 --- a/cdk/kraken/src/postintegrationimagepublishjob.ts +++ b/cdk/kraken/src/postintegrationimagepublishjob.ts @@ -81,8 +81,8 @@ export class PostIntegrationPublishJob extends CheckoutJob { }, }, { - name: 'Load docker images', - run: fullConfig.dockerImages.map(image => `docker push ${image}`).join('\n'), + name: 'Push docker images', + run: fullConfig.dockerImages.map(image => `docker push -a ${image}`).join('\n'), }, ], ...overrides, diff --git a/cdk/kraken/test/__snapshots__/integration-tests.test.ts.snap b/cdk/kraken/test/__snapshots__/integration-tests.test.ts.snap index ac05482f..aa3c2179 100644 --- a/cdk/kraken/test/__snapshots__/integration-tests.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/integration-tests.test.ts.snap @@ -94,8 +94,8 @@ done", }, }, Object { - "name": "Load docker images", - "run": "docker push image", + "name": "Push docker images", + "run": "docker push -a image", }, ], }, diff --git a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap index 85b37671..aebd598d 100644 --- a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap @@ -379,10 +379,10 @@ jobs: with: username: \${{ secrets.DOCKER_USERNAME }} password: \${{ secrets.DOCKER_PASSWORD }} - - name: Load docker images + - name: Push docker images run: |- - docker push pennlabs/example-backend - docker push pennlabs/example-frontend + docker push -a pennlabs/example-backend + docker push -a pennlabs/example-frontend needs: integration-tests deploy: runs-on: ubuntu-latest diff --git a/cdk/kraken/version.json b/cdk/kraken/version.json index 35313e03..d3585dd4 100644 --- a/cdk/kraken/version.json +++ b/cdk/kraken/version.json @@ -1,3 +1,3 @@ { - "version": "0.4.11" + "version": "0.4.12" } From e22acf90adf5cca91d531fb59d9c2d8726ed7abd Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 22 Feb 2021 19:02:18 -0500 Subject: [PATCH 4/7] Update dependency typescript to v4 (#53) Co-authored-by: Renovate Bot --- cdk/kittyhawk/package.json | 2 +- cdk/kittyhawk/yarn.lock | 7 ++++++- cdk/kraken/package.json | 2 +- cdk/kraken/yarn.lock | 8 ++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cdk/kittyhawk/package.json b/cdk/kittyhawk/package.json index 9b43b034..c7bd03c5 100644 --- a/cdk/kittyhawk/package.json +++ b/cdk/kittyhawk/package.json @@ -48,7 +48,7 @@ "standard-version": "^9.0.0", "ts-jest": "^26.4.4", "typedoc": "^0.19.2", - "typescript": "^3.9.5" + "typescript": "^4.1.5" }, "dependencies": { "cdk8s": "^1.0.0-beta.5", diff --git a/cdk/kittyhawk/yarn.lock b/cdk/kittyhawk/yarn.lock index feda1dac..4e05fc62 100644 --- a/cdk/kittyhawk/yarn.lock +++ b/cdk/kittyhawk/yarn.lock @@ -6095,7 +6095,12 @@ typedoc@^0.19.2: shelljs "^0.8.4" typedoc-default-themes "^0.11.4" -typescript@^3.9.5, typescript@~3.9.7: +typescript@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.1.5.tgz#123a3b214aaff3be32926f0d8f1f6e704eb89a72" + integrity sha512-6OSu9PTIzmn9TCDiovULTnET6BgXtDYL4Gg4szY+cGsc3JP1dQL8qvE8kShTRx1NIw4Q9IBHlwODjkjWEtMUyA== + +typescript@~3.9.7: version "3.9.7" resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.9.7.tgz#98d600a5ebdc38f40cb277522f12dc800e9e25fa" integrity sha512-BLbiRkiBzAwsjut4x/dsibSTB6yWpwT5qWmC2OfuCg3GgVQCSgMs4vEctYPhsaGtd0AeuuHMkjZ2h2WG8MSzRw== diff --git a/cdk/kraken/package.json b/cdk/kraken/package.json index 2ece0088..4f5da025 100644 --- a/cdk/kraken/package.json +++ b/cdk/kraken/package.json @@ -46,7 +46,7 @@ "standard-version": "^9.0.0", "ts-jest": "^26.4.4", "typedoc": "^0.19.2", - "typescript": "^3.9.5" + "typescript": "^4.1.5" }, "peerDependencies": {}, "dependencies": { diff --git a/cdk/kraken/yarn.lock b/cdk/kraken/yarn.lock index d321bcb8..3054a28a 100644 --- a/cdk/kraken/yarn.lock +++ b/cdk/kraken/yarn.lock @@ -5553,10 +5553,10 @@ typedoc@^0.19.2: shelljs "^0.8.4" typedoc-default-themes "^0.11.4" -typescript@^3.9.5: - version "3.9.7" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-3.9.7.tgz#98d600a5ebdc38f40cb277522f12dc800e9e25fa" - integrity sha512-BLbiRkiBzAwsjut4x/dsibSTB6yWpwT5qWmC2OfuCg3GgVQCSgMs4vEctYPhsaGtd0AeuuHMkjZ2h2WG8MSzRw== +typescript@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.1.5.tgz#123a3b214aaff3be32926f0d8f1f6e704eb89a72" + integrity sha512-6OSu9PTIzmn9TCDiovULTnET6BgXtDYL4Gg4szY+cGsc3JP1dQL8qvE8kShTRx1NIw4Q9IBHlwODjkjWEtMUyA== uglify-js@^3.1.4: version "3.12.3" From 8c5cea895860a117d904ce5daee8ed41df23d856 Mon Sep 17 00:00:00 2001 From: Armaan Tobaccowalla Date: Thu, 25 Feb 2021 11:52:02 -0500 Subject: [PATCH 5/7] Kraken AWS deploys (#57) --- cdk/kraken/CHANGELOG.md | 4 ++++ cdk/kraken/package.json | 4 ++-- cdk/kraken/src/deploy.ts | 9 +++++---- .../test/__snapshots__/custom.test.ts.snap | 9 +++++---- .../labs-application.test.ts.snap | 18 ++++++++++-------- cdk/kraken/version.json | 2 +- cdk/kraken/yarn.lock | 6 +++--- 7 files changed, 30 insertions(+), 22 deletions(-) diff --git a/cdk/kraken/CHANGELOG.md b/cdk/kraken/CHANGELOG.md index bfe7d9d2..c75d6762 100644 --- a/cdk/kraken/CHANGELOG.md +++ b/cdk/kraken/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.5.0 (2021-02-25) + +* Modify deploy job to deploy to AWS EKS + ## 0.4.12 (2021-02-17) * Hotfix deploy job part 2 diff --git a/cdk/kraken/package.json b/cdk/kraken/package.json index 4f5da025..749a04b2 100644 --- a/cdk/kraken/package.json +++ b/cdk/kraken/package.json @@ -64,7 +64,7 @@ ], "main": "lib/index.js", "license": "MIT", - "version": "0.4.12", + "version": "0.5.0", "jest": { "clearMocks": true, "collectCoverage": true, @@ -97,4 +97,4 @@ }, "types": "lib/index.d.ts", "//": "~~ Generated by projen. To modify, edit .projenrc.js and run \"npx projen\"." -} \ No newline at end of file +} diff --git a/cdk/kraken/src/deploy.ts b/cdk/kraken/src/deploy.ts index b3cc9e12..cb36c93d 100644 --- a/cdk/kraken/src/deploy.ts +++ b/cdk/kraken/src/deploy.ts @@ -38,7 +38,7 @@ export class DeployJob extends CheckoutJob { // Build config const fullConfig: Required = { deployTag: '${{ github.sha }}', - image: 'pennlabs/helm-tools:c964e53d3e3e88d36677e84f5437da40a289c7a4', + image: 'pennlabs/helm-tools:39b60af248944898fcbc58d1fe5b0f1995420aef', defaultBranch: 'master', ...config, }; @@ -52,9 +52,7 @@ export class DeployJob extends CheckoutJob { if: `github.ref == 'refs/heads/${fullConfig.defaultBranch}'`, steps: [{ name: 'Deploy', - run: dedent`curl -s -X GET -H "Content-Type: application/json" -H "Authorization: Bearer $DO_AUTH_TOKEN" "https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig" > /kubeconfig.conf - - export KUBECONFIG=/kubeconfig.conf + run: dedent`aws eks --region us-east-1 update-kubeconfig --name production --role-arn arn:aws:iam::\${AWS_ACCOUNT_ID}:role/kubectl # get repo name (by removing owner/organization) RELEASE_NAME=\${REPOSITORY#*/} @@ -82,6 +80,9 @@ export class DeployJob extends CheckoutJob { exit 1`, env: { IMAGE_TAG: fullConfig.deployTag, + AWS_ACCOUNT_ID: '${{ secrets.AWS_ACCOUNT_ID }}', + AWS_ACCESS_KEY_ID: '${{ secrets.GH_AWS_ACCESS_KEY_ID }}', + AWS_SECRET_ACCESS_KEY: '${{ secrets.GH_AWS_SECRET_ACCESS_KEY }}', DO_AUTH_TOKEN: '${{ secrets.DO_AUTH_TOKEN }}', K8S_CLUSTER_ID: '${{ secrets.K8S_CLUSTER_ID }}', REPOSITORY: '${{ github.repository }}', diff --git a/cdk/kraken/test/__snapshots__/custom.test.ts.snap b/cdk/kraken/test/__snapshots__/custom.test.ts.snap index 4eb2e84b..cbc4d8ac 100644 --- a/cdk/kraken/test/__snapshots__/custom.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/custom.test.ts.snap @@ -5,7 +5,7 @@ Object { "jobs": Object { "deploy": Object { "container": Object { - "image": "pennlabs/helm-tools:c964e53d3e3e88d36677e84f5437da40a289c7a4", + "image": "pennlabs/helm-tools:39b60af248944898fcbc58d1fe5b0f1995420aef", }, "if": "github.ref == 'refs/heads/master'", "needs": Array [ @@ -21,15 +21,16 @@ Object { }, Object { "env": Object { + "AWS_ACCESS_KEY_ID": "\${{ secrets.GH_AWS_ACCESS_KEY_ID }}", + "AWS_ACCOUNT_ID": "\${{ secrets.AWS_ACCOUNT_ID }}", + "AWS_SECRET_ACCESS_KEY": "\${{ secrets.GH_AWS_SECRET_ACCESS_KEY }}", "DO_AUTH_TOKEN": "\${{ secrets.DO_AUTH_TOKEN }}", "IMAGE_TAG": "\${{ github.sha }}", "K8S_CLUSTER_ID": "\${{ secrets.K8S_CLUSTER_ID }}", "REPOSITORY": "\${{ github.repository }}", }, "name": "Deploy", - "run": "curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf - -export KUBECONFIG=/kubeconfig.conf + "run": "aws eks --region us-east-1 update-kubeconfig --name production --role-arn arn:aws:iam::\${AWS_ACCOUNT_ID}:role/kubectl # get repo name (by removing owner/organization) RELEASE_NAME=\${REPOSITORY#*/} diff --git a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap index aebd598d..abc184d9 100644 --- a/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap +++ b/cdk/kraken/test/__snapshots__/labs-application.test.ts.snap @@ -135,15 +135,13 @@ jobs: deploy: runs-on: ubuntu-latest container: - image: pennlabs/helm-tools:c964e53d3e3e88d36677e84f5437da40a289c7a4 + image: pennlabs/helm-tools:39b60af248944898fcbc58d1fe5b0f1995420aef if: github.ref == 'refs/heads/master' steps: - uses: actions/checkout@v2 - name: Deploy run: |- - curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf - - export KUBECONFIG=/kubeconfig.conf + aws eks --region us-east-1 update-kubeconfig --name production --role-arn arn:aws:iam::\${AWS_ACCOUNT_ID}:role/kubectl # get repo name (by removing owner/organization) RELEASE_NAME=\${REPOSITORY#*/} @@ -171,6 +169,9 @@ jobs: exit 1 env: IMAGE_TAG: \${{ github.sha }} + AWS_ACCOUNT_ID: \${{ secrets.AWS_ACCOUNT_ID }} + AWS_ACCESS_KEY_ID: \${{ secrets.GH_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: \${{ secrets.GH_AWS_SECRET_ACCESS_KEY }} DO_AUTH_TOKEN: \${{ secrets.DO_AUTH_TOKEN }} K8S_CLUSTER_ID: \${{ secrets.K8S_CLUSTER_ID }} REPOSITORY: \${{ github.repository }} @@ -387,15 +388,13 @@ jobs: deploy: runs-on: ubuntu-latest container: - image: pennlabs/helm-tools:c964e53d3e3e88d36677e84f5437da40a289c7a4 + image: pennlabs/helm-tools:39b60af248944898fcbc58d1fe5b0f1995420aef if: github.ref == 'refs/heads/master' steps: - uses: actions/checkout@v2 - name: Deploy run: |- - curl -s -X GET -H \\"Content-Type: application/json\\" -H \\"Authorization: Bearer $DO_AUTH_TOKEN\\" \\"https://api.digitalocean.com/v2/kubernetes/clusters/\${K8S_CLUSTER_ID}/kubeconfig\\" > /kubeconfig.conf - - export KUBECONFIG=/kubeconfig.conf + aws eks --region us-east-1 update-kubeconfig --name production --role-arn arn:aws:iam::\${AWS_ACCOUNT_ID}:role/kubectl # get repo name (by removing owner/organization) RELEASE_NAME=\${REPOSITORY#*/} @@ -423,6 +422,9 @@ jobs: exit 1 env: IMAGE_TAG: \${{ github.sha }} + AWS_ACCOUNT_ID: \${{ secrets.AWS_ACCOUNT_ID }} + AWS_ACCESS_KEY_ID: \${{ secrets.GH_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: \${{ secrets.GH_AWS_SECRET_ACCESS_KEY }} DO_AUTH_TOKEN: \${{ secrets.DO_AUTH_TOKEN }} K8S_CLUSTER_ID: \${{ secrets.K8S_CLUSTER_ID }} REPOSITORY: \${{ github.repository }} diff --git a/cdk/kraken/version.json b/cdk/kraken/version.json index d3585dd4..156772da 100644 --- a/cdk/kraken/version.json +++ b/cdk/kraken/version.json @@ -1,3 +1,3 @@ { - "version": "0.4.12" + "version": "0.5.0" } diff --git a/cdk/kraken/yarn.lock b/cdk/kraken/yarn.lock index 3054a28a..f9c0aa19 100644 --- a/cdk/kraken/yarn.lock +++ b/cdk/kraken/yarn.lock @@ -5554,9 +5554,9 @@ typedoc@^0.19.2: typedoc-default-themes "^0.11.4" typescript@^4.1.5: - version "4.1.5" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.1.5.tgz#123a3b214aaff3be32926f0d8f1f6e704eb89a72" - integrity sha512-6OSu9PTIzmn9TCDiovULTnET6BgXtDYL4Gg4szY+cGsc3JP1dQL8qvE8kShTRx1NIw4Q9IBHlwODjkjWEtMUyA== + version "4.2.2" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.2.2.tgz#1450f020618f872db0ea17317d16d8da8ddb8c4c" + integrity sha512-tbb+NVrLfnsJy3M59lsDgrzWIflR4d4TIUjz+heUnHZwdF7YsrMTKoRERiIvI2lvBG95dfpLxB21WZhys1bgaQ== uglify-js@^3.1.4: version "3.12.3" From d4fc465de179471b4fab3f219c106b691a37d88b Mon Sep 17 00:00:00 2001 From: Armaan Tobaccowalla Date: Fri, 12 Mar 2021 20:14:46 -0500 Subject: [PATCH 6/7] AWS Migration (#35) * Init * Platform users can assume kubectl * Fixes * Vault configuration * Flush to vault * Use maps * Move file * Make ami local * reformat * format * Limit vault IAM to getting roles * Fix grafana vault path * Bump DB size * Add additional IAM roles * Configure vault iam backend * Initial DB backup * Secret sync * Get base_cluster config working * Remove unused variables * Add additional apps to cluster * Add comment * Initial team sync * Remove atlantis * Remove note * Remove terraform approle * Add note * Allow gh action user eks access through kubectl role * Move policy * Allow gh actions user to describe cluster * Small refactor * Finish team-sync * db backup * Rename vars * Rename * Hardcode vault * Documentation++ * Delete old config * Lint * Add tf module source * Link traefik replicas to cluster size * Fix grant * Configure domains * Bump rds version * Scale * Update comment * Disable AppRole authentication in vault * Fix local-exec * Remove SSH access to vault * Fix local-exec for vault pt 2 * Fixes * Update vault README * Remove TODOs --- atlantis.yaml | 19 -- terraform/Bootstrapping.md | 65 +++++ terraform/README.md | 169 +++++------ terraform/backend.tf | 4 + terraform/base/README.md | 52 ---- terraform/base/atlantis.tf | 10 - terraform/base/bitwarden.tf | 10 - terraform/base/db-backup.tf | 10 - terraform/base/helm-production/atlantis.yaml | 61 ---- .../base/helm-production/cert-manager.yaml | 1 - terraform/base/helm-production/db-backup.yaml | 17 -- terraform/base/helm-production/fluentd.yaml | 83 ------ .../base/helm-production/prometheus.yaml | 25 -- terraform/base/helm-production/traefik.yaml | 37 --- .../helm-production/vault-secret-sync.yaml | 16 -- terraform/base/helm-sandbox/cert-manager.yaml | 1 - terraform/base/helm-sandbox/fluentd.yaml | 83 ------ terraform/base/helm-sandbox/prometheus.yaml | 25 -- terraform/base/helm-sandbox/traefik.yaml | 37 --- .../base/helm-sandbox/vault-secret-sync.yaml | 10 - terraform/base/main.tf | 70 ----- terraform/base/provider.tf | 143 --------- terraform/base/secret-sync.tf | 37 --- terraform/base/staging.tf | 5 - terraform/base/vault-flush.tf | 16 -- terraform/chronos/README.md | 51 ---- terraform/chronos/backends.tf | 17 -- terraform/chronos/grafana.tf | 11 - terraform/chronos/helm/cert-manager.yaml | 1 - terraform/chronos/helm/fluentd.yaml | 83 ------ terraform/chronos/helm/vault-secret-sync.yaml | 7 - terraform/chronos/helm/vault.yaml | 56 ---- terraform/chronos/main.tf | 31 -- terraform/chronos/outputs.tf | 14 - terraform/chronos/provider.tf | 62 ---- terraform/chronos/team-sync.tf | 10 - .../chronos/utils/temp-vault-ingress.yaml | 14 - terraform/chronos/vault.tf | 91 ------ terraform/db-backup.tf | 16 ++ terraform/eks.tf | 97 +++++++ terraform/files/kubeconfig | 27 ++ terraform/files/vault_user_data.sh | 21 ++ terraform/gh-actions.tf | 23 ++ .../helm/aws-node-termination-handler.yaml | 13 + .../helm-production => helm}/bitwarden.yaml | 0 terraform/helm/db-backup.yaml | 13 + terraform/{chronos => }/helm/grafana.yaml | 20 +- terraform/{chronos => }/helm/prometheus.yaml | 0 terraform/{chronos => }/helm/team-sync.yaml | 8 +- terraform/{chronos => }/helm/traefik.yaml | 2 +- terraform/helm/vault-secret-sync.yaml | 6 + terraform/iam.tf | 15 + terraform/main.tf | 51 ++++ terraform/modules/README.md | 3 +- terraform/modules/base_cluster/README.md | 29 +- .../modules/base_cluster/cert-manager.tf | 17 +- terraform/modules/base_cluster/main.tf | 16 +- terraform/modules/base_cluster/monitoring.tf | 14 +- terraform/modules/base_cluster/outputs.tf | 14 - terraform/modules/base_cluster/traefik.tf | 2 +- terraform/modules/base_cluster/variables.tf | 35 --- .../modules/base_cluster/vault-secret-sync.tf | 2 +- terraform/modules/domain/README.md | 19 ++ terraform/modules/domain/main.tf | 61 ++++ terraform/modules/domain/variables.tf | 14 + terraform/modules/iam/README.md | 16 ++ terraform/modules/iam/main.tf | 25 ++ terraform/modules/iam/outputs.tf | 7 + terraform/modules/iam/variables.tf | 20 ++ terraform/modules/postgres_cluster/README.md | 25 -- terraform/modules/postgres_cluster/main.tf | 50 ---- terraform/modules/postgres_cluster/outputs.tf | 37 --- .../modules/postgres_cluster/variables.tf | 27 -- terraform/{ => modules}/vault/README.md | 42 +-- terraform/{ => modules}/vault/cert-manager.tf | 8 +- terraform/modules/vault/grafana.tf | 16 ++ terraform/{ => modules}/vault/main.tf | 6 +- terraform/modules/vault/outputs.tf | 3 + .../{ => modules}/vault/policies/admin.hcl | 0 .../vault/policies/secret-sync.hcl | 0 .../vault/policies/team-sync.hcl | 0 terraform/modules/vault/secret-sync.tf | 12 + terraform/modules/vault/team-sync.tf | 21 ++ terraform/{ => modules}/vault/variables.tf | 8 +- terraform/modules/vault_flush/README.md | 2 +- terraform/platform.tf | 30 ++ terraform/production-cluster.tf | 53 ++++ terraform/provider.tf | 78 +++++ terraform/rds.tf | 82 ++++++ terraform/route53.tf | 11 + terraform/variables.tf | 24 ++ terraform/vault.tf | 271 ++++++++++++++++++ terraform/vault/monitoring.tf | 52 ---- terraform/vault/outputs.tf | 9 - terraform/vault/provider.tf | 18 -- terraform/vault/secret-sync.tf | 27 -- terraform/vault/team-sync.tf | 28 -- terraform/vault/terraform-user.tf | 9 - terraform/vpc.tf | 31 ++ 99 files changed, 1262 insertions(+), 1778 deletions(-) delete mode 100644 atlantis.yaml create mode 100644 terraform/Bootstrapping.md create mode 100644 terraform/backend.tf delete mode 100644 terraform/base/README.md delete mode 100644 terraform/base/atlantis.tf delete mode 100644 terraform/base/bitwarden.tf delete mode 100644 terraform/base/db-backup.tf delete mode 100644 terraform/base/helm-production/atlantis.yaml delete mode 100644 terraform/base/helm-production/cert-manager.yaml delete mode 100644 terraform/base/helm-production/db-backup.yaml delete mode 100644 terraform/base/helm-production/fluentd.yaml delete mode 100644 terraform/base/helm-production/prometheus.yaml delete mode 100644 terraform/base/helm-production/traefik.yaml delete mode 100644 terraform/base/helm-production/vault-secret-sync.yaml delete mode 100644 terraform/base/helm-sandbox/cert-manager.yaml delete mode 100644 terraform/base/helm-sandbox/fluentd.yaml delete mode 100644 terraform/base/helm-sandbox/prometheus.yaml delete mode 100644 terraform/base/helm-sandbox/traefik.yaml delete mode 100644 terraform/base/helm-sandbox/vault-secret-sync.yaml delete mode 100644 terraform/base/main.tf delete mode 100644 terraform/base/provider.tf delete mode 100644 terraform/base/secret-sync.tf delete mode 100644 terraform/base/staging.tf delete mode 100644 terraform/base/vault-flush.tf delete mode 100644 terraform/chronos/README.md delete mode 100644 terraform/chronos/backends.tf delete mode 100644 terraform/chronos/grafana.tf delete mode 100644 terraform/chronos/helm/cert-manager.yaml delete mode 100644 terraform/chronos/helm/fluentd.yaml delete mode 100644 terraform/chronos/helm/vault-secret-sync.yaml delete mode 100644 terraform/chronos/helm/vault.yaml delete mode 100644 terraform/chronos/main.tf delete mode 100644 terraform/chronos/outputs.tf delete mode 100644 terraform/chronos/provider.tf delete mode 100644 terraform/chronos/team-sync.tf delete mode 100644 terraform/chronos/utils/temp-vault-ingress.yaml delete mode 100644 terraform/chronos/vault.tf create mode 100644 terraform/db-backup.tf create mode 100644 terraform/eks.tf create mode 100644 terraform/files/kubeconfig create mode 100644 terraform/files/vault_user_data.sh create mode 100644 terraform/gh-actions.tf create mode 100644 terraform/helm/aws-node-termination-handler.yaml rename terraform/{base/helm-production => helm}/bitwarden.yaml (100%) create mode 100644 terraform/helm/db-backup.yaml rename terraform/{chronos => }/helm/grafana.yaml (75%) rename terraform/{chronos => }/helm/prometheus.yaml (100%) rename terraform/{chronos => }/helm/team-sync.yaml (52%) rename terraform/{chronos => }/helm/traefik.yaml (96%) create mode 100644 terraform/helm/vault-secret-sync.yaml create mode 100644 terraform/iam.tf create mode 100644 terraform/main.tf delete mode 100644 terraform/modules/base_cluster/outputs.tf create mode 100644 terraform/modules/domain/README.md create mode 100644 terraform/modules/domain/main.tf create mode 100644 terraform/modules/domain/variables.tf create mode 100644 terraform/modules/iam/README.md create mode 100644 terraform/modules/iam/main.tf create mode 100644 terraform/modules/iam/outputs.tf create mode 100644 terraform/modules/iam/variables.tf delete mode 100644 terraform/modules/postgres_cluster/README.md delete mode 100644 terraform/modules/postgres_cluster/main.tf delete mode 100644 terraform/modules/postgres_cluster/outputs.tf delete mode 100644 terraform/modules/postgres_cluster/variables.tf rename terraform/{ => modules}/vault/README.md (61%) rename terraform/{ => modules}/vault/cert-manager.tf (56%) create mode 100644 terraform/modules/vault/grafana.tf rename terraform/{ => modules}/vault/main.tf (79%) create mode 100644 terraform/modules/vault/outputs.tf rename terraform/{ => modules}/vault/policies/admin.hcl (100%) rename terraform/{ => modules}/vault/policies/secret-sync.hcl (100%) rename terraform/{ => modules}/vault/policies/team-sync.hcl (100%) create mode 100644 terraform/modules/vault/secret-sync.tf create mode 100644 terraform/modules/vault/team-sync.tf rename terraform/{ => modules}/vault/variables.tf (82%) create mode 100644 terraform/platform.tf create mode 100644 terraform/production-cluster.tf create mode 100644 terraform/provider.tf create mode 100644 terraform/rds.tf create mode 100644 terraform/route53.tf create mode 100644 terraform/variables.tf create mode 100644 terraform/vault.tf delete mode 100644 terraform/vault/monitoring.tf delete mode 100644 terraform/vault/outputs.tf delete mode 100644 terraform/vault/provider.tf delete mode 100644 terraform/vault/secret-sync.tf delete mode 100644 terraform/vault/team-sync.tf delete mode 100644 terraform/vault/terraform-user.tf create mode 100644 terraform/vpc.tf diff --git a/atlantis.yaml b/atlantis.yaml deleted file mode 100644 index 309a1cc9..00000000 --- a/atlantis.yaml +++ /dev/null @@ -1,19 +0,0 @@ -version: 3 -projects: - - name: chronos - dir: terraform/chronos - autoplan: - when_modified: ["*.tf", "helm/*.yaml", "../modules/**/*.tf"] - - name: base - dir: terraform/base - autoplan: - when_modified: - - "*.tf" - - helm-sandbox/*.yaml" - - helm-production/*.yaml" - - "../modules/**/*.tf" - workflow: base - - name: vault - dir: terraform/vault - autoplan: - when_modified: ["*.tf", "policies/*.hcl"] diff --git a/terraform/Bootstrapping.md b/terraform/Bootstrapping.md new file mode 100644 index 00000000..1d433de4 --- /dev/null +++ b/terraform/Bootstrapping.md @@ -0,0 +1,65 @@ +# Bootstrapping + +If you want to start from scratch (or somehow AWS loses an entire datacenter in NY). Follow these steps. **Make sure to have psql installed on whatever machine you're running these commands on.** + +The very first thing you need to do is create credentials for terraform to use for AWS. + +Create a `terraform` [AWS IAM user](https://console.aws.amazon.com/iam/home#/users) and attach the `AdministratorAccess` policy to it. + +Now export the following environment variables: +| Name | Description | +| --------------------- | -------------------------------- | +| AWS_ACCESS_KEY_ID | The AWS Access Key for terraform | +| AWS_SECRET_ACCESS_KEY | The AWS Secret Key for terraform | + +Also export all the environment variables as specified under [Inputs](./README.md#inputs). + +Now on to creating the actual infrastructure. First, we need to create an S3 bucket remote backend so that terraform can store its state in the cloud (and not your laptop). To create that bucket, first comment out the `terraform` block in `provider.tf` as well as everything specified in `vault.tf` then run: + +``` bash +terraform init +terraform apply --target module.tfstate_backend +``` + +You will probably need to manually create the verification DNS record for vault and rerun `terraform apply`. + +Now export the following environment variable (keeping the previous exported variables): +| Name | Description | +| ----------- | --------------------------------------- | +| VAULT_TOKEN | The root vault token you just generated | + +uncomment the `terraform` block from `provider.tf` and run: + +``` bash +terraform init +terraform apply +``` + +If you run into any issues, a second `terraform apply` usually solves them. + +Create the following DNS records where `xyz.us-east-1.elb.amazonaws.com` is the Elastic Loadbalancer DNS name of vault (Which can be found in the AWS management console). + +| Type | Name | Destination | +| ----- | ----------------------------- | ------------------------------- | +| CNAME | vault.pennlabs.org | xyz.us-east-1.elb.amazonaws.com | +| CNAME | \_acme-challenge.pennlabs.org | \_acme-challenge.upenn.club | + +Next, visit [vault](https://vault.pennlabs.org) and follow the prompts to initialize vault. Save the root token and recovery key in a safe location. + +Then, uncomment everything in `vault.tf` and run: + +``` bash +terraform init +terraform apply +``` + +Finally, create the following DNS records where `y.y.y.y` is the IP address of traefik in the production cluster (Traefik's IP can be found in the AWS management console). + +| Type | Name | Destination | +| ----- | --------------------------------------- | --------------------------- | +| A | pennlabs.org | y.y.y.y | +| CNAME | *.pennlabs.org | pennlabs.org | +| A | \ | y.y.y.y | +| CNAME | \_acme-challenge.\ | \_acme-challenge.upenn.club | + +If all goes well you should have a fully fuctional Kubernetes cluster with everything you need configured. diff --git a/terraform/README.md b/terraform/README.md index d6c518c4..048c1186 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -2,137 +2,100 @@ We use [Terraform](https://www.terraform.io/docs/index.html) to manage our infrastructure in a declarative manner. -Our terraform configuration consists of four parts: +## Inputs -1. Chronos - A long-lived K8s cluster that runs atlantis, grafana, and vault -2. Vault - Configuration for Vault -3. Base Configuration for our Sandbox and Production clusters - 1. Sandbox Cluster - A sandbox K8s cluster for us to test infrastructure related changes - 2. Production cluster - Our production cluster that contains our products and additional applications -4. Modules - these are different terraform modules we use to replicate configuration across our different clusters. - 1. Base Cluster - a barebones K8s cluster with additional software installed - 2. Postgres Cluster - a module to create a postgres cluster as well as users/databases with correct default permissions - 3. Vault Flush - a module to flush updated secrets to vault +| | Description | +| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CF_API_KEY | The [Global API Key](https://cert-manager.io/docs/configuration/acme/dns01/cloudflare/#api-keys) of the Penn Labs Cloudflare account | +| GH_PERSONAL_TOKEN | A [GitHub Personal Access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) of the Penn Labs Admin account | +| GF_GH_CLIENT_ID | The Client ID to the Grafana Penn Labs OAuth2 application on Github | +| GF_SLACK_URL | Slack notification URL used for Grafana notifications | +| GF_GH_CLIENT_SECRET | The Client Secret to the Grafana Penn Labs OAuth2 application on Github | -Each directory contains a README with additional information about that directory. +## backend.tf -## Bootstrapping - -If you want to start from scratch (or somehow DigitalOcean loses an entire datacenter in NY. Follow these steps. **Make sure to have psql installed on whatever machine you're running these commands on.** - -The very first thing you need to do is create credentials for terraform to use for AWS and DigitalOcean. - -Create a `terraform` [AWS IAM user](https://console.aws.amazon.com/iam/home#/users) and attach the `AdministratorAccess` policy to it. Also create a [DigitalOcean Personal Access Token](https://cloud.digitalocean.com/account/api/tokens) for terraform. +Contains configuration to create a terraform S3 backend. `provider.tf` in is configured to use the remote S3 backend. -Now export the following environment variables: -| Name | Description | -|---------------------------|-----------------------------------| -| DIGITALOCEAN_ACCESS_TOKEN | The DO Access Token for terraform | -| AWS_ACCESS_KEY_ID | The AWS Access Key for terraform | -| AWS_SECRET_ACCESS_KEY | The AWS Secret Key for terraform | +## db-backup.tf -Now on to creating the actual infrastructure. First, `chronos` needs to create its own S3 bucket remote backend so that it can store its state in the cloud (and not your laptop). To create that bucket, first comment out the `terraform` block in `chronos/provider.tf` then run: +Grants the `db-backup` IAM role access to the `sql.pennlabs.org` S3 bucket. -```bash -cd chronos -terraform init -terraform apply --target module.chronos_tfstate_backend -``` +## eks.tf -uncomment the `terraform` block from `chronos/provider.tf` and run: +Creates an EKS cluster using the [EKS Module](https://registry.terraform.io/modules/terraform-aws-modules/eks/aws/latest). -```bash -terraform init -terraform apply -``` +Additionally installs [`aws-node-termination-handler`](https://github.com/aws/aws-node-termination-handler) and creates a `kubectl` IAM role that has admin access to the EKS cluster. -If you run into any issues, a second `terraform apply` usually solves them. +Finally, a populated kubeconfig is pushed to vault for platform members to use. However, that kubeconfig requires the user to have authorization to assume the `kubectl` role. -At this point the Chronos cluster will be deployed but vault won't yet be configured correctly. There will likely be a lot of errors in K8s due to missing secrets. Ignore those for now. +## gh-actions.tf -Create the following DNS records where `x.x.x.x` is the IP address of traefik (Traefik's IP can be found in the DigitalOcean Loadbalancer page) +Creates an IAM user for GitHub Actions that can assume the `kubectl` IAM role as well as describe the EKS cluster (so that it can generate its own kubeconfig). -| Type | Name | Destination | -|-------|-------------------------------|-----------------------------| -| A | upenn.club | x.x.x.x | -| CNAME | *.upenn.club | upenn.club | -| CNAME | vault.pennlabs.org | upenn.club | -| CNAME | grafana.pennlabs.org | upenn.club | -| CNAME | \_acme-challenge.pennlabs.org | \_acme-challenge.upenn.club | +## iam.tf -Now you need to unseal vault. Download a kubeconfig for the Chronos cluster from DigitalOcean and run the following commands: +Uses our [IAM Module](./modules/iam) to create IAM roles for all of our products that can be assumed by the correct Service Account in the `default` namespace. -```bash -$ kubectl apply -f utils/temp-vault-ingress.yaml -$ kubectl exec -it vault-0 -- /bin/sh -vault-0 $ vault operator init -recovery-shares=1 -recovery-threshold=1 -``` +Additionally creates an IAM role for secret sync that can be assumed from the `default`, `staging`, and `cert-manager` namespaces. -Save the resulting root token and key in a safe location. +## main.tf -Now export the following environment variables +Defines a few locals that are used in various places: -| Name | Description | -|----------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| VAULT_TOKEN | The root vault token you just generated | -| TF_VAR_CF_API_KEY | The [Global API Key](https://cert-manager.io/docs/configuration/acme/dns01/cloudflare/#api-keys) of the Penn Labs Cloudflare account | -| TF_VAR_GH_PERSONAL_TOKEN | A [GitHub Personal Access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) of the Penn Labs Admin account | -| TF_VAR_GF_GH_CLIENT_ID | The Client ID to the Grafana Penn Labs OAuth2 application on Github | -| TF_VAR_GF_GH_CLIENT_SECRET | The Client Secret to the Grafana Penn Labs OAuth2 application on Github | -| TF_VAR_GF_SLACK_URL | Slack notification URL used for Grafana notifications | -| TF_VAR_ELASTIC_PASSWORD | The password to the managed elasticsearch instance | -| TF_VAR_ELASTIC_HOST | The host to the managed elasticsearch instance (format should be ) | -| VAULT_ADDR | Set this to | +* `database_users` - a set of all databases to be created. +* `products` - a set of all Penn Labs products in the cluster. +* `iam_service_accounts` - a set of all IAM roles that should be created with the ability to assume those roles from a Service Account. +* `platform_members` - a set of platform members to grant kubectl access to. +* `k8s_cluster_name` - the name of the EKS cluster. +* `k8s_cluster_size` - the size of our cluster. +* `domains` - a set of all our product domains. +* `traefik_lb_name` - Name of the load balancer created by traefik. +* `vault_ami` - The AMI of the official vault AMI. -Run the following commands: +## platform.tf -```bash -cd ../vault -terraform init -terraform apply -``` +Creates an IAM user for certain platform members that allows them to assume the `kubectl` role (and therefore gain kubectl access to the cluster). -Now create a secret in vault at the path `secrets/production/default/atlantis` with the all the previous environment variables set (AWS, DigitalOcean, Vault, and TF_VAR_*) as well as the following values: -| Name | Description | -|----------------------------|---------------------------------------------------------| -| ATLANTIS_GH_APP_ID | The ID of the Penn Labs Atlantis GitHub App | -| ATLANTIS_GH_APP_KEY | The private key of the Penn Labs Atlantis GitHub App | -| ATLANTIS_GH_WEBHOOK_SECRET | The webhook secret of the Penn Labs Atlantis GitHub App | -| VAULT_ADDR | | +Additionally pushes credentials for those users into vault (where only platform members can read them). -Also make sure that the atlantis bot has push access to the master branch ([more information here](https://www.runatlantis.io/docs/apply-requirements.html#github)). +## production-cluster.tf -The last step is to deploy the sandbox and production clusters. First, log into vault and create empty secrets for all of our products (named `locals.database_users` in `base/main.tf`) with the path `secrets/production/default/`. Then run the following commands +Uses our [Base Cluster Module](./modules/base_cluster) to configure our K8s cluster. -```bash -cd ../base -terraform init -terraform apply -``` +Additionally also installs `team-sync`, `grafana`, `bitwarden`, and `db-backup`. -Create the following DNS records where `y.y.y.y` is the IP address of traefik in the production cluster (Traefik's IP can be found in the DigitalOcean Loadbalancer page) and `z.z.z.z` is the IP address of traefik in the sandbox cluster. +## provider.tf -| Type | Name | Destination | -|-------|-----------------------------------------|-----------------------------| -| A | pennlabs.org | y.y.y.y | -| CNAME | *.pennlabs.org | pennlabs.org | -| A | prometheus-sandbox.pennlabs.org | z.z.z.z | -| CNAME | helm.pennlabs.org | pennlabs.github.io | -| A | \ | y.y.y.y | -| CNAME | \_acme-challenge.\ | \_acme-challenge.upenn.club | +Configures terraform to use the remote S3 backend as well as the following providers: -Now you can run the following command to delete the temporary vault ingress. +* AWS +* DigitalOcean +* Helm & Kubernetes (pointing to the EKS cluster) +* Postgres (pointing to the production database) +* Random +* Vault +* Time -```bash -kubectl delete -f ../chronos/utils/temp-vault-ingress.yaml -``` +## rds.tf -If all goes well, you should now have 3 working clusters completely managed through terraform. +Creates an RDS postgres cluster with a random master password. -The final configuration for the new clusters consists of editing the `k8s-deploy` context in CircleCI and replacing `K8S_CLUSTER_ID` with the cluster ID of your new production cluster which can be found in DigitalOcean. +Additionally creates databases, database roles, and manages database grants for each product. -## Things we would like to improve +## route53.tf -* We currently use bcrypt to generate the secret traefik uses to provide authentication for our prometheus ingresses. Unfortunately the output of the bcrypt function changes each time it's called, which causes `terraform apply` to change the secret data each time the command is run. -* Grafana metadata is stored in a pvc, so it cannot be transferred cluster-to-cluster. We would love to move to postgres, but the helm chart doesn't support it. -* In `base` we need to manually create the secret-sync authentication secrets in Kubernetes in each ns of each cluster. There's doesn't seem to be an immediate cleaner way of doing this, but it feels like there should be. +Uses our [Domain Module](./modules/domain) to create Route53 hosted zones for all of our products that contain the minimal DNS entries we need configured. + +## vault.tf + +Creates a TLS certificate (provisioned by AWS) and a KMS key to use for vault. + +Creates an IAM role for vault that allows it to read any IAM role (used for aws auth method) as well as interact with the KMS key generated. + +Creates an EC2 instance running the official vault AMI (with a custom config file). As well as a loadbalancer that terminates TLS and points back to the EC2 instance. + +Finally, configured vault using our [Vault Module](./modules/vault) as well as populate a few additional secrets. + +## vpc.tf + +Creates a VPC to put all of our cloud resources in using the [VPC Module](https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws/latest). diff --git a/terraform/backend.tf b/terraform/backend.tf new file mode 100644 index 00000000..a403d72e --- /dev/null +++ b/terraform/backend.tf @@ -0,0 +1,4 @@ +module "tfstate_backend" { + source = "git::https://github.com/cloudposse/terraform-aws-tfstate-backend.git?ref=tags/0.28.0" + name = "pennlabs-terraform" +} diff --git a/terraform/base/README.md b/terraform/base/README.md deleted file mode 100644 index a7cce774..00000000 --- a/terraform/base/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# Base - -This directory mainly contains configuration for our production and sandbox Kubernetes clusters as well as configuration for our production postgres database. It also contains a small amount of configuration for our chronos cluster. - -## main.tf - -Sets up the base Sandbox and Production clusters using our [Base Cluster Terraform Module](../modules/base_cluster). We provide custom values to traefik, vault-secret-sync, prometheus, and fluentd from files in the `helm-sandbox` and `helm-production` directories. - -It also creates a production postgres database using our [Postgres Cluster Terraform Module](../modules/postgres_cluster). Additionally we create users for bitwarden, and all of our products within the database. - -## provider.tf - -Configures terraform to use the `base` remote S3 backend as well as the following providers - -* AWS -* DigitalOcean -* Helm & Kubernetes (pointing to the `production` cluster) -* Helm & Kubernetes with the `sandbox` alias (pointing to the `sandbox` cluster) -* Helm & Kubernetes with the `chronos` alias pointing to the `chronos` cluster) -* Postgres (pointing to the production database) -* Random -* Vault - -Also configures [terraform remote state](https://www.terraform.io/docs/providers/terraform/d/remote_state.html) for `chronos` and `vault` to gain access to credentials generated from those terraform projects. - -## atlantis.tf - -Installs [atlantis](https://www.runatlantis.io/) through [this helm chart](https://github.com/helm/charts/tree/master/stable/atlantis) on to our production cluster. - -## bitwarden.tf - -Installs [bitwarden_rs](https://github.com/dani-garcia/bitwarden_rs) through [Icarus](https://github.com/pennlabs/icarus) on to our production cluster. - -## db-backup.tf - -Installs [pg-s3-backup](../../docker/pg-s3-backup) through [Icarus](https://github.com/pennlabs/icarus) on to our production cluster as a cronjob to backup our databases. - -## secret-sync.tf - -Saves the secret-sync AppRole credentials in Kubernetes secrets in: - -* `cert-manager`, `default`, and `monitoring` namespaces in `chronos` -* `cert-manager`, `default`, and `monitoring` namespaces in `sandbox` -* `cert-manager`, `default`, `staging`, and `monitoring` namespaces in `production` - -## staging.tf - -Creates a staging namespace in our production cluster. - -## vault-flush.tf - -Saves the database credentials for all of our products in vault. diff --git a/terraform/base/atlantis.tf b/terraform/base/atlantis.tf deleted file mode 100644 index b7d80f47..00000000 --- a/terraform/base/atlantis.tf +++ /dev/null @@ -1,10 +0,0 @@ -resource "helm_release" "atlantis" { - name = "atlantis" - repository = "https://kubernetes-charts.storage.googleapis.com" - chart = "atlantis" - version = "3.12.0" - - values = [ - "${file("helm-production/atlantis.yaml")}" - ] -} diff --git a/terraform/base/bitwarden.tf b/terraform/base/bitwarden.tf deleted file mode 100644 index d4093542..00000000 --- a/terraform/base/bitwarden.tf +++ /dev/null @@ -1,10 +0,0 @@ -resource "helm_release" "bitwarden" { - name = "bitwarden" - repository = "https://helm.pennlabs.org" - chart = "icarus" - version = "0.1.20" - - values = [ - "${file("helm-production/bitwarden.yaml")}" - ] -} diff --git a/terraform/base/db-backup.tf b/terraform/base/db-backup.tf deleted file mode 100644 index 36466b6a..00000000 --- a/terraform/base/db-backup.tf +++ /dev/null @@ -1,10 +0,0 @@ -resource "helm_release" "db-backup" { - name = "db-backup" - repository = "https://helm.pennlabs.org" - chart = "icarus" - version = "0.1.20" - - values = [ - "${file("helm-production/db-backup.yaml")}" - ] -} diff --git a/terraform/base/helm-production/atlantis.yaml b/terraform/base/helm-production/atlantis.yaml deleted file mode 100644 index 15f504be..00000000 --- a/terraform/base/helm-production/atlantis.yaml +++ /dev/null @@ -1,61 +0,0 @@ -orgWhitelist: github.com/pennlabs/infrastructure - -# Atlantis requires the GitHub app private key to be saved in a file -# This workaround allows us to load it from an environment variable from vault -# Additional flags are also specified here until the helm chart includes them natively -command: - [ - "/bin/sh", - "-c", - 'echo "$ATLANTIS_GH_APP_KEY" > /home/atlantis/gh_app_key.pem && atlantis server --silence-vcs-status-no-plans --hide-prev-plan-comments --write-git-creds --gh-app-key-file=/home/atlantis/gh_app_key.pem', - ] - -image: - repository: runatlantis/atlantis - tag: v0.14.0 - pullPolicy: IfNotPresent - -## Use Server Side Repo Config, -## ref: https://www.runatlantis.io/docs/server-side-repo-config.html -repoConfig: | - --- - repos: - - id: github.com/pennlabs/infrastructure - apply_requirements: [approved, mergeable] - workflow: default - allowed_overrides: [workflow] - allow_custom_workflows: false - workflows: - default: - plan: - steps: [init, plan] - apply: - steps: [apply] - base: - plan: - steps: - - run: cd ../chronos && /atlantis-data/bin/terraform${ATLANTIS_TERRAFORM_VERSION} init -input=false -no-color -upgrade - - run: cd ../chronos && /atlantis-data/bin/terraform${ATLANTIS_TERRAFORM_VERSION} refresh -no-color - - run: echo "[SPLIT] BASE CHANGES UNDER HERE" - - init - - plan - -defaultTFVersion: 0.12.29 - -ingress: - enabled: true - path: / - host: atlantis.pennlabs.org - tls: - - secretName: pennlabs-org-tls - hosts: - - atlantis.pennlabs.org - -## test container details -test: - enabled: true - image: lachlanevenson/k8s-kubectl - imageTag: v1.4.8-bash - -loadEnvFromSecrets: - - atlantis diff --git a/terraform/base/helm-production/cert-manager.yaml b/terraform/base/helm-production/cert-manager.yaml deleted file mode 100644 index 1b4551cc..00000000 --- a/terraform/base/helm-production/cert-manager.yaml +++ /dev/null @@ -1 +0,0 @@ -installCRDs: true diff --git a/terraform/base/helm-production/db-backup.yaml b/terraform/base/helm-production/db-backup.yaml deleted file mode 100644 index f8936279..00000000 --- a/terraform/base/helm-production/db-backup.yaml +++ /dev/null @@ -1,17 +0,0 @@ -cronjobs: - - name: backup-infrastructure - schedule: "21 2 * * *" - secret: backup-infrastructure - image: pennlabs/pg-s3-backup - tag: 1200cfb82d9d83522a219f2631663dc633bb29ca - extraEnv: - - name: AWS_DEFAULT_REGION - value: "us-east-1" - - name: backup-production - schedule: "21 2 * * *" - secret: backup-production - image: pennlabs/pg-s3-backup - tag: 1200cfb82d9d83522a219f2631663dc633bb29ca - extraEnv: - - name: AWS_DEFAULT_REGION - value: "us-east-1" diff --git a/terraform/base/helm-production/fluentd.yaml b/terraform/base/helm-production/fluentd.yaml deleted file mode 100644 index 8bbc2a64..00000000 --- a/terraform/base/helm-production/fluentd.yaml +++ /dev/null @@ -1,83 +0,0 @@ -image: - repository: quay.io/fluentd_elasticsearch/fluentd - tag: v3.0.1 - -resources: - limits: - cpu: 100m - memory: 500Mi - requests: - cpu: 100m - memory: 200Mi - -elasticsearch: - auth: - enabled: false - setOutputHostEnvVar: false - scheme: "https" - logLevel: "info" - typeName: "fluentd" - -configMaps: - useDefaults: - systemInputConf: false - forwardInputConf: false - monitoringConf: false - outputConf: false - -secret: - - name: OUTPUT_USER - secret_name: fluentd - secret_key: ELASTIC_USER - - name: OUTPUT_PASSWORD - secret_name: fluentd - secret_key: ELASTIC_PASSWORD - - name: OUTPUT_HOSTS - secret_name: fluentd - secret_key: ELASTIC_HOSTS - -# Nearly identical to the default output.conf, but we had to manually put -# in the user and password fields since the chart doesn't include the fields -# if elasticsearch.auth is disabled (which we have to do in order to hand in -# the env variables through k8s secrets). -extraConfigMaps: - output.conf: | - - @type kubernetes_metadata - - - - @type null - - - - @id elasticsearch - @type "#{ENV['OUTPUT_TYPE']}" - @log_level "#{ENV['OUTPUT_LOG_LEVEL']}" - include_tag_key "#{ENV['OUTPUT_INCLUDE_TAG_KEY']}" - hosts "#{ENV['OUTPUT_HOSTS']}" - path "#{ENV['OUTPUT_PATH']}" - scheme "#{ENV['OUTPUT_SCHEME']}" - ssl_verify "#{ENV['OUTPUT_SSL_VERIFY']}" - ssl_version "#{ENV['OUTPUT_SSL_VERSION']}" - user "#{ENV['OUTPUT_USER']}" - password "#{ENV['OUTPUT_PASSWORD']}" - logstash_format "#{ENV['LOGSTASH_FORMAT']}" - logstash_prefix "#{ENV['LOGSTASH_PREFIX']}" - reconnect_on_error "#{ENV['OUTPUT_RECONNECT_ON_ERROR']}" - reload_on_failure "#{ENV['OUTPUT_RELOAD_ON_FAILURE']}" - reload_connections "#{ENV['OUTPUT_RELOAD_CONNECTIONS']}" - - @type "#{ENV['OUTPUT_BUFFER_TYPE']}" - path "#{ENV['OUTPUT_BUFFER_PATH']}" - flush_mode "#{ENV['OUTPUT_BUFFER_FLUSH_MODE']}" - retry_type "#{ENV['OUTPUT_BUFFER_RETRY_TYPE']}" - flush_thread_count "#{ENV['OUTPUT_BUFFER_FLUSH_THREAD_TYPE']}" - flush_interval "#{ENV['OUTPUT_BUFFER_FLUSH_INTERVAL']}" - retry_forever "#{ENV['OUTPUT_BUFFER_RETRY_FOREVER']}" - retry_max_interval "#{ENV['OUTPUT_BUFFER_RETRY_MAX_INTERVAL']}" - chunk_limit_size "#{ENV['OUTPUT_BUFFER_CHUNK_LIMIT']}" - queue_limit_length "#{ENV['OUTPUT_BUFFER_QUEUE_LIMIT']}" - overflow_action "#{ENV['OUTPUT_BUFFER_OVERFLOW_ACTION']}" - - diff --git a/terraform/base/helm-production/prometheus.yaml b/terraform/base/helm-production/prometheus.yaml deleted file mode 100644 index 06b5b122..00000000 --- a/terraform/base/helm-production/prometheus.yaml +++ /dev/null @@ -1,25 +0,0 @@ -alertmanager: - # we're using grafana alerts for the time being - enabled: false - -server: - image: - repository: prom/prometheus - tag: v2.13.1 - pullPolicy: IfNotPresent - persistentVolume: - enabled: true - accessModes: - - ReadWriteOnce - size: 8Gi - ingress: - enabled: true - annotations: - traefik.ingress.kubernetes.io/auth-type: "basic" - traefik.ingress.kubernetes.io/auth-secret: "prometheus-basic-auth" - hosts: - - prometheus-production.pennlabs.org - tls: - - secretName: pennlabs-org-tls - hosts: - - prometheus-production.pennlabs.org diff --git a/terraform/base/helm-production/traefik.yaml b/terraform/base/helm-production/traefik.yaml deleted file mode 100644 index 85e50bcc..00000000 --- a/terraform/base/helm-production/traefik.yaml +++ /dev/null @@ -1,37 +0,0 @@ -replicas: 5 - -deploymentStrategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - type: RollingUpdate - -ssl: - enabled: true - enforced: true - permanentRedirect: true - upstream: false - insecureSkipVerify: false - generateTLS: false - - persistence: - enabled: false - -dashboard: - enabled: false - -accessLogs: - enabled: true - format: common - -rbac: - enabled: true - -metrics: - prometheus: - enabled: true - buckets: [0.1, 0.3, 1.2, 5] - service: - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "9100" diff --git a/terraform/base/helm-production/vault-secret-sync.yaml b/terraform/base/helm-production/vault-secret-sync.yaml deleted file mode 100644 index 0278b359..00000000 --- a/terraform/base/helm-production/vault-secret-sync.yaml +++ /dev/null @@ -1,16 +0,0 @@ -authenticator: - image: pennlabs/vault-approle-authenticator - tag: "85da74af10627d38367c47c7802d360b13669584" - -synchronizer: - image: postfinance/vault-kubernetes-synchronizer - tag: "0.1.3" - -namespaces: - - default - - cert-manager - - staging - - monitoring - -vault_address: https://vault.pennlabs.org:443 -cluster: production diff --git a/terraform/base/helm-sandbox/cert-manager.yaml b/terraform/base/helm-sandbox/cert-manager.yaml deleted file mode 100644 index 1b4551cc..00000000 --- a/terraform/base/helm-sandbox/cert-manager.yaml +++ /dev/null @@ -1 +0,0 @@ -installCRDs: true diff --git a/terraform/base/helm-sandbox/fluentd.yaml b/terraform/base/helm-sandbox/fluentd.yaml deleted file mode 100644 index 8bbc2a64..00000000 --- a/terraform/base/helm-sandbox/fluentd.yaml +++ /dev/null @@ -1,83 +0,0 @@ -image: - repository: quay.io/fluentd_elasticsearch/fluentd - tag: v3.0.1 - -resources: - limits: - cpu: 100m - memory: 500Mi - requests: - cpu: 100m - memory: 200Mi - -elasticsearch: - auth: - enabled: false - setOutputHostEnvVar: false - scheme: "https" - logLevel: "info" - typeName: "fluentd" - -configMaps: - useDefaults: - systemInputConf: false - forwardInputConf: false - monitoringConf: false - outputConf: false - -secret: - - name: OUTPUT_USER - secret_name: fluentd - secret_key: ELASTIC_USER - - name: OUTPUT_PASSWORD - secret_name: fluentd - secret_key: ELASTIC_PASSWORD - - name: OUTPUT_HOSTS - secret_name: fluentd - secret_key: ELASTIC_HOSTS - -# Nearly identical to the default output.conf, but we had to manually put -# in the user and password fields since the chart doesn't include the fields -# if elasticsearch.auth is disabled (which we have to do in order to hand in -# the env variables through k8s secrets). -extraConfigMaps: - output.conf: | - - @type kubernetes_metadata - - - - @type null - - - - @id elasticsearch - @type "#{ENV['OUTPUT_TYPE']}" - @log_level "#{ENV['OUTPUT_LOG_LEVEL']}" - include_tag_key "#{ENV['OUTPUT_INCLUDE_TAG_KEY']}" - hosts "#{ENV['OUTPUT_HOSTS']}" - path "#{ENV['OUTPUT_PATH']}" - scheme "#{ENV['OUTPUT_SCHEME']}" - ssl_verify "#{ENV['OUTPUT_SSL_VERIFY']}" - ssl_version "#{ENV['OUTPUT_SSL_VERSION']}" - user "#{ENV['OUTPUT_USER']}" - password "#{ENV['OUTPUT_PASSWORD']}" - logstash_format "#{ENV['LOGSTASH_FORMAT']}" - logstash_prefix "#{ENV['LOGSTASH_PREFIX']}" - reconnect_on_error "#{ENV['OUTPUT_RECONNECT_ON_ERROR']}" - reload_on_failure "#{ENV['OUTPUT_RELOAD_ON_FAILURE']}" - reload_connections "#{ENV['OUTPUT_RELOAD_CONNECTIONS']}" - - @type "#{ENV['OUTPUT_BUFFER_TYPE']}" - path "#{ENV['OUTPUT_BUFFER_PATH']}" - flush_mode "#{ENV['OUTPUT_BUFFER_FLUSH_MODE']}" - retry_type "#{ENV['OUTPUT_BUFFER_RETRY_TYPE']}" - flush_thread_count "#{ENV['OUTPUT_BUFFER_FLUSH_THREAD_TYPE']}" - flush_interval "#{ENV['OUTPUT_BUFFER_FLUSH_INTERVAL']}" - retry_forever "#{ENV['OUTPUT_BUFFER_RETRY_FOREVER']}" - retry_max_interval "#{ENV['OUTPUT_BUFFER_RETRY_MAX_INTERVAL']}" - chunk_limit_size "#{ENV['OUTPUT_BUFFER_CHUNK_LIMIT']}" - queue_limit_length "#{ENV['OUTPUT_BUFFER_QUEUE_LIMIT']}" - overflow_action "#{ENV['OUTPUT_BUFFER_OVERFLOW_ACTION']}" - - diff --git a/terraform/base/helm-sandbox/prometheus.yaml b/terraform/base/helm-sandbox/prometheus.yaml deleted file mode 100644 index 6b2cc4d1..00000000 --- a/terraform/base/helm-sandbox/prometheus.yaml +++ /dev/null @@ -1,25 +0,0 @@ -alertmanager: - # we're using grafana alerts for the time being - enabled: false - -server: - image: - repository: prom/prometheus - tag: v2.13.1 - pullPolicy: IfNotPresent - persistentVolume: - enabled: true - accessModes: - - ReadWriteOnce - size: 8Gi - ingress: - enabled: true - annotations: - traefik.ingress.kubernetes.io/auth-type: "basic" - traefik.ingress.kubernetes.io/auth-secret: "prometheus-basic-auth" - hosts: - - prometheus-sandbox.pennlabs.org - tls: - - secretName: pennlabs-org-tls - hosts: - - prometheus-sandbox.pennlabs.org diff --git a/terraform/base/helm-sandbox/traefik.yaml b/terraform/base/helm-sandbox/traefik.yaml deleted file mode 100644 index 4b2dbe2a..00000000 --- a/terraform/base/helm-sandbox/traefik.yaml +++ /dev/null @@ -1,37 +0,0 @@ -replicas: 1 - -deploymentStrategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - type: RollingUpdate - -ssl: - enabled: true - enforced: true - permanentRedirect: true - upstream: false - insecureSkipVerify: false - generateTLS: false - - persistence: - enabled: false - -dashboard: - enabled: false - -accessLogs: - enabled: true - format: common - -rbac: - enabled: true - -metrics: - prometheus: - enabled: true - buckets: [0.1, 0.3, 1.2, 5] - service: - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "9100" diff --git a/terraform/base/helm-sandbox/vault-secret-sync.yaml b/terraform/base/helm-sandbox/vault-secret-sync.yaml deleted file mode 100644 index 3507c73d..00000000 --- a/terraform/base/helm-sandbox/vault-secret-sync.yaml +++ /dev/null @@ -1,10 +0,0 @@ -authenticator: - image: pennlabs/vault-approle-authenticator - tag: "85da74af10627d38367c47c7802d360b13669584" - -synchronizer: - image: postfinance/vault-kubernetes-synchronizer - tag: "0.1.3" - -vault_address: https://vault.pennlabs.org:443 -cluster: sandbox diff --git a/terraform/base/main.tf b/terraform/base/main.tf deleted file mode 100644 index 17dd73e1..00000000 --- a/terraform/base/main.tf +++ /dev/null @@ -1,70 +0,0 @@ -module "production-cluster" { - source = "../modules/base_cluster" - name = "production" - node_count = 10 - node_size = "s-2vcpu-4gb" - traefik_values = [ - "${file("helm-production/traefik.yaml")}" - ] - cert_manager_values = [ - "${file("helm-production/cert-manager.yaml")}" - ] - vault_secret_sync_values = [ - "${file("helm-production/vault-secret-sync.yaml")}" - ] - prometheus_values = [ - "${file("helm-production/prometheus.yaml")}" - ] - fluentd_values = [ - "${file("helm-production/fluentd.yaml")}" - ] -} - -module "sandbox-cluster" { - source = "../modules/base_cluster" - name = "sandbox" - node_count = 1 - node_size = "s-1vcpu-2gb" - traefik_values = [ - "${file("helm-sandbox/traefik.yaml")}" - ] - cert_manager_values = [ - "${file("helm-sandbox/cert-manager.yaml")}" - ] - vault_secret_sync_values = [ - "${file("helm-sandbox/vault-secret-sync.yaml")}" - ] - prometheus_values = [ - "${file("helm-sandbox/prometheus.yaml")}" - ] - fluentd_values = [ - "${file("helm-sandbox/fluentd.yaml")}" - ] - providers = { - helm = helm.sandbox - kubernetes = kubernetes.sandbox - } -} - -// Production DB -module "postgres-cluster" { - source = "../modules/postgres_cluster" - users = local.database_users - name = "production" - node_count = 2 - node_size = "db-s-6vcpu-16gb" -} - -locals { - database_users = toset([ - "bitwarden", - "common-funding-application", - "first-year-hub", - "office-hours-queue", - "penn-clubs", - "penn-courses", - "platform", - "platform-dev", - "student-life", - ]) -} diff --git a/terraform/base/provider.tf b/terraform/base/provider.tf deleted file mode 100644 index dbadacd8..00000000 --- a/terraform/base/provider.tf +++ /dev/null @@ -1,143 +0,0 @@ -provider "digitalocean" { - version = "~> 1.22.0" -} - -provider "aws" { - version = "~> 2.44.0" - region = "us-east-1" -} - -// Production K8s cluster -provider "helm" { - version = "~> 1.3" - kubernetes { - load_config_file = false - host = module.production-cluster.endpoint - token = module.production-cluster.token - cluster_ca_certificate = base64decode( - module.production-cluster.cluster_ca_certificate - ) - } -} - -provider "kubernetes" { - version = "~> 1.11" - load_config_file = false - host = module.production-cluster.endpoint - token = module.production-cluster.token - cluster_ca_certificate = base64decode( - module.production-cluster.cluster_ca_certificate - ) -} - -// Sandbox K8s cluster -provider "helm" { - alias = "sandbox" - version = "~> 1.3" - kubernetes { - load_config_file = false - host = module.sandbox-cluster.endpoint - token = module.sandbox-cluster.token - cluster_ca_certificate = base64decode( - module.sandbox-cluster.cluster_ca_certificate - ) - } -} - -provider "kubernetes" { - alias = "sandbox" - version = "~> 1.11" - load_config_file = false - host = module.sandbox-cluster.endpoint - token = module.sandbox-cluster.token - cluster_ca_certificate = base64decode( - module.sandbox-cluster.cluster_ca_certificate - ) -} - -// Chronos K8s cluster -provider "helm" { - alias = "chronos" - version = "~> 1.3" - kubernetes { - load_config_file = false - host = data.terraform_remote_state.chronos.outputs.endpoint - token = data.terraform_remote_state.chronos.outputs.token - cluster_ca_certificate = base64decode( - data.terraform_remote_state.chronos.outputs.cluster_ca_certificate - ) - } -} - -provider "kubernetes" { - alias = "chronos" - version = "~> 1.11" - load_config_file = false - host = data.terraform_remote_state.chronos.outputs.endpoint - token = data.terraform_remote_state.chronos.outputs.token - cluster_ca_certificate = base64decode( - data.terraform_remote_state.chronos.outputs.cluster_ca_certificate - ) -} - -// Production DB -provider "postgresql" { - version = "~> 1.6" - host = module.postgres-cluster.host - port = module.postgres-cluster.port - database = "defaultdb" - expected_version = module.postgres-cluster.version - username = module.postgres-cluster.admin-user - password = module.postgres-cluster.admin-password - superuser = false - sslmode = "require" -} - -provider "random" { - version = "~> 2.2" -} - -provider "vault" { - version = "~> 2.10" - skip_tls_verify = true -} - -provider "time" { - version = "~> 0.5" -} - -// Vault remote state -data "terraform_remote_state" "vault" { - backend = "s3" - - config = { - region = "us-east-1" - bucket = "vault-tfstate-state" - key = "terraform.tfstate" - dynamodb_table = "vault-tfstate-state-lock" - encrypt = true - } -} - -// Chronos remote state -data "terraform_remote_state" "chronos" { - backend = "s3" - - config = { - region = "us-east-1" - bucket = "chronos-tfstate-state" - key = "terraform.tfstate" - dynamodb_table = "chronos-tfstate-state-lock" - encrypt = true - } -} - -terraform { - backend "s3" { - region = "us-east-1" - bucket = "base-tfstate-state" - key = "terraform.tfstate" - dynamodb_table = "base-tfstate-state-lock" - encrypt = true - } -} diff --git a/terraform/base/secret-sync.tf b/terraform/base/secret-sync.tf deleted file mode 100644 index 49cda1c3..00000000 --- a/terraform/base/secret-sync.tf +++ /dev/null @@ -1,37 +0,0 @@ -data "vault_generic_secret" "secret-sync" { - path = "secrets/secret-sync" -} - -resource "kubernetes_secret" "secret-sync-chronos" { - for_each = toset(["cert-manager", "default", "monitoring"]) - metadata { - name = "secret-sync" - namespace = each.key - } - - data = data.vault_generic_secret.secret-sync.data - - provider = kubernetes.chronos -} - -resource "kubernetes_secret" "secret-sync-sandbox" { - for_each = toset(["cert-manager", "default", "monitoring"]) - metadata { - name = "secret-sync" - namespace = each.key - } - - data = data.vault_generic_secret.secret-sync.data - - provider = kubernetes.sandbox -} - -resource "kubernetes_secret" "secret-sync-production" { - for_each = toset(["cert-manager", "default", "staging", "monitoring"]) - metadata { - name = "secret-sync" - namespace = each.key - } - - data = data.vault_generic_secret.secret-sync.data -} diff --git a/terraform/base/staging.tf b/terraform/base/staging.tf deleted file mode 100644 index a399205d..00000000 --- a/terraform/base/staging.tf +++ /dev/null @@ -1,5 +0,0 @@ -resource "kubernetes_namespace" "staging" { - metadata { - name = "staging" - } -} diff --git a/terraform/base/vault-flush.tf b/terraform/base/vault-flush.tf deleted file mode 100644 index c08cf13b..00000000 --- a/terraform/base/vault-flush.tf +++ /dev/null @@ -1,16 +0,0 @@ -// Note: we're not using the vault-flush module because currently -// in tf version 12 you can't use the for_each keyword within a module - -data "vault_generic_secret" "vault-flush" { - for_each = local.database_users - path = "secrets/production/default/${each.key}" -} - -resource "vault_generic_secret" "vault-flush" { - for_each = local.database_users - path = "secrets/production/default/${each.key}" - data_json = jsonencode(merge(data.vault_generic_secret.vault-flush[each.key].data, { - DATABASE_URL : "postgres://${each.key}:${module.postgres-cluster.passwords[each.key]}@${module.postgres-cluster.private-host}:${module.postgres-cluster.port}/${each.key}" - } - )) -} diff --git a/terraform/chronos/README.md b/terraform/chronos/README.md deleted file mode 100644 index ea289087..00000000 --- a/terraform/chronos/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Chronos - -Chronos is a kubernetes cluster that lives outside the lifecycle for our products. It's supposed to be configured once, then largely forgotten about (apart from occasional updates). - -## backends.tf - -Contains the terraform backends for each of the terraform projects (`chronos`, `vault`, and `base`). The `provider.tf` in each terraform project is configured to use the correct remote S3 backend. - -## main.tf - -Sets up the base Chronos cluster using our [Base Cluster Terraform Module](../modules/base_cluster). We provide custom values to traefik, vault-secret-sync, prometheus, and fluentd from files in the `helm` directory - -It also creates an infrastructure postgres database using our [Postgres Cluster Terraform Module](../modules/postgres_cluster). Additionally we create grafana and vault users within the database. - -## provider.tf - -Configures terraform to use the `chronos` remote S3 backend as well as the following providers - -* AWS -* DigitalOcean -* Helm & Kubernetes (pointing to the `chronos` cluster) -* Postgres (pointing to the infrastructure database) -* Random - -## grafana.tf - -Installs the [grafana helm chart](https://github.com/helm/charts/tree/master/stable/grafana) (on [grafana.pennlabs.org](https://grafana.pennlabs.org)) with - -* GitHub OAuth2 authentication within the `pennlabs` organization -* Prometheus data sources for `chronos`, `sandbox`, and `production` (with basic auth for the latter two). -* The Kubernetes node exporter full dashboard -* Our custom pod dashboard -* Our custom pod alerting dashboard - -## team-sync.tf - -Installs [team sync](github.com/pennlabs/docker-team-sync/) through helm to create vault policies for team leads based on their GitHub team within the `pennlabs` organization. - -## vault.tf - -Configures vault (on [vault.pennlabs.org](https://vault.pennlabs.org)) by - -* Creating an AWS KMS key to [auto unseal vault](https://www.vaultproject.io/docs/configuration/seal/awskms.html) -* Creating an IAM user and policy for vault to access said KMS key -* Creating a Kubernetes secret with the IAM credentials, KMS key ID, and a connection url for the vault user in the infrastructure postgres database -* Installing the [vault helm chart](https://github.com/hashicorp/vault-helm) -* [Creating the table vault needs](https://www.vaultproject.io/docs/configuration/storage/postgresql) in the postgres database using a local-exec - -## outputs.tf - -Outputs Kubernetes credentials for the `chronos` cluster so that [base](../base) can use those credentials to make additional changes to the cluster. diff --git a/terraform/chronos/backends.tf b/terraform/chronos/backends.tf deleted file mode 100644 index 4d429d9f..00000000 --- a/terraform/chronos/backends.tf +++ /dev/null @@ -1,17 +0,0 @@ -module "chronos_tfstate_backend" { - source = "git::https://github.com/cloudposse/terraform-aws-tfstate-backend.git?ref=tags/0.17.0" - name = "chronos-tfstate" - region = "us-east-1" -} - -module "vault_tfstate_backend" { - source = "git::https://github.com/cloudposse/terraform-aws-tfstate-backend.git?ref=tags/0.17.0" - name = "vault-tfstate" - region = "us-east-1" -} - -module "base_tfstate_backend" { - source = "git::https://github.com/cloudposse/terraform-aws-tfstate-backend.git?ref=tags/0.17.0" - name = "base-tfstate" - region = "us-east-1" -} diff --git a/terraform/chronos/grafana.tf b/terraform/chronos/grafana.tf deleted file mode 100644 index 33221f78..00000000 --- a/terraform/chronos/grafana.tf +++ /dev/null @@ -1,11 +0,0 @@ -resource "helm_release" "grafana" { - name = "grafana" - repository = "https://kubernetes-charts.storage.googleapis.com" - chart = "grafana" - version = "5.1.4" - // This needs to be set because secrets that grafana expects aren't set yet - wait = false - values = [ - file("helm/grafana.yaml") - ] -} diff --git a/terraform/chronos/helm/cert-manager.yaml b/terraform/chronos/helm/cert-manager.yaml deleted file mode 100644 index 1b4551cc..00000000 --- a/terraform/chronos/helm/cert-manager.yaml +++ /dev/null @@ -1 +0,0 @@ -installCRDs: true diff --git a/terraform/chronos/helm/fluentd.yaml b/terraform/chronos/helm/fluentd.yaml deleted file mode 100644 index 8bbc2a64..00000000 --- a/terraform/chronos/helm/fluentd.yaml +++ /dev/null @@ -1,83 +0,0 @@ -image: - repository: quay.io/fluentd_elasticsearch/fluentd - tag: v3.0.1 - -resources: - limits: - cpu: 100m - memory: 500Mi - requests: - cpu: 100m - memory: 200Mi - -elasticsearch: - auth: - enabled: false - setOutputHostEnvVar: false - scheme: "https" - logLevel: "info" - typeName: "fluentd" - -configMaps: - useDefaults: - systemInputConf: false - forwardInputConf: false - monitoringConf: false - outputConf: false - -secret: - - name: OUTPUT_USER - secret_name: fluentd - secret_key: ELASTIC_USER - - name: OUTPUT_PASSWORD - secret_name: fluentd - secret_key: ELASTIC_PASSWORD - - name: OUTPUT_HOSTS - secret_name: fluentd - secret_key: ELASTIC_HOSTS - -# Nearly identical to the default output.conf, but we had to manually put -# in the user and password fields since the chart doesn't include the fields -# if elasticsearch.auth is disabled (which we have to do in order to hand in -# the env variables through k8s secrets). -extraConfigMaps: - output.conf: | - - @type kubernetes_metadata - - - - @type null - - - - @id elasticsearch - @type "#{ENV['OUTPUT_TYPE']}" - @log_level "#{ENV['OUTPUT_LOG_LEVEL']}" - include_tag_key "#{ENV['OUTPUT_INCLUDE_TAG_KEY']}" - hosts "#{ENV['OUTPUT_HOSTS']}" - path "#{ENV['OUTPUT_PATH']}" - scheme "#{ENV['OUTPUT_SCHEME']}" - ssl_verify "#{ENV['OUTPUT_SSL_VERIFY']}" - ssl_version "#{ENV['OUTPUT_SSL_VERSION']}" - user "#{ENV['OUTPUT_USER']}" - password "#{ENV['OUTPUT_PASSWORD']}" - logstash_format "#{ENV['LOGSTASH_FORMAT']}" - logstash_prefix "#{ENV['LOGSTASH_PREFIX']}" - reconnect_on_error "#{ENV['OUTPUT_RECONNECT_ON_ERROR']}" - reload_on_failure "#{ENV['OUTPUT_RELOAD_ON_FAILURE']}" - reload_connections "#{ENV['OUTPUT_RELOAD_CONNECTIONS']}" - - @type "#{ENV['OUTPUT_BUFFER_TYPE']}" - path "#{ENV['OUTPUT_BUFFER_PATH']}" - flush_mode "#{ENV['OUTPUT_BUFFER_FLUSH_MODE']}" - retry_type "#{ENV['OUTPUT_BUFFER_RETRY_TYPE']}" - flush_thread_count "#{ENV['OUTPUT_BUFFER_FLUSH_THREAD_TYPE']}" - flush_interval "#{ENV['OUTPUT_BUFFER_FLUSH_INTERVAL']}" - retry_forever "#{ENV['OUTPUT_BUFFER_RETRY_FOREVER']}" - retry_max_interval "#{ENV['OUTPUT_BUFFER_RETRY_MAX_INTERVAL']}" - chunk_limit_size "#{ENV['OUTPUT_BUFFER_CHUNK_LIMIT']}" - queue_limit_length "#{ENV['OUTPUT_BUFFER_QUEUE_LIMIT']}" - overflow_action "#{ENV['OUTPUT_BUFFER_OVERFLOW_ACTION']}" - - diff --git a/terraform/chronos/helm/vault-secret-sync.yaml b/terraform/chronos/helm/vault-secret-sync.yaml deleted file mode 100644 index 0fec44b1..00000000 --- a/terraform/chronos/helm/vault-secret-sync.yaml +++ /dev/null @@ -1,7 +0,0 @@ -authenticator: - image: pennlabs/vault-approle-authenticator - tag: "85da74af10627d38367c47c7802d360b13669584" - -synchronizer: - image: postfinance/vault-kubernetes-synchronizer - tag: "0.1.3" diff --git a/terraform/chronos/helm/vault.yaml b/terraform/chronos/helm/vault.yaml deleted file mode 100644 index b49ef22a..00000000 --- a/terraform/chronos/helm/vault.yaml +++ /dev/null @@ -1,56 +0,0 @@ -global: - image: "vault:1.2.2" - -server: - standalone: - enabled: true - config: | - ui = true - - listener "tcp" { - tls_disable = 1 - address = "[::]:8200" - cluster_address = "[::]:8201" - } - - storage "postgresql" { - max_parallel = "4" - } - - seal "awskms" { - region = "us-east-1" - } - - dataStorage: - enabled: false - - extraSecretEnvironmentVars: - - envName: AWS_ACCESS_KEY_ID - secretName: vault - secretKey: AWS_ACCESS_KEY_ID - - envName: AWS_SECRET_ACCESS_KEY - secretName: vault - secretKey: AWS_SECRET_ACCESS_KEY - - envName: VAULT_AWSKMS_SEAL_KEY_ID - secretName: vault - secretKey: VAULT_AWSKMS_SEAL_KEY_ID - - envName: VAULT_PG_CONNECTION_URL - secretName: vault - secretKey: VAULT_PG_CONNECTION_URL - - service: - enabled: true - - ingress: - enabled: true - hosts: - - host: vault.pennlabs.org - paths: ["/"] - tls: - - hosts: - - vault.pennlabs.org - secretName: pennlabs-org-tls - -ui: - enabled: true - serviceType: ClusterIP diff --git a/terraform/chronos/main.tf b/terraform/chronos/main.tf deleted file mode 100644 index 4ea348a0..00000000 --- a/terraform/chronos/main.tf +++ /dev/null @@ -1,31 +0,0 @@ -// Chronos K8s cluster -module "chronos-cluster" { - source = "../modules/base_cluster" - name = "chronos" - node_count = 2 - node_size = "s-1vcpu-2gb" - traefik_values = [ - "${file("helm/traefik.yaml")}" - ] - cert_manager_values = [ - "${file("helm/cert-manager.yaml")}" - ] - vault_secret_sync_values = [ - "${file("helm/vault-secret-sync.yaml")}" - ] - prometheus_values = [ - "${file("helm/prometheus.yaml")}" - ] - fluentd_values = [ - "${file("helm/fluentd.yaml")}" - ] -} - -// Infrastructure DB -module "postgres-cluster" { - source = "../modules/postgres_cluster" - users = ["vault", "grafana"] - name = "infrastructure" - // TODO: (before release) make this more and add a replica - node_count = 1 -} diff --git a/terraform/chronos/outputs.tf b/terraform/chronos/outputs.tf deleted file mode 100644 index ecd50aa2..00000000 --- a/terraform/chronos/outputs.tf +++ /dev/null @@ -1,14 +0,0 @@ -output "endpoint" { - value = module.chronos-cluster.endpoint - sensitive = true -} - -output "token" { - value = module.chronos-cluster.token - sensitive = true -} - -output "cluster_ca_certificate" { - value = module.chronos-cluster.cluster_ca_certificate - sensitive = true -} diff --git a/terraform/chronos/provider.tf b/terraform/chronos/provider.tf deleted file mode 100644 index e8237f80..00000000 --- a/terraform/chronos/provider.tf +++ /dev/null @@ -1,62 +0,0 @@ -provider "digitalocean" { - version = "~> 1.22.0" -} - -provider "aws" { - version = "~> 2.44.0" - region = "us-east-1" -} - -// Chronos K8s cluster -provider "helm" { - version = "~> 1.3" - kubernetes { - load_config_file = false - host = module.chronos-cluster.endpoint - token = module.chronos-cluster.token - cluster_ca_certificate = base64decode( - module.chronos-cluster.cluster_ca_certificate - ) - } -} - -provider "kubernetes" { - version = "~> 1.11" - load_config_file = false - host = module.chronos-cluster.endpoint - token = module.chronos-cluster.token - cluster_ca_certificate = base64decode( - module.chronos-cluster.cluster_ca_certificate - ) -} - -// Infrastructure DB -provider "postgresql" { - version = "~> 1.6" - host = module.postgres-cluster.host - port = module.postgres-cluster.port - database = "defaultdb" - expected_version = module.postgres-cluster.version - username = module.postgres-cluster.admin-user - password = module.postgres-cluster.admin-password - superuser = false - sslmode = "require" -} - -provider "random" { - version = "~> 2.2" -} - -provider "time" { - version = "~> 0.5" -} - -terraform { - backend "s3" { - region = "us-east-1" - bucket = "chronos-tfstate-state" - key = "terraform.tfstate" - dynamodb_table = "chronos-tfstate-state-lock" - encrypt = true - } -} diff --git a/terraform/chronos/team-sync.tf b/terraform/chronos/team-sync.tf deleted file mode 100644 index e92e3e39..00000000 --- a/terraform/chronos/team-sync.tf +++ /dev/null @@ -1,10 +0,0 @@ -resource "helm_release" "team-sync" { - name = "team-sync" - repository = "https://helm.pennlabs.org" - chart = "icarus" - version = "0.1.20" - - values = [ - file("helm/team-sync.yaml") - ] -} diff --git a/terraform/chronos/utils/temp-vault-ingress.yaml b/terraform/chronos/utils/temp-vault-ingress.yaml deleted file mode 100644 index e1a3bac3..00000000 --- a/terraform/chronos/utils/temp-vault-ingress.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: temp-vault-ingress - namespace: default -spec: - rules: - - host: vault.upenn.club - http: - paths: - - backend: - serviceName: vault - servicePort: 8200 - path: / diff --git a/terraform/chronos/vault.tf b/terraform/chronos/vault.tf deleted file mode 100644 index b4247a01..00000000 --- a/terraform/chronos/vault.tf +++ /dev/null @@ -1,91 +0,0 @@ -resource "aws_kms_key" "vault" { - description = "Key to unseal vault" - deletion_window_in_days = 10 - - tags = { - created-by = "terraform" - } -} - -resource "aws_iam_user" "vault" { - name = "vault" - - tags = { - created-by = "terraform" - } -} - -resource "aws_iam_access_key" "vault" { - user = aws_iam_user.vault.name -} - -resource "aws_iam_user_policy" "vault" { - name = "vault" - user = aws_iam_user.vault.name - - policy = < /etc/vault.d/vault.hcl +ui = true + +listener "tcp" { + address = "0.0.0.0:8200" + tls_cert_file = "/opt/vault/tls/tls.crt" + tls_key_file = "/opt/vault/tls/tls.key" +} + +storage "postgresql" { + connection_url = "${connection_url}" + max_parallel = "4" +} + +seal "awskms" { + region = "us-east-1" + kms_key_id = "${kms_key_id}" +} +EOF diff --git a/terraform/gh-actions.tf b/terraform/gh-actions.tf new file mode 100644 index 00000000..ab1b961e --- /dev/null +++ b/terraform/gh-actions.tf @@ -0,0 +1,23 @@ +resource "aws_iam_user" "gh-actions" { + name = "gh-actions" + + tags = { + created-by = "terraform" + } +} + +resource "aws_iam_access_key" "gh-actions" { + user = aws_iam_user.gh-actions.name +} + +resource "aws_iam_user_policy" "gh-actions-assume-kubectl" { + name = "kubectl" + user = aws_iam_user.gh-actions.name + policy = data.aws_iam_policy_document.assume-kubectl.json +} + +resource "aws_iam_user_policy" "gh-actions-view-k8s" { + name = "view-eks" + user = aws_iam_user.gh-actions.name + policy = data.aws_iam_policy_document.view-k8s.json +} diff --git a/terraform/helm/aws-node-termination-handler.yaml b/terraform/helm/aws-node-termination-handler.yaml new file mode 100644 index 00000000..7a4e3adc --- /dev/null +++ b/terraform/helm/aws-node-termination-handler.yaml @@ -0,0 +1,13 @@ +## enableSpotInterruptionDraining If true, drain nodes when the spot interruption termination notice is received +enableSpotInterruptionDraining: "true" + +# nodeSelector tells both linux and windows daemonsets where to place the node-termination-handler +# pods. By default, this value is empty and every node will receive a pod. +nodeSelector: + node.kubernetes.io/lifecycle: spot + +enablePrometheusServer: true + +podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9092" diff --git a/terraform/base/helm-production/bitwarden.yaml b/terraform/helm/bitwarden.yaml similarity index 100% rename from terraform/base/helm-production/bitwarden.yaml rename to terraform/helm/bitwarden.yaml diff --git a/terraform/helm/db-backup.yaml b/terraform/helm/db-backup.yaml new file mode 100644 index 00000000..18ad4ba7 --- /dev/null +++ b/terraform/helm/db-backup.yaml @@ -0,0 +1,13 @@ +cronjobs: + - name: db-backup + schedule: "21 2 * * *" + secret: db-backup + image: pennlabs/pg-s3-backup + tag: dca1814fb78f71d7646a957c1e71a4595b8175d8 + extraEnv: + - name: AWS_DEFAULT_REGION + value: "us-east-1" + +rbac: + createSA: true + roleARN: ${roleARN} diff --git a/terraform/chronos/helm/grafana.yaml b/terraform/helm/grafana.yaml similarity index 75% rename from terraform/chronos/helm/grafana.yaml rename to terraform/helm/grafana.yaml index 3de44186..038ff05d 100644 --- a/terraform/chronos/helm/grafana.yaml +++ b/terraform/helm/grafana.yaml @@ -63,29 +63,11 @@ datasources: datasources.yaml: apiVersion: 1 datasources: - - name: Prometheus-Chronos + - name: Prometheus type: prometheus url: http://prometheus-server.monitoring access: proxy orgId: 1 - - name: Prometheus-Sandbox - type: prometheus - url: https://prometheus-sandbox.pennlabs.org - access: proxy - orgId: 1 - basicAuth: true - basicAuthUser: ${SANDBOX_PROMETHEUS_USER} - secureJsonData: - basicAuthPassword: ${SANDBOX_PROMETHEUS_PASSWORD} - - name: Prometheus-Production - type: prometheus - url: https://prometheus-production.pennlabs.org - access: proxy - orgId: 1 - basicAuth: true - basicAuthUser: ${PRODUCTION_PROMETHEUS_USER} - secureJsonData: - basicAuthPassword: ${PRODUCTION_PROMETHEUS_PASSWORD} grafana.ini: server: diff --git a/terraform/chronos/helm/prometheus.yaml b/terraform/helm/prometheus.yaml similarity index 100% rename from terraform/chronos/helm/prometheus.yaml rename to terraform/helm/prometheus.yaml diff --git a/terraform/chronos/helm/team-sync.yaml b/terraform/helm/team-sync.yaml similarity index 52% rename from terraform/chronos/helm/team-sync.yaml rename to terraform/helm/team-sync.yaml index 2fb8e7fd..9a8419e4 100644 --- a/terraform/chronos/helm/team-sync.yaml +++ b/terraform/helm/team-sync.yaml @@ -3,7 +3,11 @@ cronjobs: schedule: "*/10 * * * *" secret: team-sync image: pennlabs/team-sync - tag: fa5c54f2edb5311f1e6dc543a938b5254c7b0aca + tag: 98bd93b34e2269d3d9d534f05fb8d170077eeb32 extraEnv: - name: VAULT_ADDR - value: http://vault.default:8200 + value: https://vault.pennlabs.org + +rbac: + createSA: true + roleARN: ${roleARN} diff --git a/terraform/chronos/helm/traefik.yaml b/terraform/helm/traefik.yaml similarity index 96% rename from terraform/chronos/helm/traefik.yaml rename to terraform/helm/traefik.yaml index 116ce05e..36a9b03e 100644 --- a/terraform/chronos/helm/traefik.yaml +++ b/terraform/helm/traefik.yaml @@ -1,4 +1,4 @@ -replicas: 2 +replicas: ${count} deploymentStrategy: rollingUpdate: diff --git a/terraform/helm/vault-secret-sync.yaml b/terraform/helm/vault-secret-sync.yaml new file mode 100644 index 00000000..e54b46c7 --- /dev/null +++ b/terraform/helm/vault-secret-sync.yaml @@ -0,0 +1,6 @@ +namespaces: + - default + - cert-manager + # - staging + +role_arn: ${role_arn} diff --git a/terraform/iam.tf b/terraform/iam.tf new file mode 100644 index 00000000..462f556f --- /dev/null +++ b/terraform/iam.tf @@ -0,0 +1,15 @@ +module "iam-products" { + for_each = local.iam_service_accounts + source = "./modules/iam" + role = each.key + oidc_issuer_url = module.eks-production.cluster_oidc_issuer_url + oidc_provider_arn = module.eks-production.oidc_provider_arn +} + +module "iam-secret-sync" { + source = "./modules/iam" + role = "secret-sync" + namespaces = ["default", "cert-manager", "staging"] + oidc_issuer_url = module.eks-production.cluster_oidc_issuer_url + oidc_provider_arn = module.eks-production.oidc_provider_arn +} diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 00000000..27a835a6 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,51 @@ +locals { + database_users = setunion(local.products, + toset([ + "bitwarden", + "vault", + ]) + ) + products = toset([ + "common-funding-application", + "first-year-hub", + "office-hours-queue", + "penn-clubs", + "penn-courses", + "platform", + "platform-dev", + "student-life" + ]) + iam_service_accounts = setunion(local.products, + toset([ + "team-sync", + "db-backup", + ]) + ) + platform_members = toset([ + "armaan", + "peyton" + ]) + k8s_cluster_name = "production" + k8s_cluster_size = 10 + vault_ami = "ami-0eec2c28d4dd94628" + domains = toset([ + "ohq.io", + "pennbasics.com", + "penncfa.com", + "pennclubs.com", + "penncoursealert.com", + "penncourseplan.com", + "penncoursereview.com", // Currently still in Google Domains + "penncourses.org", + "pennlabs.org", + "pennmobile.org", + ]) + traefik_lb_name = "a3b77cc4561e649d4bcc2a89e1b63d7d" +} + +data "aws_iam_policy_document" "assume-kubectl" { + statement { + actions = ["sts:AssumeRole"] + resources = [aws_iam_role.kubectl.arn] + } +} diff --git a/terraform/modules/README.md b/terraform/modules/README.md index 3d1f91fe..f841ff3a 100644 --- a/terraform/modules/README.md +++ b/terraform/modules/README.md @@ -5,5 +5,6 @@ Reusable terraform modules to keep our infrastructure DRY. We've created the following modules: * [Base Cluster](base_cluster) - a barebones K8s cluster with additional software installed -* [Postgres Cluster](postgres_cluster) - a module to create a postgres cluster as well as users/databases with correct default permissions +* [IAM](iam) - a module to create an IAM role that can be assumed from Kubernetes +* [Vault](vault) - a module to configure vault with all the secrets we need * [Vault Flush](vault_flush) - a module to flush updated secrets to vault diff --git a/terraform/modules/base_cluster/README.md b/terraform/modules/base_cluster/README.md index d2587e69..f65a5fbe 100644 --- a/terraform/modules/base_cluster/README.md +++ b/terraform/modules/base_cluster/README.md @@ -1,32 +1,18 @@ # Base Cluster -A terraform module to create a Kubernetes cluster on DigitalOcean with some additional software installed. +A terraform module to populate a Kubernetes cluster with some useful additional software. ## Inputs -| Name | Description | -|--------------------------|---------------------------------------------------------------| -| name | Name of the Kuberenetes cluster | -| cluster_version | Kubernetes version of the cluster (Default: 1.17.5-do.0) | -| node_count | Number of nodes in cluster | -| node_size | DigitalOcean size for Kubernetes nodes (Default: s-2vcpu-4gb) | -| traefik_values | Values to provide to the traefik helm chart | -| cert_manager_values | Values to provide to the Cert Manager helm chart | -| vault_secret_sync_values | Values to provide to the Vault Secret Sync helm chart | -| prometheus_values | Values to provide to the Prometheus helm chart | -| fluentd_values | Values to provide to the Fluentd helm chart | - -## Outputs - -| Name | Description | -|------------------------|---------------------------------------| -| endpoint | Endpoint of the created cluster | -| token | Token to access the created cluster | -| cluster_ca_certificate | CA Certificate of the created cluster | +| Name | Description | +| ------------------------ | ----------------------------------------------------- | +| traefik_values | Values to provide to the traefik helm chart | +| vault_secret_sync_values | Values to provide to the Vault Secret Sync helm chart | +| prometheus_values | Values to provide to the Prometheus helm chart | ## main.tf -Creates the actual Kubernetes cluster in DigitalOcean +Doesn't do anything ## cert-manager.tf @@ -43,7 +29,6 @@ Configure our monitoring stack by * Creating the `monitoring` namespace * Installing the [prometheus helm chart](https://github.com/helm/charts/tree/master/stable/prometheus) with the inputted values -* Installing [fluentd-elasticsearch helm chart](https://github.com/kiwigrid/helm-charts/tree/master/charts/fluentd-elasticsearch) with the inputted values ## traefik.tf diff --git a/terraform/modules/base_cluster/cert-manager.tf b/terraform/modules/base_cluster/cert-manager.tf index 172b4a44..ca671e5b 100644 --- a/terraform/modules/base_cluster/cert-manager.tf +++ b/terraform/modules/base_cluster/cert-manager.tf @@ -8,11 +8,14 @@ resource "helm_release" "cert-manager" { name = "cert-manager" repository = "https://charts.jetstack.io" chart = "cert-manager" - version = "0.15.0" + version = "1.1.0" namespace = kubernetes_namespace.cert-manager.metadata[0].name // This is set to ensure that cert-manager is working before the CRs are applied atomic = true - values = var.cert_manager_values + set { + name = "installCRDs" + value = true + } } resource "time_sleep" "cert-manager-cr" { @@ -26,9 +29,7 @@ resource "helm_release" "labs-clusterissuer" { repository = "https://helm.pennlabs.org" chart = "helm-wrapper" version = "0.1.0" - values = [ - "${file("${path.module}/clusterissuer.yaml")}" - ] + values = [file("${path.module}/clusterissuer.yaml")] depends_on = [ time_sleep.cert-manager-cr @@ -36,15 +37,11 @@ resource "helm_release" "labs-clusterissuer" { } resource "helm_release" "pennlabs-wildcard-cert" { - for_each = toset(["default", "monitoring"]) name = "pennlabs-wildcard-cert" repository = "https://helm.pennlabs.org" chart = "helm-wrapper" version = "0.1.0" - namespace = each.key - values = [ - "${file("${path.module}/wildcard-cert.yaml")}" - ] + values = [file("${path.module}/wildcard-cert.yaml")] depends_on = [ time_sleep.cert-manager-cr diff --git a/terraform/modules/base_cluster/main.tf b/terraform/modules/base_cluster/main.tf index b31030f7..bc010a3d 100644 --- a/terraform/modules/base_cluster/main.tf +++ b/terraform/modules/base_cluster/main.tf @@ -1,15 +1 @@ -resource "digitalocean_kubernetes_cluster" "cluster" { - name = var.name - region = "nyc1" - version = var.cluster_version - - node_pool { - name = var.name - size = var.node_size - node_count = var.node_count - } - - lifecycle { - prevent_destroy = true - } -} +// Nothing here diff --git a/terraform/modules/base_cluster/monitoring.tf b/terraform/modules/base_cluster/monitoring.tf index bf6cb59d..af69c2c3 100644 --- a/terraform/modules/base_cluster/monitoring.tf +++ b/terraform/modules/base_cluster/monitoring.tf @@ -6,22 +6,10 @@ resource "kubernetes_namespace" "monitoring" { resource "helm_release" "prometheus" { name = "prometheus" - repository = "https://kubernetes-charts.storage.googleapis.com" + repository = "https://charts.helm.sh/stable" chart = "prometheus" version = "11.2.3" namespace = kubernetes_namespace.monitoring.metadata[0].name values = var.prometheus_values } - -resource "helm_release" "fluentd" { - name = "fluentd" - repository = "https://kiwigrid.github.io" - chart = "fluentd-elasticsearch" - version = "9.2.0" - namespace = kubernetes_namespace.monitoring.metadata[0].name - // This needs to be set because secrets that fluentd expects aren't set yet - wait = false - - values = var.fluentd_values -} diff --git a/terraform/modules/base_cluster/outputs.tf b/terraform/modules/base_cluster/outputs.tf deleted file mode 100644 index fcdc5c86..00000000 --- a/terraform/modules/base_cluster/outputs.tf +++ /dev/null @@ -1,14 +0,0 @@ -output "endpoint" { - value = digitalocean_kubernetes_cluster.cluster.endpoint - sensitive = true -} - -output "token" { - value = digitalocean_kubernetes_cluster.cluster.kube_config[0].token - sensitive = true -} - -output "cluster_ca_certificate" { - value = digitalocean_kubernetes_cluster.cluster.kube_config[0].cluster_ca_certificate - sensitive = true -} diff --git a/terraform/modules/base_cluster/traefik.tf b/terraform/modules/base_cluster/traefik.tf index 70958919..167ec2a0 100644 --- a/terraform/modules/base_cluster/traefik.tf +++ b/terraform/modules/base_cluster/traefik.tf @@ -1,6 +1,6 @@ resource "helm_release" "traefik" { name = "traefik" - repository = "https://kubernetes-charts.storage.googleapis.com" + repository = "https://charts.helm.sh/stable" chart = "traefik" version = "1.87.2" namespace = "kube-system" diff --git a/terraform/modules/base_cluster/variables.tf b/terraform/modules/base_cluster/variables.tf index b5ef1ef8..7aa869e5 100644 --- a/terraform/modules/base_cluster/variables.tf +++ b/terraform/modules/base_cluster/variables.tf @@ -1,38 +1,9 @@ -variable "name" { - description = "Name for the cluster" - type = string -} - -// Kubernetes cluster inputs -variable "cluster_version" { - description = "Kubernetes version of the cluster" - type = string - default = "1.17.9-do.0" -} - -variable "node_count" { - description = "Number of nodes in cluster" - type = number -} - -variable "node_size" { - description = "DigitalOcean size for Kubernetes nodes" - type = string - default = "s-2vcpu-4gb" -} - // Traefik values variable "traefik_values" { description = "Values to provide to the Traefik helm chart" type = list(string) } -// Cert Manager values -variable "cert_manager_values" { - description = "Values to provide to the Cert Manager helm chart" - type = list(string) -} - // Vault Secret Sync values variable "vault_secret_sync_values" { description = "Values to provide to the Vault Secret Sync helm chart" @@ -44,9 +15,3 @@ variable "prometheus_values" { description = "Values to provide to the Prometheus helm chart" type = list(string) } - -// Fluentd values -variable "fluentd_values" { - description = "Values to provide to the Fluentd helm chart" - type = list(string) -} diff --git a/terraform/modules/base_cluster/vault-secret-sync.tf b/terraform/modules/base_cluster/vault-secret-sync.tf index c433d6bb..20fc77ad 100644 --- a/terraform/modules/base_cluster/vault-secret-sync.tf +++ b/terraform/modules/base_cluster/vault-secret-sync.tf @@ -2,7 +2,7 @@ resource "helm_release" "vault-secret-sync" { name = "vault-secret-sync" repository = "https://helm.pennlabs.org" chart = "vault-secret-sync" - version = "0.1.3" + version = "0.1.4" values = var.vault_secret_sync_values depends_on = [ kubernetes_namespace.monitoring, diff --git a/terraform/modules/domain/README.md b/terraform/modules/domain/README.md new file mode 100644 index 00000000..0983bb2b --- /dev/null +++ b/terraform/modules/domain/README.md @@ -0,0 +1,19 @@ +# Domain + +A terraform module to configure basic DNS records for a Penn Labs domain that: + +* Allow for LE through our cloudflare proxy domain +* Point the apex domain to traefik +* CNAME all subdomains to the apex domain +* Create SPF, DKIM, and CNAME records to send mail through mailgun +* Configure MX records to receive mail through Google. + +| Name | Description | +| --------------- | ------------------------------------- | +| domain | Domain name to configure | +| traefik_lb_name | DNS name of the traefik load balancer | +| traefik_zone_id | Zone ID of the traefik load balancer | + +## Outputs + +None diff --git a/terraform/modules/domain/main.tf b/terraform/modules/domain/main.tf new file mode 100644 index 00000000..420159ea --- /dev/null +++ b/terraform/modules/domain/main.tf @@ -0,0 +1,61 @@ +resource "aws_route53_zone" "domain" { + name = var.domain +} + +resource "aws_route53_record" "acme-challenge" { + zone_id = aws_route53_zone.domain.zone_id + name = "_acme-challenge" + type = "CNAME" + ttl = 3600 + records = ["_acme-challenge.upenn.club."] +} + +resource "aws_route53_record" "apex-domain" { + zone_id = aws_route53_zone.domain.zone_id + name = "" + type = "A" + + alias { + name = var.traefik_lb_name + zone_id = var.traefik_zone_id + evaluate_target_health = false + } +} + +resource "aws_route53_record" "wildcard" { + zone_id = aws_route53_zone.domain.zone_id + name = "*" + type = "CNAME" + ttl = 3600 + records = [aws_route53_zone.domain.name] +} + +resource "aws_route53_record" "spf" { + zone_id = aws_route53_zone.domain.zone_id + name = "" + type = "TXT" + ttl = 3600 + records = ["v=spf1 include:mailgun.org ~all"] +} + +resource "aws_route53_record" "mailgun" { + zone_id = aws_route53_zone.domain.zone_id + name = "email" + type = "CNAME" + ttl = 3600 + records = ["mailgun.org."] +} + +resource "aws_route53_record" "gmail" { + zone_id = aws_route53_zone.domain.zone_id + name = "" + type = "MX" + ttl = 3600 + records = [ + "5 gmr-smtp-in.l.google.com.", + "10 alt1.gmr-smtp-in.l.google.com.", + "20 alt2.gmr-smtp-in.l.google.com.", + "30 alt3.gmr-smtp-in.l.google.com.", + "40 alt4.gmr-smtp-in.l.google.com.", + ] +} diff --git a/terraform/modules/domain/variables.tf b/terraform/modules/domain/variables.tf new file mode 100644 index 00000000..946edec8 --- /dev/null +++ b/terraform/modules/domain/variables.tf @@ -0,0 +1,14 @@ +variable "domain" { + description = "Domain name" + type = string +} + +variable "traefik_lb_name" { + description = "DNS name of the traefik load balancer" + type = string +} + +variable "traefik_zone_id" { + description = "Zone ID of the traefik load balancer" + type = string +} diff --git a/terraform/modules/iam/README.md b/terraform/modules/iam/README.md new file mode 100644 index 00000000..318f657e --- /dev/null +++ b/terraform/modules/iam/README.md @@ -0,0 +1,16 @@ +# IAM + +A terraform module to create an IAM role that can be assumed by a Kubernetes Service Account with the same name. + +## Inputs + +| Name | Description | +| ----------------- | --------------------------------------- | +| role | Name of K8s SA (and generated IAM role) | +| namespaces | Namespaces of the K8s SA | +| oidc_issuer_url | URL of the K8s oidc issuer | +| oidc_provider_arn | ARN of the K8s oidc issuer | + +## Outputs + +None diff --git a/terraform/modules/iam/main.tf b/terraform/modules/iam/main.tf new file mode 100644 index 00000000..4472478e --- /dev/null +++ b/terraform/modules/iam/main.tf @@ -0,0 +1,25 @@ +resource "aws_iam_role" "role" { + assume_role_policy = data.aws_iam_policy_document.k8s.json + name = var.role + tags = { + created-by = "terraform" + } +} + +data "aws_iam_policy_document" "k8s" { + statement { + actions = ["sts:AssumeRoleWithWebIdentity"] + effect = "Allow" + + condition { + test = "StringEquals" + variable = "${replace(var.oidc_issuer_url, "https://", "")}:sub" + values = [for namespace in var.namespaces : "system:serviceaccount:${namespace}:${var.role}"] + } + + principals { + identifiers = [var.oidc_provider_arn] + type = "Federated" + } + } +} diff --git a/terraform/modules/iam/outputs.tf b/terraform/modules/iam/outputs.tf new file mode 100644 index 00000000..895c6e2b --- /dev/null +++ b/terraform/modules/iam/outputs.tf @@ -0,0 +1,7 @@ +output "role-id" { + value = aws_iam_role.role.id +} + +output "role-arn" { + value = aws_iam_role.role.arn +} diff --git a/terraform/modules/iam/variables.tf b/terraform/modules/iam/variables.tf new file mode 100644 index 00000000..bf58e537 --- /dev/null +++ b/terraform/modules/iam/variables.tf @@ -0,0 +1,20 @@ +variable "role" { + description = "Name of K8s SA (and generated IAM role)" + type = string +} + +variable "namespaces" { + description = "Namespace(s) of the k8s SA" + type = set(string) + default = ["default"] +} + +variable "oidc_issuer_url" { + description = "URL of the K8s oidc issuer" + type = string +} + +variable "oidc_provider_arn" { + description = "ARN of the K8s oidc issuer" + type = string +} diff --git a/terraform/modules/postgres_cluster/README.md b/terraform/modules/postgres_cluster/README.md deleted file mode 100644 index e48064d1..00000000 --- a/terraform/modules/postgres_cluster/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Postgres Cluster - -A terraform module to create a postgres cluster on DigitalOcean and create users and databases such that a user can only access the database with the same name as it. - -## Inputs - -| Name | Description | -|-----------------|-----------------------------------------------------------------| -| name | Name for database cluster | -| node_count | Number of nodes in cluster | -| node_size | DigitalOcean size for database nodes (Default: db-s-1vcpu-1gb) | -| cluster_version | Postgres version of the cluster (Default: 11) | -| users | List of names to generate DBs and users from | - -## Outputs - -| Name | Description | -|----------------|---------------------------------------------------------------| -| host | Host of the created cluster | -| private-host | Private Host of the created cluster | -| port | Port of the created cluster | -| admin-user | Admin username of the created cluster | -| admin-password | Admin password of the created cluster | -| version | Version of the created cluster | -| passwords | A map from usernames to passwords of all the `users` provided | diff --git a/terraform/modules/postgres_cluster/main.tf b/terraform/modules/postgres_cluster/main.tf deleted file mode 100644 index 72fcbf37..00000000 --- a/terraform/modules/postgres_cluster/main.tf +++ /dev/null @@ -1,50 +0,0 @@ -resource "digitalocean_database_cluster" "postgres" { - name = var.name - engine = "pg" - size = var.node_size - region = "nyc1" - node_count = var.node_count - version = var.cluster_version - - lifecycle { - prevent_destroy = true - } -} - -resource "random_password" "password" { - for_each = var.users - length = 64 - special = false -} - -resource "postgresql_database" "db" { - for_each = var.users - name = each.key - owner = postgresql_role.role[each.key].name -} - -resource "postgresql_role" "role" { - for_each = var.users - name = each.key - login = true - password = random_password.password[each.key].result -} - -resource "postgresql_grant" "grant" { - for_each = var.users - database = postgresql_database.db[each.key].name - role = postgresql_role.role[each.key].name - schema = "public" - object_type = "table" - privileges = ["SELECT", "INSERT", "UPDATE", "DELETE", "TRUNCATE", "REFERENCES", "TRIGGER"] -} - -resource "postgresql_default_privileges" "privileges" { - for_each = var.users - database = postgresql_database.db[each.key].name - role = postgresql_role.role[each.key].name - owner = postgresql_role.role[each.key].name - schema = "public" - object_type = "table" - privileges = ["SELECT", "INSERT", "UPDATE", "DELETE", "TRUNCATE", "REFERENCES", "TRIGGER"] -} diff --git a/terraform/modules/postgres_cluster/outputs.tf b/terraform/modules/postgres_cluster/outputs.tf deleted file mode 100644 index 835355cf..00000000 --- a/terraform/modules/postgres_cluster/outputs.tf +++ /dev/null @@ -1,37 +0,0 @@ -output "host" { - value = digitalocean_database_cluster.postgres.host - sensitive = true -} - -output "private-host" { - value = digitalocean_database_cluster.postgres.private_host - sensitive = true -} - -output "port" { - value = digitalocean_database_cluster.postgres.port - sensitive = true -} - -output "admin-user" { - value = digitalocean_database_cluster.postgres.user - sensitive = true -} - -output "admin-password" { - value = digitalocean_database_cluster.postgres.password - sensitive = true -} - -output "version" { - value = digitalocean_database_cluster.postgres.version - sensitive = false -} - -output "passwords" { - value = { - for user in postgresql_role.role : - user.name => user.password - } - sensitive = true -} diff --git a/terraform/modules/postgres_cluster/variables.tf b/terraform/modules/postgres_cluster/variables.tf deleted file mode 100644 index 0862b08f..00000000 --- a/terraform/modules/postgres_cluster/variables.tf +++ /dev/null @@ -1,27 +0,0 @@ -variable "name" { - description = "Name for the postgres database cluster" - type = string -} - -variable "node_count" { - description = "Number of nodes in cluster" - type = number -} - -variable "node_size" { - description = "DigitalOcean size for database nodes" - type = string - default = "db-s-1vcpu-1gb" -} - -variable "cluster_version" { - description = "Postgres version of the cluster" - type = number - default = 11 -} - -variable "users" { - description = "List of names to generate DBs and users from" - type = set(string) -} - diff --git a/terraform/vault/README.md b/terraform/modules/vault/README.md similarity index 61% rename from terraform/vault/README.md rename to terraform/modules/vault/README.md index 7913b888..ec71ac55 100644 --- a/terraform/vault/README.md +++ b/terraform/modules/vault/README.md @@ -7,66 +7,50 @@ We use vault to store any secrets that our products (or infrastructure) need to Defines the following inputs to store within vault. These variables can be provided in a [few different ways](https://www.terraform.io/docs/configuration/variables.html#assigning-values-to-root-module-variables) but environment variables appear to be the easiest | | Description | -|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| VAULT_TOKEN | The root vault token you just generated | +| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | CF_API_KEY | The [Global API Key](https://cert-manager.io/docs/configuration/acme/dns01/cloudflare/#api-keys) of the Penn Labs Cloudflare account | | GH_PERSONAL_TOKEN | A [GitHub Personal Access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) of the Penn Labs Admin account | | GF_GH_CLIENT_ID | The Client ID to the Grafana Penn Labs OAuth2 application on Github | | GF_SLACK_URL | Slack notification URL used for Grafana notifications | -| ELASTIC_PASSWORD | The password to the managed elasticsearch instance | -| ELASTIC_HOST | The host to the managed elasticsearch instance (format should be https://host:port) | | GF_GH_CLIENT_SECRET | The Client Secret to the Grafana Penn Labs OAuth2 application on Github | +| SECRET_SYNC_ARN | ARN of the secret-sync role | +| TEAM_SYNC_ARN | ARN of the team-sync role | ## main.tf - Sets up the base vault configuration we need. It involves * Enabling the [key-value secrets engine](https://www.vaultproject.io/docs/secrets/kv/kv-v2) on `secrets/` -* Enabling the [approle auth backend](https://www.vaultproject.io/docs/auth/approle) on `/approle` +* Enabling the [aws auth backend](https://www.vaultproject.io/docs/auth/aws) on `/aws` * Enabling [github auth backend](https://www.vaultproject.io/docs/auth/github) on `/github` * Creating an admin vault policy * Mapping the SRE team on GitHub to the admin policy -## provider.tf - -Configures terraform to use the `vault` remote S3 backend as well as the following providers - -* Vault -* Random - ## cert-manager -Saves the Cloudflare API Key in vault so that it will be synced to the `cert-manager` namespace in `chronos`, `sandbox`, and `production`. +Saves the Cloudflare API Key in vault so that it will be synced to the `cert-manager` namespace in `sandbox`, and `production`. -## monitoring.tf +## grafana.tf -Configures grafana and prometheus by: +Configures grafana by: -* Creating passwords to protect the prometheus ingresses in `sandbox` and `production -* Generating a correctly formatted secret within vault using bcrypt that traefik uses to enforce HTTP basic auth -* Generating a secure password for the admin grafana user in `chronos` -* Saving the above-mentioned secrets, as well as the Grafana Penn Labs OAuth2 credentials in a secret that will be synced to the `default` namespace on `chronos` +* Generating a secure password for the admin grafana user in `production` +* Saving the above-mentioned password, as well as the Grafana Penn Labs OAuth2 credentials in a secret that will be synced to the `default` namespace on `production` ## secret-sync.tf Configures secret sync by: * Creating a vault policy for secret-sync (stored in `policies/secret-sync.hcl`) -* Creating an AppRole secret-sync user -* Saving the role id and secret id in a secret in vault that will later be synced into our Kubernetes clusters +* Mapping the secret-sync IAM role to that policy ## team-sync.tf Configures team sync by: * Creating a vault policy for team-sync (stored in `policies/team-sync.hcl`) -* Creating an AppRole team-sync user -* Saving the role id and secret id in a secret that will be synced to the `default` namespace in `chronos` - -## terraform-user.tf - -Creates a terraform AppRole user with admin privileges +* Mapping the team-sync IAM role to that policy +* Saving a GitHub token in a secret that will be synced to the `default` namespace in `production` ## outputs.tf -Exports the role id and secret id of the terraform AppRole vault user so that [base](../base) can use those credentials to make additional changes to vault. +Exports the secrets engine path so that vault resources outside this module can depend on this module being configured. diff --git a/terraform/vault/cert-manager.tf b/terraform/modules/vault/cert-manager.tf similarity index 56% rename from terraform/vault/cert-manager.tf rename to terraform/modules/vault/cert-manager.tf index 54227f0b..162f5162 100644 --- a/terraform/vault/cert-manager.tf +++ b/terraform/modules/vault/cert-manager.tf @@ -1,10 +1,6 @@ resource "vault_generic_secret" "cloudflare-api-key" { - for_each = toset(["chronos", "sandbox", "production"]) + for_each = toset(["sandbox", "production"]) path = "${vault_mount.secrets.path}/${each.key}/cert-manager/cloudflare-api-key-secret" - data_json = < Date: Fri, 12 Mar 2021 20:33:30 -0500 Subject: [PATCH 7/7] Lint Terraform (#58) * Lint * unlint * Fix * Fix again * Lint * Lint again --- .github/cdk/main.ts | 27 ++++++++++++--------- .github/workflows/cdkactions_approve.yaml | 12 --------- .github/workflows/cdkactions_terraform.yaml | 14 +++++++++++ terraform/modules/vault_flush/variables.tf | 2 +- 4 files changed, 30 insertions(+), 25 deletions(-) delete mode 100644 .github/workflows/cdkactions_approve.yaml create mode 100644 .github/workflows/cdkactions_terraform.yaml diff --git a/.github/cdk/main.ts b/.github/cdk/main.ts index 2da179c6..ad2a9c8a 100644 --- a/.github/cdk/main.ts +++ b/.github/cdk/main.ts @@ -1,25 +1,28 @@ -import { App, Job, Stack, Workflow } from "cdkactions"; +import { App, CheckoutJob, Stack, Workflow } from "cdkactions"; import { CDKPublishStack } from "@pennlabs/kraken" import { Construct } from "constructs"; -export class AutoApproveStack extends Stack { +class TerraformLintStack extends Stack { constructor(scope: Construct, name: string) { super(scope, name); - const workflow = new Workflow(this, 'approve', { - name: 'Auto Approve dependabot PRs', - on: 'pullRequest', + const workflow = new Workflow(this, 'terraform', { + name: 'Lint terraform files', + on: { + push: { + paths: ['terraform/**.tf'] + } + }, }); - new Job(workflow, 'approve', { + new CheckoutJob(workflow, 'lint', { runsOn: 'ubuntu-latest', steps: [ { - uses: 'hmarr/auto-approve-action@v2.0.0', - if: "github.actor == 'dependabot[bot]'", - with: { - "github-token": "${{ secrets.BOT_GITHUB_PAT }}" - } + uses: 'hashicorp/setup-terraform@v1' + }, + { + run: 'terraform fmt -check -recursive terraform' } ], }); @@ -29,5 +32,5 @@ export class AutoApproveStack extends Stack { const app = new App(); new CDKPublishStack(app, 'kraken'); new CDKPublishStack(app, 'kittyhawk'); -new AutoApproveStack(app, 'approve'); +new TerraformLintStack(app, 'terraform'); app.synth(); diff --git a/.github/workflows/cdkactions_approve.yaml b/.github/workflows/cdkactions_approve.yaml deleted file mode 100644 index 38006c07..00000000 --- a/.github/workflows/cdkactions_approve.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Generated by cdkactions. Do not modify -# Generated as part of the 'approve' stack. -name: Auto Approve dependabot PRs -on: pull_request -jobs: - approve: - runs-on: ubuntu-latest - steps: - - uses: hmarr/auto-approve-action@v2.0.0 - if: github.actor == 'dependabot[bot]' - with: - github-token: ${{ secrets.BOT_GITHUB_PAT }} diff --git a/.github/workflows/cdkactions_terraform.yaml b/.github/workflows/cdkactions_terraform.yaml new file mode 100644 index 00000000..393b55e4 --- /dev/null +++ b/.github/workflows/cdkactions_terraform.yaml @@ -0,0 +1,14 @@ +# Generated by cdkactions. Do not modify +# Generated as part of the 'terraform' stack. +name: Lint terraform files +on: + push: + paths: + - terraform/**.tf +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: hashicorp/setup-terraform@v1 + - run: terraform fmt -check -recursive terraform diff --git a/terraform/modules/vault_flush/variables.tf b/terraform/modules/vault_flush/variables.tf index a42a4ecf..d96f6956 100644 --- a/terraform/modules/vault_flush/variables.tf +++ b/terraform/modules/vault_flush/variables.tf @@ -5,5 +5,5 @@ variable "path" { variable "entry" { description = "Entries to replace within the secret" - type = map + type = map(any) }