From 38a0503292e765186f757eca935d8e55b0979133 Mon Sep 17 00:00:00 2001 From: Mads Hartmann Date: Wed, 27 Apr 2022 11:58:08 +0000 Subject: [PATCH] Simplify DB check and improve werft logging --- ...atform-delete-preview-environments-cron.ts | 196 +++++++++++------- 1 file changed, 122 insertions(+), 74 deletions(-) diff --git a/.werft/platform-delete-preview-environments-cron.ts b/.werft/platform-delete-preview-environments-cron.ts index 1561cbb1695c60..1748c27201de06 100644 --- a/.werft/platform-delete-preview-environments-cron.ts +++ b/.werft/platform-delete-preview-environments-cron.ts @@ -11,6 +11,16 @@ import {deleteDNSRecord} from "./util/gcloud"; // if set to 'true' it shows only previews that would be deleted const DRY_RUN = false +const SLICES = { + CONFIGURE_ACCESS: "Configuring access to relevant resources", + FETCHING_PREVIEW_ENVIRONMENTS: "Fetching preview environments", + FETCHING_BRANCHES: "Fetching branches", + DETERMINING_STALE_PREVIEW_ENVIRONMENTS: "Determining stale preview environments", + CHECKING_STALE_BRANCH: (branch: string) => `Checking for commit activity on ${branch}`, + CHECKING_DB_ACTIVITY: (preview: string) => `Checking for DB activity in ${preview}`, + DELETING_PREVIEW_ENVIRONMNETS: "Deleting preview environments" +} + // Will be set once tracing has been initialized let werft: Werft @@ -25,6 +35,9 @@ Tracing.initialize() code: SpanStatusCode.ERROR, message: err }) + console.error("Werft job failed with an error", err) + // Explicitly not using process.exit as we need to flush tracing, see tracing.js + process.exitCode = 1 }) .finally(() => { werft.phase("Flushing telemetry", "Flushing telemetry before stopping job") @@ -33,123 +46,158 @@ Tracing.initialize() async function deletePreviewEnvironments() { - werft.phase("prep"); + werft.phase("Configure access"); try { const GCLOUD_SERVICE_ACCOUNT_PATH = "/mnt/secrets/gcp-sa/service-account.json"; - exec(`gcloud auth activate-service-account --key-file "${GCLOUD_SERVICE_ACCOUNT_PATH}"`); - exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} gcloud container clusters get-credentials core-dev --zone europe-west1-b --project gitpod-core-dev`); + exec(`gcloud auth activate-service-account --key-file "${GCLOUD_SERVICE_ACCOUNT_PATH}"`, {slice: SLICES.CONFIGURE_ACCESS}); + exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} gcloud container clusters get-credentials core-dev --zone europe-west1-b --project gitpod-core-dev`, {slice: SLICES.CONFIGURE_ACCESS}); + werft.done(SLICES.CONFIGURE_ACCESS) } catch (err) { - werft.fail("prep", err) + werft.fail(SLICES.CONFIGURE_ACCESS, err) } - werft.done("prep") - werft.phase("Fetching previews"); + werft.phase("Fetching preview environments"); let previews: string[] try { previews = listAllPreviewNamespaces(CORE_DEV_KUBECONFIG_PATH, {}); - previews.forEach(previewNs => werft.log("Fetching preview", previewNs)); - werft.done("Fetching preview"); + previews.forEach(previewNs => werft.log(SLICES.FETCHING_PREVIEW_ENVIRONMENTS, previewNs)); + werft.log(SLICES.FETCHING_PREVIEW_ENVIRONMENTS, `Found ${previews.length} preview environments`) + werft.done(SLICES.FETCHING_PREVIEW_ENVIRONMENTS); } catch (err) { - werft.fail("Fetching preview", err) + werft.fail(SLICES.FETCHING_PREVIEW_ENVIRONMENTS, err) } - werft.phase("Fetching outdated branches"); + werft.phase("Fetching branches"); const branches = getAllBranches(); - const outdatedPreviews = new Set(branches + werft.log(SLICES.FETCHING_BRANCHES, `Found ${branches.length} branches`) + + werft.phase("Determining which preview environments are stale"); + + const previewEnvironmentsBasedOnBranches = new Set(branches.map(branch => expectedNamespaceFromBranch(branch))); + + const previewEnvironmentsBasedOnStaleBranches = new Set(branches .filter(branch => { - const lastCommit = exec(`git log origin/${branch} --since=$(date +%Y-%m-%d -d "5 days ago")`, { silent: true }) - return lastCommit.length < 1 + const sliceID = SLICES.CHECKING_STALE_BRANCH(branch) + const lastCommit = exec(`git log origin/${branch} --since=$(date +%Y-%m-%d -d "5 days ago")`, { slice: sliceID }) + const hasRecentCommits = lastCommit.length > 1 + werft.log(sliceID, `Has recent commits: ${hasRecentCommits}`) + werft.done(sliceID) + return !hasRecentCommits }) .map(branch => expectedNamespaceFromBranch(branch))) - const expectedPreviewEnvironmentNamespaces = new Set(branches.map(branch => expectedNamespaceFromBranch(branch))); + const deleteDueToMissingBranch = previews.filter(ns => !previewEnvironmentsBasedOnBranches.has(ns)) + const deleteDueToNoCommitActivity = previews.filter(ns => previewEnvironmentsBasedOnStaleBranches.has(ns)) + const deleteDueToNoDBActivity = previews.filter(ns => isInactive(ns)) + const previewsToDelete = new Set([...deleteDueToMissingBranch, ...deleteDueToNoCommitActivity, ...deleteDueToNoDBActivity]) - werft.phase("deleting previews") - try { - const deleteDueToMissingBranch = previews.filter(ns => !expectedPreviewEnvironmentNamespaces.has(ns)) - const deleteDueToNoCommitActivity = previews.filter(ns => outdatedPreviews.has(ns)) - const deleteDueToNoDBActivity = previews.filter(ns => isInactive(ns)) - const previewsToDelete = new Set([...deleteDueToMissingBranch, ...deleteDueToNoCommitActivity, ...deleteDueToNoDBActivity]) + if (previewsToDelete.has("staging-main")) { + previewsToDelete.delete("staging-main") + } - if (previewsToDelete.has("staging-main")) { - previewsToDelete.delete("staging-main") - } + if (previewsToDelete.size == 0) { + werft.log(SLICES.DETERMINING_STALE_PREVIEW_ENVIRONMENTS, "No stale preview environments.") + werft.done(SLICES.DETERMINING_STALE_PREVIEW_ENVIRONMENTS) + return + } else { + werft.log(SLICES.DETERMINING_STALE_PREVIEW_ENVIRONMENTS, `Found ${previewsToDelete.size} stale preview environments`) + } - if (DRY_RUN) { - previewsToDelete.forEach(preview => werft.log("deleting preview", `would have deleted preview environment ${preview}`)) - } - else { - const promises: Promise[] = []; - previewsToDelete.forEach(preview => { - werft.log("deleting preview", preview) - promises.push( - removeCertificate(preview, CORE_DEV_KUBECONFIG_PATH), - removeStagingDNSRecord(preview), - removePreviewDNSRecord(preview), - wipePreviewEnvironmentAndNamespace(helmInstallName, preview, CORE_DEV_KUBECONFIG_PATH, { slice: `Deleting preview ${preview}` }) - ) - }) - await Promise.all(promises) - } - werft.done("deleting preview") + werft.phase("Deleting stale preview environments") + if (DRY_RUN) { + previewsToDelete.forEach(preview => { + werft.log(SLICES.DELETING_PREVIEW_ENVIRONMNETS, `Would have deleted preview environment ${preview}`) + }) + werft.done(SLICES.DELETING_PREVIEW_ENVIRONMNETS) + return + } + + try { + const promises: Promise[] = []; + previewsToDelete.forEach(preview => promises.push(removePreviewEnvironment(preview))) + await Promise.all(promises) + werft.done(SLICES.DELETING_PREVIEW_ENVIRONMNETS) } catch (err) { - werft.fail("deleting preview", err) + werft.fail(SLICES.DELETING_PREVIEW_ENVIRONMNETS, err) } } +async function removePreviewEnvironment(previewNamespace: string) { + const sliceID = `Deleting preview ${previewNamespace}` + werft.log(sliceID, `Starting deletion of all resources related to ${previewNamespace}`) + try { + await Promise.all([ + removeCertificate(previewNamespace, CORE_DEV_KUBECONFIG_PATH), + removeStagingDNSRecord(previewNamespace), + removePreviewDNSRecord(previewNamespace), + wipePreviewEnvironmentAndNamespace(helmInstallName, previewNamespace, CORE_DEV_KUBECONFIG_PATH, { slice: sliceID }) + ]) + werft.done(sliceID) + } catch (e) { + werft.fail(sliceID, e) + } +} +/** + * Checks whether or not a preview environment is considered inactive. + * + * It errors on the side of caution, so in case of connection issues etc. it will consider the + * preview environment active. + */ function isInactive(previewNS: string): boolean { + const sliceID = SLICES.CHECKING_DB_ACTIVITY(previewNS) + try { + werft.log(sliceID, "Checking namespace status") + const statusNS = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get ns ${previewNS} -o jsonpath='{.status.phase}'`, { slice: sliceID }) - const statusNS = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get ns ${previewNS} -o jsonpath='{.status.phase}'`, { silent: true}) - - if ( statusNS == "Active") { - - const emptyNS = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods -n ${previewNS} -o jsonpath='{.items.*}'`, { silent: true}) - - if ( emptyNS.length < 1 ) { - return false; + if (statusNS != "Active") { + werft.log(sliceID, `Is inactive: false - The namespace is ${statusNS}`) + werft.done(sliceID) + return false } - const statusDB = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.phase}'`, { silent: true}) - const statusDbContainer = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.containerStatuses.*.ready}'`, { silent: true}) + werft.log(sliceID, "Checking status of the MySQL pod") + const statusDB = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.phase}'`, { slice: sliceID}) + const statusDbContainer = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.containerStatuses.*.ready}'`, { slice: sliceID}) - if (statusDB.code == 0 && statusDB == "Running" && statusDbContainer != "false") { + if (statusDB.code != 0 || statusDB != "Running" || statusDbContainer == "false") { + werft.log(sliceID, "Is inactive: false - The database is not reachable") + werft.done(sliceID) + return false + } - const connectionToDb = `KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get secret db-password -n ${previewNS} -o jsonpath='{.data.mysql-root-password}' | base64 -d | mysql --host=db.${previewNS}.svc.cluster.local --port=3306 --user=root --database=gitpod -s -N -p` + const dbPassword = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get secret db-password -n ${previewNS} -o jsonpath='{.data.mysql-root-password}' | base64 -d`, {silent: true}).stdout.trim() + const connectionToDb = `mysql --host=db.${previewNS}.svc.cluster.local --port=3306 --user=root --database=gitpod -s -N --password=${dbPassword}` - const latestInstanceTimeout = 24 - const latestInstance = exec(`${connectionToDb} --execute="SELECT creationTime FROM d_b_workspace_instance WHERE creationTime > DATE_SUB(NOW(), INTERVAL '${latestInstanceTimeout}' HOUR) LIMIT 1"`, { silent: true }) + const latestInstanceTimeout = 48 + const latestInstance = exec(`${connectionToDb} --execute="SELECT creationTime FROM d_b_workspace_instance WHERE creationTime > DATE_SUB(NOW(), INTERVAL '${latestInstanceTimeout}' HOUR) LIMIT 1"`, { slice: sliceID}) - const latestUserTimeout = 24 - const latestUser= exec(`${connectionToDb} --execute="SELECT creationDate FROM d_b_user WHERE creationDate > DATE_SUB(NOW(), INTERVAL '${latestUserTimeout}' HOUR) LIMIT 1"`, { silent: true }) + const latestUserTimeout = 48 + const latestUser= exec(`${connectionToDb} --execute="SELECT creationDate FROM d_b_user WHERE creationDate > DATE_SUB(NOW(), INTERVAL '${latestUserTimeout}' HOUR) LIMIT 1"`, { slice: sliceID}) - const lastModifiedTimeout = 24 - const lastModified= exec(`${connectionToDb} --execute="SELECT _lastModified FROM d_b_user WHERE _lastModified > DATE_SUB(NOW(), INTERVAL '${lastModifiedTimeout}' HOUR) LIMIT 1"`, { silent: true }) + const lastModifiedTimeout = 48 + const lastModified= exec(`${connectionToDb} --execute="SELECT _lastModified FROM d_b_user WHERE _lastModified > DATE_SUB(NOW(), INTERVAL '${lastModifiedTimeout}' HOUR) LIMIT 1"`, { slice: sliceID}) - const heartbeatTimeout = 24 - const heartbeat= exec(`${connectionToDb} --execute="SELECT lastSeen FROM d_b_workspace_instance_user WHERE lastSeen > DATE_SUB(NOW(), INTERVAL '${heartbeatTimeout}' HOUR) LIMIT 1"`, { silent: true }) + const heartbeatTimeout = 48 + const heartbeat= exec(`${connectionToDb} --execute="SELECT lastSeen FROM d_b_workspace_instance_user WHERE lastSeen > DATE_SUB(NOW(), INTERVAL '${heartbeatTimeout}' HOUR) LIMIT 1"`, { slice: sliceID}) - if ( (heartbeat.length < 1) && - (latestInstance.length < 1) && - (latestUser.length < 1) && - (lastModified.length < 1) ) { - return true; - } else { - return false; - } - } + const isInactive = (heartbeat.length < 1) && (latestInstance.length < 1) && (latestUser.length < 1) && (lastModified.length < 1) + werft.log(sliceID, `Is inactive: ${isInactive}`) + werft.done(sliceID) + return isInactive + } catch (err) { + werft.log(sliceID, "Is inactive: false - Unable to check DB activity") + return false } - } async function removeCertificate(preview: string, kubectlConfig: string) { exec(`kubectl --kubeconfig ${kubectlConfig} -n certs delete cert ${preview}`) - return } // remove DNS records for core-dev-based preview environments async function removeStagingDNSRecord(preview: string) { - return Promise.all([ + await Promise.all([ deleteDNSRecord('A', `*.ws-dev.${preview}.staging.gitpod-dev.com`, 'gitpod-dev', 'gitpod-dev-com'), deleteDNSRecord('A', `*.${preview}.staging.gitpod-dev.com`, 'gitpod-dev', 'gitpod-dev-com'), deleteDNSRecord('A', `${preview}.staging.gitpod-dev.com`, 'gitpod-dev', 'gitpod-dev-com'), @@ -164,7 +212,7 @@ async function removeStagingDNSRecord(preview: string) { // remove DNS records for harvester-based preview environments async function removePreviewDNSRecord(preview: string) { - return Promise.all([ + await Promise.all([ deleteDNSRecord('A', `*.ws-dev.${preview}.preview.gitpod-dev.com`, 'gitpod-core-dev', 'preview-gitpod-dev-com'), deleteDNSRecord('A', `*.${preview}.preview.gitpod-dev.com`, 'gitpod-core-dev', 'preview-gitpod-dev-com'), deleteDNSRecord('A', `${preview}.preview.gitpod-dev.com`, 'gitpod-core-dev', 'preview-gitpod-dev-com'),