Skip to content

Commit

Permalink
Simplify DB check and improve werft logging
Browse files Browse the repository at this point in the history
  • Loading branch information
mads-hartmann committed Apr 27, 2022
1 parent 40239be commit 38a0503
Showing 1 changed file with 122 additions and 74 deletions.
196 changes: 122 additions & 74 deletions .werft/platform-delete-preview-environments-cron.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ import {deleteDNSRecord} from "./util/gcloud";
// if set to 'true' it shows only previews that would be deleted
const DRY_RUN = false

const SLICES = {
CONFIGURE_ACCESS: "Configuring access to relevant resources",
FETCHING_PREVIEW_ENVIRONMENTS: "Fetching preview environments",
FETCHING_BRANCHES: "Fetching branches",
DETERMINING_STALE_PREVIEW_ENVIRONMENTS: "Determining stale preview environments",
CHECKING_STALE_BRANCH: (branch: string) => `Checking for commit activity on ${branch}`,
CHECKING_DB_ACTIVITY: (preview: string) => `Checking for DB activity in ${preview}`,
DELETING_PREVIEW_ENVIRONMNETS: "Deleting preview environments"
}

// Will be set once tracing has been initialized
let werft: Werft

Expand All @@ -25,6 +35,9 @@ Tracing.initialize()
code: SpanStatusCode.ERROR,
message: err
})
console.error("Werft job failed with an error", err)
// Explicitly not using process.exit as we need to flush tracing, see tracing.js
process.exitCode = 1
})
.finally(() => {
werft.phase("Flushing telemetry", "Flushing telemetry before stopping job")
Expand All @@ -33,123 +46,158 @@ Tracing.initialize()

async function deletePreviewEnvironments() {

werft.phase("prep");
werft.phase("Configure access");
try {
const GCLOUD_SERVICE_ACCOUNT_PATH = "/mnt/secrets/gcp-sa/service-account.json";
exec(`gcloud auth activate-service-account --key-file "${GCLOUD_SERVICE_ACCOUNT_PATH}"`);
exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} gcloud container clusters get-credentials core-dev --zone europe-west1-b --project gitpod-core-dev`);
exec(`gcloud auth activate-service-account --key-file "${GCLOUD_SERVICE_ACCOUNT_PATH}"`, {slice: SLICES.CONFIGURE_ACCESS});
exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} gcloud container clusters get-credentials core-dev --zone europe-west1-b --project gitpod-core-dev`, {slice: SLICES.CONFIGURE_ACCESS});
werft.done(SLICES.CONFIGURE_ACCESS)
} catch (err) {
werft.fail("prep", err)
werft.fail(SLICES.CONFIGURE_ACCESS, err)
}
werft.done("prep")

werft.phase("Fetching previews");
werft.phase("Fetching preview environments");
let previews: string[]
try {
previews = listAllPreviewNamespaces(CORE_DEV_KUBECONFIG_PATH, {});
previews.forEach(previewNs => werft.log("Fetching preview", previewNs));
werft.done("Fetching preview");
previews.forEach(previewNs => werft.log(SLICES.FETCHING_PREVIEW_ENVIRONMENTS, previewNs));
werft.log(SLICES.FETCHING_PREVIEW_ENVIRONMENTS, `Found ${previews.length} preview environments`)
werft.done(SLICES.FETCHING_PREVIEW_ENVIRONMENTS);
} catch (err) {
werft.fail("Fetching preview", err)
werft.fail(SLICES.FETCHING_PREVIEW_ENVIRONMENTS, err)
}

werft.phase("Fetching outdated branches");
werft.phase("Fetching branches");
const branches = getAllBranches();
const outdatedPreviews = new Set(branches
werft.log(SLICES.FETCHING_BRANCHES, `Found ${branches.length} branches`)

werft.phase("Determining which preview environments are stale");

const previewEnvironmentsBasedOnBranches = new Set(branches.map(branch => expectedNamespaceFromBranch(branch)));

const previewEnvironmentsBasedOnStaleBranches = new Set(branches
.filter(branch => {
const lastCommit = exec(`git log origin/${branch} --since=$(date +%Y-%m-%d -d "5 days ago")`, { silent: true })
return lastCommit.length < 1
const sliceID = SLICES.CHECKING_STALE_BRANCH(branch)
const lastCommit = exec(`git log origin/${branch} --since=$(date +%Y-%m-%d -d "5 days ago")`, { slice: sliceID })
const hasRecentCommits = lastCommit.length > 1
werft.log(sliceID, `Has recent commits: ${hasRecentCommits}`)
werft.done(sliceID)
return !hasRecentCommits
})
.map(branch => expectedNamespaceFromBranch(branch)))

const expectedPreviewEnvironmentNamespaces = new Set(branches.map(branch => expectedNamespaceFromBranch(branch)));
const deleteDueToMissingBranch = previews.filter(ns => !previewEnvironmentsBasedOnBranches.has(ns))
const deleteDueToNoCommitActivity = previews.filter(ns => previewEnvironmentsBasedOnStaleBranches.has(ns))
const deleteDueToNoDBActivity = previews.filter(ns => isInactive(ns))
const previewsToDelete = new Set([...deleteDueToMissingBranch, ...deleteDueToNoCommitActivity, ...deleteDueToNoDBActivity])

werft.phase("deleting previews")
try {
const deleteDueToMissingBranch = previews.filter(ns => !expectedPreviewEnvironmentNamespaces.has(ns))
const deleteDueToNoCommitActivity = previews.filter(ns => outdatedPreviews.has(ns))
const deleteDueToNoDBActivity = previews.filter(ns => isInactive(ns))
const previewsToDelete = new Set([...deleteDueToMissingBranch, ...deleteDueToNoCommitActivity, ...deleteDueToNoDBActivity])
if (previewsToDelete.has("staging-main")) {
previewsToDelete.delete("staging-main")
}

if (previewsToDelete.has("staging-main")) {
previewsToDelete.delete("staging-main")
}
if (previewsToDelete.size == 0) {
werft.log(SLICES.DETERMINING_STALE_PREVIEW_ENVIRONMENTS, "No stale preview environments.")
werft.done(SLICES.DETERMINING_STALE_PREVIEW_ENVIRONMENTS)
return
} else {
werft.log(SLICES.DETERMINING_STALE_PREVIEW_ENVIRONMENTS, `Found ${previewsToDelete.size} stale preview environments`)
}

if (DRY_RUN) {
previewsToDelete.forEach(preview => werft.log("deleting preview", `would have deleted preview environment ${preview}`))
}
else {
const promises: Promise<any>[] = [];
previewsToDelete.forEach(preview => {
werft.log("deleting preview", preview)
promises.push(
removeCertificate(preview, CORE_DEV_KUBECONFIG_PATH),
removeStagingDNSRecord(preview),
removePreviewDNSRecord(preview),
wipePreviewEnvironmentAndNamespace(helmInstallName, preview, CORE_DEV_KUBECONFIG_PATH, { slice: `Deleting preview ${preview}` })
)
})
await Promise.all(promises)
}
werft.done("deleting preview")
werft.phase("Deleting stale preview environments")
if (DRY_RUN) {
previewsToDelete.forEach(preview => {
werft.log(SLICES.DELETING_PREVIEW_ENVIRONMNETS, `Would have deleted preview environment ${preview}`)
})
werft.done(SLICES.DELETING_PREVIEW_ENVIRONMNETS)
return
}

try {
const promises: Promise<any>[] = [];
previewsToDelete.forEach(preview => promises.push(removePreviewEnvironment(preview)))
await Promise.all(promises)
werft.done(SLICES.DELETING_PREVIEW_ENVIRONMNETS)
} catch (err) {
werft.fail("deleting preview", err)
werft.fail(SLICES.DELETING_PREVIEW_ENVIRONMNETS, err)
}
}

async function removePreviewEnvironment(previewNamespace: string) {
const sliceID = `Deleting preview ${previewNamespace}`
werft.log(sliceID, `Starting deletion of all resources related to ${previewNamespace}`)
try {
await Promise.all([
removeCertificate(previewNamespace, CORE_DEV_KUBECONFIG_PATH),
removeStagingDNSRecord(previewNamespace),
removePreviewDNSRecord(previewNamespace),
wipePreviewEnvironmentAndNamespace(helmInstallName, previewNamespace, CORE_DEV_KUBECONFIG_PATH, { slice: sliceID })
])
werft.done(sliceID)
} catch (e) {
werft.fail(sliceID, e)
}
}

/**
* Checks whether or not a preview environment is considered inactive.
*
* It errors on the side of caution, so in case of connection issues etc. it will consider the
* preview environment active.
*/
function isInactive(previewNS: string): boolean {
const sliceID = SLICES.CHECKING_DB_ACTIVITY(previewNS)
try {
werft.log(sliceID, "Checking namespace status")
const statusNS = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get ns ${previewNS} -o jsonpath='{.status.phase}'`, { slice: sliceID })

const statusNS = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get ns ${previewNS} -o jsonpath='{.status.phase}'`, { silent: true})

if ( statusNS == "Active") {

const emptyNS = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods -n ${previewNS} -o jsonpath='{.items.*}'`, { silent: true})

if ( emptyNS.length < 1 ) {
return false;
if (statusNS != "Active") {
werft.log(sliceID, `Is inactive: false - The namespace is ${statusNS}`)
werft.done(sliceID)
return false
}

const statusDB = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.phase}'`, { silent: true})
const statusDbContainer = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.containerStatuses.*.ready}'`, { silent: true})
werft.log(sliceID, "Checking status of the MySQL pod")
const statusDB = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.phase}'`, { slice: sliceID})
const statusDbContainer = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get pods mysql-0 -n ${previewNS} -o jsonpath='{.status.containerStatuses.*.ready}'`, { slice: sliceID})

if (statusDB.code == 0 && statusDB == "Running" && statusDbContainer != "false") {
if (statusDB.code != 0 || statusDB != "Running" || statusDbContainer == "false") {
werft.log(sliceID, "Is inactive: false - The database is not reachable")
werft.done(sliceID)
return false
}

const connectionToDb = `KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get secret db-password -n ${previewNS} -o jsonpath='{.data.mysql-root-password}' | base64 -d | mysql --host=db.${previewNS}.svc.cluster.local --port=3306 --user=root --database=gitpod -s -N -p`
const dbPassword = exec(`KUBECONFIG=${CORE_DEV_KUBECONFIG_PATH} kubectl get secret db-password -n ${previewNS} -o jsonpath='{.data.mysql-root-password}' | base64 -d`, {silent: true}).stdout.trim()
const connectionToDb = `mysql --host=db.${previewNS}.svc.cluster.local --port=3306 --user=root --database=gitpod -s -N --password=${dbPassword}`

const latestInstanceTimeout = 24
const latestInstance = exec(`${connectionToDb} --execute="SELECT creationTime FROM d_b_workspace_instance WHERE creationTime > DATE_SUB(NOW(), INTERVAL '${latestInstanceTimeout}' HOUR) LIMIT 1"`, { silent: true })
const latestInstanceTimeout = 48
const latestInstance = exec(`${connectionToDb} --execute="SELECT creationTime FROM d_b_workspace_instance WHERE creationTime > DATE_SUB(NOW(), INTERVAL '${latestInstanceTimeout}' HOUR) LIMIT 1"`, { slice: sliceID})

const latestUserTimeout = 24
const latestUser= exec(`${connectionToDb} --execute="SELECT creationDate FROM d_b_user WHERE creationDate > DATE_SUB(NOW(), INTERVAL '${latestUserTimeout}' HOUR) LIMIT 1"`, { silent: true })
const latestUserTimeout = 48
const latestUser= exec(`${connectionToDb} --execute="SELECT creationDate FROM d_b_user WHERE creationDate > DATE_SUB(NOW(), INTERVAL '${latestUserTimeout}' HOUR) LIMIT 1"`, { slice: sliceID})

const lastModifiedTimeout = 24
const lastModified= exec(`${connectionToDb} --execute="SELECT _lastModified FROM d_b_user WHERE _lastModified > DATE_SUB(NOW(), INTERVAL '${lastModifiedTimeout}' HOUR) LIMIT 1"`, { silent: true })
const lastModifiedTimeout = 48
const lastModified= exec(`${connectionToDb} --execute="SELECT _lastModified FROM d_b_user WHERE _lastModified > DATE_SUB(NOW(), INTERVAL '${lastModifiedTimeout}' HOUR) LIMIT 1"`, { slice: sliceID})

const heartbeatTimeout = 24
const heartbeat= exec(`${connectionToDb} --execute="SELECT lastSeen FROM d_b_workspace_instance_user WHERE lastSeen > DATE_SUB(NOW(), INTERVAL '${heartbeatTimeout}' HOUR) LIMIT 1"`, { silent: true })
const heartbeatTimeout = 48
const heartbeat= exec(`${connectionToDb} --execute="SELECT lastSeen FROM d_b_workspace_instance_user WHERE lastSeen > DATE_SUB(NOW(), INTERVAL '${heartbeatTimeout}' HOUR) LIMIT 1"`, { slice: sliceID})

if ( (heartbeat.length < 1) &&
(latestInstance.length < 1) &&
(latestUser.length < 1) &&
(lastModified.length < 1) ) {
return true;
} else {
return false;
}
}
const isInactive = (heartbeat.length < 1) && (latestInstance.length < 1) && (latestUser.length < 1) && (lastModified.length < 1)
werft.log(sliceID, `Is inactive: ${isInactive}`)
werft.done(sliceID)
return isInactive
} catch (err) {
werft.log(sliceID, "Is inactive: false - Unable to check DB activity")
return false
}

}

async function removeCertificate(preview: string, kubectlConfig: string) {
exec(`kubectl --kubeconfig ${kubectlConfig} -n certs delete cert ${preview}`)
return
}

// remove DNS records for core-dev-based preview environments
async function removeStagingDNSRecord(preview: string) {
return Promise.all([
await Promise.all([
deleteDNSRecord('A', `*.ws-dev.${preview}.staging.gitpod-dev.com`, 'gitpod-dev', 'gitpod-dev-com'),
deleteDNSRecord('A', `*.${preview}.staging.gitpod-dev.com`, 'gitpod-dev', 'gitpod-dev-com'),
deleteDNSRecord('A', `${preview}.staging.gitpod-dev.com`, 'gitpod-dev', 'gitpod-dev-com'),
Expand All @@ -164,7 +212,7 @@ async function removeStagingDNSRecord(preview: string) {

// remove DNS records for harvester-based preview environments
async function removePreviewDNSRecord(preview: string) {
return Promise.all([
await Promise.all([
deleteDNSRecord('A', `*.ws-dev.${preview}.preview.gitpod-dev.com`, 'gitpod-core-dev', 'preview-gitpod-dev-com'),
deleteDNSRecord('A', `*.${preview}.preview.gitpod-dev.com`, 'gitpod-core-dev', 'preview-gitpod-dev-com'),
deleteDNSRecord('A', `${preview}.preview.gitpod-dev.com`, 'gitpod-core-dev', 'preview-gitpod-dev-com'),
Expand Down

0 comments on commit 38a0503

Please sign in to comment.