diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index 45f8afd8b9..8ba803561f 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -18,6 +18,8 @@ const mockOctokit = { listSelfHostedRunnersForOrg: jest.fn(), deleteSelfHostedRunnerFromOrg: jest.fn(), deleteSelfHostedRunnerFromRepo: jest.fn(), + getSelfHostedRunnerForOrg: jest.fn(), + getSelfHostedRunnerForRepo: jest.fn(), }, paginate: jest.fn(), }; @@ -144,10 +146,14 @@ const DEFAULT_RUNNERS_ORIGINAL = [ repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, }, { - instanceId: 'i-busy-112', - launchTime: moment(new Date()) - .subtract(minimumRunningTimeInMinutes + 27, 'minutes') - .toDate(), + instanceId: 'i-running-112', + launchTime: moment(new Date()).subtract(25, 'minutes').toDate(), + type: 'Repo', + owner: `doe/another-repo`, + }, + { + instanceId: 'i-running-113', + launchTime: moment(new Date()).subtract(25, 'minutes').toDate(), type: 'Org', owner: TEST_DATA.repositoryOwner, }, @@ -157,37 +163,42 @@ const DEFAULT_REGISTERED_RUNNERS = [ { id: 101, name: 'i-idle-101', - busy: false, }, { id: 102, name: 'i-idle-102', - busy: false, }, { id: 103, name: 'i-oldest-idle-103', - busy: false, }, { id: 104, name: 'i-oldest-idle-104', - busy: false, }, { id: 105, name: 'i-running-105', - busy: false, }, { id: 106, name: 'i-running-106', - busy: false, }, { - id: 112, - name: 'i-busy-112', - busy: true, + id: 1121, + name: 'i-running-112-1', + }, + { + id: 1122, + name: 'i-running-112-2', + }, + { + id: 1131, + name: 'i-running-113-1', + }, + { + id: 1132, + name: 'i-running-113-2', }, ]; @@ -235,6 +246,29 @@ describe('scaleDown', () => { } }); + mockOctokit.actions.getSelfHostedRunnerForRepo.mockImplementation((repo) => { + if (repo.runner_id === 1121) { + return { + data: { busy: true }, + }; + } else { + return { + data: { busy: false }, + }; + } + }); + mockOctokit.actions.getSelfHostedRunnerForOrg.mockImplementation((repo) => { + if (repo.runner_id === 1131) { + return { + data: { busy: true }, + }; + } else { + return { + data: { busy: false }, + }; + } + }); + const mockTerminateRunners = mocked(terminateRunner); mockTerminateRunners.mockImplementation(async () => { return; @@ -279,8 +313,7 @@ describe('scaleDown', () => { ); RUNNERS_ALL_REMOVED = DEFAULT_RUNNERS_ORG.filter( - (r) => - !r.instanceId.includes('running') && !r.instanceId.includes('registered') && !r.instanceId.includes('busy'), + (r) => !r.instanceId.includes('running') && !r.instanceId.includes('registered'), ); DEFAULT_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORIGINAL.filter( (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), @@ -349,7 +382,7 @@ describe('scaleDown', () => { beforeEach(() => { process.env.SCALE_DOWN_CONFIG = JSON.stringify([ { - idleCount: 2, + idleCount: 3, cron: '* * * * * *', timeZone: 'Europe/Amsterdam', }, @@ -479,7 +512,7 @@ describe('scaleDown', () => { beforeEach(() => { process.env.SCALE_DOWN_CONFIG = JSON.stringify([ { - idleCount: 2, + idleCount: 3, cron: '* * * * * *', timeZone: 'Europe/Amsterdam', }, diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index df4ee1fe97..605221c58f 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -47,6 +47,27 @@ async function getOrCreateOctokit(runner: RunnerInfo): Promise { return octokit; } +async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise { + const state = + ec2runner.type === 'Org' + ? await client.actions.getSelfHostedRunnerForOrg({ + runner_id: runnerId, + org: ec2runner.owner, + }) + : await client.actions.getSelfHostedRunnerForRepo({ + runner_id: runnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }); + + logger.info( + `Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.data.busy}`, + LogFields.print(), + ); + + return state.data.busy; +} + async function listGitHubRunners(runner: RunnerInfo): Promise { const key = runner.owner as string; const cachedRunners = githubCache.runners.get(key); @@ -86,29 +107,48 @@ function bootTimeExceeded(ec2Runner: RunnerInfo): boolean { return launchTimePlusBootTime < moment(new Date()).utc(); } -async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise { +async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise { const githubAppClient = await getOrCreateOctokit(ec2runner); try { - const result = - ec2runner.type === 'Org' - ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ - runner_id: ghRunnerId, - org: ec2runner.owner, - }) - : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ - runner_id: ghRunnerId, - owner: ec2runner.owner.split('/')[0], - repo: ec2runner.owner.split('/')[1], - }); - - if (result.status == 204) { - await terminateRunner(ec2runner.instanceId); + const states = await Promise.all( + ghRunnerIds.map(async (ghRunnerId) => { + // Get busy state instead of using the output of listGitHubRunners(...) to minimize to race condition. + return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId); + }), + ); + + if (states.every((busy) => busy === false)) { + const statuses = await Promise.all( + ghRunnerIds.map(async (ghRunnerId) => { + return ( + ec2runner.type === 'Org' + ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ + runner_id: ghRunnerId, + org: ec2runner.owner, + }) + : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ + runner_id: ghRunnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }) + ).status; + }), + ); + + if (statuses.every((status) => status == 204)) { + await terminateRunner(ec2runner.instanceId); + logger.info( + `AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`, + LogFields.print(), + ); + } else { + logger.error(`Failed to de-register GitHub runner: ${statuses}`, LogFields.print()); + } + } else { logger.info( - `AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`, + `Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`, LogFields.print(), ); - } else { - logger.error(`Failed to de-register GitHub runner: ${result.status}`, LogFields.print()); } } catch (e) { logger.error(`Runner '${ec2runner.instanceId}' cannot be de-registered. Error: ${e}`, LogFields.print()); @@ -130,15 +170,20 @@ async function evaluateAndRemoveRunners( ); for (const ec2Runner of ec2RunnersFiltered) { const ghRunners = await listGitHubRunners(ec2Runner); - const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId); - if (ghRunner) { - if (!ghRunner.busy && runnerMinimumTimeExceeded(ec2Runner)) { + const ghRunnersFiltered = ghRunners.filter((runner: { name: string }) => + runner.name.startsWith(ec2Runner.instanceId), + ); + if (ghRunnersFiltered.length) { + if (runnerMinimumTimeExceeded(ec2Runner)) { if (idleCounter > 0) { idleCounter--; logger.info(`Runner '${ec2Runner.instanceId}' will be kept idle.`, LogFields.print()); } else { logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`, LogFields.print()); - await removeRunner(ec2Runner, ghRunner.id); + await removeRunner( + ec2Runner, + ghRunnersFiltered.map((runner: { id: number }) => runner.id), + ); } } } else {