Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scale Logging Context #1399

Merged
merged 4 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions modules/runners/lambdas/runners/src/scale-runners/gh-auth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import {
} from '@octokit/auth-app/dist-types/types';
import { OctokitOptions } from '@octokit/core/dist-types/types';
import { getParameterValue } from './ssm';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'gh-auth' });

export async function createOctoClient(token: string, ghesApiUrl = ''): Promise<Octokit> {
const ocktokitOptions: OctokitOptions = {
Expand Down Expand Up @@ -55,7 +55,7 @@ async function createAuth(installationId: number | undefined, ghesApiUrl: string
};
if (installationId) authOptions = { ...authOptions, installationId };

logger.debug(`GHES API URL: ${ghesApiUrl}`);
logger.debug(`GHES API URL: ${ghesApiUrl}`, LogFields.print());
if (ghesApiUrl) {
authOptions.request = request.defaults({
baseUrl: ghesApiUrl,
Expand Down
9 changes: 8 additions & 1 deletion modules/runners/lambdas/runners/src/scale-runners/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ export const logger = new Logger({
displayInstanceName: false,
maskAnyRegEx: ['--token [A-Z0-9]*'],
minLevel: process.env.LOG_LEVEL || 'info',
name: 'scale-up',
name: 'scale-runners',
overwriteConsole: true,
type: process.env.LOG_TYPE || 'pretty',
});
export class LogFields {
static fields: { [key: string]: string } = {};

public static print(): string {
return JSON.stringify(LogFields.fields);
}
}
14 changes: 9 additions & 5 deletions modules/runners/lambdas/runners/src/scale-runners/runners.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { EC2, SSM } from 'aws-sdk';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'runners' });

export interface RunnerList {
instanceId: string;
Expand Down Expand Up @@ -75,16 +75,20 @@ export async function terminateRunner(instanceId: string): Promise<void> {
InstanceIds: [instanceId],
})
.promise();
logger.info(`Runner ${instanceId} has been terminated.`);
logger.info(`Runner ${instanceId} has been terminated.`, LogFields.print());
}

export async function createRunner(runnerParameters: RunnerInputParameters, launchTemplateName: string): Promise<void> {
logger.debug('Runner configuration: ' + JSON.stringify(runnerParameters));
logger.debug('Runner configuration: ' + JSON.stringify(runnerParameters), LogFields.print());
const ec2 = new EC2();
const runInstancesResponse = await ec2
.runInstances(getInstanceParams(launchTemplateName, runnerParameters))
.promise();
logger.info('Created instance(s): ', runInstancesResponse.Instances?.map((i) => i.InstanceId).join(','));
logger.info(
'Created instance(s): ',
runInstancesResponse.Instances?.map((i) => i.InstanceId).join(','),
LogFields.print(),
);
const ssm = new SSM();
runInstancesResponse.Instances?.forEach(async (i: EC2.Instance) => {
await ssm
Expand Down
53 changes: 34 additions & 19 deletions modules/runners/lambdas/runners/src/scale-runners/scale-down.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ import { listEC2Runners, RunnerInfo, RunnerList, terminateRunner } from './runne
import { getIdleRunnerCount, ScalingDownConfig } from './scale-down-config';
import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth';
import { githubCache, GhRunners } from './cache';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'scale-down' });

async function getOrCreateOctokit(runner: RunnerInfo): Promise<Octokit> {
const key = runner.owner;
const cachedOctokit = githubCache.clients.get(key);

if (cachedOctokit) {
logger.debug(`[createGitHubClientForRunner] Cache hit for ${key}`);
logger.debug(`[createGitHubClientForRunner] Cache hit for ${key}`, LogFields.print());
return cachedOctokit;
}

logger.debug(`[createGitHubClientForRunner] Cache miss for ${key}`);
logger.debug(`[createGitHubClientForRunner] Cache miss for ${key}`, LogFields.print());
const ghesBaseUrl = process.env.GHES_URL;
let ghesApiUrl = '';
if (ghesBaseUrl) {
Expand Down Expand Up @@ -50,11 +50,11 @@ async function listGitHubRunners(runner: RunnerInfo): Promise<GhRunners> {
const key = runner.owner as string;
const cachedRunners = githubCache.runners.get(key);
if (cachedRunners) {
logger.debug(`[listGithubRunners] Cache hit for ${key}`);
logger.debug(`[listGithubRunners] Cache hit for ${key}`, LogFields.print());
return cachedRunners;
}

logger.debug(`[listGithubRunners] Cache miss for ${key}`);
logger.debug(`[listGithubRunners] Cache miss for ${key}`, LogFields.print());
const client = await getOrCreateOctokit(runner);
const runners =
runner.type === 'Org'
Expand Down Expand Up @@ -102,12 +102,18 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise<

if (result.status == 204) {
await terminateRunner(ec2runner.instanceId);
logger.info(`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`);
logger.info(
`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`,
LogFields.print(),
);
} else {
logger.error(`Failed to de-register GitHub runner: ${result.status}`);
logger.error(`Failed to de-register GitHub runner: ${result.status}`, LogFields.print());
}
} catch (e) {
logger.info(`Runner '${ec2runner.instanceId}' cannot be de-registered, most likely the runner is active.`);
logger.info(
`Runner '${ec2runner.instanceId}' cannot be de-registered, most likely the runner is active.`,
LogFields.print(),
);
}
}

Expand All @@ -120,26 +126,29 @@ async function evaluateAndRemoveRunners(

for (const ownerTag of ownerTags) {
const ec2RunnersFiltered = ec2Runners.filter((runner) => runner.owner === ownerTag);
logger.debug(`Found: '${ec2RunnersFiltered.length}' active GitHub runners with owner tag: '${ownerTag}'`);
logger.debug(
`Found: '${ec2RunnersFiltered.length}' active GitHub runners with owner tag: '${ownerTag}'`,
LogFields.print(),
);
for (const ec2Runner of ec2RunnersFiltered) {
const ghRunners = await listGitHubRunners(ec2Runner);
const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId);
if (ghRunner) {
if (runnerMinimumTimeExceeded(ec2Runner)) {
if (idleCounter > 0) {
idleCounter--;
logger.info(`Runner '${ec2Runner.instanceId}' will kept idle.`);
logger.info(`Runner '${ec2Runner.instanceId}' will kept idle.`, LogFields.print());
} else {
logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`);
logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`, LogFields.print());
await removeRunner(ec2Runner, ghRunner.id);
}
}
} else {
if (bootTimeExceeded(ec2Runner)) {
logger.info(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`);
logger.info(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`, LogFields.print());
terminateOrphan(ec2Runner.instanceId);
} else {
logger.debug(`Runner ${ec2Runner.instanceId} has not yet booted.`);
logger.debug(`Runner ${ec2Runner.instanceId} has not yet booted.`, LogFields.print());
}
}
}
Expand All @@ -150,7 +159,7 @@ async function terminateOrphan(instanceId: string): Promise<void> {
try {
await terminateRunner(instanceId);
} catch (e) {
logger.debug(`Orphan runner '${instanceId}' cannot be removed.`);
logger.debug(`Orphan runner '${instanceId}' cannot be removed.`, LogFields.print());
}
}

Expand Down Expand Up @@ -195,19 +204,25 @@ export async function scaleDown(): Promise<void> {
// list and sort runners, newest first. This ensure we keep the newest runners longer.
const ec2Runners = await listAndSortRunners(environment);
const activeEc2RunnersCount = ec2Runners.length;
logger.info(`Found: '${activeEc2RunnersCount}' active GitHub EC2 runner instances before clean-up.`);
logger.info(
`Found: '${activeEc2RunnersCount}' active GitHub EC2 runner instances before clean-up.`,
LogFields.print(),
);

if (activeEc2RunnersCount === 0) {
logger.debug(`No active runners found for environment: '${environment}'`);
logger.debug(`No active runners found for environment: '${environment}'`, LogFields.print());
return;
}
const legacyRunners = filterLegacyRunners(ec2Runners);
logger.debug(JSON.stringify(legacyRunners));
logger.debug(JSON.stringify(legacyRunners), LogFields.print());
const runners = filterRunners(ec2Runners);

await evaluateAndRemoveRunners(runners, scaleDownConfigs);
await evaluateAndRemoveRunners(legacyRunners, scaleDownConfigs);

const activeEc2RunnersCountAfter = (await listAndSortRunners(environment)).length;
logger.info(`Found: '${activeEc2RunnersCountAfter}' active GitHub EC2 runners instances after clean-up.`);
logger.info(
`Found: '${activeEc2RunnersCountAfter}' active GitHub EC2 runners instances after clean-up.`,
LogFields.print(),
);
}
31 changes: 19 additions & 12 deletions modules/runners/lambdas/runners/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import { listEC2Runners, createRunner, RunnerInputParameters } from './runners';
import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth';
import yn from 'yn';
import { Octokit } from '@octokit/rest';
import { logger as rootLogger } from './logger';
import { logger as rootLogger, LogFields } from './logger';

const logger = rootLogger.getChildLogger();
const logger = rootLogger.getChildLogger({ name: 'scale-up' });

export interface ActionRequestMessage {
id: number;
Expand All @@ -23,7 +23,16 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
const environment = process.env.ENVIRONMENT;
const ghesBaseUrl = process.env.GHES_URL;

console.info(`Received ${payload.eventType} from ${payload.repositoryOwner}/${payload.repositoryName}`);
const runnerType = enableOrgLevel ? 'Org' : 'Repo';
const runnerOwner = enableOrgLevel ? payload.repositoryOwner : `${payload.repositoryOwner}/${payload.repositoryName}`;

LogFields.fields = {};
LogFields.fields.runnerType = runnerType;
LogFields.fields.runnerOwner = runnerOwner;
LogFields.fields.event = payload.eventType;
LogFields.fields.id = payload.id.toString();

logger.info(`Received event`, LogFields.print());

let ghesApiUrl = '';
if (ghesBaseUrl) {
Expand All @@ -50,8 +59,6 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage

const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl);
const githubInstallationClient = await createOctoClient(ghAuth.token, ghesApiUrl);
const runnerType = enableOrgLevel ? 'Org' : 'Repo';
const runnerOwner = enableOrgLevel ? payload.repositoryOwner : `${payload.repositoryOwner}/${payload.repositoryName}`;

const isQueued = await getJobStatus(githubInstallationClient, payload);
if (isQueued) {
Expand All @@ -60,10 +67,10 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
runnerType,
runnerOwner,
});
logger.info(`${runnerType} ${runnerOwner} has ${currentRunners.length}/${maximumRunners} runners`);
logger.info(`Current runners: ${currentRunners.length} of ${maximumRunners}`, LogFields.print());

if (currentRunners.length < maximumRunners) {
console.info(`Attempting to launch a new runner`);
logger.info(`Attempting to launch a new runner`, LogFields.print());
// create token
const registrationToken = enableOrgLevel
? await githubInstallationClient.actions.createRegistrationTokenForOrg({ org: payload.repositoryOwner })
Expand All @@ -87,7 +94,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
runnerType,
});
} else {
logger.info('No runner will be created, maximum number of runners reached.');
logger.info('No runner will be created, maximum number of runners reached.', LogFields.print());
}
}
}
Expand All @@ -112,7 +119,7 @@ async function getJobStatus(githubInstallationClient: Octokit, payload: ActionRe
throw Error(`Event ${payload.eventType} is not supported`);
}
if (!isQueued) {
logger.info(`Job ${payload.id} is not queued`);
logger.info(`Job not queued`, LogFields.print());
}
return isQueued;
}
Expand All @@ -121,14 +128,14 @@ export async function createRunnerLoop(runnerParameters: RunnerInputParameters):
const launchTemplateNames = process.env.LAUNCH_TEMPLATE_NAME?.split(',') as string[];
let launched = false;
for (let i = 0; i < launchTemplateNames.length; i++) {
logger.info(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]}.`);
logger.info(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]}.`, LogFields.print());
try {
await createRunner(runnerParameters, launchTemplateNames[i]);
launched = true;
break;
} catch (error) {
logger.debug(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]} FAILED.`);
logger.error(error);
logger.debug(`Attempt '${i}' to launch instance using ${launchTemplateNames[i]} FAILED.`, LogFields.print());
logger.error(error, LogFields.print());
}
}
if (launched == false) {
Expand Down