Skip to content

Commit

Permalink
feat(core): Introduce worker metrics (#10850)
Browse files Browse the repository at this point in the history
  • Loading branch information
ivov authored Sep 18, 2024
1 parent ab83c4b commit 08ebe1e
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 55 deletions.
14 changes: 9 additions & 5 deletions packages/cli/src/commands/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { LogStreamingEventRelay } from '@/events/log-streaming-event-relay';
import { JobProcessor } from '@/scaling/job-processor';
import { Publisher } from '@/scaling/pubsub/publisher.service';
import type { ScalingService } from '@/scaling/scaling.service';
import type { WorkerServerEndpointsConfig } from '@/scaling/worker-server';
import { OrchestrationHandlerWorkerService } from '@/services/orchestration/worker/orchestration.handler.worker.service';
import { OrchestrationWorkerService } from '@/services/orchestration/worker/orchestration.worker.service';

Expand Down Expand Up @@ -160,12 +161,15 @@ export class Worker extends BaseCommand {
this.logger.info(` * Concurrency: ${this.concurrency}`);
this.logger.info('');

if (
this.globalConfig.queue.health.active ||
this.globalConfig.credentials.overwrite.endpoint !== ''
) {
const endpointsConfig: WorkerServerEndpointsConfig = {
health: this.globalConfig.queue.health.active,
overwrites: this.globalConfig.credentials.overwrite.endpoint !== '',
metrics: this.globalConfig.endpoints.metrics.enable,
};

if (Object.values(endpointsConfig).some((e) => e)) {
const { WorkerServer } = await import('@/scaling/worker-server');
await Container.get(WorkerServer).init();
await Container.get(WorkerServer).init(endpointsConfig);
}

if (!inTest && process.stdout.isTTY) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { GlobalConfig } from '@n8n/config';
import type express from 'express';
import promBundle from 'express-prom-bundle';
import { mock } from 'jest-mock-extended';
import type { InstanceSettings } from 'n8n-core';
import promClient from 'prom-client';

import config from '@/config';
Expand Down Expand Up @@ -43,11 +44,13 @@ describe('PrometheusMetricsService', () => {
const app = mock<express.Application>();
const eventBus = mock<MessageEventBus>();
const eventService = mock<EventService>();
const instanceSettings = mock<InstanceSettings>({ instanceType: 'main' });
const prometheusMetricsService = new PrometheusMetricsService(
mock(),
eventBus,
globalConfig,
eventService,
instanceSettings,
);

afterEach(() => {
Expand All @@ -64,6 +67,7 @@ describe('PrometheusMetricsService', () => {
mock(),
customGlobalConfig,
mock(),
instanceSettings,
);

await customPrometheusMetricsService.init(app);
Expand Down Expand Up @@ -204,5 +208,18 @@ describe('PrometheusMetricsService', () => {
expect(promClient.Counter).toHaveBeenCalledTimes(0); // cache metrics
expect(eventService.on).not.toHaveBeenCalled();
});

it('should not set up queue metrics if enabled and on scaling mode but instance is not main', async () => {
config.set('executions.mode', 'queue');
prometheusMetricsService.enableMetric('queue');
// @ts-expect-error private field
instanceSettings.instanceType = 'worker';

await prometheusMetricsService.init(app);

expect(promClient.Gauge).toHaveBeenCalledTimes(1); // version metric
expect(promClient.Counter).toHaveBeenCalledTimes(0); // cache metrics
expect(eventService.on).not.toHaveBeenCalled();
});
});
});
10 changes: 9 additions & 1 deletion packages/cli/src/metrics/prometheus-metrics.service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { GlobalConfig } from '@n8n/config';
import type express from 'express';
import promBundle from 'express-prom-bundle';
import { InstanceSettings } from 'n8n-core';
import { EventMessageTypeNames } from 'n8n-workflow';
import promClient, { type Counter, type Gauge } from 'prom-client';
import semverParse from 'semver/functions/parse';
Expand All @@ -22,6 +23,7 @@ export class PrometheusMetricsService {
private readonly eventBus: MessageEventBus,
private readonly globalConfig: GlobalConfig,
private readonly eventService: EventService,
private readonly instanceSettings: InstanceSettings,
) {}

private readonly counters: { [key: string]: Counter<string> | null } = {};
Expand Down Expand Up @@ -227,7 +229,13 @@ export class PrometheusMetricsService {
}

private initQueueMetrics() {
if (!this.includes.metrics.queue || config.getEnv('executions.mode') !== 'queue') return;
if (
!this.includes.metrics.queue ||
config.getEnv('executions.mode') !== 'queue' ||
this.instanceSettings.instanceType !== 'main'
) {
return;
}

this.gauges.waiting = new promClient.Gauge({
name: this.prefix + 'scaling_mode_queue_jobs_waiting',
Expand Down
111 changes: 80 additions & 31 deletions packages/cli/src/scaling/__tests__/worker-server.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import * as http from 'node:http';

import { PortTakenError } from '@/errors/port-taken.error';
import type { ExternalHooks } from '@/external-hooks';
import type { PrometheusMetricsService } from '@/metrics/prometheus-metrics.service';
import { bodyParser, rawBodyReader } from '@/middlewares';

import { WorkerServer } from '../worker-server';
Expand All @@ -28,6 +29,7 @@ describe('WorkerServer', () => {

const externalHooks = mock<ExternalHooks>();
const instanceSettings = mock<InstanceSettings>({ instanceType: 'worker' });
const prometheusMetricsService = mock<PrometheusMetricsService>();

beforeEach(() => {
globalConfig = mock<GlobalConfig>({
Expand All @@ -52,6 +54,7 @@ describe('WorkerServer', () => {
mock(),
externalHooks,
mock<InstanceSettings>({ instanceType: 'webhook' }),
prometheusMetricsService,
),
).toThrowError(AssertionError);
});
Expand All @@ -68,62 +71,101 @@ describe('WorkerServer', () => {

expect(
() =>
new WorkerServer(globalConfig, mock(), mock(), mock(), externalHooks, instanceSettings),
new WorkerServer(
globalConfig,
mock(),
mock(),
mock(),
externalHooks,
instanceSettings,
prometheusMetricsService,
),
).toThrowError(PortTakenError);
});
});

it('should set up `/healthz` if health check is enabled', async () => {
jest.spyOn(http, 'createServer').mockReturnValue(mock<http.Server>());

new WorkerServer(globalConfig, mock(), mock(), mock(), externalHooks, instanceSettings);

expect(app.get).toHaveBeenCalledWith('/healthz', expect.any(Function));
});

it('should not set up `/healthz` if health check is disabled', async () => {
globalConfig.queue.health.active = false;

jest.spyOn(http, 'createServer').mockReturnValue(mock<http.Server>());

new WorkerServer(globalConfig, mock(), mock(), mock(), externalHooks, instanceSettings);
describe('init', () => {
it('should mount all endpoints when all are enabled', async () => {
const server = mock<http.Server>();
jest.spyOn(http, 'createServer').mockReturnValue(server);

expect(app.get).not.toHaveBeenCalled();
});
server.listen.mockImplementation((_port, callback: () => void) => {
callback();
return server;
});

it('should set up `/:endpoint` if overwrites endpoint is set', async () => {
jest.spyOn(http, 'createServer').mockReturnValue(mock<http.Server>());
const workerServer = new WorkerServer(
globalConfig,
mock(),
mock(),
mock(),
externalHooks,
instanceSettings,
prometheusMetricsService,
);

const CREDENTIALS_OVERWRITE_ENDPOINT = 'credentials/overwrites';
globalConfig.credentials.overwrite.endpoint = CREDENTIALS_OVERWRITE_ENDPOINT;

new WorkerServer(globalConfig, mock(), mock(), mock(), externalHooks, instanceSettings);
await workerServer.init({ health: true, overwrites: true, metrics: true });

expect(app.get).toHaveBeenCalledWith('/healthz', expect.any(Function));
expect(app.post).toHaveBeenCalledWith(
`/${CREDENTIALS_OVERWRITE_ENDPOINT}`,
rawBodyReader,
bodyParser,
expect.any(Function),
);
expect(prometheusMetricsService.init).toHaveBeenCalledWith(app);
});

it('should not set up `/:endpoint` if overwrites endpoint is not set', async () => {
jest.spyOn(http, 'createServer').mockReturnValue(mock<http.Server>());
it('should mount only health and overwrites endpoints if only those are enabled', async () => {
const server = mock<http.Server>();
jest.spyOn(http, 'createServer').mockReturnValue(server);

new WorkerServer(globalConfig, mock(), mock(), mock(), externalHooks, instanceSettings);
server.listen.mockImplementation((_port, callback: () => void) => {
callback();
return server;
});

const workerServer = new WorkerServer(
globalConfig,
mock(),
mock(),
mock(),
externalHooks,
instanceSettings,
prometheusMetricsService,
);

await workerServer.init({ health: true, overwrites: false, metrics: true });

expect(app.get).toHaveBeenCalledWith('/healthz', expect.any(Function));
expect(app.post).not.toHaveBeenCalled();
expect(prometheusMetricsService.init).toHaveBeenCalledWith(app);
});
});

describe('init', () => {
it('should call `worker.ready` external hook', async () => {
it('should throw if no endpoints are enabled', async () => {
const server = mock<http.Server>();
jest.spyOn(http, 'createServer').mockReturnValue(server);

server.listen.mockImplementation((_port, callback: () => void) => {
callback();
return server;
});
const workerServer = new WorkerServer(
globalConfig,
mock(),
mock(),
mock(),
externalHooks,
instanceSettings,
prometheusMetricsService,
);
await expect(
workerServer.init({ health: false, overwrites: false, metrics: false }),
).rejects.toThrowError(AssertionError);
});

it('should call `worker.ready` external hook', async () => {
const server = mock<http.Server>();
jest.spyOn(http, 'createServer').mockReturnValue(server);

const workerServer = new WorkerServer(
globalConfig,
Expand All @@ -132,8 +174,15 @@ describe('WorkerServer', () => {
mock(),
externalHooks,
instanceSettings,
prometheusMetricsService,
);
await workerServer.init();

server.listen.mockImplementation((_port, callback: () => void) => {
callback();
return server;
});

await workerServer.init({ health: true, overwrites: true, metrics: true });

expect(externalHooks.run).toHaveBeenCalledWith('worker.ready');
});
Expand Down
63 changes: 45 additions & 18 deletions packages/cli/src/scaling/worker-server.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { GlobalConfig } from '@n8n/config';
import type { Application } from 'express';
import express from 'express';
import { InstanceSettings } from 'n8n-core';
import { ensureError } from 'n8n-workflow';
Expand All @@ -16,10 +17,22 @@ import { ServiceUnavailableError } from '@/errors/response-errors/service-unavai
import { ExternalHooks } from '@/external-hooks';
import type { ICredentialsOverwrite } from '@/interfaces';
import { Logger } from '@/logger';
import { PrometheusMetricsService } from '@/metrics/prometheus-metrics.service';
import { rawBodyReader, bodyParser } from '@/middlewares';
import * as ResponseHelper from '@/response-helper';
import { ScalingService } from '@/scaling/scaling.service';

export type WorkerServerEndpointsConfig = {
/** Whether the `/healthz` endpoint is enabled. */
health: boolean;

/** Whether the [credentials overwrites endpoint](https://docs.n8n.io/embed/configuration/#credential-overwrites) is enabled. */
overwrites: boolean;

/** Whether the `/metrics` endpoint is enabled. */
metrics: boolean;
};

/**
* Responsible for handling HTTP requests sent to a worker.
*/
Expand All @@ -29,9 +42,10 @@ export class WorkerServer {

private readonly server: Server;

/**
* @doc https://docs.n8n.io/embed/configuration/#credential-overwrites
*/
private readonly app: Application;

private endpointsConfig: WorkerServerEndpointsConfig;

private overwritesLoaded = false;

constructor(
Expand All @@ -41,42 +55,55 @@ export class WorkerServer {
private readonly credentialsOverwrites: CredentialsOverwrites,
private readonly externalHooks: ExternalHooks,
private readonly instanceSettings: InstanceSettings,
private readonly prometheusMetricsService: PrometheusMetricsService,
) {
assert(this.instanceSettings.instanceType === 'worker');

const app = express();
this.app = express();

app.disable('x-powered-by');
this.app.disable('x-powered-by');

this.server = http.createServer(app);
this.server = http.createServer(this.app);

this.port = this.globalConfig.queue.health.port;

const overwritesEndpoint = this.globalConfig.credentials.overwrite.endpoint;

this.server.on('error', (error: NodeJS.ErrnoException) => {
if (error.code === 'EADDRINUSE') throw new PortTakenError(this.port);
});
}

if (this.globalConfig.queue.health.active) {
app.get('/healthz', async (req, res) => await this.healthcheck(req, res));
}
async init(endpointsConfig: WorkerServerEndpointsConfig) {
assert(Object.values(endpointsConfig).some((e) => e));

if (overwritesEndpoint !== '') {
app.post(`/${overwritesEndpoint}`, rawBodyReader, bodyParser, (req, res) =>
this.handleOverwrites(req, res),
);
}
}
this.endpointsConfig = endpointsConfig;

await this.mountEndpoints();

async init() {
await new Promise<void>((resolve) => this.server.listen(this.port, resolve));

await this.externalHooks.run('worker.ready');

this.logger.info(`\nn8n worker server listening on port ${this.port}`);
}

private async mountEndpoints() {
if (this.endpointsConfig.health) {
this.app.get('/healthz', async (req, res) => await this.healthcheck(req, res));
}

if (this.endpointsConfig.overwrites) {
const { endpoint } = this.globalConfig.credentials.overwrite;

this.app.post(`/${endpoint}`, rawBodyReader, bodyParser, (req, res) =>
this.handleOverwrites(req, res),
);
}

if (this.endpointsConfig.metrics) {
await this.prometheusMetricsService.init(this.app);
}
}

private async healthcheck(_req: express.Request, res: express.Response) {
this.logger.debug('[WorkerServer] Health check started');

Expand Down

0 comments on commit 08ebe1e

Please sign in to comment.