From baf06e273ebd1a71e0ec9e92238ceb22a351b415 Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Tue, 17 Dec 2024 22:02:45 +0000 Subject: [PATCH] feat(database monitor): query configured postgres values - provides a threshold for use in monitoring & alerting --- .../metrics/dbMaxLogicalReplicationWorkers.ts | 33 +++++++++++++++++++ .../metrics/dbMaxReplicationSlots.ts | 33 +++++++++++++++++++ .../dbMaxSyncWorkersPerSubscription.ts | 33 +++++++++++++++++++ .../observability/metrics/dbMaxWalSenders.ts | 33 +++++++++++++++++++ .../metrics/dbMaxWorkerProcesses.ts | 33 +++++++++++++++++++ .../src/observability/metrics/dbWalLevel.ts | 33 +++++++++++++++++++ .../src/observability/prometheusMetrics.ts | 14 +++++++- 7 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 packages/monitor-deployment/src/observability/metrics/dbMaxLogicalReplicationWorkers.ts create mode 100644 packages/monitor-deployment/src/observability/metrics/dbMaxReplicationSlots.ts create mode 100644 packages/monitor-deployment/src/observability/metrics/dbMaxSyncWorkersPerSubscription.ts create mode 100644 packages/monitor-deployment/src/observability/metrics/dbMaxWalSenders.ts create mode 100644 packages/monitor-deployment/src/observability/metrics/dbMaxWorkerProcesses.ts create mode 100644 packages/monitor-deployment/src/observability/metrics/dbWalLevel.ts diff --git a/packages/monitor-deployment/src/observability/metrics/dbMaxLogicalReplicationWorkers.ts b/packages/monitor-deployment/src/observability/metrics/dbMaxLogicalReplicationWorkers.ts new file mode 100644 index 0000000000..8a993b97a1 --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/dbMaxLogicalReplicationWorkers.ts @@ -0,0 +1,33 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const metric = new prometheusClient.Gauge({ + name: join([namePrefix, 'db', 'max_logical_replication_workers'], '_'), + help: 'Configured value of max_logical_replication_workers for the Postgres database', + labelNames: ['region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + const queryResults = await client.raw<{ + rows: [{ max_logical_replication_workers: string }] + }>(`SHOW max_logical_replication_workers;`) + if (!queryResults.rows.length) { + logger.error( + { region: regionKey }, + "No max_logical_replication_workers found for region '{region}'. This is odd." + ) + return + } + metric.set( + { ...labels, region: regionKey }, + parseInt(queryResults.rows[0].max_logical_replication_workers) + ) + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/metrics/dbMaxReplicationSlots.ts b/packages/monitor-deployment/src/observability/metrics/dbMaxReplicationSlots.ts new file mode 100644 index 0000000000..7daa523fc7 --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/dbMaxReplicationSlots.ts @@ -0,0 +1,33 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const metric = new prometheusClient.Gauge({ + name: join([namePrefix, 'db_max_replication_slots'], '_'), + help: 'Configured value of max_replication_slots for the Postgres database', + labelNames: ['region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + const queryResults = await client.raw<{ + rows: [{ max_replication_slots: string }] + }>(`SHOW max_replication_slots;`) + if (!queryResults.rows.length) { + logger.error( + { region: regionKey }, + "No max_replication_slots found for region '{region}'. This is odd." + ) + return + } + metric.set( + { ...labels, region: regionKey }, + parseInt(queryResults.rows[0].max_replication_slots) + ) + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/metrics/dbMaxSyncWorkersPerSubscription.ts b/packages/monitor-deployment/src/observability/metrics/dbMaxSyncWorkersPerSubscription.ts new file mode 100644 index 0000000000..55c0d24fc4 --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/dbMaxSyncWorkersPerSubscription.ts @@ -0,0 +1,33 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const metric = new prometheusClient.Gauge({ + name: join([namePrefix, 'db', 'max_sync_workers_per_subscription'], '_'), + help: 'Configured value of max_sync_workers_per_subscription for the Postgres database', + labelNames: ['region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + const queryResults = await client.raw<{ + rows: [{ max_sync_workers_per_subscription: string }] + }>(`SHOW max_sync_workers_per_subscription;`) + if (!queryResults.rows.length) { + logger.error( + { region: regionKey }, + "No max_sync_workers_per_subscription found for region '{region}'. This is odd." + ) + return + } + metric.set( + { ...labels, region: regionKey }, + parseInt(queryResults.rows[0].max_sync_workers_per_subscription) + ) + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/metrics/dbMaxWalSenders.ts b/packages/monitor-deployment/src/observability/metrics/dbMaxWalSenders.ts new file mode 100644 index 0000000000..c7e930bb35 --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/dbMaxWalSenders.ts @@ -0,0 +1,33 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const metric = new prometheusClient.Gauge({ + name: join([namePrefix, 'db_max_wal_senders'], '_'), + help: 'Configured value of max_wal_senders for the Postgres database', + labelNames: ['region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + const queryResults = await client.raw<{ + rows: [{ max_wal_senders: string }] + }>(`SHOW max_wal_senders;`) + if (!queryResults.rows.length) { + logger.error( + { region: regionKey }, + "No max_wal_senders found for region '{region}'. This is odd." + ) + return + } + metric.set( + { ...labels, region: regionKey }, + parseInt(queryResults.rows[0].max_wal_senders) + ) + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/metrics/dbMaxWorkerProcesses.ts b/packages/monitor-deployment/src/observability/metrics/dbMaxWorkerProcesses.ts new file mode 100644 index 0000000000..c13fbe8126 --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/dbMaxWorkerProcesses.ts @@ -0,0 +1,33 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const metric = new prometheusClient.Gauge({ + name: join([namePrefix, 'db', 'max_worker_processes'], '_'), + help: 'Configured value of max_worker_processes for the Postgres database', + labelNames: ['region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + const queryResults = await client.raw<{ + rows: [{ max_worker_processes: string }] + }>(`SHOW max_worker_processes;`) + if (!queryResults.rows.length) { + logger.error( + { region: regionKey }, + "No max_worker_processes found for region '{region}'. This is odd." + ) + return + } + metric.set( + { ...labels, region: regionKey }, + parseInt(queryResults.rows[0].max_worker_processes) + ) + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/metrics/dbWalLevel.ts b/packages/monitor-deployment/src/observability/metrics/dbWalLevel.ts new file mode 100644 index 0000000000..cc5c043353 --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/dbWalLevel.ts @@ -0,0 +1,33 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const metric = new prometheusClient.Gauge({ + name: join([namePrefix, 'db_wal_level_is_logical'], '_'), + help: "Indicates whether the value of wal_level for the Postgres database is 'logical'", + labelNames: ['region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + const queryResults = await client.raw<{ + rows: [{ wal_level: string }] + }>(`SHOW wal_level;`) + if (!queryResults.rows.length) { + logger.error( + { region: regionKey }, + "No wal_level found for region '{region}'. This is odd." + ) + return + } + metric.set( + { ...labels, region: regionKey }, + queryResults.rows[0].wal_level === 'logical' ? 1 : 0 + ) + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/prometheusMetrics.ts b/packages/monitor-deployment/src/observability/prometheusMetrics.ts index 4023e6d5ed..3ea9e20e93 100644 --- a/packages/monitor-deployment/src/observability/prometheusMetrics.ts +++ b/packages/monitor-deployment/src/observability/prometheusMetrics.ts @@ -5,7 +5,13 @@ import { join } from 'lodash-es' import { Counter, Histogram, Registry } from 'prom-client' import prometheusClient from 'prom-client' import { init as commits } from '@/observability/metrics/commits.js' +import { init as dbMaxLogicalReplicationWorkers } from '@/observability/metrics/dbMaxLogicalReplicationWorkers.js' +import { init as dbMaxReplicationSlots } from '@/observability/metrics/dbMaxReplicationSlots.js' +import { init as dbMaxSyncWorkersPerSubscription } from '@/observability/metrics/dbMaxSyncWorkersPerSubscription.js' +import { init as dbMaxWalSenders } from '@/observability/metrics/dbMaxWalSenders.js' +import { init as dbMaxWorkerProcesses } from '@/observability/metrics/dbMaxWorkerProcesses.js' import { init as dbSize } from '@/observability/metrics/dbSize.js' +import { init as dbWalLevel } from '@/observability/metrics/dbWalLevel.js' import { init as dbWorkers } from '@/observability/metrics/dbWorkers.js' import { init as dbWorkersAwaitingLocks } from '@/observability/metrics/dbWorkersAwaitingLocks.js' import { init as fileImports } from '@/observability/metrics/fileImports.js' @@ -54,9 +60,15 @@ function initMonitoringMetrics(params: { const metricsToInitialize = [ commits, + dbMaxLogicalReplicationWorkers, + dbMaxReplicationSlots, + dbMaxSyncWorkersPerSubscription, + dbMaxWalSenders, + dbMaxWorkerProcesses, + dbWalLevel, + dbSize, dbWorkers, dbWorkersAwaitingLocks, - dbSize, fileImports, fileSize, inactiveReplicationSlots,