Skip to content

Commit

Permalink
feat(metrics): more metrics related to replications
Browse files Browse the repository at this point in the history
  • Loading branch information
iainsproat committed Dec 16, 2024
1 parent efe2416 commit 4261e26
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 12 deletions.
33 changes: 33 additions & 0 deletions packages/monitor-deployment/src/observability/metrics/dbWorkers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import prometheusClient from 'prom-client'
import { join } from 'lodash-es'
import type { MetricInitializer } from '@/observability/types.js'

export const init: MetricInitializer = (config) => {
const { labelNames, namePrefix, logger } = config
const dbWorkers = new prometheusClient.Gauge({
name: join([namePrefix, 'db_workers'], '_'),
help: 'Number of database workers',
labelNames: ['region', ...labelNames]
})
return async (params) => {
const { dbClients, labels } = params
await Promise.all(
dbClients.map(async ({ client, regionKey }) => {
const connectionResults = await client.raw<{
rows: [{ worker_count: string }]
}>(`SELECT COUNT(*) AS worker_count FROM pg_stat_activity;`)
if (!connectionResults.rows.length) {
logger.error(
{ region: regionKey },
"No database workers found for region '{region}'. This is odd."
)
return
}
dbWorkers.set(
{ ...labels, region: regionKey },
parseInt(connectionResults.rows[0].worker_count)
)
})
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import prometheusClient from 'prom-client'
import { join } from 'lodash-es'
import type { MetricInitializer } from '@/observability/types.js'

export const init: MetricInitializer = (config) => {
const { labelNames, namePrefix, logger } = config
const promMetric = new prometheusClient.Gauge({
name: join([namePrefix, 'db_workers_awaiting_locks'], '_'),
help: 'Number of database workers awaiting locks',
labelNames: ['region', ...labelNames]
})
return async (params) => {
const { dbClients, labels } = params
await Promise.all(
dbClients.map(async ({ client, regionKey }) => {
const queryResults = await client.raw<{
rows: [{ count: string }]
}>(`SELECT COUNT(*) FROM pg_stat_activity WHERE wait_event = 'Lock';`)
if (!queryResults.rows.length) {
logger.error(
{ region: regionKey },
"No database workers found for region '{region}'. This is odd."
)
return
}
promMetric.set(
{ ...labels, region: regionKey },
parseInt(queryResults.rows[0].count)
)
})
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,29 @@ import type { MetricInitializer } from '@/observability/types.js'
export const init: MetricInitializer = (config) => {
const { labelNames, namePrefix, logger } = config
const connections = new prometheusClient.Gauge({
name: join([namePrefix, 'db_used_connections'], '_'),
help: 'Number of active (used) database connections',
name: join([namePrefix, 'db_inactive_replication_slots'], '_'),
help: 'Number of inactive database replication slots',
labelNames: ['region', ...labelNames]
})
return async (params) => {
const { dbClients, labels } = params
await Promise.all(
dbClients.map(async ({ client, regionKey }) => {
const connectionResults = await client.raw<{
rows: [{ used_connections: string }]
}>(`SELECT COUNT(*) AS used_connections FROM pg_stat_activity;`)
rows: [{ inactive_replication_slots: string }]
}>(
`SELECT count(*) AS inactive_replication_slots FROM pg_replication_slots WHERE NOT active;`
)
if (!connectionResults.rows.length) {
logger.error(
{ region: regionKey },
"No active connections found for region '{region}'. This is odd."
"No data related to replication slots found for region '{region}'. This is odd."
)
return
}
connections.set(
{ ...labels, region: regionKey },
parseInt(connectionResults.rows[0].used_connections)
parseInt(connectionResults.rows[0].inactive_replication_slots)
)
})
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import type { MetricInitializer } from '@/observability/types.js'
export const init: MetricInitializer = (config) => {
const { labelNames, namePrefix, logger } = config
const totalConnections = new prometheusClient.Gauge({
name: join([namePrefix, 'db_total_connections'], '_'),
help: 'Total number of database connections',
name: join([namePrefix, 'db_max_connections'], '_'),
help: 'Maximum number of database connections allowed by the server',
labelNames: ['region', ...labelNames]
})
return async (params) => {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import prometheusClient from 'prom-client'
import { join } from 'lodash-es'
import type { MetricInitializer } from '@/observability/types.js'

export const init: MetricInitializer = (config) => {
const { labelNames, namePrefix, logger } = config
const promMetric = new prometheusClient.Gauge({
name: join([namePrefix, 'db_replication_slot_lag'], '_'),
help: 'Lag of replication slots in bytes',
labelNames: ['region', ...labelNames]
})
return async (params) => {
const { dbClients, labels } = params
await Promise.all(
dbClients.map(async ({ client, regionKey }) => {
const queryResults = await client.raw<{
rows: [{ slot_lag_bytes: number }]
}>(`
SELECT pg_current_wal_lsn() - confirmed_flush_lsn AS slot_lag_bytes
FROM pg_replication_slots;
`)
if (!queryResults.rows.length) {
logger.error(
{ region: regionKey },
"No database replication slots found for region '{region}'. This is odd."
)
return
}
promMetric.set(
{ ...labels, region: regionKey },
queryResults.rows[0].slot_lag_bytes
)
})
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import prometheusClient from 'prom-client'
import { join } from 'lodash-es'
import type { MetricInitializer } from '@/observability/types.js'

export const init: MetricInitializer = (config) => {
const { labelNames, namePrefix, logger } = config
const promMetric = new prometheusClient.Gauge({
name: join([namePrefix, 'db_replication_worker_lag'], '_'),
help: 'Lag of replication workers, by type of lag',
labelNames: ['region', 'lagtype', ...labelNames]
})
return async (params) => {
const { dbClients, labels } = params
await Promise.all(
dbClients.map(async ({ client, regionKey }) => {
const queryResults = await client.raw<{
rows: [{ write_lag: number; flush_lag: number; replay_lag: number }]
}>(`
SELECT write_lsn - sent_lsn AS write_lag,
flush_lsn - write_lsn AS flush_lag,
replay_lsn - flush_lsn AS replay_lag
FROM pg_stat_replication;
`)
if (!queryResults.rows.length) {
logger.error(
{ region: regionKey },
"No database workers found for region '{region}'. This is odd."
)
return
}
promMetric.set(
{ ...labels, region: regionKey, lagtype: 'write' },
queryResults.rows[0].write_lag
)
promMetric.set(
{ ...labels, region: regionKey, lagtype: 'flush' },
queryResults.rows[0].flush_lag
)
promMetric.set(
{ ...labels, region: regionKey, lagtype: 'replay' },
queryResults.rows[0].replay_lag
)
})
)
}
}
16 changes: 12 additions & 4 deletions packages/monitor-deployment/src/observability/prometheusMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ import { join } from 'lodash-es'
import { Histogram, Registry } from 'prom-client'
import prometheusClient from 'prom-client'
import { init as commits } from '@/observability/metrics/commits.js'
import { init as connections } from '@/observability/metrics/connections.js'
import { init as connectionsTotal } from '@/observability/metrics/connectionsTotal.js'
import { init as dbSize } from '@/observability/metrics/dbSize.js'
import { init as dbWorkers } from '@/observability/metrics/dbWorkers.js'
import { init as dbWorkersAwaitingLocks } from '@/observability/metrics/dbWorkersAwaitingLocks.js'
import { init as fileImports } from '@/observability/metrics/fileImports.js'
import { init as fileSize } from '@/observability/metrics/fileSize.js'
import { init as inactiveReplicationSlots } from '@/observability/metrics/inactiveReplicationSlots.js'
import { init as maxConnections } from '@/observability/metrics/maxConnections.js'
import { init as objects } from '@/observability/metrics/objects.js'
import { init as previews } from '@/observability/metrics/previews.js'
import { init as replicationSlotLag } from '@/observability/metrics/replicationSlotLag.js'
import { init as replicationWorkerLag } from '@/observability/metrics/replicationWorkerLag.js'
import { init as streams } from '@/observability/metrics/streams.js'
import { init as tablesize } from '@/observability/metrics/tableSize.js'
import { init as users } from '@/observability/metrics/users.js'
Expand Down Expand Up @@ -49,13 +53,17 @@ function initMonitoringMetrics(params: {

const metricsToInitialize = [
commits,
connections,
connectionsTotal,
dbWorkers,
dbWorkersAwaitingLocks,
dbSize,
fileImports,
fileSize,
inactiveReplicationSlots,
maxConnections,
objects,
previews,
replicationSlotLag,
replicationWorkerLag,
streams,
tablesize,
users,
Expand Down

0 comments on commit 4261e26

Please sign in to comment.