Skip to content

Commit 9b9c1a7

Browse files
committed
refactor pg_stat_bgwriter metrics into standalone collector
This moves the metrics that are queried from pg_stat_bgwriter into a dedicated collector instead of dynamically generating queries and metrics from a map. It adopts a new, consistent namespace `postgres` for its metrics and includes the `_total` suffix on all of the counters to match prometheus standards. This implementation uses QueryRowContext to allow for later addition of context for cancellation. From the Postgres documentation, it states that there is one line per WAL sender process, but it is unclear how to differentiate between them in any meaningful way. When querying the table, there is no column to identify the row, only metrics about bgwriter. Signed-off-by: Joe Adams <github@joeadams.io>
1 parent 017e060 commit 9b9c1a7

File tree

5 files changed

+231
-22
lines changed

5 files changed

+231
-22
lines changed

cmd/postgres_exporter/postgres_exporter.go

-17
Original file line numberDiff line numberDiff line change
@@ -163,23 +163,6 @@ func dumpMaps() {
163163
}
164164

165165
var builtinMetricMaps = map[string]intermediateMetricMap{
166-
"pg_stat_bgwriter": {
167-
map[string]ColumnMapping{
168-
"checkpoints_timed": {COUNTER, "Number of scheduled checkpoints that have been performed", nil, nil},
169-
"checkpoints_req": {COUNTER, "Number of requested checkpoints that have been performed", nil, nil},
170-
"checkpoint_write_time": {COUNTER, "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds", nil, nil},
171-
"checkpoint_sync_time": {COUNTER, "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds", nil, nil},
172-
"buffers_checkpoint": {COUNTER, "Number of buffers written during checkpoints", nil, nil},
173-
"buffers_clean": {COUNTER, "Number of buffers written by the background writer", nil, nil},
174-
"maxwritten_clean": {COUNTER, "Number of times the background writer stopped a cleaning scan because it had written too many buffers", nil, nil},
175-
"buffers_backend": {COUNTER, "Number of buffers written directly by a backend", nil, nil},
176-
"buffers_backend_fsync": {COUNTER, "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)", nil, nil},
177-
"buffers_alloc": {COUNTER, "Number of buffers allocated", nil, nil},
178-
"stats_reset": {COUNTER, "Time at which these statistics were last reset", nil, nil},
179-
},
180-
true,
181-
0,
182-
},
183166
"pg_stat_database": {
184167
map[string]ColumnMapping{
185168
"datid": {LABEL, "OID of a database", nil, nil},

cmd/postgres_exporter/server.go

+15
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414
package main
1515

1616
import (
17+
"context"
1718
"database/sql"
1819
"fmt"
20+
"log"
1921
"sync"
2022
"time"
2123

2224
"github.com/blang/semver"
2325
"github.com/go-kit/log/level"
26+
"github.com/prometheus-community/postgres_exporter/collector"
2427
"github.com/prometheus/client_golang/prometheus"
2528
)
2629

@@ -128,6 +131,18 @@ func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool
128131
err = fmt.Errorf("queryNamespaceMappings returned %d errors", len(errMap))
129132
}
130133

134+
// Only query pg_stat_* on master servers
135+
if s.master {
136+
pgsc := collector.NewPGStatBGWriterCollector()
137+
metrics, err := pgsc.Update(context.Background(), s.db, s.String())
138+
if err != nil {
139+
log.Printf("error in pgsc: %v", err)
140+
}
141+
for _, m := range metrics {
142+
ch <- m
143+
}
144+
}
145+
131146
return err
132147
}
133148

collector/collector.go

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
const namespace = "postgres"

collector/stat_bgwriter.go

+195
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"context"
18+
"database/sql"
19+
"time"
20+
21+
"github.com/prometheus/client_golang/prometheus"
22+
)
23+
24+
type PGStatBGWriterCollector struct {
25+
}
26+
27+
func NewPGStatBGWriterCollector() *PGStatBGWriterCollector {
28+
return &PGStatBGWriterCollector{}
29+
}
30+
31+
const bgWriterSubsystem = "stat_bgwriter"
32+
33+
var statBGWriter = map[string]*prometheus.Desc{
34+
"checkpoints_timed": prometheus.NewDesc(
35+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_timed_total"),
36+
"Number of scheduled checkpoints that have been performed",
37+
[]string{"server"},
38+
prometheus.Labels{},
39+
),
40+
"checkpoints_req": prometheus.NewDesc(
41+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_req_total"),
42+
"Number of requested checkpoints that have been performed",
43+
[]string{"server"},
44+
prometheus.Labels{},
45+
),
46+
"checkpoint_write_time": prometheus.NewDesc(
47+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoint_write_time_total"),
48+
"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds",
49+
[]string{"server"},
50+
prometheus.Labels{},
51+
),
52+
"checkpoint_sync_time": prometheus.NewDesc(
53+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoint_sync_time_total"),
54+
"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds",
55+
[]string{"server"},
56+
prometheus.Labels{},
57+
),
58+
"buffers_checkpoint": prometheus.NewDesc(
59+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_checkpoint_total"),
60+
"Number of buffers written during checkpoints",
61+
[]string{"server"},
62+
prometheus.Labels{},
63+
),
64+
"buffers_clean": prometheus.NewDesc(
65+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_clean_total"),
66+
"Number of buffers written by the background writer",
67+
[]string{"server"},
68+
prometheus.Labels{},
69+
),
70+
"maxwritten_clean": prometheus.NewDesc(
71+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "maxwritten_clean_total"),
72+
"Number of times the background writer stopped a cleaning scan because it had written too many buffers",
73+
[]string{"server"},
74+
prometheus.Labels{},
75+
),
76+
"buffers_backend": prometheus.NewDesc(
77+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_backend_total"),
78+
"Number of buffers written directly by a backend",
79+
[]string{"server"},
80+
prometheus.Labels{},
81+
),
82+
"buffers_backend_fsync": prometheus.NewDesc(
83+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_backend_fsync_total"),
84+
"Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)",
85+
[]string{"server"},
86+
prometheus.Labels{},
87+
),
88+
"buffers_alloc": prometheus.NewDesc(
89+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_alloc_total"),
90+
"Number of buffers allocated",
91+
[]string{"server"},
92+
prometheus.Labels{},
93+
),
94+
"stats_reset": prometheus.NewDesc(
95+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "stats_reset_total"),
96+
"Time at which these statistics were last reset",
97+
[]string{"server"},
98+
prometheus.Labels{},
99+
),
100+
}
101+
102+
func (PGStatBGWriterCollector) Update(ctx context.Context, db *sql.DB, server string) ([]prometheus.Metric, error) {
103+
metrics := []prometheus.Metric{}
104+
row := db.QueryRowContext(ctx,
105+
`SELECT
106+
checkpoints_timed
107+
,checkpoints_req
108+
,checkpoint_write_time
109+
,checkpoint_sync_time
110+
,buffers_checkpoint
111+
,buffers_clean
112+
,maxwritten_clean
113+
,buffers_backend
114+
,buffers_backend_fsync
115+
,buffers_alloc
116+
,stats_reset
117+
FROM pg_stat_bgwriter;`)
118+
119+
var cpt int
120+
var cpr int
121+
var cpwt int
122+
var cpst int
123+
var bcp int
124+
var bc int
125+
var mwc int
126+
var bb int
127+
var bbf int
128+
var ba int
129+
var sr time.Time
130+
131+
err := row.Scan(&cpt, &cpr, &cpwt, &cpst, &bcp, &bc, &mwc, &bb, &bbf, &ba, &sr)
132+
if err != nil {
133+
return metrics, err
134+
}
135+
metrics = append(metrics,
136+
prometheus.MustNewConstMetric(
137+
statBGWriter["checkpoints_timed"],
138+
prometheus.CounterValue,
139+
float64(cpt),
140+
server,
141+
), prometheus.MustNewConstMetric(
142+
statBGWriter["checkpoints_req"],
143+
prometheus.CounterValue,
144+
float64(cpr),
145+
server,
146+
), prometheus.MustNewConstMetric(
147+
statBGWriter["checkpoint_write_time"],
148+
prometheus.CounterValue,
149+
float64(cpwt),
150+
server,
151+
), prometheus.MustNewConstMetric(
152+
statBGWriter["checkpoint_sync_time"],
153+
prometheus.CounterValue,
154+
float64(cpst),
155+
server,
156+
), prometheus.MustNewConstMetric(
157+
statBGWriter["buffers_checkpoint"],
158+
prometheus.CounterValue,
159+
float64(bcp),
160+
server,
161+
), prometheus.MustNewConstMetric(
162+
statBGWriter["buffers_clean"],
163+
prometheus.CounterValue,
164+
float64(bc),
165+
server,
166+
), prometheus.MustNewConstMetric(
167+
statBGWriter["maxwritten_clean"],
168+
prometheus.CounterValue,
169+
float64(mwc),
170+
server,
171+
), prometheus.MustNewConstMetric(
172+
statBGWriter["buffers_backend"],
173+
prometheus.CounterValue,
174+
float64(bb),
175+
server,
176+
), prometheus.MustNewConstMetric(
177+
statBGWriter["buffers_backend_fsync"],
178+
prometheus.CounterValue,
179+
float64(bbf),
180+
server,
181+
), prometheus.MustNewConstMetric(
182+
statBGWriter["buffers_alloc"],
183+
prometheus.CounterValue,
184+
float64(ba),
185+
server,
186+
), prometheus.MustNewConstMetric(
187+
statBGWriter["stats_reset"],
188+
prometheus.CounterValue,
189+
float64(sr.Unix()),
190+
server,
191+
),
192+
)
193+
194+
return metrics, nil
195+
}

postgres_mixin/dashboards/postgres-overview.json

+5-5
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@
584584
{
585585
"alias": "Buffers Allocated",
586586
"dsType": "prometheus",
587-
"expr": "irate(pg_stat_bgwriter_buffers_alloc{instance='$instance'}[5m])",
587+
"expr": "irate(postgres_stat_bgwriter_buffers_alloc_total{instance='$instance'}[5m])",
588588
"format": "time_series",
589589
"groupBy": [
590590
{
@@ -636,7 +636,7 @@
636636
{
637637
"alias": "Buffers Allocated",
638638
"dsType": "prometheus",
639-
"expr": "irate(pg_stat_bgwriter_buffers_backend_fsync{instance='$instance'}[5m])",
639+
"expr": "irate(postgres_stat_bgwriter_buffers_backend_fsync_total{instance='$instance'}[5m])",
640640
"format": "time_series",
641641
"groupBy": [
642642
{
@@ -688,7 +688,7 @@
688688
{
689689
"alias": "Buffers Allocated",
690690
"dsType": "prometheus",
691-
"expr": "irate(pg_stat_bgwriter_buffers_backend{instance='$instance'}[5m])",
691+
"expr": "irate(postgres_stat_bgwriter_buffers_backend_total{instance='$instance'}[5m])",
692692
"format": "time_series",
693693
"groupBy": [
694694
{
@@ -740,7 +740,7 @@
740740
{
741741
"alias": "Buffers Allocated",
742742
"dsType": "prometheus",
743-
"expr": "irate(pg_stat_bgwriter_buffers_clean{instance='$instance'}[5m])",
743+
"expr": "irate(postgres_stat_bgwriter_buffers_clean_total{instance='$instance'}[5m])",
744744
"format": "time_series",
745745
"groupBy": [
746746
{
@@ -792,7 +792,7 @@
792792
{
793793
"alias": "Buffers Allocated",
794794
"dsType": "prometheus",
795-
"expr": "irate(pg_stat_bgwriter_buffers_checkpoint{instance='$instance'}[5m])",
795+
"expr": "irate(postgres_stat_bgwriter_buffers_checkpoint_total{instance='$instance'}[5m])",
796796
"format": "time_series",
797797
"groupBy": [
798798
{

0 commit comments

Comments
 (0)