Skip to content

Commit 27d23d0

Browse files
committed
refactor pg_stat_bgwriter metrics into standalone collector
This moves the metrics that are queried from pg_stat_bgwriter into a dedicated collector instead of dynamically generating queries and metrics from a map. It renames some metrics including adding the `_total` suffix on all of the counters to match prometheus standards. This implementation uses QueryRowContext to allow for later addition of context for cancellation. From the Postgres documentation, it states that there is one line per WAL sender process, but it is unclear how to differentiate between them in any meaningful way. When querying the table, there is no column to identify the row, only metrics about bgwriter. Signed-off-by: Joe Adams <github@joeadams.io>
1 parent 35b53f7 commit 27d23d0

File tree

8 files changed

+280
-41
lines changed

8 files changed

+280
-41
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
## master / unreleased
22

3+
* [CHANGE] pg_stat_bgwriter counter metrics had the `_total` suffix added #556
34
* [ENHANCEMENT] Add pg_database_size_bytes metric #613
45

56
## 0.10.1 / 2022-01-14

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ This will build the docker image as `prometheuscommunity/postgres_exporter:${bra
4141
* `help`
4242
Show context-sensitive help (also try --help-long and --help-man).
4343

44+
* `collector.database`
45+
Enable the pg_database collector. Default is `enabled`
46+
47+
* `collector.bgwriter`
48+
Enable the pg_stat_bgwriter collector. Default is `enabled`
49+
4450
* `web.listen-address`
4551
Address to listen on for web interface and telemetry. Default is `:9187`.
4652

cmd/postgres_exporter/main.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,12 @@ func main() {
115115

116116
prometheus.MustRegister(exporter)
117117

118-
pe, err := collector.NewPostgresCollector(logger, dsn)
118+
pe, err := collector.NewPostgresCollector(
119+
logger,
120+
dsn,
121+
[]string{},
122+
collector.WithAutoDiscoverDatabases(*autoDiscoverDatabases),
123+
)
119124
if err != nil {
120125
level.Error(logger).Log("msg", "Failed to create PostgresCollector", "err", err.Error())
121126
os.Exit(1)

cmd/postgres_exporter/postgres_exporter.go

-17
Original file line numberDiff line numberDiff line change
@@ -163,23 +163,6 @@ func dumpMaps() {
163163
}
164164

165165
var builtinMetricMaps = map[string]intermediateMetricMap{
166-
"pg_stat_bgwriter": {
167-
map[string]ColumnMapping{
168-
"checkpoints_timed": {COUNTER, "Number of scheduled checkpoints that have been performed", nil, nil},
169-
"checkpoints_req": {COUNTER, "Number of requested checkpoints that have been performed", nil, nil},
170-
"checkpoint_write_time": {COUNTER, "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds", nil, nil},
171-
"checkpoint_sync_time": {COUNTER, "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds", nil, nil},
172-
"buffers_checkpoint": {COUNTER, "Number of buffers written during checkpoints", nil, nil},
173-
"buffers_clean": {COUNTER, "Number of buffers written by the background writer", nil, nil},
174-
"maxwritten_clean": {COUNTER, "Number of times the background writer stopped a cleaning scan because it had written too many buffers", nil, nil},
175-
"buffers_backend": {COUNTER, "Number of buffers written directly by a backend", nil, nil},
176-
"buffers_backend_fsync": {COUNTER, "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)", nil, nil},
177-
"buffers_alloc": {COUNTER, "Number of buffers allocated", nil, nil},
178-
"stats_reset": {COUNTER, "Time at which these statistics were last reset", nil, nil},
179-
},
180-
true,
181-
0,
182-
},
183166
"pg_stat_database": {
184167
map[string]ColumnMapping{
185168
"datid": {LABEL, "OID of a database", nil, nil},

collector/collector.go

+46-15
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,27 @@ type PostgresCollector struct {
8787
logger log.Logger
8888

8989
servers map[string]*server
90+
91+
// autoDiscoverDatabases will cause the collector to query the database
92+
// to find other servers and also scrape them.
93+
autoDiscoverDatabases bool
9094
}
9195

96+
type Option func(*PostgresCollector) error
97+
9298
// NewPostgresCollector creates a new PostgresCollector.
93-
func NewPostgresCollector(logger log.Logger, dsns []string, filters ...string) (*PostgresCollector, error) {
99+
func NewPostgresCollector(logger log.Logger, dsns []string, filters []string, options ...Option) (*PostgresCollector, error) {
100+
p := &PostgresCollector{
101+
logger: logger,
102+
}
103+
// Apply options to customize the collector
104+
for _, o := range options {
105+
err := o(p)
106+
if err != nil {
107+
return nil, err
108+
}
109+
}
110+
94111
f := make(map[string]bool)
95112
for _, filter := range filters {
96113
enabled, exist := collectorState[filter]
@@ -121,48 +138,62 @@ func NewPostgresCollector(logger log.Logger, dsns []string, filters ...string) (
121138
}
122139
}
123140

141+
p.Collectors = collectors
142+
124143
servers := make(map[string]*server)
125144
for _, dsn := range dsns {
126145
s, err := makeServer(dsn)
127146
if err != nil {
128147
return nil, err
129148
}
149+
// Manually provided servers are always classified as "primary"
150+
s.isPrimary = true
151+
152+
// TODO(@sysadmind): We need to discover the downstream servers and add them here.
153+
// if p.autoDiscoverDatabases {
154+
// }
155+
130156
servers[dsn] = s
131157
}
132158

133-
return &PostgresCollector{
134-
Collectors: collectors,
135-
logger: logger,
136-
servers: servers,
137-
}, nil
159+
p.servers = servers
160+
161+
return p, nil
162+
}
163+
164+
func WithAutoDiscoverDatabases(discover bool) Option {
165+
return func(p *PostgresCollector) error {
166+
p.autoDiscoverDatabases = discover
167+
return nil
168+
}
138169
}
139170

140171
// Describe implements the prometheus.Collector interface.
141-
func (n PostgresCollector) Describe(ch chan<- *prometheus.Desc) {
172+
func (p PostgresCollector) Describe(ch chan<- *prometheus.Desc) {
142173
ch <- scrapeDurationDesc
143174
ch <- scrapeSuccessDesc
144175
}
145176

146177
// Collect implements the prometheus.Collector interface.
147-
func (n PostgresCollector) Collect(ch chan<- prometheus.Metric) {
178+
func (p PostgresCollector) Collect(ch chan<- prometheus.Metric) {
148179
ctx := context.TODO()
149180
wg := sync.WaitGroup{}
150-
wg.Add(len(n.servers))
151-
for _, s := range n.servers {
181+
wg.Add(len(p.servers))
182+
for _, s := range p.servers {
152183
go func(s *server) {
153-
n.subCollect(ctx, s, ch)
184+
p.subCollect(ctx, s, ch)
154185
wg.Done()
155186
}(s)
156187
}
157188
wg.Wait()
158189
}
159190

160-
func (n PostgresCollector) subCollect(ctx context.Context, server *server, ch chan<- prometheus.Metric) {
191+
func (p PostgresCollector) subCollect(ctx context.Context, server *server, ch chan<- prometheus.Metric) {
161192
wg := sync.WaitGroup{}
162-
wg.Add(len(n.Collectors))
163-
for name, c := range n.Collectors {
193+
wg.Add(len(p.Collectors))
194+
for name, c := range p.Collectors {
164195
go func(name string, c Collector) {
165-
execute(ctx, name, c, server, ch, n.logger)
196+
execute(ctx, name, c, server, ch, p.logger)
166197
wg.Done()
167198
}(name, c)
168199
}

collector/pg_stat_bgwriter.go

+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// Copyright 2021 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"context"
18+
"time"
19+
20+
"github.com/go-kit/log"
21+
"github.com/prometheus/client_golang/prometheus"
22+
)
23+
24+
func init() {
25+
registerCollector("bgwriter", defaultEnabled, NewPGStatBGWriterCollector)
26+
}
27+
28+
type PGStatBGWriterCollector struct {
29+
}
30+
31+
func NewPGStatBGWriterCollector(logger log.Logger) (Collector, error) {
32+
return &PGStatBGWriterCollector{}, nil
33+
}
34+
35+
const bgWriterSubsystem = "stat_bgwriter"
36+
37+
var statBGWriter = map[string]*prometheus.Desc{
38+
"checkpoints_timed": prometheus.NewDesc(
39+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_timed_total"),
40+
"Number of scheduled checkpoints that have been performed",
41+
[]string{"server"},
42+
prometheus.Labels{},
43+
),
44+
"checkpoints_req": prometheus.NewDesc(
45+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoints_req_total"),
46+
"Number of requested checkpoints that have been performed",
47+
[]string{"server"},
48+
prometheus.Labels{},
49+
),
50+
"checkpoint_write_time": prometheus.NewDesc(
51+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoint_write_time_total"),
52+
"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds",
53+
[]string{"server"},
54+
prometheus.Labels{},
55+
),
56+
"checkpoint_sync_time": prometheus.NewDesc(
57+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "checkpoint_sync_time_total"),
58+
"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds",
59+
[]string{"server"},
60+
prometheus.Labels{},
61+
),
62+
"buffers_checkpoint": prometheus.NewDesc(
63+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_checkpoint_total"),
64+
"Number of buffers written during checkpoints",
65+
[]string{"server"},
66+
prometheus.Labels{},
67+
),
68+
"buffers_clean": prometheus.NewDesc(
69+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_clean_total"),
70+
"Number of buffers written by the background writer",
71+
[]string{"server"},
72+
prometheus.Labels{},
73+
),
74+
"maxwritten_clean": prometheus.NewDesc(
75+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "maxwritten_clean_total"),
76+
"Number of times the background writer stopped a cleaning scan because it had written too many buffers",
77+
[]string{"server"},
78+
prometheus.Labels{},
79+
),
80+
"buffers_backend": prometheus.NewDesc(
81+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_backend_total"),
82+
"Number of buffers written directly by a backend",
83+
[]string{"server"},
84+
prometheus.Labels{},
85+
),
86+
"buffers_backend_fsync": prometheus.NewDesc(
87+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_backend_fsync_total"),
88+
"Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)",
89+
[]string{"server"},
90+
prometheus.Labels{},
91+
),
92+
"buffers_alloc": prometheus.NewDesc(
93+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "buffers_alloc_total"),
94+
"Number of buffers allocated",
95+
[]string{"server"},
96+
prometheus.Labels{},
97+
),
98+
"stats_reset": prometheus.NewDesc(
99+
prometheus.BuildFQName(namespace, bgWriterSubsystem, "stats_reset_total"),
100+
"Time at which these statistics were last reset",
101+
[]string{"server"},
102+
prometheus.Labels{},
103+
),
104+
}
105+
106+
func (PGStatBGWriterCollector) Update(ctx context.Context, server *server, ch chan<- prometheus.Metric) error {
107+
db, err := server.GetDB()
108+
if err != nil {
109+
return err
110+
}
111+
112+
row := db.QueryRowContext(ctx,
113+
`SELECT
114+
checkpoints_timed
115+
,checkpoints_req
116+
,checkpoint_write_time
117+
,checkpoint_sync_time
118+
,buffers_checkpoint
119+
,buffers_clean
120+
,maxwritten_clean
121+
,buffers_backend
122+
,buffers_backend_fsync
123+
,buffers_alloc
124+
,stats_reset
125+
FROM pg_stat_bgwriter;`)
126+
127+
var cpt int
128+
var cpr int
129+
var cpwt int
130+
var cpst int
131+
var bcp int
132+
var bc int
133+
var mwc int
134+
var bb int
135+
var bbf int
136+
var ba int
137+
var sr time.Time
138+
139+
err = row.Scan(&cpt, &cpr, &cpwt, &cpst, &bcp, &bc, &mwc, &bb, &bbf, &ba, &sr)
140+
if err != nil {
141+
return err
142+
}
143+
144+
ch <- prometheus.MustNewConstMetric(
145+
statBGWriter["checkpoints_timed"],
146+
prometheus.CounterValue,
147+
float64(cpt),
148+
server.GetName(),
149+
)
150+
ch <- prometheus.MustNewConstMetric(
151+
statBGWriter["checkpoints_req"],
152+
prometheus.CounterValue,
153+
float64(cpr),
154+
server.GetName(),
155+
)
156+
ch <- prometheus.MustNewConstMetric(
157+
statBGWriter["checkpoint_write_time"],
158+
prometheus.CounterValue,
159+
float64(cpwt),
160+
server.GetName(),
161+
)
162+
ch <- prometheus.MustNewConstMetric(
163+
statBGWriter["checkpoint_sync_time"],
164+
prometheus.CounterValue,
165+
float64(cpst),
166+
server.GetName(),
167+
)
168+
ch <- prometheus.MustNewConstMetric(
169+
statBGWriter["buffers_checkpoint"],
170+
prometheus.CounterValue,
171+
float64(bcp),
172+
server.GetName(),
173+
)
174+
ch <- prometheus.MustNewConstMetric(
175+
statBGWriter["buffers_clean"],
176+
prometheus.CounterValue,
177+
float64(bc),
178+
server.GetName(),
179+
)
180+
ch <- prometheus.MustNewConstMetric(
181+
statBGWriter["maxwritten_clean"],
182+
prometheus.CounterValue,
183+
float64(mwc),
184+
server.GetName(),
185+
)
186+
ch <- prometheus.MustNewConstMetric(
187+
statBGWriter["buffers_backend"],
188+
prometheus.CounterValue,
189+
float64(bb),
190+
server.GetName(),
191+
)
192+
ch <- prometheus.MustNewConstMetric(
193+
statBGWriter["buffers_backend_fsync"],
194+
prometheus.CounterValue,
195+
float64(bbf),
196+
server.GetName(),
197+
)
198+
ch <- prometheus.MustNewConstMetric(
199+
statBGWriter["buffers_alloc"],
200+
prometheus.CounterValue,
201+
float64(ba),
202+
server.GetName(),
203+
)
204+
ch <- prometheus.MustNewConstMetric(
205+
statBGWriter["stats_reset"],
206+
prometheus.CounterValue,
207+
float64(sr.Unix()),
208+
server.GetName(),
209+
)
210+
211+
return nil
212+
}

collector/server.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ import (
2222
)
2323

2424
type server struct {
25-
dsn string
26-
name string
27-
db *sql.DB
25+
dsn string
26+
name string
27+
db *sql.DB
28+
isPrimary bool // Certain queries are only run on the primary server
2829
}
2930

3031
func makeServer(dsn string) (*server, error) {

0 commit comments

Comments
 (0)