Skip to content

Commit 86a7b41

Browse files
Add a collector for stat_wal.
Docs: https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-WAL-VIEW We use this collector in production and have done so for some time.
1 parent 94e8399 commit 86a7b41

File tree

2 files changed

+384
-0
lines changed

2 files changed

+384
-0
lines changed

collector/pg_stat_wal.go

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
// Copyright 2024 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
package collector
14+
15+
import (
16+
"context"
17+
"database/sql"
18+
"fmt"
19+
"log/slog"
20+
"strings"
21+
22+
"github.com/prometheus/client_golang/prometheus"
23+
)
24+
25+
const statWALSubsystem = "stat_wal"
26+
27+
func init() {
28+
registerCollector(statWALSubsystem, defaultDisabled, NewPGStatWALCollector)
29+
}
30+
31+
type PGStatWALCollector struct {
32+
log *slog.Logger
33+
}
34+
35+
func NewPGStatWALCollector(config collectorConfig) (Collector, error) {
36+
return &PGStatWALCollector{log: config.logger}, nil
37+
}
38+
39+
var statsWALRecordsDesc = prometheus.NewDesc(
40+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_records_total"),
41+
"Total number of WAL records generated",
42+
[]string{},
43+
prometheus.Labels{},
44+
)
45+
46+
var statsWALFPIDesc = prometheus.NewDesc(
47+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_fpi"),
48+
"Total number of WAL full page images generated",
49+
[]string{},
50+
prometheus.Labels{},
51+
)
52+
53+
var statsWALBytesDesc = prometheus.NewDesc(
54+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_bytes"),
55+
"Total amount of WAL generated in bytes",
56+
[]string{},
57+
prometheus.Labels{},
58+
)
59+
60+
var statsWALBuffersFullDesc = prometheus.NewDesc(
61+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_buffers_full"),
62+
"Number of times WAL data was written to disk because WAL buffers became full",
63+
[]string{},
64+
prometheus.Labels{},
65+
)
66+
67+
var statsWALWriteDesc = prometheus.NewDesc(
68+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_write"),
69+
"Number of times WAL buffers were written out to disk via XLogWrite request. See Section 30.5 for more information about the internal WAL function XLogWrite.",
70+
[]string{},
71+
prometheus.Labels{},
72+
)
73+
74+
var statsWALSyncDesc = prometheus.NewDesc(
75+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_sync"),
76+
"Number of times WAL files were synced to disk via issue_xlog_fsync request (if fsync is on and wal_sync_method is either fdatasync, fsync or fsync_writethrough, otherwise zero). See Section 30.5 for more information about the internal WAL function issue_xlog_fsync.",
77+
[]string{},
78+
prometheus.Labels{},
79+
)
80+
81+
var statsWALWriteTimeDesc = prometheus.NewDesc(
82+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_write_time"),
83+
"Total amount of time spent writing WAL buffers to disk via XLogWrite request, in milliseconds (if track_wal_io_timing is enabled, otherwise zero). This includes the sync time when wal_sync_method is either open_datasync or open_sync.",
84+
[]string{},
85+
prometheus.Labels{},
86+
)
87+
88+
var statsWALSyncTimeDesc = prometheus.NewDesc(
89+
prometheus.BuildFQName(namespace, statWALSubsystem, "wal_sync_time"),
90+
"Total amount of time spent syncing WAL files to disk via issue_xlog_fsync request, in milliseconds (if track_wal_io_timing is enabled, fsync is on, and wal_sync_method is either fdatasync, fsync or fsync_writethrough, otherwise zero).",
91+
[]string{},
92+
prometheus.Labels{},
93+
)
94+
95+
var statsWALStatsResetDesc = prometheus.NewDesc(
96+
prometheus.BuildFQName(namespace, statWALSubsystem, "stats_reset"),
97+
"Time at which these statistics were last reset",
98+
[]string{},
99+
prometheus.Labels{},
100+
)
101+
102+
func statWALQuery(columns []string) string {
103+
return fmt.Sprintf("SELECT %s FROM pg_stat_wal;", strings.Join(columns, ","))
104+
}
105+
106+
func (c *PGStatWALCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error {
107+
db := instance.getDB()
108+
109+
columns := []string{
110+
"wal_records", // bigint
111+
"wal_fpi", // bigint
112+
"wal_bytes", // numeric
113+
"wal_buffers_full", // bigint
114+
"wal_write", // bigint
115+
"wal_sync", // bigint
116+
"wal_write_time", // double precision
117+
"wal_sync_time", // double precision
118+
"stats_reset", // timestamp with time zone
119+
}
120+
121+
rows, err := db.QueryContext(ctx,
122+
statWALQuery(columns),
123+
)
124+
if err != nil {
125+
return err
126+
}
127+
defer rows.Close()
128+
129+
for rows.Next() {
130+
var walRecords, walFPI, walBytes, walBuffersFull, walWrite, walSync sql.NullInt64
131+
var walWriteTime, walSyncTime sql.NullFloat64
132+
var statsReset sql.NullTime
133+
134+
err := rows.Scan(
135+
&walRecords,
136+
&walFPI,
137+
&walBytes,
138+
&walBuffersFull,
139+
&walWrite,
140+
&walSync,
141+
&walWriteTime,
142+
&walSyncTime,
143+
&statsReset,
144+
)
145+
if err != nil {
146+
return err
147+
}
148+
149+
walRecordsMetric := 0.0
150+
if walRecords.Valid {
151+
walRecordsMetric = float64(walRecords.Int64)
152+
}
153+
ch <- prometheus.MustNewConstMetric(
154+
statsWALRecordsDesc,
155+
prometheus.CounterValue,
156+
walRecordsMetric,
157+
)
158+
159+
walFPIMetric := 0.0
160+
if walFPI.Valid {
161+
walFPIMetric = float64(walFPI.Int64)
162+
}
163+
ch <- prometheus.MustNewConstMetric(
164+
statsWALFPIDesc,
165+
prometheus.CounterValue,
166+
walFPIMetric,
167+
)
168+
169+
walBytesMetric := 0.0
170+
if walBytes.Valid {
171+
walBytesMetric = float64(walBytes.Int64)
172+
}
173+
ch <- prometheus.MustNewConstMetric(
174+
statsWALBytesDesc,
175+
prometheus.CounterValue,
176+
walBytesMetric,
177+
)
178+
179+
walBuffersFullMetric := 0.0
180+
if walBuffersFull.Valid {
181+
walBuffersFullMetric = float64(walBuffersFull.Int64)
182+
}
183+
ch <- prometheus.MustNewConstMetric(
184+
statsWALBuffersFullDesc,
185+
prometheus.CounterValue,
186+
walBuffersFullMetric,
187+
)
188+
189+
walWriteMetric := 0.0
190+
if walWrite.Valid {
191+
walWriteMetric = float64(walWrite.Int64)
192+
}
193+
ch <- prometheus.MustNewConstMetric(
194+
statsWALWriteDesc,
195+
prometheus.CounterValue,
196+
walWriteMetric,
197+
)
198+
199+
walSyncMetric := 0.0
200+
if walSync.Valid {
201+
walSyncMetric = float64(walSync.Int64)
202+
}
203+
ch <- prometheus.MustNewConstMetric(
204+
statsWALSyncDesc,
205+
prometheus.CounterValue,
206+
walSyncMetric,
207+
)
208+
209+
walWriteTimeMetric := 0.0
210+
if walWriteTime.Valid {
211+
walWriteTimeMetric = float64(walWriteTime.Float64)
212+
}
213+
ch <- prometheus.MustNewConstMetric(
214+
statsWALWriteTimeDesc,
215+
prometheus.CounterValue,
216+
walWriteTimeMetric,
217+
)
218+
219+
walSyncTimeMetric := 0.0
220+
if walSyncTime.Valid {
221+
walSyncTimeMetric = float64(walSyncTime.Float64)
222+
}
223+
ch <- prometheus.MustNewConstMetric(
224+
statsWALSyncTimeDesc,
225+
prometheus.CounterValue,
226+
walSyncTimeMetric,
227+
)
228+
229+
resetMetric := 0.0
230+
if statsReset.Valid {
231+
resetMetric = float64(statsReset.Time.Unix())
232+
}
233+
ch <- prometheus.MustNewConstMetric(
234+
statsWALStatsResetDesc,
235+
prometheus.CounterValue,
236+
resetMetric,
237+
)
238+
}
239+
return nil
240+
}

collector/pg_stat_wal_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// Copyright 2023 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
package collector
14+
15+
import (
16+
"context"
17+
"testing"
18+
"time"
19+
20+
"github.com/DATA-DOG/go-sqlmock"
21+
"github.com/prometheus/client_golang/prometheus"
22+
dto "github.com/prometheus/client_model/go"
23+
"github.com/smartystreets/goconvey/convey"
24+
)
25+
26+
func TestPGStatWALCollector(t *testing.T) {
27+
db, mock, err := sqlmock.New()
28+
if err != nil {
29+
t.Fatalf("Error opening a stub db connection: %s", err)
30+
}
31+
defer db.Close()
32+
33+
inst := &instance{db: db}
34+
35+
columns := []string{
36+
"wal_records", // bigint
37+
"wal_fpi", // bigint
38+
"wal_bytes", // numeric
39+
"wal_buffers_full", // bigint
40+
"wal_write", // bigint
41+
"wal_sync", // bigint
42+
"wal_write_time", // double precision
43+
"wal_sync_time", // double precision
44+
"stats_reset", // timestamp with time zone
45+
}
46+
47+
srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07")
48+
if err != nil {
49+
t.Fatalf("Error parsing time: %s", err)
50+
}
51+
52+
rows := sqlmock.NewRows(columns).
53+
AddRow(354, 4945, 289097744, 1242257, int64(3275602074), 89320867, 450.123439, 1234.5678, srT)
54+
mock.ExpectQuery(sanitizeQuery(statWALQuery(columns))).WillReturnRows(rows)
55+
56+
ch := make(chan prometheus.Metric)
57+
go func() {
58+
defer close(ch)
59+
c := PGStatWALCollector{}
60+
61+
if err := c.Update(context.Background(), inst, ch); err != nil {
62+
t.Errorf("Error calling PGStatWALCollector.Update: %s", err)
63+
}
64+
}()
65+
66+
expected := []MetricResult{
67+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 354},
68+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 4945},
69+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 289097744},
70+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1242257},
71+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 3275602074},
72+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 89320867},
73+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 450.123439},
74+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1234.5678},
75+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1685059842},
76+
}
77+
78+
convey.Convey("Metrics comparison", t, func() {
79+
for _, expect := range expected {
80+
m := readMetric(<-ch)
81+
convey.So(expect, convey.ShouldResemble, m)
82+
}
83+
})
84+
if err := mock.ExpectationsWereMet(); err != nil {
85+
t.Errorf("there were unfulfilled exceptions: %s", err)
86+
}
87+
}
88+
89+
func TestPGStatWALCollectorNullValues(t *testing.T) {
90+
db, mock, err := sqlmock.New()
91+
if err != nil {
92+
t.Fatalf("Error opening a stub db connection: %s", err)
93+
}
94+
defer db.Close()
95+
96+
inst := &instance{db: db}
97+
columns := []string{
98+
"wal_records", // bigint
99+
"wal_fpi", // bigint
100+
"wal_bytes", // numeric
101+
"wal_buffers_full", // bigint
102+
"wal_write", // bigint
103+
"wal_sync", // bigint
104+
"wal_write_time", // double precision
105+
"wal_sync_time", // double precision
106+
"stats_reset", // timestamp with time zone
107+
}
108+
109+
rows := sqlmock.NewRows(columns).
110+
AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil)
111+
mock.ExpectQuery(sanitizeQuery(statWALQuery(columns))).WillReturnRows(rows)
112+
113+
ch := make(chan prometheus.Metric)
114+
go func() {
115+
defer close(ch)
116+
c := PGStatWALCollector{}
117+
118+
if err := c.Update(context.Background(), inst, ch); err != nil {
119+
t.Errorf("Error calling PGStatWALCollector.Update: %s", err)
120+
}
121+
}()
122+
123+
expected := []MetricResult{
124+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
125+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
126+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
127+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
128+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
129+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
130+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
131+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
132+
{labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0},
133+
}
134+
135+
convey.Convey("Metrics comparison", t, func() {
136+
for _, expect := range expected {
137+
m := readMetric(<-ch)
138+
convey.So(expect, convey.ShouldResemble, m)
139+
}
140+
})
141+
if err := mock.ExpectationsWereMet(); err != nil {
142+
t.Errorf("there were unfulfilled exceptions: %s", err)
143+
}
144+
}

0 commit comments

Comments
 (0)