Skip to content

Commit 024c1fd

Browse files
committed
Fix swapped flushedLsn and receiveStartTli for wal_receiver collector
In `pgStatWalReceiverQueryTemplate`, the order of the columns (when `hasFlushedLSN == true`) is: - ... - `receive_start_lsn` - `flushed_lsn` - `receive_start_tli` - ... However, columns were scanned in this order: - ... - `receive_start_lsn` -> `receiveStartLsn` - `receive_start_tli` -> `flushedLsn` (!) - `flushed_lsn` -> `receiveStartTli` (!) - ... This incorrect hydration of variables also manifests as swapped values for the `pg_stat_wal_receiver_flushed_lsn` and `pg_stat_wal_receiver_receive_start_tli` metrics. This seems to be a bug that has existed since the initial implementation: - 2d7e152 - prometheus-community#844 In this patch, I'm: - fixing the `.Scan()`, so that it hydrates variables in the correct order - adjusting the order in which metrics are pushed out to the channel, to follow the order we consume them in (.., `receive_start_lsn`, `flushed_lsn`, `receive_start_tli`, ..) - adjusting the walreceiver tests, to follow the new order (which matches .`Scan()`) - fixing a small identation issue in `pgStatWalReceiverQueryTemplate` Signed-off-by: Slavi Pantaleev <slavi@devture.com>
1 parent e62fe08 commit 024c1fd

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

collector/pg_stat_walreceiver.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ var (
108108
status,
109109
(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
110110
%s
111-
receive_start_tli,
111+
receive_start_tli,
112112
received_tli,
113113
extract(epoch from last_msg_send_time) as last_msg_send_time,
114114
extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
@@ -147,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
147147
var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
148148

149149
if hasFlushedLSN {
150-
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
150+
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
151151
return err
152152
}
153153
} else {
@@ -209,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
209209
float64(receiveStartLsn.Int64),
210210
labels...)
211211

212-
ch <- prometheus.MustNewConstMetric(
213-
statWalReceiverReceiveStartTli,
214-
prometheus.GaugeValue,
215-
float64(receiveStartTli.Int64),
216-
labels...)
217-
218212
if hasFlushedLSN {
219213
ch <- prometheus.MustNewConstMetric(
220214
statWalReceiverFlushedLSN,
@@ -223,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
223217
labels...)
224218
}
225219

220+
ch <- prometheus.MustNewConstMetric(
221+
statWalReceiverReceiveStartTli,
222+
prometheus.GaugeValue,
223+
float64(receiveStartTli.Int64),
224+
labels...)
225+
226226
ch <- prometheus.MustNewConstMetric(
227227
statWalReceiverReceivedTli,
228228
prometheus.GaugeValue,

collector/pg_stat_walreceiver_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
5050
"slot_name",
5151
"status",
5252
"receive_start_lsn",
53-
"receive_start_tli",
5453
"flushed_lsn",
54+
"receive_start_tli",
5555
"received_tli",
5656
"last_msg_send_time",
5757
"last_msg_receipt_time",
@@ -65,8 +65,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
6565
"bar",
6666
"stopping",
6767
int64(1200668684563608),
68-
1687321285,
6968
int64(1200668684563609),
69+
1687321285,
7070
1687321280,
7171
1687321275,
7272
1687321276,
@@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
8888
}()
8989
expected := []MetricResult{
9090
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
91-
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
9291
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
92+
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
9393
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
9494
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
9595
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},

0 commit comments

Comments
 (0)