Skip to content

Commit

Permalink
Add metrics for gcplog scrape. (#4235)
Browse files Browse the repository at this point in the history
* Add metrics for gcplog scrape.

Also fix the Ready() method of target

* Fix typo with help message
  • Loading branch information
kavirajk authored Aug 30, 2021
1 parent b0646e7 commit b36bc5a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
11 changes: 9 additions & 2 deletions clients/pkg/promtail/targets/gcplog/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ type Metrics struct {
// reg is the Registerer used to create this set of metrics.
reg prometheus.Registerer

gcplogEntries *prometheus.CounterVec
gcplogErrors *prometheus.CounterVec
gcplogEntries *prometheus.CounterVec
gcplogErrors *prometheus.CounterVec
gcplogTargetLastSuccessScrape *prometheus.GaugeVec
}

// NewMetrics creates a new set of metrics. Metrics will be registered to reg.
Expand All @@ -28,6 +29,12 @@ func NewMetrics(reg prometheus.Registerer) *Metrics {
Help: "Total number of parsing errors while receiving gcplog messages",
}, []string{"project"})

m.gcplogTargetLastSuccessScrape = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "promtail",
Name: "gcplog_target_last_success_scrape",
Help: "Timestamp of the specific target's last successful poll",
}, []string{"project", "target"})

reg.MustRegister(m.gcplogEntries, m.gcplogErrors)
return &m
}
10 changes: 7 additions & 3 deletions clients/pkg/promtail/targets/gcplog/target.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ func (t *GcplogTarget) run() error {
t.msgs <- m
})
if err != nil {
// TODO(kavi): Add proper error propagation maybe?
level.Error(t.logger).Log("error", err)
level.Error(t.logger).Log("msg", "failed to receive pubsub messages", "error", err)
t.metrics.gcplogErrors.WithLabelValues(t.config.ProjectID).Inc()
t.metrics.gcplogTargetLastSuccessScrape.WithLabelValues(t.config.ProjectID, t.config.Subscription).SetToCurrentTime()
}
}()

Expand Down Expand Up @@ -138,7 +138,11 @@ func (t *GcplogTarget) Type() target.TargetType {
}

func (t *GcplogTarget) Ready() bool {
return t.ctx.Err() == nil
// Return true just like all other targets.
// Rationale is gcplog scraping shouldn't stop because of some transient timeout errors.
// This transient failure can cause promtail readyness probe to fail which may prevent pod from starting.
// We have metrics now to track if scraping failed (`gcplog_target_last_success_scrape`).
return true
}

func (t *GcplogTarget) DiscoveredLabels() model.LabelSet {
Expand Down

0 comments on commit b36bc5a

Please sign in to comment.