Skip to content

Commit

Permalink
metrics: collect the metrics of nydusd events
Browse files Browse the repository at this point in the history
Collect the metrics of nydus daemon events, including INIT, RUNNING and DIED.

Signed-off-by: Bin Tang <tangbin.bin@bytedance.com>
  • Loading branch information
sctb512 committed Nov 25, 2022
1 parent 84d9ba3 commit b2fc494
Show file tree
Hide file tree
Showing 13 changed files with 137 additions and 51 deletions.
1 change: 1 addition & 0 deletions pkg/daemon/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
DaemonStateInit DaemonState = "INIT"
DaemonStateReady DaemonState = "READY"
DaemonStateRunning DaemonState = "RUNNING"
DaemonStateDied DaemonState = "DIED"
)

func (info *DaemonInfo) DaemonState() DaemonState {
Expand Down
4 changes: 4 additions & 0 deletions pkg/filesystem/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/containerd/nydus-snapshotter/pkg/filesystem/meta"
"github.com/containerd/nydus-snapshotter/pkg/label"
"github.com/containerd/nydus-snapshotter/pkg/manager"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter"
"github.com/containerd/nydus-snapshotter/pkg/signature"
"github.com/containerd/nydus-snapshotter/pkg/stargz"
)
Expand Down Expand Up @@ -109,6 +110,9 @@ func NewFileSystem(ctx context.Context, opt ...NewFSOpt) (*Filesystem, error) {
if err := d.WaitUntilState(types.DaemonStateRunning); err != nil {
return nil, errors.Wrapf(err, "recover daemon %s", d.ID())
}
if err := exporter.ExportNydusdEventMetric(d.States.ID, string(types.DaemonStateRunning)); err != nil {
log.L.Errorf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", d.States.ID, string(types.DaemonStateRunning), err)
}
if err := d.RecoveredMountInstances(); err != nil {
return nil, errors.Wrapf(err, "recover daemons")
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/manager/daemon_adaptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/containerd/nydus-snapshotter/pkg/daemon/command"
"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/errdefs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter"
"github.com/pkg/errors"
)

Expand Down Expand Up @@ -69,6 +70,9 @@ func (m *Manager) StartDaemon(d *daemon.Daemon) error {
log.L.Errorf("daemon %s is not managed to reach RUNNING state", d.ID())
return
}
if err := exporter.ExportNydusdEventMetric(d.States.ID, string(types.DaemonStateRunning)); err != nil {
log.L.Errorf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", d.States.ID, string(types.DaemonStateRunning), err)
}

su := d.Supervisor
err = su.FetchDaemonStates(func() error {
Expand Down
4 changes: 4 additions & 0 deletions pkg/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/containerd/nydus-snapshotter/pkg/daemon"
"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/errdefs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter"
"github.com/containerd/nydus-snapshotter/pkg/store"
"github.com/containerd/nydus-snapshotter/pkg/supervisor"
)
Expand Down Expand Up @@ -262,6 +263,9 @@ func (m *Manager) doDaemonFailover(d *daemon.Daemon) {
return
}

if err := exporter.ExportNydusdEventMetric(d.States.ID, string(types.DaemonStateInit)); err != nil {
log.L.Errorf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", d.States.ID, string(types.DaemonStateInit), err)
}
if err := d.TakeOver(); err != nil {
log.L.Errorf("fail to takeover, %s", err)
return
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import (
"golang.org/x/sys/unix"

"github.com/containerd/containerd/log"
"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/errdefs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter"
"github.com/containerd/nydus-snapshotter/pkg/utils/retry"
)

Expand Down Expand Up @@ -217,6 +219,9 @@ func (m *livenessMonitor) Run() {

if ev.Events&(unix.EPOLLHUP|unix.EPOLLERR) != 0 {
log.L.Warnf("Daemon %s died", target.id)
if err := exporter.ExportNydusdEventMetric(target.id, string(types.DaemonStateDied)); err != nil {
log.L.Errorf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", target.id, string(types.DaemonStateDied), err)
}
// Notify subscribers that death event happens
target.notifier <- deathEvent{daemonID: target.id, path: target.path}
}
Expand Down
60 changes: 51 additions & 9 deletions pkg/metrics/exporter/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@ import (
"encoding/json"
"fmt"
"os"
"sync"
"time"

"github.com/pkg/errors"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"

"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/fs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/snapshot"
)

type Opt func(*Exporter) error
Expand All @@ -26,6 +29,27 @@ type Exporter struct {
outputFile string
}

type MutexExporter struct {
mu sync.Mutex
exp *Exporter
}

func (mexp *MutexExporter) Get() *Exporter {
mexp.mu.Lock()
defer mexp.mu.Unlock()

return mexp.exp
}

func (mexp *MutexExporter) Set(exp *Exporter) {
mexp.mu.Lock()
defer mexp.mu.Unlock()

mexp.exp = exp
}

var GlobalExp MutexExporter

func WithOutputFile(metricsFile string) Opt {
return func(e *Exporter) error {
if metricsFile == "" {
Expand All @@ -40,25 +64,32 @@ func WithOutputFile(metricsFile string) Opt {
}
}

func NewExporter(opts ...Opt) (*Exporter, error) {
func NewExporter(opts ...Opt) error {
var exp Exporter

for _, o := range opts {
if err := o(&exp); err != nil {
return nil, err
return err
}
}

return &exp, nil
GlobalExp.Set(&exp)

return nil
}

func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error {
ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead))
OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens))
OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens))
LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp))
func ExportFsMetrics(m *types.FsMetrics, imageRef string) error {
e := GlobalExp.Get()
if e == nil {
return errors.New("failed to get exporter")
}

fs.ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead))
fs.OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens))
fs.OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens))
fs.LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp))

for _, h := range FsMetricHists {
for _, h := range fs.MetricHists {
o, err := h.ToConstHistogram(m, imageRef)
if err != nil {
return errors.Wrapf(err, "failed to new const histogram for %s", h.Desc.String())
Expand All @@ -69,6 +100,17 @@ func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error {
return e.output()
}

func ExportNydusdEventMetric(daemonID string, event string) error {
e := GlobalExp.Get()
if e == nil {
return errors.New("failed to get metric exporter")
}

snapshot.NydusdEvent.WithLabelValues(daemonID, time.Now().Format("2006-01-02 15:04:05.000"), event).Inc()

return e.output()
}

func (e *Exporter) output() error {
ms, err := Registry.Gather()
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
* SPDX-License-Identifier: Apache-2.0
*/

package exporter
package fs

import (
"time"

"github.com/prometheus/client_golang/prometheus"

"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
Expand All @@ -17,7 +15,6 @@ import (

var (
imageRefLabel = "image_ref"
defaultTTL = 3 * time.Minute
)

var (
Expand All @@ -28,7 +25,7 @@ var (
Help: "Total number read of a nydus fs, in Byte.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)

OpenFdCount = ttl.NewGaugeVecWithTTL(
Expand All @@ -37,7 +34,7 @@ var (
Help: "Number of current open files.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)

OpenFdMaxCount = ttl.NewGaugeVecWithTTL(
Expand All @@ -46,7 +43,7 @@ var (
Help: "Number of max open files.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)

LastFopTimestamp = ttl.NewGaugeVecWithTTL(
Expand All @@ -55,12 +52,12 @@ var (
Help: "Timestamp of last file operation.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)
)

// Fs metric histograms
var FsMetricHists = []*FsMetricHistogram{
var MetricHists = []*MetricHistogram{
{
Desc: prometheus.NewDesc(
"nydusd_block_count_read_hist",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

package exporter
package fs

import (
"fmt"
Expand Down Expand Up @@ -50,7 +50,7 @@ func MakeFopBuckets() []uint64 {

type GetCountersFn func(*types.FsMetrics) []uint64

type FsMetricHistogram struct {
type MetricHistogram struct {
Desc *prometheus.Desc
Buckets []uint64
GetCounters GetCountersFn
Expand All @@ -59,7 +59,7 @@ type FsMetricHistogram struct {
constHist prometheus.Metric
}

func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) {
func (h *MetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) {
var count, sum uint64
counters := h.GetCounters(m)
hmap := make(map[float64]uint64)
Expand All @@ -82,18 +82,18 @@ func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string
), nil
}

func (h *FsMetricHistogram) Save(m prometheus.Metric) {
func (h *MetricHistogram) Save(m prometheus.Metric) {
h.constHist = m
}

// Implement prometheus.Collector interface
func (h *FsMetricHistogram) Describe(ch chan<- *prometheus.Desc) {
func (h *MetricHistogram) Describe(ch chan<- *prometheus.Desc) {
if h.Desc != nil {
ch <- h.Desc
}
}

func (h *FsMetricHistogram) Collect(ch chan<- prometheus.Metric) {
func (h *MetricHistogram) Collect(ch chan<- prometheus.Metric) {
if h.constHist != nil {
ch <- h.constHist
}
Expand Down
13 changes: 8 additions & 5 deletions pkg/metrics/exporter/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
package exporter

import (
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/fs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/snapshot"
"github.com/prometheus/client_golang/prometheus"
)

Expand All @@ -16,13 +18,14 @@ var (

func init() {
Registry.MustRegister(
ReadCount,
OpenFdCount,
OpenFdMaxCount,
LastFopTimestamp,
fs.ReadCount,
fs.OpenFdCount,
fs.OpenFdMaxCount,
fs.LastFopTimestamp,
snapshot.NydusdEvent,
)

for _, m := range FsMetricHists {
for _, m := range fs.MetricHists {
Registry.MustRegister(m)
}
}
27 changes: 27 additions & 0 deletions pkg/metrics/exporter/snapshot/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) 2021. Alibaba Cloud. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*/

package snapshot

import (
"github.com/prometheus/client_golang/prometheus"
)

var (
daemonIDLabel = "daemon_id"
timeLabel = "time"
eventLabel = "event"
)

var (
NydusdEvent = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "nydusd_lifetime_event_times",
Help: "nydusd lifetime event times.",
},
[]string{daemonIDLabel, timeLabel, eventLabel},
)
)
6 changes: 2 additions & 4 deletions pkg/metrics/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ type Server struct {
rootDir string
metricsFile string
pm *manager.Manager
exp *exporter.Exporter
}

func WithRootDir(rootDir string) ServerOpt {
Expand Down Expand Up @@ -70,13 +69,12 @@ func NewServer(ctx context.Context, opts ...ServerOpt) (*Server, error) {
}
}

exp, err := exporter.NewExporter(
err := exporter.NewExporter(
exporter.WithOutputFile(s.metricsFile),
)
if err != nil {
return nil, errors.Wrap(err, "failed to new metric exporter")
}
s.exp = exp

sockPath := filepath.Join(s.rootDir, sockFileName)

Expand Down Expand Up @@ -123,7 +121,7 @@ outer:
continue
}

if err := s.exp.ExportFsMetrics(fsMetrics, i.ImageID); err != nil {
if err := exporter.ExportFsMetrics(fsMetrics, i.ImageID); err != nil {
log.G(ctx).Errorf("failed to export fs metrics for %s: %v", i.ImageID, err)
continue
}
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/ttl/gauge.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

var (
defaultCleanUpPeriod = 10 * time.Minute
DefaultTTL = 3 * time.Minute
)

type LabelWithValue struct {
Expand Down
Loading

0 comments on commit b2fc494

Please sign in to comment.