From 19f0a663ea65c5fa498def3701a4ba01f57b0cae Mon Sep 17 00:00:00 2001 From: Bin Tang Date: Thu, 24 Nov 2022 14:04:08 +0800 Subject: [PATCH] metrics: collect the metrics of nydusd events Collect the metrics of nydus daemon events, including INIT, RUNNING and DIED. Signed-off-by: Bin Tang --- pkg/daemon/daemon.go | 5 +++ pkg/daemon/types/types.go | 1 + pkg/manager/monitor.go | 5 +++ pkg/metrics/exporter/export.go | 48 +++++++++++++++++++----- pkg/metrics/exporter/{ => fs}/metrics.go | 15 +++----- pkg/metrics/exporter/{ => fs}/types.go | 12 +++--- pkg/metrics/exporter/registry.go | 13 ++++--- pkg/metrics/serve.go | 6 +-- pkg/metrics/ttl/gauge.go | 1 + snapshot/snapshot.go | 36 +++++++++--------- 10 files changed, 91 insertions(+), 51 deletions(-) rename pkg/metrics/exporter/{ => fs}/metrics.go (93%) rename pkg/metrics/exporter/{ => fs}/types.go (80%) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 0345607051..aba8ee64b5 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -20,6 +20,7 @@ import ( "github.com/containerd/nydus-snapshotter/config/daemonconfig" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter" "github.com/containerd/nydus-snapshotter/pkg/supervisor" "github.com/containerd/nydus-snapshotter/pkg/utils/erofs" "github.com/containerd/nydus-snapshotter/pkg/utils/mount" @@ -201,6 +202,10 @@ func (d *Daemon) WaitUntilState(expected types.DaemonState) error { _, err, shared := d.stateGetterGroup.Do(d.ID(), stateGetter) log.L.Debugf("Get daemon %s with shared result: %v ", d.ID(), shared) + if exportErr := exporter.ExportNydusdEventMetric(d.States.ID, string(expected)); exportErr != nil { + log.L.Warnf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", d.States.ID, string(expected), exportErr) + } + return err } diff --git a/pkg/daemon/types/types.go b/pkg/daemon/types/types.go index 42ab591d4e..5e4d6be3cf 100644 --- a/pkg/daemon/types/types.go +++ b/pkg/daemon/types/types.go @@ -28,6 +28,7 @@ const ( DaemonStateInit DaemonState = "INIT" DaemonStateReady DaemonState = "READY" DaemonStateRunning DaemonState = "RUNNING" + DaemonStateDied DaemonState = "DIED" ) func (info *DaemonInfo) DaemonState() DaemonState { diff --git a/pkg/manager/monitor.go b/pkg/manager/monitor.go index 39f9805a29..f98135b003 100644 --- a/pkg/manager/monitor.go +++ b/pkg/manager/monitor.go @@ -16,7 +16,9 @@ import ( "golang.org/x/sys/unix" "github.com/containerd/containerd/log" + "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter" "github.com/containerd/nydus-snapshotter/pkg/utils/retry" ) @@ -217,6 +219,9 @@ func (m *livenessMonitor) Run() { if ev.Events&(unix.EPOLLHUP|unix.EPOLLERR) != 0 { log.L.Warnf("Daemon %s died", target.id) + if err := exporter.ExportNydusdEventMetric(target.id, string(types.DaemonStateDied)); err != nil { + log.L.Warnf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", target.id, string(types.DaemonStateDied), err) + } // Notify subscribers that death event happens target.notifier <- deathEvent{daemonID: target.id, path: target.path} } diff --git a/pkg/metrics/exporter/export.go b/pkg/metrics/exporter/export.go index 873103bc1d..385803fe51 100644 --- a/pkg/metrics/exporter/export.go +++ b/pkg/metrics/exporter/export.go @@ -18,6 +18,8 @@ import ( "github.com/prometheus/common/expfmt" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" + "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/daemon" + "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/fs" ) type Opt func(*Exporter) error @@ -26,6 +28,8 @@ type Exporter struct { outputFile string } +var globalExporter *Exporter + func WithOutputFile(metricsFile string) Opt { return func(e *Exporter) error { if metricsFile == "" { @@ -40,25 +44,40 @@ func WithOutputFile(metricsFile string) Opt { } } -func NewExporter(opts ...Opt) (*Exporter, error) { +func NewExporter(opts ...Opt) error { var exp Exporter for _, o := range opts { if err := o(&exp); err != nil { - return nil, err + return err } } - return &exp, nil + globalExporter = &exp + + return nil +} + +func getExporter() (*Exporter, error) { + if globalExporter == nil { + return nil, errors.New("failed to get global metric exporter") + } + + return globalExporter, nil } -func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error { - ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead)) - OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens)) - OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens)) - LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp)) +func ExportFsMetrics(m *types.FsMetrics, imageRef string) error { + e, err := getExporter() + if err != nil { + return err + } - for _, h := range FsMetricHists { + fs.ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead)) + fs.OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens)) + fs.OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens)) + fs.LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp)) + + for _, h := range fs.MetricHists { o, err := h.ToConstHistogram(m, imageRef) if err != nil { return errors.Wrapf(err, "failed to new const histogram for %s", h.Desc.String()) @@ -69,6 +88,17 @@ func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error { return e.output() } +func ExportNydusdEventMetric(daemonID string, event string) error { + e, err := getExporter() + if err != nil { + return err + } + + daemon.NydusdEvent.WithLabelValues(daemonID, time.Now().Format("2006-01-02 15:04:05.000"), event).Inc() + + return e.output() +} + func (e *Exporter) output() error { ms, err := Registry.Gather() if err != nil { diff --git a/pkg/metrics/exporter/metrics.go b/pkg/metrics/exporter/fs/metrics.go similarity index 93% rename from pkg/metrics/exporter/metrics.go rename to pkg/metrics/exporter/fs/metrics.go index d0eb954a6e..47a30cd6bb 100644 --- a/pkg/metrics/exporter/metrics.go +++ b/pkg/metrics/exporter/fs/metrics.go @@ -4,11 +4,9 @@ * SPDX-License-Identifier: Apache-2.0 */ -package exporter +package fs import ( - "time" - "github.com/prometheus/client_golang/prometheus" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" @@ -17,7 +15,6 @@ import ( var ( imageRefLabel = "image_ref" - defaultTTL = 3 * time.Minute ) var ( @@ -28,7 +25,7 @@ var ( Help: "Total number read of a nydus fs, in Byte.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) OpenFdCount = ttl.NewGaugeVecWithTTL( @@ -37,7 +34,7 @@ var ( Help: "Number of current open files.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) OpenFdMaxCount = ttl.NewGaugeVecWithTTL( @@ -46,7 +43,7 @@ var ( Help: "Number of max open files.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) LastFopTimestamp = ttl.NewGaugeVecWithTTL( @@ -55,12 +52,12 @@ var ( Help: "Timestamp of last file operation.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) ) // Fs metric histograms -var FsMetricHists = []*FsMetricHistogram{ +var MetricHists = []*MetricHistogram{ { Desc: prometheus.NewDesc( "nydusd_block_count_read_hist", diff --git a/pkg/metrics/exporter/types.go b/pkg/metrics/exporter/fs/types.go similarity index 80% rename from pkg/metrics/exporter/types.go rename to pkg/metrics/exporter/fs/types.go index 28509402ac..ec5e8e931c 100644 --- a/pkg/metrics/exporter/types.go +++ b/pkg/metrics/exporter/fs/types.go @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package exporter +package fs import ( "fmt" @@ -50,7 +50,7 @@ func MakeFopBuckets() []uint64 { type GetCountersFn func(*types.FsMetrics) []uint64 -type FsMetricHistogram struct { +type MetricHistogram struct { Desc *prometheus.Desc Buckets []uint64 GetCounters GetCountersFn @@ -59,7 +59,7 @@ type FsMetricHistogram struct { constHist prometheus.Metric } -func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) { +func (h *MetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) { var count, sum uint64 counters := h.GetCounters(m) hmap := make(map[float64]uint64) @@ -82,18 +82,18 @@ func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string ), nil } -func (h *FsMetricHistogram) Save(m prometheus.Metric) { +func (h *MetricHistogram) Save(m prometheus.Metric) { h.constHist = m } // Implement prometheus.Collector interface -func (h *FsMetricHistogram) Describe(ch chan<- *prometheus.Desc) { +func (h *MetricHistogram) Describe(ch chan<- *prometheus.Desc) { if h.Desc != nil { ch <- h.Desc } } -func (h *FsMetricHistogram) Collect(ch chan<- prometheus.Metric) { +func (h *MetricHistogram) Collect(ch chan<- prometheus.Metric) { if h.constHist != nil { ch <- h.constHist } diff --git a/pkg/metrics/exporter/registry.go b/pkg/metrics/exporter/registry.go index af68e120da..31a14d7906 100644 --- a/pkg/metrics/exporter/registry.go +++ b/pkg/metrics/exporter/registry.go @@ -7,6 +7,8 @@ package exporter import ( + "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/daemon" + "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/fs" "github.com/prometheus/client_golang/prometheus" ) @@ -16,13 +18,14 @@ var ( func init() { Registry.MustRegister( - ReadCount, - OpenFdCount, - OpenFdMaxCount, - LastFopTimestamp, + fs.ReadCount, + fs.OpenFdCount, + fs.OpenFdMaxCount, + fs.LastFopTimestamp, + daemon.NydusdEvent, ) - for _, m := range FsMetricHists { + for _, m := range fs.MetricHists { Registry.MustRegister(m) } } diff --git a/pkg/metrics/serve.go b/pkg/metrics/serve.go index 64115c73e5..7de4156779 100644 --- a/pkg/metrics/serve.go +++ b/pkg/metrics/serve.go @@ -30,7 +30,6 @@ type Server struct { rootDir string metricsFile string pm *manager.Manager - exp *exporter.Exporter } func WithRootDir(rootDir string) ServerOpt { @@ -70,13 +69,12 @@ func NewServer(ctx context.Context, opts ...ServerOpt) (*Server, error) { } } - exp, err := exporter.NewExporter( + err := exporter.NewExporter( exporter.WithOutputFile(s.metricsFile), ) if err != nil { return nil, errors.Wrap(err, "failed to new metric exporter") } - s.exp = exp sockPath := filepath.Join(s.rootDir, sockFileName) @@ -123,7 +121,7 @@ outer: continue } - if err := s.exp.ExportFsMetrics(fsMetrics, i.ImageID); err != nil { + if err := exporter.ExportFsMetrics(fsMetrics, i.ImageID); err != nil { log.G(ctx).Errorf("failed to export fs metrics for %s: %v", i.ImageID, err) continue } diff --git a/pkg/metrics/ttl/gauge.go b/pkg/metrics/ttl/gauge.go index 74c1b1c785..fdb4ff80c1 100644 --- a/pkg/metrics/ttl/gauge.go +++ b/pkg/metrics/ttl/gauge.go @@ -16,6 +16,7 @@ import ( var ( defaultCleanUpPeriod = 10 * time.Minute + DefaultTTL = 3 * time.Minute ) type LabelWithValue struct { diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index bcf5fed4e2..d437bf908f 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -98,6 +98,24 @@ func NewSnapshotter(ctx context.Context, cfg *config.Config) (snapshots.Snapshot return nil, errors.Wrap(err, "create daemons manager") } + if cfg.EnableMetrics { + metricServer, err := metrics.NewServer( + ctx, + metrics.WithRootDir(cfg.RootDir), + metrics.WithMetricsFile(cfg.MetricsFile), + metrics.WithProcessManager(manager), + ) + if err != nil { + return nil, errors.Wrap(err, "create metrics server") + } + // Start metrics http server. + go func() { + if err := metricServer.Serve(ctx); err != nil { + log.L.Errorf("Failed to start metrics server, %s", err) + } + }() + } + if cfg.APISocket != "" { systemController, err := system.NewSystemController(manager, cfg.APISocket) if err != nil { @@ -163,24 +181,6 @@ func NewSnapshotter(ctx context.Context, cfg *config.Config) (snapshots.Snapshot } } - if cfg.EnableMetrics { - metricServer, err := metrics.NewServer( - ctx, - metrics.WithRootDir(cfg.RootDir), - metrics.WithMetricsFile(cfg.MetricsFile), - metrics.WithProcessManager(manager), - ) - if err != nil { - return nil, errors.Wrap(err, "create metrics server") - } - // Start metrics http server. - go func() { - if err := metricServer.Serve(ctx); err != nil { - log.L.Errorf("Failed to start metrics server, %s", err) - } - }() - } - if err := os.MkdirAll(cfg.RootDir, 0700); err != nil { return nil, err }