diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 0345607051..e2b953f7ce 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -20,6 +20,7 @@ import ( "github.com/containerd/nydus-snapshotter/config/daemonconfig" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/metrics/collector" "github.com/containerd/nydus-snapshotter/pkg/supervisor" "github.com/containerd/nydus-snapshotter/pkg/utils/erofs" "github.com/containerd/nydus-snapshotter/pkg/utils/mount" @@ -187,6 +188,7 @@ func (d *Daemon) WaitUntilState(expected types.DaemonState) error { return errors.Errorf("daemon %s is not %s yet, current state %s", d.ID(), expected, state) } + collector.CollectDaemonEvent(d.ID(), string(expected)) return nil }, diff --git a/pkg/daemon/types/types.go b/pkg/daemon/types/types.go index 42ab591d4e..9df996c4e0 100644 --- a/pkg/daemon/types/types.go +++ b/pkg/daemon/types/types.go @@ -24,10 +24,12 @@ type DaemonInfo struct { } const ( - DaemonStateUnknown DaemonState = "UNKNOWN" - DaemonStateInit DaemonState = "INIT" - DaemonStateReady DaemonState = "READY" - DaemonStateRunning DaemonState = "RUNNING" + DaemonStateUnknown DaemonState = "UNKNOWN" + DaemonStateInit DaemonState = "INIT" + DaemonStateReady DaemonState = "READY" + DaemonStateRunning DaemonState = "RUNNING" + DaemonStateDied DaemonState = "DIED" + DaemonStateDestroyed DaemonState = "DESTROYED" ) func (info *DaemonInfo) DaemonState() DaemonState { diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 9c26d601c1..e192357e4a 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -23,6 +23,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/daemon" "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/metrics/collector" "github.com/containerd/nydus-snapshotter/pkg/store" "github.com/containerd/nydus-snapshotter/pkg/supervisor" ) @@ -506,6 +507,8 @@ func (m *Manager) DestroyDaemon(d *daemon.Daemon) error { log.L.Warnf("Failed to wait for daemon, %v", err) } + collector.CollectDaemonEvent(d.ID(), string(types.DaemonStateDestroyed)) + return nil } diff --git a/pkg/manager/monitor.go b/pkg/manager/monitor.go index 39f9805a29..3fd068ba4b 100644 --- a/pkg/manager/monitor.go +++ b/pkg/manager/monitor.go @@ -16,7 +16,9 @@ import ( "golang.org/x/sys/unix" "github.com/containerd/containerd/log" + "github.com/containerd/nydus-snapshotter/pkg/daemon/types" "github.com/containerd/nydus-snapshotter/pkg/errdefs" + "github.com/containerd/nydus-snapshotter/pkg/metrics/collector" "github.com/containerd/nydus-snapshotter/pkg/utils/retry" ) @@ -217,6 +219,7 @@ func (m *livenessMonitor) Run() { if ev.Events&(unix.EPOLLHUP|unix.EPOLLERR) != 0 { log.L.Warnf("Daemon %s died", target.id) + collector.CollectDaemonEvent(target.id, string(types.DaemonStateDied)) // Notify subscribers that death event happens target.notifier <- deathEvent{daemonID: target.id, path: target.path} } diff --git a/pkg/metrics/collector/collector.go b/pkg/metrics/collector/collector.go new file mode 100644 index 0000000000..7e7055cd14 --- /dev/null +++ b/pkg/metrics/collector/collector.go @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package collector + +import "github.com/containerd/nydus-snapshotter/pkg/daemon/types" + +type Collector interface { + // Collect metrics to data. + Collect() +} + +func CollectDaemonEvent(daemonID string, event string) error { + GlobalDaemonEventCollector.Collect(daemonID, event) + return nil +} + +func CollectFsMetrics(m *types.FsMetrics, imageRef string) error { + GlobalFsMetricsCollector.Collect(m, imageRef) + return nil +} diff --git a/pkg/metrics/collector/daemon.go b/pkg/metrics/collector/daemon.go new file mode 100644 index 0000000000..7aa682a0e2 --- /dev/null +++ b/pkg/metrics/collector/daemon.go @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package collector + +import ( + "time" + + "github.com/containerd/nydus-snapshotter/pkg/metrics/data" +) + +type DaemonEventCollector struct{} + +var GlobalDaemonEventCollector *DaemonEventCollector + +func init() { + GlobalDaemonEventCollector = &DaemonEventCollector{} +} + +func (d *DaemonEventCollector) Collect(daemonID string, event string) { + data.NydusdEvent.WithLabelValues(daemonID, time.Now().Format("2006-01-02 15:04:05.000"), event).Inc() +} diff --git a/pkg/metrics/collector/fs.go b/pkg/metrics/collector/fs.go new file mode 100644 index 0000000000..71a95faa7b --- /dev/null +++ b/pkg/metrics/collector/fs.go @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package collector + +import ( + "github.com/containerd/nydus-snapshotter/pkg/daemon/types" + "github.com/containerd/nydus-snapshotter/pkg/metrics/data" + "github.com/pkg/errors" +) + +type FsMetricsCollector struct{} + +var GlobalFsMetricsCollector *FsMetricsCollector + +func init() { + GlobalFsMetricsCollector = &FsMetricsCollector{} +} + +func (f *FsMetricsCollector) Collect(m *types.FsMetrics, imageRef string) error { + data.ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead)) + data.OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens)) + data.OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens)) + data.LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp)) + + for _, h := range data.MetricHists { + o, err := h.ToConstHistogram(m, imageRef) + if err != nil { + return errors.Wrapf(err, "failed to new const histogram for %s", h.Desc.String()) + } + h.Save(o) + } + + return nil +} diff --git a/pkg/metrics/data/daemon.go b/pkg/metrics/data/daemon.go new file mode 100644 index 0000000000..6ad9d3050e --- /dev/null +++ b/pkg/metrics/data/daemon.go @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package data + +import "github.com/prometheus/client_golang/prometheus" + +var ( + daemonIDLabel = "daemon_id" + timeLabel = "time" + eventLabel = "event" +) + +var ( + NydusdEvent = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "nydusd_lifetime_events", + Help: "The lifetime events of nydus daemon.", + }, + []string{daemonIDLabel, timeLabel, eventLabel}, + ) +) diff --git a/pkg/metrics/exporter/metrics.go b/pkg/metrics/data/fs.go similarity index 84% rename from pkg/metrics/exporter/metrics.go rename to pkg/metrics/data/fs.go index d0eb954a6e..f3f270150c 100644 --- a/pkg/metrics/exporter/metrics.go +++ b/pkg/metrics/data/fs.go @@ -1,23 +1,21 @@ /* * Copyright (c) 2021. Alibaba Cloud. All rights reserved. + * Copyright (c) 2022. Nydus Developers. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 */ -package exporter +package data import ( - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/containerd/nydus-snapshotter/pkg/daemon/types" - "github.com/containerd/nydus-snapshotter/pkg/metrics/ttl" + mtypes "github.com/containerd/nydus-snapshotter/pkg/metrics/types" + "github.com/containerd/nydus-snapshotter/pkg/metrics/types/ttl" + "github.com/prometheus/client_golang/prometheus" ) var ( imageRefLabel = "image_ref" - defaultTTL = 3 * time.Minute ) var ( @@ -28,7 +26,7 @@ var ( Help: "Total number read of a nydus fs, in Byte.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) OpenFdCount = ttl.NewGaugeVecWithTTL( @@ -37,7 +35,7 @@ var ( Help: "Number of current open files.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) OpenFdMaxCount = ttl.NewGaugeVecWithTTL( @@ -46,7 +44,7 @@ var ( Help: "Number of max open files.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) LastFopTimestamp = ttl.NewGaugeVecWithTTL( @@ -55,12 +53,12 @@ var ( Help: "Timestamp of last file operation.", }, []string{imageRefLabel}, - defaultTTL, + ttl.DefaultTTL, ) ) // Fs metric histograms -var FsMetricHists = []*FsMetricHistogram{ +var MetricHists = []*mtypes.MetricHistogram{ { Desc: prometheus.NewDesc( "nydusd_block_count_read_hist", @@ -81,7 +79,7 @@ var FsMetricHists = []*FsMetricHistogram{ []string{imageRefLabel}, prometheus.Labels{}, ), - Buckets: MakeFopBuckets(), + Buckets: mtypes.MakeFopBuckets(), GetCounters: func(m *types.FsMetrics) []uint64 { return m.FopHits }, @@ -94,7 +92,7 @@ var FsMetricHists = []*FsMetricHistogram{ []string{imageRefLabel}, prometheus.Labels{}, ), - Buckets: MakeFopBuckets(), + Buckets: mtypes.MakeFopBuckets(), GetCounters: func(m *types.FsMetrics) []uint64 { return m.FopErrors }, diff --git a/pkg/metrics/exporter/exporter.go b/pkg/metrics/exporter/exporter.go new file mode 100644 index 0000000000..d2553f7f39 --- /dev/null +++ b/pkg/metrics/exporter/exporter.go @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2022. Nydus Developers. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package exporter + +type Exporter interface { + // Export all metrics data. + Export() +} + +func FileExport() error { + return GlobalFileExporter.Export() +} diff --git a/pkg/metrics/exporter/export.go b/pkg/metrics/exporter/file.go similarity index 58% rename from pkg/metrics/exporter/export.go rename to pkg/metrics/exporter/file.go index 873103bc1d..79f968e13d 100644 --- a/pkg/metrics/exporter/export.go +++ b/pkg/metrics/exporter/file.go @@ -1,5 +1,6 @@ /* * Copyright (c) 2021. Alibaba Cloud. All rights reserved. + * Copyright (c) 2022. Nydus Developers. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 */ @@ -13,21 +14,27 @@ import ( "os" "time" + "github.com/containerd/nydus-snapshotter/pkg/metrics/registry" "github.com/pkg/errors" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" - - "github.com/containerd/nydus-snapshotter/pkg/daemon/types" ) -type Opt func(*Exporter) error +type Opt func(*FileExporter) error -type Exporter struct { +type FileExporter struct { outputFile string } +var GlobalFileExporter *FileExporter + +func init() { + var exp FileExporter + GlobalFileExporter = &exp +} + func WithOutputFile(metricsFile string) Opt { - return func(e *Exporter) error { + return func(e *FileExporter) error { if metricsFile == "" { return errors.New("metrics file path is empty") } @@ -40,39 +47,20 @@ func WithOutputFile(metricsFile string) Opt { } } -func NewExporter(opts ...Opt) (*Exporter, error) { - var exp Exporter - +func NewFileExporter(opts ...Opt) error { for _, o := range opts { - if err := o(&exp); err != nil { - return nil, err - } - } - - return &exp, nil -} - -func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error { - ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead)) - OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens)) - OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens)) - LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp)) - - for _, h := range FsMetricHists { - o, err := h.ToConstHistogram(m, imageRef) - if err != nil { - return errors.Wrapf(err, "failed to new const histogram for %s", h.Desc.String()) + if err := o(GlobalFileExporter); err != nil { + return err } - h.Save(o) } - return e.output() + return nil } -func (e *Exporter) output() error { - ms, err := Registry.Gather() +func (e *FileExporter) Export() error { + ms, err := registry.Registry.Gather() if err != nil { - return errors.Wrap(err, "failed to gather all prometheus collectors") + return errors.Wrap(err, "failed to gather all prometheus exporters") } for _, m := range ms { if err := e.exportText(m); err != nil { @@ -83,7 +71,7 @@ func (e *Exporter) output() error { return nil } -func (e *Exporter) exportText(m *dto.MetricFamily) error { +func (e *FileExporter) exportText(m *dto.MetricFamily) error { var b bytes.Buffer enc := expfmt.NewEncoder(&b, expfmt.FmtText) @@ -102,7 +90,7 @@ func (e *Exporter) exportText(m *dto.MetricFamily) error { return e.writeToFile(string(json)) } -func (e *Exporter) writeToFile(data string) error { +func (e *FileExporter) writeToFile(data string) error { f, err := os.OpenFile(e.outputFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return errors.Wrapf(err, "failed to open metrics file on %s", e.outputFile) diff --git a/pkg/metrics/exporter/registry.go b/pkg/metrics/registry/registry.go similarity index 56% rename from pkg/metrics/exporter/registry.go rename to pkg/metrics/registry/registry.go index af68e120da..2527cff761 100644 --- a/pkg/metrics/exporter/registry.go +++ b/pkg/metrics/registry/registry.go @@ -4,9 +4,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -package exporter +package registry import ( + "github.com/containerd/nydus-snapshotter/pkg/metrics/data" "github.com/prometheus/client_golang/prometheus" ) @@ -16,13 +17,14 @@ var ( func init() { Registry.MustRegister( - ReadCount, - OpenFdCount, - OpenFdMaxCount, - LastFopTimestamp, + data.ReadCount, + data.OpenFdCount, + data.OpenFdMaxCount, + data.LastFopTimestamp, + data.NydusdEvent, ) - for _, m := range FsMetricHists { + for _, m := range data.MetricHists { Registry.MustRegister(m) } } diff --git a/pkg/metrics/serve.go b/pkg/metrics/serve.go index 64115c73e5..d4c6cbbdb0 100644 --- a/pkg/metrics/serve.go +++ b/pkg/metrics/serve.go @@ -9,8 +9,6 @@ package metrics import ( "context" - "net" - "os" "path/filepath" "time" @@ -18,19 +16,16 @@ import ( "github.com/containerd/containerd/log" "github.com/containerd/nydus-snapshotter/pkg/manager" + "github.com/containerd/nydus-snapshotter/pkg/metrics/collector" "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter" ) type ServerOpt func(*Server) error -const sockFileName = "metrics.sock" - type Server struct { - listener net.Listener rootDir string metricsFile string pm *manager.Manager - exp *exporter.Exporter } func WithRootDir(rootDir string) ServerOpt { @@ -70,34 +65,17 @@ func NewServer(ctx context.Context, opts ...ServerOpt) (*Server, error) { } } - exp, err := exporter.NewExporter( + err := exporter.NewFileExporter( exporter.WithOutputFile(s.metricsFile), ) if err != nil { return nil, errors.Wrap(err, "failed to new metric exporter") } - s.exp = exp - - sockPath := filepath.Join(s.rootDir, sockFileName) - - if _, err := os.Stat(sockPath); err == nil { - err = os.Remove(sockPath) - if err != nil { - return nil, err - } - } - ln, err := NewListener(sockPath) - if err != nil { - return nil, err - } - s.listener = ln - - log.G(ctx).Infof("Starting metrics server on %s", sockPath) return &s, nil } -func (s *Server) collectDaemonMetric(ctx context.Context) { +func (s *Server) CollectDaemonMetrics(ctx context.Context) error { // TODO(renzhen): make collect interval time configurable timer := time.NewTicker(time.Duration(1) * time.Minute) @@ -105,9 +83,9 @@ outer: for { select { case <-timer.C: + // Collect metrics from daemons. daemons := s.pm.ListDaemons() for _, d := range daemons { - for _, i := range d.Instances.List() { var sid string @@ -123,7 +101,7 @@ outer: continue } - if err := s.exp.ExportFsMetrics(fsMetrics, i.ImageID); err != nil { + if err := collector.CollectFsMetrics(fsMetrics, i.ImageID); err != nil { log.G(ctx).Errorf("failed to export fs metrics for %s: %v", i.ImageID, err) continue } @@ -135,13 +113,6 @@ outer: break outer } } -} - -func (s *Server) Serve(ctx context.Context) error { - // Start to collect metrics from daemons periodically. - go func() { - s.collectDaemonMetric(ctx) - }() return nil } diff --git a/pkg/metrics/ttl/gauge.go b/pkg/metrics/types/ttl/gauge.go similarity index 97% rename from pkg/metrics/ttl/gauge.go rename to pkg/metrics/types/ttl/gauge.go index 74c1b1c785..fdb4ff80c1 100644 --- a/pkg/metrics/ttl/gauge.go +++ b/pkg/metrics/types/ttl/gauge.go @@ -16,6 +16,7 @@ import ( var ( defaultCleanUpPeriod = 10 * time.Minute + DefaultTTL = 3 * time.Minute ) type LabelWithValue struct { diff --git a/pkg/metrics/ttl/gauge_test.go b/pkg/metrics/types/ttl/gauge_test.go similarity index 100% rename from pkg/metrics/ttl/gauge_test.go rename to pkg/metrics/types/ttl/gauge_test.go diff --git a/pkg/metrics/exporter/types.go b/pkg/metrics/types/types.go similarity index 80% rename from pkg/metrics/exporter/types.go rename to pkg/metrics/types/types.go index 28509402ac..ec5e8e931c 100644 --- a/pkg/metrics/exporter/types.go +++ b/pkg/metrics/types/types.go @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package exporter +package fs import ( "fmt" @@ -50,7 +50,7 @@ func MakeFopBuckets() []uint64 { type GetCountersFn func(*types.FsMetrics) []uint64 -type FsMetricHistogram struct { +type MetricHistogram struct { Desc *prometheus.Desc Buckets []uint64 GetCounters GetCountersFn @@ -59,7 +59,7 @@ type FsMetricHistogram struct { constHist prometheus.Metric } -func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) { +func (h *MetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) { var count, sum uint64 counters := h.GetCounters(m) hmap := make(map[float64]uint64) @@ -82,18 +82,18 @@ func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string ), nil } -func (h *FsMetricHistogram) Save(m prometheus.Metric) { +func (h *MetricHistogram) Save(m prometheus.Metric) { h.constHist = m } // Implement prometheus.Collector interface -func (h *FsMetricHistogram) Describe(ch chan<- *prometheus.Desc) { +func (h *MetricHistogram) Describe(ch chan<- *prometheus.Desc) { if h.Desc != nil { ch <- h.Desc } } -func (h *FsMetricHistogram) Collect(ch chan<- prometheus.Metric) { +func (h *MetricHistogram) Collect(ch chan<- prometheus.Metric) { if h.constHist != nil { ch <- h.constHist } diff --git a/pkg/system/system.go b/pkg/system/system.go index c49241fc6d..67a0623063 100644 --- a/pkg/system/system.go +++ b/pkg/system/system.go @@ -26,6 +26,7 @@ import ( "github.com/containerd/nydus-snapshotter/pkg/errdefs" "github.com/containerd/nydus-snapshotter/pkg/manager" "github.com/containerd/nydus-snapshotter/pkg/metrics/exporter" + "github.com/containerd/nydus-snapshotter/pkg/metrics/registry" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -145,10 +146,14 @@ func (sc *Controller) registerRouter() { sc.router.HandleFunc(endpointDaemonRecords, sc.getDaemonRecords()).Methods(http.MethodGet) // Special registration for Prometheus metrics export - handler := promhttp.HandlerFor(exporter.Registry, promhttp.HandlerOpts{ + handler := promhttp.HandlerFor(registry.Registry, promhttp.HandlerOpts{ ErrorHandling: promhttp.HTTPErrorOnError, }) - sc.router.Handle(endpointPromMetrics, handler) + + sc.router.Handle(endpointPromMetrics, http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) { + handler.ServeHTTP(rsp, req) + exporter.FileExport() + })) } func (sc *Controller) describeDaemons() func(w http.ResponseWriter, r *http.Request) { diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index bcf5fed4e2..7e820492cb 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -98,6 +98,25 @@ func NewSnapshotter(ctx context.Context, cfg *config.Config) (snapshots.Snapshot return nil, errors.Wrap(err, "create daemons manager") } + metricServer, err := metrics.NewServer( + ctx, + metrics.WithRootDir(cfg.RootDir), + metrics.WithMetricsFile(cfg.MetricsFile), + metrics.WithProcessManager(manager), + ) + if err != nil { + return nil, errors.Wrap(err, "create metrics server") + } + + if cfg.EnableMetrics { + // Start to collect daemon metrics. + go func() { + if err := metricServer.CollectDaemonMetrics(ctx); err != nil { + log.L.Errorf("Failed to start export metrics, %s", err) + } + }() + } + if cfg.APISocket != "" { systemController, err := system.NewSystemController(manager, cfg.APISocket) if err != nil { @@ -163,24 +182,6 @@ func NewSnapshotter(ctx context.Context, cfg *config.Config) (snapshots.Snapshot } } - if cfg.EnableMetrics { - metricServer, err := metrics.NewServer( - ctx, - metrics.WithRootDir(cfg.RootDir), - metrics.WithMetricsFile(cfg.MetricsFile), - metrics.WithProcessManager(manager), - ) - if err != nil { - return nil, errors.Wrap(err, "create metrics server") - } - // Start metrics http server. - go func() { - if err := metricServer.Serve(ctx); err != nil { - log.L.Errorf("Failed to start metrics server, %s", err) - } - }() - } - if err := os.MkdirAll(cfg.RootDir, 0700); err != nil { return nil, err }