Skip to content

Commit

Permalink
metrics: collect the metrics of nydusd events
Browse files Browse the repository at this point in the history
Collect the metrics of nydus daemon events, including INIT, RUNNING and DIED.

Signed-off-by: Bin Tang <tangbin.bin@bytedance.com>
  • Loading branch information
sctb512 committed Nov 28, 2022
1 parent a6fc3af commit 19f0a66
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 51 deletions.
5 changes: 5 additions & 0 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/containerd/nydus-snapshotter/config/daemonconfig"
"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/errdefs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter"
"github.com/containerd/nydus-snapshotter/pkg/supervisor"
"github.com/containerd/nydus-snapshotter/pkg/utils/erofs"
"github.com/containerd/nydus-snapshotter/pkg/utils/mount"
Expand Down Expand Up @@ -201,6 +202,10 @@ func (d *Daemon) WaitUntilState(expected types.DaemonState) error {
_, err, shared := d.stateGetterGroup.Do(d.ID(), stateGetter)
log.L.Debugf("Get daemon %s with shared result: %v ", d.ID(), shared)

if exportErr := exporter.ExportNydusdEventMetric(d.States.ID, string(expected)); exportErr != nil {
log.L.Warnf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", d.States.ID, string(expected), exportErr)
}

return err
}

Expand Down
1 change: 1 addition & 0 deletions pkg/daemon/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
DaemonStateInit DaemonState = "INIT"
DaemonStateReady DaemonState = "READY"
DaemonStateRunning DaemonState = "RUNNING"
DaemonStateDied DaemonState = "DIED"
)

func (info *DaemonInfo) DaemonState() DaemonState {
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import (
"golang.org/x/sys/unix"

"github.com/containerd/containerd/log"
"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/errdefs"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter"
"github.com/containerd/nydus-snapshotter/pkg/utils/retry"
)

Expand Down Expand Up @@ -217,6 +219,9 @@ func (m *livenessMonitor) Run() {

if ev.Events&(unix.EPOLLHUP|unix.EPOLLERR) != 0 {
log.L.Warnf("Daemon %s died", target.id)
if err := exporter.ExportNydusdEventMetric(target.id, string(types.DaemonStateDied)); err != nil {
log.L.Warnf("export nydusd event metric failed, daemon ID: %s, event: %s, error: %v", target.id, string(types.DaemonStateDied), err)
}
// Notify subscribers that death event happens
target.notifier <- deathEvent{daemonID: target.id, path: target.path}
}
Expand Down
48 changes: 39 additions & 9 deletions pkg/metrics/exporter/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"github.com/prometheus/common/expfmt"

"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/daemon"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/fs"
)

type Opt func(*Exporter) error
Expand All @@ -26,6 +28,8 @@ type Exporter struct {
outputFile string
}

var globalExporter *Exporter

func WithOutputFile(metricsFile string) Opt {
return func(e *Exporter) error {
if metricsFile == "" {
Expand All @@ -40,25 +44,40 @@ func WithOutputFile(metricsFile string) Opt {
}
}

func NewExporter(opts ...Opt) (*Exporter, error) {
func NewExporter(opts ...Opt) error {
var exp Exporter

for _, o := range opts {
if err := o(&exp); err != nil {
return nil, err
return err
}
}

return &exp, nil
globalExporter = &exp

return nil
}

func getExporter() (*Exporter, error) {
if globalExporter == nil {
return nil, errors.New("failed to get global metric exporter")
}

return globalExporter, nil
}

func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error {
ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead))
OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens))
OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens))
LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp))
func ExportFsMetrics(m *types.FsMetrics, imageRef string) error {
e, err := getExporter()
if err != nil {
return err
}

for _, h := range FsMetricHists {
fs.ReadCount.WithLabelValues(imageRef).Set(float64(m.DataRead))
fs.OpenFdCount.WithLabelValues(imageRef).Set(float64(m.NrOpens))
fs.OpenFdMaxCount.WithLabelValues(imageRef).Set(float64(m.NrMaxOpens))
fs.LastFopTimestamp.WithLabelValues(imageRef).Set(float64(m.LastFopTp))

for _, h := range fs.MetricHists {
o, err := h.ToConstHistogram(m, imageRef)
if err != nil {
return errors.Wrapf(err, "failed to new const histogram for %s", h.Desc.String())
Expand All @@ -69,6 +88,17 @@ func (e *Exporter) ExportFsMetrics(m *types.FsMetrics, imageRef string) error {
return e.output()
}

func ExportNydusdEventMetric(daemonID string, event string) error {
e, err := getExporter()
if err != nil {
return err
}

daemon.NydusdEvent.WithLabelValues(daemonID, time.Now().Format("2006-01-02 15:04:05.000"), event).Inc()

return e.output()
}

func (e *Exporter) output() error {
ms, err := Registry.Gather()
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
* SPDX-License-Identifier: Apache-2.0
*/

package exporter
package fs

import (
"time"

"github.com/prometheus/client_golang/prometheus"

"github.com/containerd/nydus-snapshotter/pkg/daemon/types"
Expand All @@ -17,7 +15,6 @@ import (

var (
imageRefLabel = "image_ref"
defaultTTL = 3 * time.Minute
)

var (
Expand All @@ -28,7 +25,7 @@ var (
Help: "Total number read of a nydus fs, in Byte.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)

OpenFdCount = ttl.NewGaugeVecWithTTL(
Expand All @@ -37,7 +34,7 @@ var (
Help: "Number of current open files.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)

OpenFdMaxCount = ttl.NewGaugeVecWithTTL(
Expand All @@ -46,7 +43,7 @@ var (
Help: "Number of max open files.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)

LastFopTimestamp = ttl.NewGaugeVecWithTTL(
Expand All @@ -55,12 +52,12 @@ var (
Help: "Timestamp of last file operation.",
},
[]string{imageRefLabel},
defaultTTL,
ttl.DefaultTTL,
)
)

// Fs metric histograms
var FsMetricHists = []*FsMetricHistogram{
var MetricHists = []*MetricHistogram{
{
Desc: prometheus.NewDesc(
"nydusd_block_count_read_hist",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

package exporter
package fs

import (
"fmt"
Expand Down Expand Up @@ -50,7 +50,7 @@ func MakeFopBuckets() []uint64 {

type GetCountersFn func(*types.FsMetrics) []uint64

type FsMetricHistogram struct {
type MetricHistogram struct {
Desc *prometheus.Desc
Buckets []uint64
GetCounters GetCountersFn
Expand All @@ -59,7 +59,7 @@ type FsMetricHistogram struct {
constHist prometheus.Metric
}

func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) {
func (h *MetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string) (prometheus.Metric, error) {
var count, sum uint64
counters := h.GetCounters(m)
hmap := make(map[float64]uint64)
Expand All @@ -82,18 +82,18 @@ func (h *FsMetricHistogram) ToConstHistogram(m *types.FsMetrics, imageRef string
), nil
}

func (h *FsMetricHistogram) Save(m prometheus.Metric) {
func (h *MetricHistogram) Save(m prometheus.Metric) {
h.constHist = m
}

// Implement prometheus.Collector interface
func (h *FsMetricHistogram) Describe(ch chan<- *prometheus.Desc) {
func (h *MetricHistogram) Describe(ch chan<- *prometheus.Desc) {
if h.Desc != nil {
ch <- h.Desc
}
}

func (h *FsMetricHistogram) Collect(ch chan<- prometheus.Metric) {
func (h *MetricHistogram) Collect(ch chan<- prometheus.Metric) {
if h.constHist != nil {
ch <- h.constHist
}
Expand Down
13 changes: 8 additions & 5 deletions pkg/metrics/exporter/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
package exporter

import (
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/daemon"
"github.com/containerd/nydus-snapshotter/pkg/metrics/exporter/fs"
"github.com/prometheus/client_golang/prometheus"
)

Expand All @@ -16,13 +18,14 @@ var (

func init() {
Registry.MustRegister(
ReadCount,
OpenFdCount,
OpenFdMaxCount,
LastFopTimestamp,
fs.ReadCount,
fs.OpenFdCount,
fs.OpenFdMaxCount,
fs.LastFopTimestamp,
daemon.NydusdEvent,
)

for _, m := range FsMetricHists {
for _, m := range fs.MetricHists {
Registry.MustRegister(m)
}
}
6 changes: 2 additions & 4 deletions pkg/metrics/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ type Server struct {
rootDir string
metricsFile string
pm *manager.Manager
exp *exporter.Exporter
}

func WithRootDir(rootDir string) ServerOpt {
Expand Down Expand Up @@ -70,13 +69,12 @@ func NewServer(ctx context.Context, opts ...ServerOpt) (*Server, error) {
}
}

exp, err := exporter.NewExporter(
err := exporter.NewExporter(
exporter.WithOutputFile(s.metricsFile),
)
if err != nil {
return nil, errors.Wrap(err, "failed to new metric exporter")
}
s.exp = exp

sockPath := filepath.Join(s.rootDir, sockFileName)

Expand Down Expand Up @@ -123,7 +121,7 @@ outer:
continue
}

if err := s.exp.ExportFsMetrics(fsMetrics, i.ImageID); err != nil {
if err := exporter.ExportFsMetrics(fsMetrics, i.ImageID); err != nil {
log.G(ctx).Errorf("failed to export fs metrics for %s: %v", i.ImageID, err)
continue
}
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/ttl/gauge.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

var (
defaultCleanUpPeriod = 10 * time.Minute
DefaultTTL = 3 * time.Minute
)

type LabelWithValue struct {
Expand Down
36 changes: 18 additions & 18 deletions snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ func NewSnapshotter(ctx context.Context, cfg *config.Config) (snapshots.Snapshot
return nil, errors.Wrap(err, "create daemons manager")
}

if cfg.EnableMetrics {
metricServer, err := metrics.NewServer(
ctx,
metrics.WithRootDir(cfg.RootDir),
metrics.WithMetricsFile(cfg.MetricsFile),
metrics.WithProcessManager(manager),
)
if err != nil {
return nil, errors.Wrap(err, "create metrics server")
}
// Start metrics http server.
go func() {
if err := metricServer.Serve(ctx); err != nil {
log.L.Errorf("Failed to start metrics server, %s", err)
}
}()
}

if cfg.APISocket != "" {
systemController, err := system.NewSystemController(manager, cfg.APISocket)
if err != nil {
Expand Down Expand Up @@ -163,24 +181,6 @@ func NewSnapshotter(ctx context.Context, cfg *config.Config) (snapshots.Snapshot
}
}

if cfg.EnableMetrics {
metricServer, err := metrics.NewServer(
ctx,
metrics.WithRootDir(cfg.RootDir),
metrics.WithMetricsFile(cfg.MetricsFile),
metrics.WithProcessManager(manager),
)
if err != nil {
return nil, errors.Wrap(err, "create metrics server")
}
// Start metrics http server.
go func() {
if err := metricServer.Serve(ctx); err != nil {
log.L.Errorf("Failed to start metrics server, %s", err)
}
}()
}

if err := os.MkdirAll(cfg.RootDir, 0700); err != nil {
return nil, err
}
Expand Down

0 comments on commit 19f0a66

Please sign in to comment.