diff --git a/node/node.go b/node/node.go index 948821ce4278..b2b3febff8ef 100644 --- a/node/node.go +++ b/node/node.go @@ -1196,15 +1196,19 @@ func (n *Node) initVdrs() validators.Set { // Initialize [n.resourceManager]. func (n *Node) initResourceManager(reg prometheus.Registerer) error { - n.resourceManager = resource.NewManager( + resourceManager, err := resource.NewManager( n.Config.DatabaseConfig.Path, n.Config.SystemTrackerFrequency, n.Config.SystemTrackerCPUHalflife, n.Config.SystemTrackerDiskHalflife, + reg, ) + if err != nil { + return err + } + n.resourceManager = resourceManager n.resourceManager.TrackProcess(os.Getpid()) - var err error n.resourceTracker, err = tracker.NewResourceTracker(reg, n.resourceManager, &meter.ContinuousFactory{}, n.Config.SystemTrackerProcessingHalflife) return err } diff --git a/utils/resource/metrics.go b/utils/resource/metrics.go new file mode 100644 index 000000000000..96c3c21ad204 --- /dev/null +++ b/utils/resource/metrics.go @@ -0,0 +1,72 @@ +// Copyright (C) 2019-2023, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package resource + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/ava-labs/avalanchego/utils/wrappers" +) + +type metrics struct { + numCPUCycles *prometheus.GaugeVec + numDiskReads *prometheus.GaugeVec + numDiskReadBytes *prometheus.GaugeVec + numDiskWrites *prometheus.GaugeVec + numDiskWritesBytes *prometheus.GaugeVec +} + +func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) { + m := &metrics{ + numCPUCycles: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_cpu_cycles", + Help: "Total number of CPU cycles", + }, + []string{"processID"}, + ), + numDiskReads: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_disk_reads", + Help: "Total number of disk reads", + }, + []string{"processID"}, + ), + numDiskReadBytes: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_disk_read_bytes", + Help: "Total number of disk read bytes", + }, + []string{"processID"}, + ), + numDiskWrites: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_disk_writes", + Help: "Total number of disk writes", + }, + []string{"processID"}, + ), + numDiskWritesBytes: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "num_disk_write_bytes", + Help: "Total number of disk write bytes", + }, + []string{"processID"}, + ), + } + errs := wrappers.Errs{} + errs.Add( + registerer.Register(m.numCPUCycles), + registerer.Register(m.numDiskReads), + registerer.Register(m.numDiskReadBytes), + registerer.Register(m.numDiskWrites), + registerer.Register(m.numDiskWritesBytes), + ) + return m, errs.Err +} diff --git a/utils/resource/usage.go b/utils/resource/usage.go index 2c83aa0db034..351df4022838 100644 --- a/utils/resource/usage.go +++ b/utils/resource/usage.go @@ -5,9 +5,12 @@ package resource import ( "math" + "strconv" "sync" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/shirou/gopsutil/process" "github.com/ava-labs/avalanchego/utils/storage" @@ -62,6 +65,8 @@ type Manager interface { } type manager struct { + processMetrics *metrics + processesLock sync.Mutex processes map[int]*proc @@ -78,14 +83,27 @@ type manager struct { onClose chan struct{} } -func NewManager(diskPath string, frequency, cpuHalflife, diskHalflife time.Duration) Manager { +func NewManager( + diskPath string, + frequency, + cpuHalflife, + diskHalflife time.Duration, + metricsRegisterer prometheus.Registerer, +) (Manager, error) { + processMetrics, err := newMetrics("system_resources", metricsRegisterer) + if err != nil { + return nil, err + } + m := &manager{ + processMetrics: processMetrics, processes: make(map[int]*proc), onClose: make(chan struct{}), availableDiskBytes: math.MaxUint64, } + go m.update(diskPath, frequency, cpuHalflife, diskHalflife) - return m + return m, nil } func (m *manager) CPUUsage() float64 { @@ -187,6 +205,13 @@ func (m *manager) getActiveUsage(secondsSinceLastUpdate float64) (float64, float totalCPU += cpu totalRead += read totalWrite += write + + processIDStr := strconv.Itoa(int(p.p.Pid)) + m.processMetrics.numCPUCycles.WithLabelValues(processIDStr).Set(p.lastTotalCPU) + m.processMetrics.numDiskReads.WithLabelValues(processIDStr).Set(float64(p.numReads)) + m.processMetrics.numDiskReadBytes.WithLabelValues(processIDStr).Set(float64(p.lastReadBytes)) + m.processMetrics.numDiskWrites.WithLabelValues(processIDStr).Set(float64(p.numWrites)) + m.processMetrics.numDiskWritesBytes.WithLabelValues(processIDStr).Set(float64(p.lastWriteBytes)) } return totalCPU, totalRead, totalWrite @@ -200,8 +225,13 @@ type proc struct { // [lastTotalCPU] is the most recent measurement of total CPU usage. lastTotalCPU float64 + // [numReads] is the total number of disk reads performed. + numReads uint64 // [lastReadBytes] is the most recent measurement of total disk bytes read. lastReadBytes uint64 + + // [numWrites] is the total number of disk writes performed. + numWrites uint64 // [lastWriteBytes] is the most recent measurement of total disk bytes // written. lastWriteBytes uint64 @@ -243,7 +273,9 @@ func (p *proc) getActiveUsage(secondsSinceLastUpdate float64) (float64, float64, p.initialized = true p.lastTotalCPU = totalCPU + p.numReads = io.ReadCount p.lastReadBytes = io.ReadBytes + p.numWrites = io.WriteCount p.lastWriteBytes = io.WriteBytes return cpu, read, write diff --git a/vms/registry/vm_getter_test.go b/vms/registry/vm_getter_test.go index ce659cf4716b..0d0efd6fcfd5 100644 --- a/vms/registry/vm_getter_test.go +++ b/vms/registry/vm_getter_test.go @@ -11,6 +11,8 @@ import ( "github.com/golang/mock/gomock" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/ids" @@ -143,13 +145,16 @@ func initVMGetterTest(t *testing.T) *vmGetterTestResources { mockReader := filesystem.NewMockReader(ctrl) mockManager := vms.NewMockManager(ctrl) + mockRegistry := prometheus.NewRegistry() + mockCPUTracker, err := resource.NewManager(" ", time.Hour, time.Hour, time.Hour, mockRegistry) + require.NoError(t, err) getter := NewVMGetter( VMGetterConfig{ FileReader: mockReader, Manager: mockManager, PluginDirectory: pluginDir, - CPUTracker: resource.NewManager("", time.Hour, time.Hour, time.Hour), + CPUTracker: mockCPUTracker, }, )