From aaae0b7fe7bf76cc1da3a3d5304482124d18f989 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Fri, 13 Apr 2018 17:00:17 +0200 Subject: [PATCH] Metricbeat: Improvements in docker diskio metricset (#6701) This PR continues with #6608 in the aim to provide docker metrics more consistent with `docker stats`. Currently we report for disk I/O the number of operations per second. This PR adds accumulated I/O stats per number of operations and per data volume since the container started, as `docker stats` does. I think this information is more valuable, as it permits to have further aggregations when visualizing it. It also addresses some other issues in the metricset. Fields in events have been reorganized and old ones have been marked as deprecated. We could also add some extra functionality, as obtaining metrics also on Windows hosts (as is this module now it would only work with docker daemons running on Linux and not sure about Mac). In Linux we could also collect metrics per block device. - [x] Add accumulated I/O stats to diskio metricset - [x] Fix diskio metricset when docker reads from multiple devices - [x] Some naming and documentation fixes - [x] Fix memory leak in blkio service (last seen stats were being stored, but never removed) - [x] More tests To follow-up in other issues: - ~~Group by device in Linux?~~ To be done if requested in the future - Support for Windows storage stats? Follow-up issue #6815 --- CHANGELOG.asciidoc | 1 + metricbeat/docs/fields.asciidoc | 105 +++++++++++++++++- .../module/docker/diskio/_meta/data.json | 30 ++++- .../module/docker/diskio/_meta/fields.yml | 63 ++++++++++- metricbeat/module/docker/diskio/data.go | 15 +++ metricbeat/module/docker/diskio/diskio.go | 8 +- .../module/docker/diskio/diskio_test.go | 53 ++++++++- metricbeat/module/docker/diskio/helper.go | 63 ++++++----- 8 files changed, 295 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 538e3e359d5..93619a8b860 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -187,6 +187,7 @@ https://github.com/elastic/beats/compare/v6.0.0-beta2...master[Check the HEAD di - Set `status` as default metricset for mysql module. {pull} 6769[6769] - Set `stubstatus` as default metricset for nginx module. {pull}6770[6770] - Added support for haproxy 1.7 and 1.8. {pull}6793[6793] +- Add accumulated I/O stats to diskio in the line of `docker stats`. {pull}6701[6701] *Packetbeat* diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 086b7abaf01..1d18d2a35b8 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -2214,12 +2214,78 @@ Disk I/O metrics. +[float] +== read fields + +Accumulated reads during the life of the container + + + +[float] +=== `docker.diskio.read.ops` + +type: long + +Number of reads during the life of the container + + +[float] +=== `docker.diskio.read.bytes` + +type: long + +format: bytes + +Bytes read during the life of the container + + +[float] +=== `docker.diskio.read.rate` + +type: long + +Number of current reads per second + + [float] === `docker.diskio.reads` type: scaled_float -Number of reads. +Number of current reads per second + + +[float] +== write fields + +Accumulated writes during the life of the container + + + +[float] +=== `docker.diskio.write.ops` + +type: long + +Number of writes during the life of the container + + +[float] +=== `docker.diskio.write.bytes` + +type: long + +format: bytes + +Bytes written during the life of the container + + +[float] +=== `docker.diskio.write.rate` + +type: long + +Number of current writes per second [float] @@ -2227,7 +2293,40 @@ Number of reads. type: scaled_float -Number of writes. +Number of current writes per second + + +[float] +== summary fields + +Accumulated reads and writes during the life of the container + + + +[float] +=== `docker.diskio.summary.ops` + +type: long + +Number of I/O operations during the life of the container + + +[float] +=== `docker.diskio.summary.bytes` + +type: long + +format: bytes + +Bytes read and written during the life of the container + + +[float] +=== `docker.diskio.summary.rate` + +type: long + +Number of current operations per second [float] @@ -2235,7 +2334,7 @@ Number of writes. type: scaled_float -Number of reads and writes combined. +Number of reads and writes per second [float] diff --git a/metricbeat/module/docker/diskio/_meta/data.json b/metricbeat/module/docker/diskio/_meta/data.json index a5201a46738..3adb0cc1675 100644 --- a/metricbeat/module/docker/diskio/_meta/data.json +++ b/metricbeat/module/docker/diskio/_meta/data.json @@ -6,20 +6,40 @@ }, "docker": { "container": { - "id": "af5b4d9b5a792bf883e3f0cb55413aec8148d75a2bbd5723680f7ad8dc5545f6", + "id": "59c5d4838454f38c7d67fdacec7a32ca4476a062ef00edf69ba6be9117cf2e7b", "labels": { - "com_docker_compose_config-hash": "b41e43e99efa9215f20761ad78899d65e4119b55", + "build-date": "20170911", + "com_docker_compose_config-hash": "a2bcfc1f8c99a4be6920deda8da8d4d06fe0d10d51623b8e1dbcc8228e96926c", "com_docker_compose_container-number": "1", "com_docker_compose_oneoff": "False", "com_docker_compose_project": "metricbeat", - "com_docker_compose_service": "nginx", - "com_docker_compose_version": "1.5.0" + "com_docker_compose_service": "elasticsearch", + "com_docker_compose_version": "1.20.1", + "license": "GPLv2", + "maintainer": "Elastic Docker Team \u003cdocker@elastic.co\u003e", + "name": "CentOS Base Image", + "vendor": "CentOS" }, - "name": "metricbeat_nginx_1" + "name": "metricbeat_elasticsearch_1" }, "diskio": { + "read": { + "bytes": 61964288, + "ops": 3284, + "rate": 0 + }, "reads": 0, + "summary": { + "bytes": 63479808, + "ops": 3500, + "rate": 0 + }, "total": 0, + "write": { + "bytes": 1515520, + "ops": 216, + "rate": 0 + }, "writes": 0 } }, diff --git a/metricbeat/module/docker/diskio/_meta/fields.yml b/metricbeat/module/docker/diskio/_meta/fields.yml index 5e68b6168da..c4f5d990202 100644 --- a/metricbeat/module/docker/diskio/_meta/fields.yml +++ b/metricbeat/module/docker/diskio/_meta/fields.yml @@ -4,15 +4,72 @@ Disk I/O metrics. release: ga fields: + - name: read + type: group + description: > + Accumulated reads during the life of the container + fields: + - name: ops + type: long + description: > + Number of reads during the life of the container + - name: bytes + type: long + format: bytes + description: > + Bytes read during the life of the container + - name: rate + type: long + description: > + Number of current reads per second - name: reads type: scaled_float + deprecated: true + description: > + Number of current reads per second + - name: write + type: group description: > - Number of reads. + Accumulated writes during the life of the container + fields: + - name: ops + type: long + description: > + Number of writes during the life of the container + - name: bytes + type: long + format: bytes + description: > + Bytes written during the life of the container + - name: rate + type: long + description: > + Number of current writes per second - name: writes type: scaled_float + deprecated: true + description: > + Number of current writes per second + - name: summary + type: group description: > - Number of writes. + Accumulated reads and writes during the life of the container + fields: + - name: ops + type: long + description: > + Number of I/O operations during the life of the container + - name: bytes + type: long + format: bytes + description: > + Bytes read and written during the life of the container + - name: rate + type: long + description: > + Number of current operations per second - name: total type: scaled_float + deprecated: true description: > - Number of reads and writes combined. + Number of reads and writes per second diff --git a/metricbeat/module/docker/diskio/data.go b/metricbeat/module/docker/diskio/data.go index e73691b247d..b87313efafd 100644 --- a/metricbeat/module/docker/diskio/data.go +++ b/metricbeat/module/docker/diskio/data.go @@ -21,6 +21,21 @@ func eventMapping(stats *BlkioStats) common.MapStr { "reads": stats.reads, "writes": stats.writes, "total": stats.totals, + "read": common.MapStr{ + "ops": stats.serviced.reads, + "bytes": stats.servicedBytes.reads, + "rate": stats.reads, + }, + "write": common.MapStr{ + "ops": stats.serviced.writes, + "bytes": stats.servicedBytes.writes, + "rate": stats.writes, + }, + "summary": common.MapStr{ + "ops": stats.serviced.totals, + "bytes": stats.servicedBytes.totals, + "rate": stats.totals, + }, } return event diff --git a/metricbeat/module/docker/diskio/diskio.go b/metricbeat/module/docker/diskio/diskio.go index b41e54a4299..61f148af979 100644 --- a/metricbeat/module/docker/diskio/diskio.go +++ b/metricbeat/module/docker/diskio/diskio.go @@ -17,7 +17,7 @@ func init() { type MetricSet struct { mb.BaseMetricSet - blkioService *BLkioService + blkioService *BlkioService dockerClient *client.Client dedot bool } @@ -37,10 +37,8 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return &MetricSet{ BaseMetricSet: base, dockerClient: client, - blkioService: &BLkioService{ - BlkioSTatsPerContainer: make(map[string]BlkioRaw), - }, - dedot: config.DeDot, + blkioService: NewBlkioService(), + dedot: config.DeDot, }, nil } diff --git a/metricbeat/module/docker/diskio/diskio_test.go b/metricbeat/module/docker/diskio/diskio_test.go index 0a85c738c01..9cd4108d02e 100644 --- a/metricbeat/module/docker/diskio/diskio_test.go +++ b/metricbeat/module/docker/diskio/diskio_test.go @@ -6,11 +6,12 @@ import ( "time" "github.com/docker/docker/api/types" + "github.com/stretchr/testify/assert" "github.com/elastic/beats/metricbeat/module/docker" ) -var blkioService BLkioService +var blkioService BlkioService var oldBlkioRaw = make([]BlkioRaw, 3) var newBlkioRaw = make([]BlkioRaw, 3) @@ -203,3 +204,53 @@ func setTime(index int) { oldBlkioRaw[index].Time = time.Now() newBlkioRaw[index].Time = oldBlkioRaw[index].Time.Add(time.Duration(2000000000)) } + +func TestGetBlkioStats(t *testing.T) { + start := time.Now() + later := start.Add(10 * time.Second) + + blkioService := BlkioService{ + map[string]BlkioRaw{ + "cebada": {Time: start, reads: 100, writes: 200, totals: 300}, + }, + } + + dockerStats := &docker.Stat{ + Container: &types.Container{ + ID: "cebada", + Names: []string{"test"}, + }, + Stats: types.StatsJSON{Stats: types.Stats{ + Read: later, + BlkioStats: types.BlkioStats{ + IoServicedRecursive: []types.BlkioStatEntry{ + {Major: 1, Minor: 1, Op: "Read", Value: 100}, + {Major: 1, Minor: 1, Op: "Write", Value: 200}, + {Major: 1, Minor: 1, Op: "Total", Value: 300}, + {Major: 1, Minor: 2, Op: "Read", Value: 50}, + {Major: 1, Minor: 2, Op: "Write", Value: 100}, + {Major: 1, Minor: 2, Op: "Total", Value: 150}, + }, + IoServiceBytesRecursive: []types.BlkioStatEntry{ + {Major: 1, Minor: 1, Op: "Read", Value: 1000}, + {Major: 1, Minor: 1, Op: "Write", Value: 2000}, + {Major: 1, Minor: 1, Op: "Total", Value: 3000}, + {Major: 1, Minor: 2, Op: "Read", Value: 500}, + {Major: 1, Minor: 2, Op: "Write", Value: 1000}, + {Major: 1, Minor: 2, Op: "Total", Value: 1500}, + }, + }, + }}, + } + + stats := blkioService.getBlkioStats(dockerStats, true) + assert.Equal(t, float64(5), stats.reads) + assert.Equal(t, float64(10), stats.writes) + assert.Equal(t, float64(15), stats.totals) + assert.Equal(t, + BlkioRaw{Time: later, reads: 150, writes: 300, totals: 450}, + stats.serviced) + assert.Equal(t, + BlkioRaw{Time: later, reads: 1500, writes: 3000, totals: 4500}, + stats.servicedBytes) +} diff --git a/metricbeat/module/docker/diskio/helper.go b/metricbeat/module/docker/diskio/helper.go index c44e6429890..4aa1ab3759d 100644 --- a/metricbeat/module/docker/diskio/helper.go +++ b/metricbeat/module/docker/diskio/helper.go @@ -14,12 +14,9 @@ type BlkioStats struct { reads float64 writes float64 totals float64 -} -type BlkioCalculator interface { - getReadPs(old *BlkioRaw, new *BlkioRaw) float64 - getWritePs(old *BlkioRaw, new *BlkioRaw) float64 - getTotalPs(old *BlkioRaw, new *BlkioRaw) float64 + serviced BlkioRaw + servicedBytes BlkioRaw } type BlkioRaw struct { @@ -29,72 +26,86 @@ type BlkioRaw struct { totals uint64 } -type BLkioService struct { - BlkioSTatsPerContainer map[string]BlkioRaw +// BlkioService is a helper to collect and calculate disk I/O metrics +type BlkioService struct { + lastStatsPerContainer map[string]BlkioRaw } -func (io *BLkioService) getBlkioStatsList(rawStats []docker.Stat, dedot bool) []BlkioStats { - formattedStats := []BlkioStats{} - if io.BlkioSTatsPerContainer == nil { - io.BlkioSTatsPerContainer = make(map[string]BlkioRaw) +// NewBlkioService builds a new initialized BlkioService +func NewBlkioService() *BlkioService { + return &BlkioService{ + lastStatsPerContainer: make(map[string]BlkioRaw), } +} + +func (io *BlkioService) getBlkioStatsList(rawStats []docker.Stat, dedot bool) []BlkioStats { + formattedStats := []BlkioStats{} + + statsPerContainer := make(map[string]BlkioRaw) for _, myRawStats := range rawStats { - formattedStats = append(formattedStats, io.getBlkioStats(&myRawStats, dedot)) + stats := io.getBlkioStats(&myRawStats, dedot) + statsPerContainer[myRawStats.Container.ID] = stats.serviced + formattedStats = append(formattedStats, stats) } + io.lastStatsPerContainer = statsPerContainer return formattedStats } -func (io *BLkioService) getBlkioStats(myRawStat *docker.Stat, dedot bool) BlkioStats { +func (io *BlkioService) getBlkioStats(myRawStat *docker.Stat, dedot bool) BlkioStats { newBlkioStats := io.getNewStats(myRawStat.Stats.Read, myRawStat.Stats.BlkioStats.IoServicedRecursive) - oldBlkioStats, exist := io.BlkioSTatsPerContainer[myRawStat.Container.ID] + bytesBlkioStats := io.getNewStats(myRawStat.Stats.Read, myRawStat.Stats.BlkioStats.IoServiceBytesRecursive) myBlkioStats := BlkioStats{ Time: myRawStat.Stats.Read, Container: docker.NewContainer(myRawStat.Container, dedot), + + serviced: newBlkioStats, + servicedBytes: bytesBlkioStats, } + oldBlkioStats, exist := io.lastStatsPerContainer[myRawStat.Container.ID] if exist { myBlkioStats.reads = io.getReadPs(&oldBlkioStats, &newBlkioStats) myBlkioStats.writes = io.getWritePs(&oldBlkioStats, &newBlkioStats) myBlkioStats.totals = io.getTotalPs(&oldBlkioStats, &newBlkioStats) } - io.BlkioSTatsPerContainer[myRawStat.Container.ID] = newBlkioStats - return myBlkioStats } -func (io *BLkioService) getNewStats(time time.Time, blkioEntry []types.BlkioStatEntry) BlkioRaw { +func (io *BlkioService) getNewStats(time time.Time, blkioEntry []types.BlkioStatEntry) BlkioRaw { stats := BlkioRaw{ Time: time, reads: 0, writes: 0, totals: 0, } + for _, myEntry := range blkioEntry { - if myEntry.Op == "Write" { - stats.writes = myEntry.Value - } else if myEntry.Op == "Read" { - stats.reads = myEntry.Value - } else if myEntry.Op == "Total" { - stats.totals = myEntry.Value + switch myEntry.Op { + case "Write": + stats.writes += myEntry.Value + case "Read": + stats.reads += myEntry.Value + case "Total": + stats.totals += myEntry.Value } } return stats } -func (io *BLkioService) getReadPs(old *BlkioRaw, new *BlkioRaw) float64 { +func (io *BlkioService) getReadPs(old *BlkioRaw, new *BlkioRaw) float64 { duration := new.Time.Sub(old.Time) return calculatePerSecond(duration, old.reads, new.reads) } -func (io *BLkioService) getWritePs(old *BlkioRaw, new *BlkioRaw) float64 { +func (io *BlkioService) getWritePs(old *BlkioRaw, new *BlkioRaw) float64 { duration := new.Time.Sub(old.Time) return calculatePerSecond(duration, old.writes, new.writes) } -func (io *BLkioService) getTotalPs(old *BlkioRaw, new *BlkioRaw) float64 { +func (io *BlkioService) getTotalPs(old *BlkioRaw, new *BlkioRaw) float64 { duration := new.Time.Sub(old.Time) return calculatePerSecond(duration, old.totals, new.totals) }