From 7281bd8c81aa7fa6a22875c474529b0450f0eb16 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 8 Aug 2022 01:42:19 +0200 Subject: [PATCH] feat: configurable disk alert threshold (#1318) * feat: configurable disk alert threshold * fix flag name * improve alerting heuristic and rename config option --- pkg/cli/flags_test.go | 1 + pkg/cli/server.go | 3 +- pkg/config/config.go | 4 +- pkg/health/disk_pressure.go | 45 +++++++++++--- pkg/health/disk_pressure_test.go | 101 +++++++++++++++++++++++++++++++ pkg/util/disk/disk.go | 10 +++ pkg/util/disk/usage_test.go | 22 +++++-- pkg/util/disk/usage_unix.go | 16 ++--- pkg/util/disk/usage_windows.go | 12 ++-- 9 files changed, 188 insertions(+), 26 deletions(-) create mode 100644 pkg/health/disk_pressure_test.go create mode 100644 pkg/util/disk/disk.go diff --git a/pkg/cli/flags_test.go b/pkg/cli/flags_test.go index 0698180f4f..40b556cb46 100644 --- a/pkg/cli/flags_test.go +++ b/pkg/cli/flags_test.go @@ -214,6 +214,7 @@ var _ = Describe("flags", func() { BaseURL: "", CacheEvictThreshold: 0.25, CacheEvictVolume: 0.33, + MinFreeSpacePercentage: 5, BadgerNoTruncate: false, DisablePprofEndpoint: false, EnableExperimentalAdmin: true, diff --git a/pkg/cli/server.go b/pkg/cli/server.go index b9208fa9da..6faa7c7f7e 100644 --- a/pkg/cli/server.go +++ b/pkg/cli/server.go @@ -37,7 +37,6 @@ import ( "github.com/pyroscope-io/pyroscope/pkg/service" "github.com/pyroscope-io/pyroscope/pkg/sqlstore" "github.com/pyroscope-io/pyroscope/pkg/storage" - "github.com/pyroscope-io/pyroscope/pkg/util/bytesize" "github.com/pyroscope-io/pyroscope/pkg/util/debug" ) @@ -93,7 +92,7 @@ func newServerService(c *config.Server) (*serverService, error) { } diskPressure := health.DiskPressure{ - Threshold: 512 * bytesize.MB, + Threshold: c.MinFreeSpacePercentage, Path: c.StoragePath, } diff --git a/pkg/config/config.go b/pkg/config/config.go index 5505e1474e..670e64f48f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -104,7 +104,9 @@ type Server struct { LogLevel string `def:"info" desc:"log level: debug|info|warn|error" mapstructure:"log-level"` BadgerLogLevel string `def:"error" desc:"log level: debug|info|warn|error" mapstructure:"badger-log-level"` - StoragePath string `def:"/var/lib/pyroscope" desc:"directory where pyroscope stores profiling data" mapstructure:"storage-path"` + StoragePath string `def:"/var/lib/pyroscope" desc:"directory where pyroscope stores profiling data" mapstructure:"storage-path"` + MinFreeSpacePercentage float64 `def:"5" desc:"percentage of available disk space at which ingestion requests are discarded. Defaults to 5% but not less than 1GB. Set 0 to disable" mapstructure:"min-free-space-percentage"` + APIBindAddr string `def:":4040" desc:"port for the HTTP(S) server used for data ingestion and web UI" mapstructure:"api-bind-addr"` BaseURL string `def:"" desc:"base URL for when the server is behind a reverse proxy with a different path" mapstructure:"base-url"` BaseURLBindAddr string `def:"" deprecated:"true" desc:"server for debugging base url" mapstructure:"base-url-bind-addr"` diff --git a/pkg/health/disk_pressure.go b/pkg/health/disk_pressure.go index 199659e9af..357828a1b1 100644 --- a/pkg/health/disk_pressure.go +++ b/pkg/health/disk_pressure.go @@ -1,28 +1,57 @@ package health import ( + "errors" "fmt" "github.com/pyroscope-io/pyroscope/pkg/util/bytesize" "github.com/pyroscope-io/pyroscope/pkg/util/disk" ) +var ( + errZeroTotalSize = errors.New("total disk size is zero") + errTotalLessThanAvailable = errors.New("total disk size is less than available space") +) + +const minAvailSpace = bytesize.GB + type DiskPressure struct { - Threshold bytesize.ByteSize + Threshold float64 Path string } func (d DiskPressure) Probe() (StatusMessage, error) { - var m StatusMessage - available, err := disk.FreeSpace(d.Path) + if d.Threshold == 0 { + return StatusMessage{Status: Healthy}, nil + } + u, err := disk.Usage(d.Path) if err != nil { - return m, err + return StatusMessage{}, err } - if available < d.Threshold { + return d.makeProbe(u) +} + +func (d DiskPressure) makeProbe(u disk.UsageStats) (StatusMessage, error) { + var m StatusMessage + if u.Total == 0 { + return m, errZeroTotalSize + } + if u.Available > u.Total { + return m, errTotalLessThanAvailable + } + m.Status = Healthy + if u.Available < d.minRequired(u) { + availPercent := 100 * float64(u.Available) / float64(u.Total) + m.Message = fmt.Sprintf("Disk space is running low: %v available (%.1f%%)", u.Available, availPercent) m.Status = Critical - } else { - m.Status = Healthy } - m.Message = fmt.Sprintf("Disk space is running low: %v available", available) return m, nil } + +func (d DiskPressure) minRequired(u disk.UsageStats) bytesize.ByteSize { + t := bytesize.ByteSize(float64(u.Total) / 100 * d.Threshold) + if t > minAvailSpace { + return t + } + return minAvailSpace +} diff --git a/pkg/health/disk_pressure_test.go b/pkg/health/disk_pressure_test.go new file mode 100644 index 0000000000..ce2be61c78 --- /dev/null +++ b/pkg/health/disk_pressure_test.go @@ -0,0 +1,101 @@ +package health + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/pyroscope-io/pyroscope/pkg/util/bytesize" + "github.com/pyroscope-io/pyroscope/pkg/util/disk" +) + +var _ = Describe("DiskPressure", func() { + It("does not fire if threshold is zero", func() { + var d DiskPressure + m, err := d.Probe() + Expect(err).ToNot(HaveOccurred()) + Expect(m.Status).To(Equal(Healthy)) + Expect(m.Message).To(BeEmpty()) + }) + + It("does not fire if available space is greater than the configured threshold", func() { + d := DiskPressure{ + Threshold: 5, + } + m, err := d.makeProbe(disk.UsageStats{ + Total: 10 * bytesize.GB, + Available: 1 * bytesize.GB, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m.Status).To(Equal(Healthy)) + Expect(m.Message).To(BeEmpty()) + }) + + It("fires if less than the configured threshold is available", func() { + d := DiskPressure{ + Threshold: 5, + } + m, err := d.makeProbe(disk.UsageStats{ + Total: 100 * bytesize.GB, + Available: 4 * bytesize.GB, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m.Status).To(Equal(Critical)) + Expect(m.Message).To(Equal("Disk space is running low: 4.00 GB available (4.0%)")) + }) + + It("fires if less than 1GB is available", func() { + d := DiskPressure{ + Threshold: 5, + } + m, err := d.makeProbe(disk.UsageStats{ + Total: 5 * bytesize.GB, + Available: bytesize.GB - 1, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m.Status).To(Equal(Critical)) + Expect(m.Message).To(Equal("Disk space is running low: 1024.00 MB available (20.0%)")) + }) + + It("fires if available is less than the configured threshold", func() { + d := DiskPressure{ + Threshold: 5, + } + m, err := d.makeProbe(disk.UsageStats{ + Total: 1 * bytesize.GB, + Available: 1 * bytesize.MB, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m.Status).To(Equal(Critical)) + Expect(m.Message).To(Equal("Disk space is running low: 1.00 MB available (0.1%)")) + }) + + It("fires if no space available", func() { + d := DiskPressure{ + Threshold: 5, + } + m, err := d.makeProbe(disk.UsageStats{ + Total: 100 * bytesize.MB, + Available: 0, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m.Status).To(Equal(Critical)) + Expect(m.Message).To(Equal("Disk space is running low: 0 bytes available (0.0%)")) + }) + + It("fails if Available > Total", func() { + var d DiskPressure + _, err := d.makeProbe(disk.UsageStats{ + Total: 1 * bytesize.GB, + Available: 2 * bytesize.GB, + }) + Expect(err).To(MatchError(errTotalLessThanAvailable)) + }) + + It("fails if Total is zero", func() { + var d DiskPressure + _, err := d.makeProbe(disk.UsageStats{ + Total: 0, + Available: 2 * bytesize.GB, + }) + Expect(err).To(MatchError(errZeroTotalSize)) + }) +}) diff --git a/pkg/util/disk/disk.go b/pkg/util/disk/disk.go new file mode 100644 index 0000000000..8c8b4ae5ee --- /dev/null +++ b/pkg/util/disk/disk.go @@ -0,0 +1,10 @@ +package disk + +import ( + "github.com/pyroscope-io/pyroscope/pkg/util/bytesize" +) + +type UsageStats struct { + Total bytesize.ByteSize + Available bytesize.ByteSize +} diff --git a/pkg/util/disk/usage_test.go b/pkg/util/disk/usage_test.go index ed21918d56..734512dc53 100644 --- a/pkg/util/disk/usage_test.go +++ b/pkg/util/disk/usage_test.go @@ -9,15 +9,29 @@ import ( ) var _ = Describe("disk package", func() { + var ( + u UsageStats + err error + ) testing.WithConfig(func(cfg **config.Config) { - Describe("FreeSpace", func() { + BeforeEach(func() { + u, err = Usage((*cfg).Server.StoragePath) + }) + Describe("Usage", func() { It("doesn't return an error", func() { - _, err := FreeSpace((*cfg).Server.StoragePath) Expect(err).To(Not(HaveOccurred())) }) - It("returns non-zero value for storage space", func() { - Expect(FreeSpace((*cfg).Server.StoragePath)).To(BeNumerically(">", 0)) + It("returns non-zero Total", func() { + Expect(u.Total).To(BeNumerically(">", 0)) + }) + + It("returns non-zero Available", func() { + Expect(u.Available).To(BeNumerically(">", 0)) + }) + + It("returns Available < Total", func() { + Expect(u.Available).To(BeNumerically("<", u.Total)) }) }) }) diff --git a/pkg/util/disk/usage_unix.go b/pkg/util/disk/usage_unix.go index 709fdec9fc..12eaaef087 100644 --- a/pkg/util/disk/usage_unix.go +++ b/pkg/util/disk/usage_unix.go @@ -9,12 +9,14 @@ import ( "github.com/pyroscope-io/pyroscope/pkg/util/bytesize" ) -func FreeSpace(storagePath string) (bytesize.ByteSize, error) { - fs := syscall.Statfs_t{} - err := syscall.Statfs(storagePath, &fs) - if err != nil { - return 0, err +func Usage(path string) (UsageStats, error) { + var fs syscall.Statfs_t + if err := syscall.Statfs(path, &fs); err != nil { + return UsageStats{}, err } - - return bytesize.ByteSize(fs.Bavail) * bytesize.ByteSize(fs.Bsize), nil + u := UsageStats{ + Total: bytesize.ByteSize(fs.Blocks) * bytesize.ByteSize(fs.Bsize), + Available: bytesize.ByteSize(fs.Bavail) * bytesize.ByteSize(fs.Bsize), + } + return u, nil } diff --git a/pkg/util/disk/usage_windows.go b/pkg/util/disk/usage_windows.go index 64859fed98..66cea09a09 100644 --- a/pkg/util/disk/usage_windows.go +++ b/pkg/util/disk/usage_windows.go @@ -16,10 +16,10 @@ var ( getDiskFreeSpaceEx = kernel32.NewProc("GetDiskFreeSpaceExW") ) -func FreeSpace(path string) (bytesize.ByteSize, error) { +func Usage(path string) (UsageStats, error) { dirPath, err := syscall.UTF16PtrFromString(path) if err != nil { - return 0, err + return UsageStats{}, err } var ( @@ -34,8 +34,12 @@ func FreeSpace(path string) (bytesize.ByteSize, error) { uintptr(unsafe.Pointer(&totalNumberOfBytes)), uintptr(unsafe.Pointer(&totalNumberOfFreeBytes))) if ret == 0 { - return 0, os.NewSyscallError("GetDiskFreeSpaceEx", err) + return UsageStats{}, os.NewSyscallError("GetDiskFreeSpaceEx", err) } - return bytesize.ByteSize(freeBytesAvailableToCaller), nil + u := UsageStats{ + Total: bytesize.ByteSize(totalNumberOfBytes), + Available: bytesize.ByteSize(freeBytesAvailableToCaller), + } + return u, nil }