rate limit the amount of time pruning can lock the index for.

woodsaj · woodsaj · commit d215b83f2f16 · 2018-09-25T16:41:16.000+08:00
adds a new configSetting "max-prune-lock-time" that is the amount of
time per second that the prune job can lock the index for.  The default
is 100ms, meaning that the index can only be locked for 10% of the time.
diff --git a/docker/docker-chaos/metrictank.ini b/docker/docker-chaos/metrictank.ini
@@ -376,3 +376,5 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms
diff --git a/docker/docker-cluster/metrictank.ini b/docker/docker-cluster/metrictank.ini
@@ -376,3 +376,5 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms
diff --git a/docker/docker-dev-custom-cfg-kafka/metrictank.ini b/docker/docker-dev-custom-cfg-kafka/metrictank.ini
@@ -376,3 +376,5 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms
diff --git a/docs/config.md b/docs/config.md
@@ -441,6 +441,8 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms
 ```
 
 # storage-schemas.conf
diff --git a/idx/memory/memory.go b/idx/memory/memory.go
@@ -1,6 +1,7 @@
 package memory
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"regexp"
@@ -47,10 +48,11 @@ var (
 	// metric idx.metrics_active is the number of currently known metrics in the index
 	statMetricsActive = stats.NewGauge32("idx.metrics_active")
 
-	Enabled         bool
-	matchCacheSize  int
-	TagSupport      bool
-	TagQueryWorkers int // number of workers to spin up when evaluation tag expressions
+	Enabled          bool
+	matchCacheSize   int
+	maxPruneLockTime = time.Millisecond * 100
+	TagSupport       bool
+	TagQueryWorkers  int // number of workers to spin up when evaluation tag expressions
 )
 
 func ConfigSetup() {
@@ -59,6 +61,7 @@ func ConfigSetup() {
 	memoryIdx.BoolVar(&TagSupport, "tag-support", false, "enables/disables querying based on tags")
 	memoryIdx.IntVar(&TagQueryWorkers, "tag-query-workers", 50, "number of workers to spin up to evaluate tag queries")
 	memoryIdx.IntVar(&matchCacheSize, "match-cache-size", 1000, "size of regular expression cache in tag query evaluation")
+	memoryIdx.DurationVar(&maxPruneLockTime, "max-prune-lock-time", time.Millisecond*100, "Maximum duration each second a prune job can lock the index.")
 	globalconf.Register("memory-idx", memoryIdx)
 }
 
@@ -1308,13 +1311,24 @@ DEFS:
 	}
 	m.RUnlock()
 
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// create a new timeLimiter that allows us to limit the amount of time we spend
+	// holding a lock to maxPruneLockTime (default 100ms) every second.
+	tl := NewTimeLimiter(ctx, time.Second, maxPruneLockTime)
+
 	for org, ids := range toPruneTagged {
 		if len(ids) == 0 {
 			continue
 		}
+		// make sure we are not locking for too long.
+		tl.Wait()
+		lockStart := time.Now()
 		m.Lock()
 		defs := m.deleteTaggedByIdSet(org, ids)
 		m.Unlock()
+		tl.Add(time.Since(lockStart))
 		pruned = append(pruned, defs...)
 	}
 
@@ -1325,28 +1339,33 @@ ORGS:
 		}
 
 		for path := range paths {
+			tl.Wait()
+			lockStart := time.Now()
 			m.Lock()
 			tree, ok := m.tree[org]
 
 			if !ok {
 				m.Unlock()
+				tl.Add(time.Since(lockStart))
 				continue ORGS
 			}
 
 			n, ok := tree.Items[path]
 
 			if !ok {
 				m.Unlock()
+				tl.Add(time.Since(lockStart))
 				log.Debug("memory-idx: series %s for orgId:%d was identified for pruning but cannot be found.", path, org)
 				continue
 			}
 
 			log.Debug("memory-idx: series %s for orgId:%d is stale. pruning it.", n.Path, org)
 			defs := m.delete(org, n, true, false)
 			m.Unlock()
+			tl.Add(time.Since(lockStart))
 			pruned = append(pruned, defs...)
-		}
 
+		}
 	}
 
 	statMetricsActive.Add(-1 * len(pruned))
diff --git a/idx/memory/time_limit.go b/idx/memory/time_limit.go
@@ -0,0 +1,86 @@
+package memory
+
+import (
+	"context"
+	"sync"
+	"time"
+)
+
+// TimeLimiter provides a means limit the amount of time spent working.TimeLimiter
+type TimeLimiter struct {
+	sync.Mutex
+	ctx          context.Context
+	lockDuration time.Duration
+	window       time.Duration
+	limit        time.Duration
+	accountCh    chan time.Duration
+	queryCh      chan chan struct{}
+}
+
+// NewTimeLimiter creates a new TimeLimiter.  A background thread will run until the
+// provided context is done.  When the amount of time spent on task (the time is determined
+// by calls to "Add()") every "window" duration is more then "limit",  then calls to
+// Wait() will block until the start if the next window period.
+func NewTimeLimiter(ctx context.Context, window, limit time.Duration) *TimeLimiter {
+	l := &TimeLimiter{
+		ctx:       ctx,
+		window:    window,
+		limit:     limit,
+		accountCh: make(chan time.Duration),
+		queryCh:   make(chan chan struct{}),
+	}
+	go l.run()
+	return l
+}
+
+func (l *TimeLimiter) run() {
+	ticker := time.NewTicker(l.window)
+	done := l.ctx.Done()
+	var blockedQueries []chan struct{}
+	for {
+		select {
+		case <-done:
+			//context done. shutting down
+			for _, ch := range blockedQueries {
+				close(ch)
+			}
+			blockedQueries = nil
+			return
+		case <-ticker.C:
+			// reset lockDuration to 0
+			l.lockDuration = time.Duration(0)
+			for _, ch := range blockedQueries {
+				close(ch)
+			}
+			blockedQueries = nil
+		case d := <-l.accountCh:
+			l.lockDuration += d
+		case respCh := <-l.queryCh:
+			if l.lockDuration < l.limit {
+				close(respCh)
+			} else {
+				// rate limit exceeded.  On the next tick respCh will be closed
+				// notifying the caller that they can continue.
+				blockedQueries = append(blockedQueries, respCh)
+			}
+		}
+	}
+}
+
+// Add increments the counter of time spent doing something by "d"
+func (l *TimeLimiter) Add(d time.Duration) {
+	l.accountCh <- d
+}
+
+// Wait will return immediatly if we are not rate limited, otherwise it will
+// block until we are no longer limited.  The longest we will block for is
+// the size of the defined time window.
+func (l *TimeLimiter) Wait() {
+	respCh := make(chan struct{})
+	l.queryCh <- respCh
+
+	// if we have not exceeded our locking quota then respCh will be
+	// immediatly closed. Otherwise it wont be closed until the next tick (duration of "l.window")
+	// and we will block until then.
+	<-respCh
+}
diff --git a/metrictank-sample.ini b/metrictank-sample.ini
@@ -379,3 +379,5 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms
diff --git a/scripts/config/metrictank-docker.ini b/scripts/config/metrictank-docker.ini
@@ -376,3 +376,5 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms
diff --git a/scripts/config/metrictank-package.ini b/scripts/config/metrictank-package.ini
@@ -376,3 +376,5 @@ tag-support = false
 tag-query-workers = 50
 # size of regular expression cache in tag query evaluation
 match-cache-size = 1000
+# maximum duration each second a prune job can lock the index.
+max-prune-lock-time = 100ms