Merge pull request #658 from grafana/resync-upstream

Sync upstream prometheus
grafana · Jul 4, 2024 · fb0cb30 · fb0cb30
2 parents 5131622 + d577de3
commit fb0cb30
Show file tree

Hide file tree

Showing 27 changed files with 1,801 additions and 1,185 deletions.
diff --git a/docs/querying/examples.md b/docs/querying/examples.md
@@ -95,3 +95,13 @@ Assuming this metric contains one time series per running instance, you could
 count the number of running instances per application like this:
 
     count by (app) (instance_cpu_time_ns)
+
+If we are exploring some metrics for their labels, to e.g. be able to aggregate
+over some of them, we could use the following:
+
+    limitk(10, app_foo_metric_bar)
+
+Alternatively, if we wanted the returned timeseries to be more evenly sampled,
+we could use the following to get approximately 10% of them:
+
+    limit_ratio(0.1, app_foo_metric_bar)
diff --git a/docs/querying/operators.md b/docs/querying/operators.md
@@ -230,6 +230,8 @@ vector of fewer elements with aggregated values:
 * `bottomk` (smallest k elements by sample value)
 * `topk` (largest k elements by sample value)
 * `quantile` (calculate φ-quantile (0 ≤ φ ≤ 1) over dimensions)
+* `limitk` (sample n elements)
+* `limit_ratio` (sample elements with approximately 𝑟 ratio if `𝑟 > 0`, and the complement of such samples if `𝑟 = -(1.0 - 𝑟)`)
 
 These operators can either be used to aggregate over **all** label dimensions
 or preserve distinct dimensions by including a `without` or `by` clause. These
@@ -249,8 +251,8 @@ all other labels are preserved in the output. `by` does the opposite and drops
 labels that are not listed in the `by` clause, even if their label values are
 identical between all elements of the vector.
 
-`parameter` is only required for `count_values`, `quantile`, `topk` and
-`bottomk`.
+`parameter` is only required for `count_values`, `quantile`, `topk`,
+`bottomk`, `limitk` and `limit_ratio`.
 
 `count_values` outputs one time series per unique sample value. Each series has
 an additional label. The name of that label is given by the aggregation
@@ -261,11 +263,16 @@ time series is the number of times that sample value was present.
 the input samples, including the original labels, are returned in the result
 vector. `by` and `without` are only used to bucket the input vector.
 
+`limitk` and `limit_ratio` also return a subset of the input samples,
+including the original labels in the result vector, these are experimental
+operators that must be enabled with `--enable-feature=promql-experimental-functions`.
+
 `quantile` calculates the φ-quantile, the value that ranks at number φ*N among
 the N metric values of the dimensions aggregated over. φ is provided as the
 aggregation parameter. For example, `quantile(0.5, ...)` calculates the median,
 `quantile(0.95, ...)` the 95th percentile. For φ = `NaN`, `NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned.
 
+
 Example:
 
 If the metric `http_requests_total` had time series that fan out by
@@ -291,6 +298,33 @@ To get the 5 largest HTTP requests counts across all instances we could write:
 
     topk(5, http_requests_total)
 
+To sample 10 timeseries, for example to inspect labels and their values, we
+could write:
+
+    limitk(10, http_requests_total)
+
+To deterministically sample approximately 10% of timeseries we could write:
+
+    limit_ratio(0.1, http_requests_total)
+
+Given that `limit_ratio()` implements a deterministic sampling algorithm (based
+on labels' hash), you can get the _complement_ of the above samples, i.e.
+approximately 90%, but precisely those not returned by `limit_ratio(0.1, ...)`
+with:
+
+    limit_ratio(-0.9, http_requests_total)
+
+You can also use this feature to e.g. verify that `avg()` is a representative
+aggregation for your samples' values, by checking that the difference between
+averaging two samples' subsets is "small" when compared to the standard
+deviation.
+
+    abs(
+      avg(limit_ratio(0.5, http_requests_total))
+      -
+      avg(limit_ratio(-0.5, http_requests_total))
+    ) <= bool stddev(http_requests_total)
+
 ## Binary operator precedence
 
 The following list shows the precedence of binary operators in Prometheus, from

diff --git a/model/labels/regexp.go b/model/labels/regexp.go
@@ -31,7 +31,7 @@ const (
 	maxSetMatches = 256
 
 	// The minimum number of alternate values a regex should have to trigger
-	// the optimization done by optimizeEqualStringMatchers() and so use a map
+	// the optimization done by optimizeEqualOrPrefixStringMatchers() and so use a map
 	// to match values instead of iterating over a list. This value has
 	// been computed running BenchmarkOptimizeEqualStringMatchers.
 	minEqualMultiStringMatcherMapThreshold = 16
@@ -382,7 +382,7 @@ func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
 		return nil, nil
 	}
 
-	multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates)
+	multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates, 0, 0)
 
 	for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
 		// Split the string into the next literal and the remainder
@@ -457,7 +457,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
 	clearBeginEndText(re)
 
 	m := stringMatcherFromRegexpInternal(re)
-	m = optimizeEqualStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
+	m = optimizeEqualOrPrefixStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
 
 	return m
 }
@@ -778,17 +778,20 @@ func (m *equalStringMatcher) Matches(s string) bool {
 type multiStringMatcherBuilder interface {
 	StringMatcher
 	add(s string)
+	addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher)
 	setMatches() []string
 }
 
-func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize int) multiStringMatcherBuilder {
+func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize, estimatedPrefixes, minPrefixLength int) multiStringMatcherBuilder {
 	// If the estimated size is low enough, it's faster to use a slice instead of a map.
-	if estimatedSize < minEqualMultiStringMatcherMapThreshold {
+	if estimatedSize < minEqualMultiStringMatcherMapThreshold && estimatedPrefixes == 0 {
 		return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
 	}
 
 	return &equalMultiStringMapMatcher{
 		values:        make(map[string]struct{}, estimatedSize),
+		prefixes:      make(map[string][]StringMatcher, estimatedPrefixes),
+		minPrefixLen:  minPrefixLength,
 		caseSensitive: caseSensitive,
 	}
 }
@@ -804,6 +807,10 @@ func (m *equalMultiStringSliceMatcher) add(s string) {
 	m.values = append(m.values, s)
 }
 
+func (m *equalMultiStringSliceMatcher) addPrefix(_ string, _ bool, _ StringMatcher) {
+	panic("not implemented")
+}
+
 func (m *equalMultiStringSliceMatcher) setMatches() []string {
 	return m.values
 }
@@ -825,12 +832,17 @@ func (m *equalMultiStringSliceMatcher) Matches(s string) bool {
 	return false
 }
 
-// equalMultiStringMapMatcher matches a string exactly against a map of valid values.
+// equalMultiStringMapMatcher matches a string exactly against a map of valid values
+// or against a set of prefix matchers.
 type equalMultiStringMapMatcher struct {
 	// values contains values to match a string against. If the matching is case insensitive,
 	// the values here must be lowercase.
 	values map[string]struct{}
-
+	// prefixes maps strings, all of length minPrefixLen, to sets of matchers to check the rest of the string.
+	// If the matching is case insensitive, prefixes are all lowercase.
+	prefixes map[string][]StringMatcher
+	// minPrefixLen can be zero, meaning there are no prefix matchers.
+	minPrefixLen  int
 	caseSensitive bool
 }
 
@@ -842,8 +854,27 @@ func (m *equalMultiStringMapMatcher) add(s string) {
 	m.values[s] = struct{}{}
 }
 
+func (m *equalMultiStringMapMatcher) addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher) {
+	if m.minPrefixLen == 0 {
+		panic("addPrefix called when no prefix length defined")
+	}
+	if len(prefix) < m.minPrefixLen {
+		panic("addPrefix called with a too short prefix")
+	}
+	if m.caseSensitive != prefixCaseSensitive {
+		panic("addPrefix called with a prefix whose case sensitivity is different than the expected one")
+	}
+
+	s := prefix[:m.minPrefixLen]
+	if !m.caseSensitive {
+		s = strings.ToLower(s)
+	}
+
+	m.prefixes[s] = append(m.prefixes[s], matcher)
+}
+
 func (m *equalMultiStringMapMatcher) setMatches() []string {
-	if len(m.values) >= maxSetMatches {
+	if len(m.values) >= maxSetMatches || len(m.prefixes) > 0 {
 		return nil
 	}
 
@@ -859,8 +890,17 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {
 		s = toNormalisedLower(s)
 	}
 
-	_, ok := m.values[s]
-	return ok
+	if _, ok := m.values[s]; ok {
+		return true
+	}
+	if m.minPrefixLen > 0 && len(s) >= m.minPrefixLen {
+		for _, matcher := range m.prefixes[s[:m.minPrefixLen]] {
+			if matcher.Matches(s) {
+				return true
+			}
+		}
+	}
+	return false
 }
 
 // toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
@@ -943,20 +983,24 @@ func (m trueMatcher) Matches(_ string) bool {
 	return true
 }
 
-// optimizeEqualStringMatchers optimize a specific case where all matchers are made by an
-// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In
-// this specific case, when we have many strings to match against we can use a map instead
+// optimizeEqualOrPrefixStringMatchers optimize a specific case where all matchers are made by an
+// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher) or
+// with a literal prefix (literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher).
+//
+// In this specific case, when we have many strings to match against we can use a map instead
 // of iterating over the list of strings.
-func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher {
+func optimizeEqualOrPrefixStringMatchers(input StringMatcher, threshold int) StringMatcher {
 	var (
 		caseSensitive    bool
 		caseSensitiveSet bool
 		numValues        int
+		numPrefixes      int
+		minPrefixLength  int
 	)
 
 	// Analyse the input StringMatcher to count the number of occurrences
 	// and ensure all of them have the same case sensitivity.
-	analyseCallback := func(matcher *equalStringMatcher) bool {
+	analyseEqualMatcherCallback := func(matcher *equalStringMatcher) bool {
 		// Ensure we don't have mixed case sensitivity.
 		if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
 			return false
@@ -969,34 +1013,55 @@ func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatch
 		return true
 	}
 
-	if !findEqualStringMatchers(input, analyseCallback) {
+	analysePrefixMatcherCallback := func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool {
+		// Ensure we don't have mixed case sensitivity.
+		if caseSensitiveSet && caseSensitive != prefixCaseSensitive {
+			return false
+		} else if !caseSensitiveSet {
+			caseSensitive = prefixCaseSensitive
+			caseSensitiveSet = true
+		}
+		if numPrefixes == 0 || len(prefix) < minPrefixLength {
+			minPrefixLength = len(prefix)
+		}
+
+		numPrefixes++
+		return true
+	}
+
+	if !findEqualOrPrefixStringMatchers(input, analyseEqualMatcherCallback, analysePrefixMatcherCallback) {
 		return input
 	}
 
-	// If the number of values found is less than the threshold, then we should skip the optimization.
-	if numValues < threshold {
+	// If the number of values and prefixes found is less than the threshold, then we should skip the optimization.
+	if (numValues + numPrefixes) < threshold {
 		return input
 	}
 
 	// Parse again the input StringMatcher to extract all values and storing them.
 	// We can skip the case sensitivity check because we've already checked it and
 	// if the code reach this point then it means all matchers have the same case sensitivity.
-	multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues)
+	multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues, numPrefixes, minPrefixLength)
 
 	// Ignore the return value because we already iterated over the input StringMatcher
 	// and it was all good.
-	findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
+	findEqualOrPrefixStringMatchers(input, func(matcher *equalStringMatcher) bool {
 		multiMatcher.add(matcher.s)
 		return true
+	}, func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool {
+		multiMatcher.addPrefix(prefix, caseSensitive, matcher)
+		return true
 	})
 
 	return multiMatcher
 }
 
-// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
-// equalStringMatcher found. Returns true if and only if the input StringMatcher is *only*
-// composed by an alternation of equalStringMatcher.
-func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool {
+// findEqualOrPrefixStringMatchers analyze the input StringMatcher and calls the equalMatcherCallback for each
+// equalStringMatcher found, and prefixMatcherCallback for each literalPrefixSensitiveStringMatcher and literalPrefixInsensitiveStringMatcher found.
+//
+// Returns true if and only if the input StringMatcher is *only* composed by an alternation of equalStringMatcher and/or
+// literal prefix matcher. Returns false if prefixMatcherCallback is nil and a literal prefix matcher is encountered.
+func findEqualOrPrefixStringMatchers(input StringMatcher, equalMatcherCallback func(matcher *equalStringMatcher) bool, prefixMatcherCallback func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool) bool {
 	orInput, ok := input.(orStringMatcher)
 	if !ok {
 		return false
@@ -1005,17 +1070,27 @@ func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalSt
 	for _, m := range orInput {
 		switch casted := m.(type) {
 		case orStringMatcher:
-			if !findEqualStringMatchers(m, callback) {
+			if !findEqualOrPrefixStringMatchers(m, equalMatcherCallback, prefixMatcherCallback) {
 				return false
 			}
 
 		case *equalStringMatcher:
-			if !callback(casted) {
+			if !equalMatcherCallback(casted) {
+				return false
+			}
+
+		case *literalPrefixSensitiveStringMatcher:
+			if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, true, casted) {
+				return false
+			}
+
+		case *literalPrefixInsensitiveStringMatcher:
+			if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, false, casted) {
 				return false
 			}
 
 		default:
-			// It's not an equal string matcher, so we have to stop searching
+			// It's not an equal or prefix string matcher, so we have to stop searching
 			// cause this optimization can't be applied.
 			return false
 		}