From 03f1d4dea66eded534e045bda0824266bf501ff3 Mon Sep 17 00:00:00 2001 From: Kerry Gallagher <471693+Kerry350@users.noreply.github.com> Date: Thu, 24 Jun 2021 20:12:52 +0100 Subject: [PATCH] [Logs UI] Log threshold rule performance improvements (#102650) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add optimisations for executor / chart previews Co-authored-by: Felix Stürmer --- .../alerting/logs/log_threshold/types.ts | 108 +++-- .../http_api/log_alerts/chart_preview_data.ts | 9 + .../criterion_preview_chart.tsx | 13 +- .../components/expression_editor/editor.tsx | 25 + .../log_threshold_chart_preview.ts | 54 ++- .../log_threshold_executor.test.ts | 455 ++++++++---------- .../log_threshold/log_threshold_executor.ts | 154 ++++-- 7 files changed, 483 insertions(+), 335 deletions(-) diff --git a/x-pack/plugins/infra/common/alerting/logs/log_threshold/types.ts b/x-pack/plugins/infra/common/alerting/logs/log_threshold/types.ts index f1e983fc34df8..6da0bb58e4e85 100644 --- a/x-pack/plugins/infra/common/alerting/logs/log_threshold/types.ts +++ b/x-pack/plugins/infra/common/alerting/logs/log_threshold/types.ts @@ -100,7 +100,7 @@ export enum AlertStates { ERROR, } -const ThresholdRT = rt.type({ +export const ThresholdRT = rt.type({ comparator: ComparatorRT, value: rt.number, }); @@ -240,31 +240,43 @@ const chartPreviewHistogramBucket = rt.type({ doc_count: rt.number, }); +const ChartPreviewBucketsRT = rt.partial({ + histogramBuckets: rt.type({ + buckets: rt.array(chartPreviewHistogramBucket), + }), +}); + // ES query responses // +const hitsRT = rt.type({ + total: rt.type({ + value: rt.number, + }), +}); + +const bucketFieldsRT = rt.type({ + key: rt.record(rt.string, rt.string), + doc_count: rt.number, +}); + +const afterKeyRT = rt.partial({ + after_key: rt.record(rt.string, rt.string), +}); + export const UngroupedSearchQueryResponseRT = rt.intersection([ commonSearchSuccessResponseFieldsRT, rt.intersection([ rt.type({ - hits: rt.type({ - total: rt.type({ - value: rt.number, - }), - }), + hits: hitsRT, }), - // Chart preview buckets rt.partial({ - aggregations: rt.type({ - histogramBuckets: rt.type({ - buckets: rt.array(chartPreviewHistogramBucket), - }), - }), + aggregations: ChartPreviewBucketsRT, }), ]), ]); export type UngroupedSearchQueryResponse = rt.TypeOf; -export const GroupedSearchQueryResponseRT = rt.intersection([ +export const UnoptimizedGroupedSearchQueryResponseRT = rt.intersection([ commonSearchSuccessResponseFieldsRT, rt.type({ aggregations: rt.type({ @@ -272,33 +284,73 @@ export const GroupedSearchQueryResponseRT = rt.intersection([ rt.type({ buckets: rt.array( rt.type({ - key: rt.record(rt.string, rt.string), - doc_count: rt.number, + ...bucketFieldsRT.props, filtered_results: rt.intersection([ rt.type({ doc_count: rt.number, }), - // Chart preview buckets - rt.partial({ - histogramBuckets: rt.type({ - buckets: rt.array(chartPreviewHistogramBucket), - }), - }), + ChartPreviewBucketsRT, ]), }) ), }), - rt.partial({ - after_key: rt.record(rt.string, rt.string), - }), + afterKeyRT, ]), }), - hits: rt.type({ - total: rt.type({ - value: rt.number, - }), + hits: hitsRT, + }), +]); + +export type UnoptimizedGroupedSearchQueryResponse = rt.TypeOf< + typeof UnoptimizedGroupedSearchQueryResponseRT +>; + +export const OptimizedGroupedSearchQueryResponseRT = rt.intersection([ + commonSearchSuccessResponseFieldsRT, + rt.type({ + aggregations: rt.type({ + groups: rt.intersection([ + rt.type({ + buckets: rt.array(rt.intersection([bucketFieldsRT, ChartPreviewBucketsRT])), + }), + afterKeyRT, + ]), }), + hits: hitsRT, }), ]); +export type OptimizedGroupedSearchQueryResponse = rt.TypeOf< + typeof OptimizedGroupedSearchQueryResponseRT +>; + +export const GroupedSearchQueryResponseRT = rt.union([ + UnoptimizedGroupedSearchQueryResponseRT, + OptimizedGroupedSearchQueryResponseRT, +]); + export type GroupedSearchQueryResponse = rt.TypeOf; + +export const isOptimizedGroupedSearchQueryResponse = ( + response: GroupedSearchQueryResponse['aggregations']['groups']['buckets'] +): response is OptimizedGroupedSearchQueryResponse['aggregations']['groups']['buckets'] => { + const result = response[0]; + return result && !result.hasOwnProperty('filtered_results'); +}; + +export const isOptimizableGroupedThreshold = ( + selectedComparator: AlertParams['count']['comparator'], + selectedValue?: AlertParams['count']['value'] +) => { + if (selectedComparator === Comparator.GT) { + return true; + } else if ( + typeof selectedValue === 'number' && + selectedComparator === Comparator.GT_OR_EQ && + selectedValue > 0 + ) { + return true; + } else { + return false; + } +}; diff --git a/x-pack/plugins/infra/common/http_api/log_alerts/chart_preview_data.ts b/x-pack/plugins/infra/common/http_api/log_alerts/chart_preview_data.ts index e6baca305508e..5f488dd532285 100644 --- a/x-pack/plugins/infra/common/http_api/log_alerts/chart_preview_data.ts +++ b/x-pack/plugins/infra/common/http_api/log_alerts/chart_preview_data.ts @@ -7,6 +7,7 @@ import * as rt from 'io-ts'; import { + ThresholdRT, countCriteriaRT, timeUnitRT, timeSizeRT, @@ -58,6 +59,14 @@ export type GetLogAlertsChartPreviewDataSuccessResponsePayload = rt.TypeOf< export const getLogAlertsChartPreviewDataAlertParamsSubsetRT: any = rt.intersection([ rt.type({ criteria: countCriteriaRT, + count: rt.intersection([ + rt.type({ + comparator: ThresholdRT.props.comparator, + }), + rt.partial({ + value: ThresholdRT.props.value, + }), + ]), timeUnit: timeUnitRT, timeSize: timeSizeRT, }), diff --git a/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/criterion_preview_chart.tsx b/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/criterion_preview_chart.tsx index 4e84cf0f9127c..4fa96ea6828d4 100644 --- a/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/criterion_preview_chart.tsx +++ b/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/criterion_preview_chart.tsx @@ -68,6 +68,10 @@ export const CriterionPreview: React.FC = ({ const criteria = field && comparator && value ? [{ field, comparator, value }] : []; const params = { criteria, + count: { + comparator: alertParams.count.comparator, + value: alertParams.count.value, + }, timeSize: alertParams.timeSize, timeUnit: alertParams.timeUnit, groupBy: alertParams.groupBy, @@ -78,7 +82,14 @@ export const CriterionPreview: React.FC = ({ } catch (error) { return null; } - }, [alertParams.timeSize, alertParams.timeUnit, alertParams.groupBy, chartCriterion]); + }, [ + alertParams.timeSize, + alertParams.timeUnit, + alertParams.groupBy, + alertParams.count.comparator, + alertParams.count.value, + chartCriterion, + ]); // Check for the existence of properties that are necessary for a meaningful chart. if (chartAlertParams === null || chartAlertParams.criteria.length === 0) return null; diff --git a/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/editor.tsx b/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/editor.tsx index ef533f63dc175..4eb0f3e8645cf 100644 --- a/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/editor.tsx +++ b/x-pack/plugins/infra/public/alerting/log_threshold/components/expression_editor/editor.tsx @@ -23,6 +23,7 @@ import { PartialRatioAlertParams, ThresholdType, timeUnitRT, + isOptimizableGroupedThreshold, } from '../../../../../common/alerting/logs/log_threshold/types'; import { decodeOrThrow } from '../../../../../common/runtime_types'; import { ObjectEntries } from '../../../../../common/utility_types'; @@ -255,6 +256,15 @@ export const Editor: React.FC< setHasSetDefaults(true); }); + const shouldShowGroupByOptimizationWarning = useMemo(() => { + const hasSetGroupBy = alertParams.groupBy && alertParams.groupBy.length > 0; + return ( + hasSetGroupBy && + alertParams.count && + !isOptimizableGroupedThreshold(alertParams.count.comparator, alertParams.count.value) + ); + }, [alertParams]); + // Wait until the alert param defaults have been set if (!hasSetDefaults) return null; @@ -299,6 +309,21 @@ export const Editor: React.FC< {alertParams.criteria && isRatioAlert(alertParams.criteria) && criteriaComponent} + {shouldShowGroupByOptimizationWarning && ( + <> + + + {i18n.translate('xpack.infra.logs.alertFlyout.groupByOptimizationWarning', { + defaultMessage: + 'When setting a "group by" we highly recommend using the "{comparator}" comparator for your threshold. This can lead to significant performance improvements.', + values: { + comparator: Comparator.GT, + }, + })} + + + )} + ); diff --git a/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_chart_preview.ts b/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_chart_preview.ts index 321273c656216..7bf2cb5ea3394 100644 --- a/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_chart_preview.ts +++ b/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_chart_preview.ts @@ -23,6 +23,7 @@ import { UngroupedSearchQueryResponse, GroupedSearchQueryResponse, GroupedSearchQueryResponseRT, + isOptimizedGroupedSearchQueryResponse, } from '../../../../common/alerting/logs/log_threshold/types'; import { decodeOrThrow } from '../../../../common/runtime_types'; import { ResolvedLogSourceConfiguration } from '../../../../common/log_sources'; @@ -97,10 +98,19 @@ const addHistogramAggregationToQuery = ( }; if (isGrouped) { - query.body.aggregations.groups.aggregations.filtered_results = { - ...query.body.aggregations.groups.aggregations.filtered_results, - aggregations: histogramAggregation, - }; + const isOptimizedQuery = !query.body.aggregations.groups.aggregations?.filtered_results; + + if (isOptimizedQuery) { + query.body.aggregations.groups.aggregations = { + ...query.body.aggregations.groups.aggregations, + ...histogramAggregation, + }; + } else { + query.body.aggregations.groups.aggregations.filtered_results = { + ...query.body.aggregations.groups.aggregations.filtered_results, + aggregations: histogramAggregation, + }; + } } else { query.body = { ...query.body, @@ -151,18 +161,34 @@ const getGroupedResults = async ( const processGroupedResults = ( results: GroupedSearchQueryResponse['aggregations']['groups']['buckets'] ): Series => { - return results.reduce((series, group) => { - if (!group.filtered_results.histogramBuckets) return series; - const groupName = Object.values(group.key).join(', '); - const points = group.filtered_results.histogramBuckets.buckets.reduce( - (pointsAcc, bucket) => { + const getGroupName = ( + key: GroupedSearchQueryResponse['aggregations']['groups']['buckets'][0]['key'] + ) => Object.values(key).join(', '); + + if (isOptimizedGroupedSearchQueryResponse(results)) { + return results.reduce((series, group) => { + if (!group.histogramBuckets) return series; + const groupName = getGroupName(group.key); + const points = group.histogramBuckets.buckets.reduce((pointsAcc, bucket) => { const { key, doc_count: count } = bucket; return [...pointsAcc, { timestamp: key, value: count }]; - }, - [] - ); - return [...series, { id: groupName, points }]; - }, []); + }, []); + return [...series, { id: groupName, points }]; + }, []); + } else { + return results.reduce((series, group) => { + if (!group.filtered_results.histogramBuckets) return series; + const groupName = getGroupName(group.key); + const points = group.filtered_results.histogramBuckets.buckets.reduce( + (pointsAcc, bucket) => { + const { key, doc_count: count } = bucket; + return [...pointsAcc, { timestamp: key, value: count }]; + }, + [] + ); + return [...series, { id: groupName, points }]; + }, []); + } }; const processUngroupedResults = (results: UngroupedSearchQueryResponse): Series => { diff --git a/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.test.ts b/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.test.ts index ffabd7ba65f03..55c66f0aabbfb 100644 --- a/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.test.ts +++ b/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.test.ts @@ -58,6 +58,74 @@ const negativeCriteria: Criterion[] = [ { ...textField, comparator: Comparator.NOT_MATCH_PHRASE }, ]; +const expectedPositiveFilterClauses = [ + { + range: { + numericField: { + gt: 10, + }, + }, + }, + { + range: { + numericField: { + gte: 10, + }, + }, + }, + { + range: { + numericField: { + lt: 10, + }, + }, + }, + { + range: { + numericField: { + lte: 10, + }, + }, + }, + { + term: { + keywordField: { + value: 'error', + }, + }, + }, + { + match: { + textField: 'Something went wrong', + }, + }, + { + match_phrase: { + textField: 'Something went wrong', + }, + }, +]; + +const expectedNegativeFilterClauses = [ + { + term: { + keywordField: { + value: 'error', + }, + }, + }, + { + match: { + textField: 'Something went wrong', + }, + }, + { + match_phrase: { + textField: 'Something went wrong', + }, + }, +]; + const baseAlertParams: Pick = { count: { comparator: Comparator.GT, @@ -102,53 +170,7 @@ describe('Log threshold executor', () => { criteria: positiveCriteria, }; const filters = buildFiltersFromCriteria(alertParams, TIMESTAMP_FIELD); - expect(filters.mustFilters).toEqual([ - { - range: { - numericField: { - gt: 10, - }, - }, - }, - { - range: { - numericField: { - gte: 10, - }, - }, - }, - { - range: { - numericField: { - lt: 10, - }, - }, - }, - { - range: { - numericField: { - lte: 10, - }, - }, - }, - { - term: { - keywordField: { - value: 'error', - }, - }, - }, - { - match: { - textField: 'Something went wrong', - }, - }, - { - match_phrase: { - textField: 'Something went wrong', - }, - }, - ]); + expect(filters.mustFilters).toEqual(expectedPositiveFilterClauses); }); test('Handles negative criteria', () => { @@ -158,25 +180,7 @@ describe('Log threshold executor', () => { }; const filters = buildFiltersFromCriteria(alertParams, TIMESTAMP_FIELD); - expect(filters.mustNotFilters).toEqual([ - { - term: { - keywordField: { - value: 'error', - }, - }, - }, - { - match: { - textField: 'Something went wrong', - }, - }, - { - match_phrase: { - textField: 'Something went wrong', - }, - }, - ]); + expect(filters.mustNotFilters).toEqual(expectedNegativeFilterClauses); }); test('Handles time range', () => { @@ -194,7 +198,7 @@ describe('Log threshold executor', () => { describe('ES queries', () => { describe('Query generation', () => { - test('Correctly generates ungrouped queries', () => { + it('Correctly generates ungrouped queries', () => { const alertParams: AlertParams = { ...baseAlertParams, criteria: [...positiveCriteria, ...negativeCriteria], @@ -223,71 +227,9 @@ describe('Log threshold executor', () => { }, }, }, - { - range: { - numericField: { - gt: 10, - }, - }, - }, - { - range: { - numericField: { - gte: 10, - }, - }, - }, - { - range: { - numericField: { - lt: 10, - }, - }, - }, - { - range: { - numericField: { - lte: 10, - }, - }, - }, - { - term: { - keywordField: { - value: 'error', - }, - }, - }, - { - match: { - textField: 'Something went wrong', - }, - }, - { - match_phrase: { - textField: 'Something went wrong', - }, - }, - ], - must_not: [ - { - term: { - keywordField: { - value: 'error', - }, - }, - }, - { - match: { - textField: 'Something went wrong', - }, - }, - { - match_phrase: { - textField: 'Something went wrong', - }, - }, + ...expectedPositiveFilterClauses, ], + must_not: [...expectedNegativeFilterClauses], }, }, runtime_mappings: { @@ -304,148 +246,159 @@ describe('Log threshold executor', () => { }); }); - test('Correctly generates grouped queries', () => { - const alertParams: AlertParams = { - ...baseAlertParams, - groupBy: ['host.name'], - criteria: [...positiveCriteria, ...negativeCriteria], - }; - const query = getGroupedESQuery( - alertParams, - TIMESTAMP_FIELD, - FILEBEAT_INDEX, - runtimeMappings - ); - expect(query).toEqual({ - index: 'filebeat-*', - allow_no_indices: true, - ignore_unavailable: true, - body: { - query: { - bool: { - filter: [ - { - range: { - '@timestamp': { - gte: expect.any(Number), - lte: expect.any(Number), - format: 'epoch_millis', + describe('Correctly generates grouped queries', () => { + it('When using an optimizable threshold comparator', () => { + const alertParams: AlertParams = { + ...baseAlertParams, + groupBy: ['host.name'], + criteria: [...positiveCriteria, ...negativeCriteria], + }; + const query = getGroupedESQuery( + alertParams, + TIMESTAMP_FIELD, + FILEBEAT_INDEX, + runtimeMappings + ); + + expect(query).toEqual({ + index: 'filebeat-*', + allow_no_indices: true, + ignore_unavailable: true, + body: { + query: { + bool: { + filter: [ + { + range: { + '@timestamp': { + gte: expect.any(Number), + lte: expect.any(Number), + format: 'epoch_millis', + }, }, }, + ...expectedPositiveFilterClauses, + ], + must_not: [...expectedNegativeFilterClauses], + }, + }, + aggregations: { + groups: { + composite: { + size: 2000, + sources: [ + { + 'group-0-host.name': { + terms: { + field: 'host.name', + }, + }, + }, + ], }, - ], + }, + }, + runtime_mappings: { + runtime_field: { + type: 'keyword', + script: { + lang: 'painless', + source: 'emit("a runtime value")', + }, + }, }, + size: 0, }, - aggregations: { - groups: { - composite: { - size: 40, - sources: [ + }); + }); + + it('When not using an optimizable threshold comparator', () => { + const alertParams: AlertParams = { + ...baseAlertParams, + count: { + ...baseAlertParams.count, + comparator: Comparator.LT, + }, + groupBy: ['host.name'], + criteria: [...positiveCriteria, ...negativeCriteria], + }; + + const query = getGroupedESQuery( + alertParams, + TIMESTAMP_FIELD, + FILEBEAT_INDEX, + runtimeMappings + ); + + expect(query).toEqual({ + index: 'filebeat-*', + allow_no_indices: true, + ignore_unavailable: true, + body: { + query: { + bool: { + filter: [ { - 'group-0-host.name': { - terms: { - field: 'host.name', + range: { + '@timestamp': { + gte: expect.any(Number), + lte: expect.any(Number), + format: 'epoch_millis', }, }, }, ], }, - aggregations: { - filtered_results: { - filter: { - bool: { - filter: [ - { - range: { - '@timestamp': { - gte: expect.any(Number), - lte: expect.any(Number), - format: 'epoch_millis', - }, - }, - }, - { - range: { - numericField: { - gt: 10, - }, - }, - }, - { - range: { - numericField: { - gte: 10, - }, - }, - }, - { - range: { - numericField: { - lt: 10, - }, - }, - }, - { - range: { - numericField: { - lte: 10, - }, - }, - }, - { - term: { - keywordField: { - value: 'error', - }, - }, - }, - { - match: { - textField: 'Something went wrong', - }, - }, - { - match_phrase: { - textField: 'Something went wrong', - }, + }, + aggregations: { + groups: { + composite: { + size: 2000, + sources: [ + { + 'group-0-host.name': { + terms: { + field: 'host.name', }, - ], - must_not: [ - { - term: { - keywordField: { - value: 'error', + }, + }, + ], + }, + aggregations: { + filtered_results: { + filter: { + bool: { + filter: [ + { + range: { + '@timestamp': { + gte: expect.any(Number), + lte: expect.any(Number), + format: 'epoch_millis', + }, }, }, - }, - { - match: { - textField: 'Something went wrong', - }, - }, - { - match_phrase: { - textField: 'Something went wrong', - }, - }, - ], + ...expectedPositiveFilterClauses, + ], + must_not: [...expectedNegativeFilterClauses], + }, }, }, }, }, }, - }, - runtime_mappings: { - runtime_field: { - type: 'keyword', - script: { - lang: 'painless', - source: 'emit("a runtime value")', + runtime_mappings: { + runtime_field: { + type: 'keyword', + script: { + lang: 'painless', + source: 'emit("a runtime value")', + }, }, }, + size: 0, }, - size: 0, - }, + }); }); }); }); diff --git a/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.ts b/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.ts index a537801202217..f9d0b5575abfc 100644 --- a/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.ts +++ b/x-pack/plugins/infra/server/lib/alerting/log_threshold/log_threshold_executor.ts @@ -36,6 +36,8 @@ import { CountCriteria, CountAlertParams, RatioAlertParams, + isOptimizedGroupedSearchQueryResponse, + isOptimizableGroupedThreshold, } from '../../../../common/alerting/logs/log_threshold/types'; import { InfraBackendLibs } from '../../infra_types'; import { getIntervalInSeconds } from '../../../utils/get_interval_in_seconds'; @@ -57,7 +59,7 @@ type LogThresholdAlertExecutorOptions = AlertExecutorOptions< LogThresholdActionGroups >; -const COMPOSITE_GROUP_SIZE = 40; +const COMPOSITE_GROUP_SIZE = 2000; const checkValueAgainstComparatorMap: { [key: string]: (a: number, b: number) => boolean; @@ -68,6 +70,10 @@ const checkValueAgainstComparatorMap: { [Comparator.LT_OR_EQ]: (a: number, b: number) => a <= b, }; +// The executor execution roughly follows a pattern of: +// ES Query generation -> fetching of results -> processing of results. +// With forks for group_by vs ungrouped, and ratio vs non-ratio. + export const createLogThresholdExecutor = (libs: InfraBackendLibs) => async function ({ services, params }: LogThresholdAlertExecutorOptions) { const { alertInstanceFactory, savedObjectsClient, scopedClusterClient } = services; @@ -277,11 +283,26 @@ type ReducedGroupByResults = ReducedGroupByResult[]; const getReducedGroupByResults = ( results: GroupedSearchQueryResponse['aggregations']['groups']['buckets'] ): ReducedGroupByResults => { - return results.reduce((acc, groupBucket) => { - const groupName = Object.values(groupBucket.key).join(', '); - const groupResult = { name: groupName, documentCount: groupBucket.filtered_results.doc_count }; - return [...acc, groupResult]; - }, []); + const getGroupName = ( + key: GroupedSearchQueryResponse['aggregations']['groups']['buckets'][0]['key'] + ) => Object.values(key).join(', '); + + if (isOptimizedGroupedSearchQueryResponse(results)) { + return results.reduce((acc, groupBucket) => { + const groupName = getGroupName(groupBucket.key); + const groupResult = { name: groupName, documentCount: groupBucket.doc_count }; + return [...acc, groupResult]; + }, []); + } else { + return results.reduce((acc, groupBucket) => { + const groupName = getGroupName(groupBucket.key); + const groupResult = { + name: groupName, + documentCount: groupBucket.filtered_results.doc_count, + }; + return [...acc, groupResult]; + }, []); + } }; export const processGroupByResults = ( @@ -430,12 +451,29 @@ export const buildFiltersFromCriteria = ( }; export const getGroupedESQuery = ( - params: Pick & { criteria: CountCriteria }, + params: Pick & { + criteria: CountCriteria; + count: { + comparator: AlertParams['count']['comparator']; + value?: AlertParams['count']['value']; + }; + }, timestampField: string, index: string, runtimeMappings: estypes.MappingRuntimeFields ): estypes.SearchRequest | undefined => { - const { groupBy } = params; + // IMPORTANT: + // For the group by scenario we need to account for users utilizing "less than" configurations + // to attempt to match on "0", e.g. something has stopped reporting. We need to cast a wider net for these + // configurations to try and capture more documents, so that the filtering doesn't make the group "disappear". + // Due to this there are two forks in the group by code, one where we can optimize the filtering early, and one where + // it is an inner aggregation. "Less than" configurations with high cardinality group by fields can cause severe performance + // problems. + + const { + groupBy, + count: { comparator, value }, + } = params; if (!groupBy || !groupBy.length) { return; @@ -446,47 +484,81 @@ export const getGroupedESQuery = ( timestampField ); - const aggregations = { - groups: { - composite: { - size: COMPOSITE_GROUP_SIZE, - sources: groupBy.map((field, groupIndex) => ({ - [`group-${groupIndex}-${field}`]: { - terms: { field }, - }, - })), + if (isOptimizableGroupedThreshold(comparator, value)) { + const aggregations = { + groups: { + composite: { + size: COMPOSITE_GROUP_SIZE, + sources: groupBy.map((field, groupIndex) => ({ + [`group-${groupIndex}-${field}`]: { + terms: { field }, + }, + })), + }, }, - aggregations: { - filtered_results: { - filter: { - bool: { - // Scope the inner filtering back to the unpadded range - filter: [rangeFilter, ...mustFilters], - ...(mustNotFilters.length > 0 && { must_not: mustNotFilters }), + }; + + const body: estypes.SearchRequest['body'] = { + query: { + bool: { + filter: [rangeFilter, ...mustFilters], + ...(mustNotFilters.length > 0 && { must_not: mustNotFilters }), + }, + }, + aggregations, + runtime_mappings: runtimeMappings, + size: 0, + }; + + return { + index, + allow_no_indices: true, + ignore_unavailable: true, + body, + }; + } else { + const aggregations = { + groups: { + composite: { + size: COMPOSITE_GROUP_SIZE, + sources: groupBy.map((field, groupIndex) => ({ + [`group-${groupIndex}-${field}`]: { + terms: { field }, + }, + })), + }, + aggregations: { + filtered_results: { + filter: { + bool: { + // Scope the inner filtering back to the unpadded range + filter: [rangeFilter, ...mustFilters], + ...(mustNotFilters.length > 0 && { must_not: mustNotFilters }), + }, }, }, }, }, - }, - }; + }; - const body: estypes.SearchRequest['body'] = { - query: { - bool: { - filter: [groupedRangeFilter], + const body: estypes.SearchRequest['body'] = { + query: { + bool: { + filter: [groupedRangeFilter], + }, }, - }, - aggregations, - runtime_mappings: runtimeMappings, - size: 0, - }; + aggregations, + runtime_mappings: runtimeMappings, + size: 0, + }; - return { - index, - allow_no_indices: true, - ignore_unavailable: true, - body, - }; + return { + index, + allow_no_indices: true, + ignore_unavailable: true, + body, + }; + } }; export const getUngroupedESQuery = (