diff --git a/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.test.ts b/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.test.ts index ec77562c1b07e..8867f7cd2db4c 100644 --- a/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.test.ts +++ b/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.test.ts @@ -39,7 +39,7 @@ describe('Transaction duration anomaly alert', () => { services.scopedClusterClient.asCurrentUser.search ).not.toHaveBeenCalled(); - expect(services.alertFactory.create).not.toHaveBeenCalled(); + expect(services.alertsClient.report).not.toHaveBeenCalled(); }); it('ml jobs are not available', async () => { @@ -69,7 +69,7 @@ describe('Transaction duration anomaly alert', () => { services.scopedClusterClient.asCurrentUser.search ).not.toHaveBeenCalled(); - expect(services.alertFactory.create).not.toHaveBeenCalled(); + expect(services.alertsClient.report).not.toHaveBeenCalled(); }); it('anomaly is less than threshold', async () => { @@ -135,7 +135,7 @@ describe('Transaction duration anomaly alert', () => { expect( services.scopedClusterClient.asCurrentUser.search ).not.toHaveBeenCalled(); - expect(services.alertFactory.create).not.toHaveBeenCalled(); + expect(services.alertsClient.report).not.toHaveBeenCalled(); }); }); @@ -154,8 +154,9 @@ describe('Transaction duration anomaly alert', () => { ] as unknown as ApmMlJob[]) ); - const { services, dependencies, executor, scheduleActions } = - createRuleTypeMocks(); + const { services, dependencies, executor } = createRuleTypeMocks(); + + services.alertsClient.report.mockReturnValue({ uuid: 'test-uuid' }); const ml = { mlSystemProvider: () => ({ @@ -221,23 +222,38 @@ describe('Transaction duration anomaly alert', () => { await executor({ params }); - expect(services.alertFactory.create).toHaveBeenCalledTimes(1); + expect(services.alertsClient.report).toHaveBeenCalledTimes(1); - expect(services.alertFactory.create).toHaveBeenCalledWith( - 'apm.anomaly_foo_development_type-foo' - ); + expect(services.alertsClient.report).toHaveBeenCalledWith({ + actionGroup: 'threshold_met', + id: 'apm.anomaly_foo_development_type-foo', + }); - expect(scheduleActions).toHaveBeenCalledWith('threshold_met', { - serviceName: 'foo', - transactionType: 'type-foo', - environment: 'development', - threshold: 'minor', - triggerValue: 'critical', - reason: - 'critical latency anomaly with a score of 80, was detected in the last 5 mins for foo.', - viewInAppUrl: - 'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=development', - alertDetailsUrl: 'mockedAlertsLocator > getLocation', + expect(services.alertsClient.setAlertData).toHaveBeenCalledWith({ + context: { + alertDetailsUrl: 'mockedAlertsLocator > getLocation', + environment: 'development', + reason: + 'critical latency anomaly with a score of 80, was detected in the last 5 mins for foo.', + serviceName: 'foo', + threshold: 'minor', + transactionType: 'type-foo', + triggerValue: 'critical', + viewInAppUrl: + 'http://localhost:5601/eyr/app/apm/services/foo?transactionType=type-foo&environment=development', + }, + id: 'apm.anomaly_foo_development_type-foo', + payload: { + 'kibana.alert.evaluation.threshold': 25, + 'kibana.alert.evaluation.value': 80, + 'kibana.alert.reason': + 'critical latency anomaly with a score of 80, was detected in the last 5 mins for foo.', + 'kibana.alert.severity': 'critical', + 'processor.event': 'transaction', + 'service.environment': 'development', + 'service.name': 'foo', + 'transaction.type': 'type-foo', + }, }); }); }); diff --git a/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.ts b/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.ts index 3165a72fbe134..e8dd969cdb30b 100644 --- a/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.ts +++ b/x-pack/plugins/observability_solution/apm/server/routes/alerts/rule_types/anomaly/register_anomaly_rule_type.ts @@ -6,7 +6,16 @@ */ import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; -import { GetViewInAppRelativeUrlFnOpts } from '@kbn/alerting-plugin/server'; +import { + GetViewInAppRelativeUrlFnOpts, + ActionGroupIdsOf, + AlertInstanceContext as AlertContext, + AlertInstanceState as AlertState, + RuleTypeState, + RuleExecutorOptions, + AlertsClientError, + IRuleTypeAlerts, +} from '@kbn/alerting-plugin/server'; import { KibanaRequest, DEFAULT_APP_CATEGORIES } from '@kbn/core/server'; import datemath from '@kbn/datemath'; import type { ESSearchResponse } from '@kbn/es-types'; @@ -23,7 +32,7 @@ import { ALERT_SEVERITY, ApmRuleType, } from '@kbn/rule-data-utils'; -import { createLifecycleRuleTypeFactory } from '@kbn/rule-registry-plugin/server'; +import { ObservabilityApmAlert } from '@kbn/alerts-as-data-utils'; import { addSpaceIdToPath } from '@kbn/spaces-plugin/common'; import { asyncForEach } from '@kbn/std'; import { compact } from 'lodash'; @@ -43,6 +52,7 @@ import { APM_SERVER_FEATURE_ID, formatAnomalyReason, RULE_TYPES_CONFIG, + THRESHOLD_MET_GROUP, } from '../../../../../common/rules/apm_rule_types'; import { asMutableArray } from '../../../../../common/utils/as_mutable_array'; import { getAlertUrlTransaction } from '../../../../../common/utils/formatters'; @@ -53,7 +63,10 @@ import { RegisterRuleDependencies, } from '../../register_apm_rule_types'; import { getServiceGroupFieldsForAnomaly } from './get_service_group_fields_for_anomaly'; -import { anomalyParamsSchema } from '../../../../../common/rules/schema'; +import { + anomalyParamsSchema, + ApmRuleParamsType, +} from '../../../../../common/rules/schema'; import { getAnomalyDetectorIndex, getAnomalyDetectorType, @@ -61,6 +74,13 @@ import { const ruleTypeConfig = RULE_TYPES_CONFIG[ApmRuleType.Anomaly]; +type AnomalyRuleTypeParams = ApmRuleParamsType[ApmRuleType.Anomaly]; +type AnomalyActionGroups = ActionGroupIdsOf; +type AnomalyRuleTypeState = RuleTypeState; +type AnomalyAlertState = AlertState; +type AnomalyAlertContext = AlertContext; +type AnomalyAlert = ObservabilityApmAlert; + export function registerAnomalyRuleType({ alerting, alertsLocator, @@ -70,296 +90,302 @@ export function registerAnomalyRuleType({ ml, ruleDataClient, }: RegisterRuleDependencies) { - const createLifecycleRuleType = createLifecycleRuleTypeFactory({ - logger, - ruleDataClient, - }); + if (!alerting) { + throw new Error( + 'Cannot register anomaly rule type. The alerting plugin needs to be enabled.' + ); + } - alerting.registerType( - createLifecycleRuleType({ - id: ApmRuleType.Anomaly, - name: ruleTypeConfig.name, - actionGroups: ruleTypeConfig.actionGroups, - defaultActionGroupId: ruleTypeConfig.defaultActionGroupId, - validate: { params: anomalyParamsSchema }, - schemas: { - params: { - type: 'config-schema', - schema: anomalyParamsSchema, - }, - }, - actionVariables: { - context: [ - apmActionVariables.alertDetailsUrl, - apmActionVariables.environment, - apmActionVariables.reason, - apmActionVariables.serviceName, - apmActionVariables.threshold, - apmActionVariables.transactionType, - apmActionVariables.triggerValue, - apmActionVariables.viewInAppUrl, - ], + alerting.registerType({ + id: ApmRuleType.Anomaly, + name: ruleTypeConfig.name, + actionGroups: ruleTypeConfig.actionGroups, + defaultActionGroupId: ruleTypeConfig.defaultActionGroupId, + validate: { params: anomalyParamsSchema }, + schemas: { + params: { + type: 'config-schema', + schema: anomalyParamsSchema, }, - category: DEFAULT_APP_CATEGORIES.observability.id, - producer: APM_SERVER_FEATURE_ID, - minimumLicenseRequired: 'basic', - isExportable: true, - executor: async ({ - params, - services, - spaceId, - startedAt, - getTimeRange, - }) => { - if (!ml) { - return { state: {} }; - } + }, + actionVariables: { + context: [ + apmActionVariables.alertDetailsUrl, + apmActionVariables.environment, + apmActionVariables.reason, + apmActionVariables.serviceName, + apmActionVariables.threshold, + apmActionVariables.transactionType, + apmActionVariables.triggerValue, + apmActionVariables.viewInAppUrl, + ], + }, + category: DEFAULT_APP_CATEGORIES.observability.id, + producer: APM_SERVER_FEATURE_ID, + minimumLicenseRequired: 'basic', + isExportable: true, + executor: async ( + options: RuleExecutorOptions< + AnomalyRuleTypeParams, + AnomalyRuleTypeState, + AnomalyAlertState, + AnomalyAlertContext, + AnomalyActionGroups, + AnomalyAlert + > + ) => { + if (!ml) { + return { state: {} }; + } - const { - getAlertUuid, - getAlertStartedDate, - savedObjectsClient, - scopedClusterClient, - } = services; + const { params, services, spaceId, startedAt, getTimeRange } = options; + const { alertsClient, savedObjectsClient, scopedClusterClient } = + services; + if (!alertsClient) { + throw new AlertsClientError(); + } - const apmIndices = await getApmIndices(savedObjectsClient); + const apmIndices = await getApmIndices(savedObjectsClient); - const ruleParams = params; - const request = {} as KibanaRequest; - const { mlAnomalySearch } = ml.mlSystemProvider( - request, - savedObjectsClient - ); - const anomalyDetectors = ml.anomalyDetectorsProvider( - request, - savedObjectsClient - ); + const ruleParams = params; + const request = {} as KibanaRequest; + const { mlAnomalySearch } = ml.mlSystemProvider( + request, + savedObjectsClient + ); + const anomalyDetectors = ml.anomalyDetectorsProvider( + request, + savedObjectsClient + ); - const mlJobs = await getMLJobs( - anomalyDetectors, - ruleParams.environment - ); + const mlJobs = await getMLJobs(anomalyDetectors, ruleParams.environment); + + const selectedOption = ANOMALY_ALERT_SEVERITY_TYPES.find( + (option) => option.type === ruleParams.anomalySeverityType + ); - const selectedOption = ANOMALY_ALERT_SEVERITY_TYPES.find( - (option) => option.type === ruleParams.anomalySeverityType + if (!selectedOption) { + throw new Error( + `Anomaly alert severity type ${ruleParams.anomalySeverityType} is not supported.` ); + } - if (!selectedOption) { - throw new Error( - `Anomaly alert severity type ${ruleParams.anomalySeverityType} is not supported.` - ); - } - - const threshold = selectedOption.threshold; - - if (mlJobs.length === 0) { - return { state: {} }; - } - - // Lookback window must be at least 30m to support rules created before this change where default was 15m - const minimumWindow = '30m'; - const requestedWindow = `${ruleParams.windowSize}${ruleParams.windowUnit}`; - - const window = - datemath.parse(`now-${minimumWindow}`)!.valueOf() < - datemath.parse(`now-${requestedWindow}`)!.valueOf() - ? minimumWindow - : requestedWindow; - - const { dateStart } = getTimeRange(window); - - const jobIds = mlJobs.map((job) => job.jobId); - const anomalySearchParams = { - body: { - track_total_hits: false, - size: 0, - query: { - bool: { - filter: [ - { term: { result_type: 'record' } }, - { terms: { job_id: jobIds } }, - { term: { is_interim: false } }, - { - range: { - timestamp: { - gte: dateStart, - }, + const threshold = selectedOption.threshold; + + if (mlJobs.length === 0) { + return { state: {} }; + } + + // Lookback window must be at least 30m to support rules created before this change where default was 15m + const minimumWindow = '30m'; + const requestedWindow = `${ruleParams.windowSize}${ruleParams.windowUnit}`; + + const window = + datemath.parse(`now-${minimumWindow}`)!.valueOf() < + datemath.parse(`now-${requestedWindow}`)!.valueOf() + ? minimumWindow + : requestedWindow; + + const { dateStart } = getTimeRange(window); + + const jobIds = mlJobs.map((job) => job.jobId); + const anomalySearchParams = { + body: { + track_total_hits: false, + size: 0, + query: { + bool: { + filter: [ + { term: { result_type: 'record' } }, + { terms: { job_id: jobIds } }, + { term: { is_interim: false } }, + { + range: { + timestamp: { + gte: dateStart, }, }, - ...termQuery( - 'partition_field_value', - ruleParams.serviceName, - { queryEmptyString: false } - ), - ...termQuery('by_field_value', ruleParams.transactionType, { - queryEmptyString: false, - }), - ...termsQuery( - 'detector_index', - ...(ruleParams.anomalyDetectorTypes?.map((type) => - getAnomalyDetectorIndex(type) - ) ?? []) - ), - ] as QueryDslQueryContainer[], - }, - }, - aggs: { - anomaly_groups: { - multi_terms: { - terms: [ - { field: 'partition_field_value' }, - { field: 'by_field_value' }, - { field: 'job_id' }, - { field: 'detector_index' }, - ], - size: 1000, - order: { 'latest_score.record_score': 'desc' as const }, }, - aggs: { - latest_score: { - top_metrics: { - metrics: asMutableArray([ - { field: 'record_score' }, - { field: 'partition_field_value' }, - { field: 'by_field_value' }, - { field: 'job_id' }, - { field: 'timestamp' }, - { field: 'bucket_span' }, - { field: 'detector_index' }, - ] as const), - sort: { - timestamp: 'desc' as const, - }, + ...termQuery('partition_field_value', ruleParams.serviceName, { + queryEmptyString: false, + }), + ...termQuery('by_field_value', ruleParams.transactionType, { + queryEmptyString: false, + }), + ...termsQuery( + 'detector_index', + ...(ruleParams.anomalyDetectorTypes?.map((type) => + getAnomalyDetectorIndex(type) + ) ?? []) + ), + ] as QueryDslQueryContainer[], + }, + }, + aggs: { + anomaly_groups: { + multi_terms: { + terms: [ + { field: 'partition_field_value' }, + { field: 'by_field_value' }, + { field: 'job_id' }, + { field: 'detector_index' }, + ], + size: 1000, + order: { 'latest_score.record_score': 'desc' as const }, + }, + aggs: { + latest_score: { + top_metrics: { + metrics: asMutableArray([ + { field: 'record_score' }, + { field: 'partition_field_value' }, + { field: 'by_field_value' }, + { field: 'job_id' }, + { field: 'timestamp' }, + { field: 'bucket_span' }, + { field: 'detector_index' }, + ] as const), + sort: { + timestamp: 'desc' as const, }, }, }, }, }, }, + }, + }; + + const response: ESSearchResponse = + (await mlAnomalySearch(anomalySearchParams, [])) as any; + + const anomalies = + response.aggregations?.anomaly_groups.buckets + .map((bucket) => { + const latest = bucket.latest_score.top[0].metrics; + + const job = mlJobs.find((j) => j.jobId === latest.job_id); + + if (!job) { + logger.warn( + `Could not find matching job for job id ${latest.job_id}` + ); + return undefined; + } + + return { + serviceName: latest.partition_field_value as string, + transactionType: latest.by_field_value as string, + environment: job.environment, + score: latest.record_score as number, + detectorType: getAnomalyDetectorType( + latest.detector_index as number + ), + timestamp: Date.parse(latest.timestamp as string), + bucketSpan: latest.bucket_span as number, + bucketKey: bucket.key, + }; + }) + .filter((anomaly) => + anomaly ? anomaly.score >= threshold : false + ) ?? []; + + await asyncForEach(compact(anomalies), async (anomaly) => { + const { + serviceName, + environment, + transactionType, + score, + detectorType, + timestamp, + bucketSpan, + bucketKey, + } = anomaly; + + const eventSourceFields = await getServiceGroupFieldsForAnomaly({ + apmIndices, + scopedClusterClient, + savedObjectsClient, + serviceName, + environment, + transactionType, + timestamp, + bucketSpan, + }); + + const severityLevel = getSeverity(score); + const reasonMessage = formatAnomalyReason({ + anomalyScore: score, + serviceName, + severityLevel, + windowSize: params.windowSize, + windowUnit: params.windowUnit, + detectorType, + }); + + const alertId = bucketKey.join('_'); + + const { uuid, start } = alertsClient.report({ + id: alertId, + actionGroup: ruleTypeConfig.defaultActionGroupId, + }); + const indexedStartedAt = start ?? startedAt.toISOString(); + + const relativeViewInAppUrl = getAlertUrlTransaction( + serviceName, + getEnvironmentEsField(environment)?.[SERVICE_ENVIRONMENT], + transactionType + ); + const viewInAppUrl = addSpaceIdToPath( + basePath.publicBaseUrl, + spaceId, + relativeViewInAppUrl + ); + const alertDetailsUrl = await getAlertUrl( + uuid, + spaceId, + indexedStartedAt, + alertsLocator, + basePath.publicBaseUrl + ); + + const payload = { + [SERVICE_NAME]: serviceName, + ...getEnvironmentEsField(environment), + [TRANSACTION_TYPE]: transactionType, + [PROCESSOR_EVENT]: ProcessorEvent.transaction, + [ALERT_SEVERITY]: severityLevel, + [ALERT_EVALUATION_VALUE]: score, + [ALERT_EVALUATION_THRESHOLD]: threshold, + [ALERT_REASON]: reasonMessage, + ...eventSourceFields, }; - const response: ESSearchResponse = - (await mlAnomalySearch(anomalySearchParams, [])) as any; - - const anomalies = - response.aggregations?.anomaly_groups.buckets - .map((bucket) => { - const latest = bucket.latest_score.top[0].metrics; - - const job = mlJobs.find((j) => j.jobId === latest.job_id); - - if (!job) { - logger.warn( - `Could not find matching job for job id ${latest.job_id}` - ); - return undefined; - } - - return { - serviceName: latest.partition_field_value as string, - transactionType: latest.by_field_value as string, - environment: job.environment, - score: latest.record_score as number, - detectorType: getAnomalyDetectorType( - latest.detector_index as number - ), - timestamp: Date.parse(latest.timestamp as string), - bucketSpan: latest.bucket_span as number, - bucketKey: bucket.key, - }; - }) - .filter((anomaly) => - anomaly ? anomaly.score >= threshold : false - ) ?? []; - - await asyncForEach(compact(anomalies), async (anomaly) => { - const { - serviceName, - environment, - transactionType, - score, - detectorType, - timestamp, - bucketSpan, - bucketKey, - } = anomaly; - - const eventSourceFields = await getServiceGroupFieldsForAnomaly({ - apmIndices, - scopedClusterClient, - savedObjectsClient, - serviceName, - environment, - transactionType, - timestamp, - bucketSpan, - }); - - const severityLevel = getSeverity(score); - const reasonMessage = formatAnomalyReason({ - anomalyScore: score, - serviceName, - severityLevel, - windowSize: params.windowSize, - windowUnit: params.windowUnit, - detectorType, - }); - - const alertId = bucketKey.join('_'); - - const alert = services.alertWithLifecycle({ - id: alertId, - fields: { - [SERVICE_NAME]: serviceName, - ...getEnvironmentEsField(environment), - [TRANSACTION_TYPE]: transactionType, - [PROCESSOR_EVENT]: ProcessorEvent.transaction, - [ALERT_SEVERITY]: severityLevel, - [ALERT_EVALUATION_VALUE]: score, - [ALERT_EVALUATION_THRESHOLD]: threshold, - [ALERT_REASON]: reasonMessage, - ...eventSourceFields, - }, - }); - - const relativeViewInAppUrl = getAlertUrlTransaction( - serviceName, - getEnvironmentEsField(environment)?.[SERVICE_ENVIRONMENT], - transactionType - ); - const viewInAppUrl = addSpaceIdToPath( - basePath.publicBaseUrl, - spaceId, - relativeViewInAppUrl - ); - const indexedStartedAt = - getAlertStartedDate(alertId) ?? startedAt.toISOString(); - const alertUuid = getAlertUuid(alertId); - const alertDetailsUrl = await getAlertUrl( - alertUuid, - spaceId, - indexedStartedAt, - alertsLocator, - basePath.publicBaseUrl - ); - - alert.scheduleActions(ruleTypeConfig.defaultActionGroupId, { - alertDetailsUrl, - environment: getEnvironmentLabel(environment), - reason: reasonMessage, - serviceName, - threshold: selectedOption?.label, - transactionType, - triggerValue: severityLevel, - viewInAppUrl, - }); + const context = { + alertDetailsUrl, + environment: getEnvironmentLabel(environment), + reason: reasonMessage, + serviceName, + threshold: selectedOption?.label, + transactionType, + triggerValue: severityLevel, + viewInAppUrl, + }; + + alertsClient.setAlertData({ + id: alertId, + payload, + context, }); + }); - return { state: {} }; - }, - alerts: ApmRuleTypeAlertDefinition, - getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) => - observabilityPaths.ruleDetails(rule.id), - }) - ); + return { state: {} }; + }, + alerts: { + ...ApmRuleTypeAlertDefinition, + shouldWrite: true, + } as IRuleTypeAlerts, + getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) => + observabilityPaths.ruleDetails(rule.id), + }); }