Skip to content

Commit

Permalink
[ResponseOps][alerting] add rule info to logging in alertsClient (ela…
Browse files Browse the repository at this point in the history
…stic#190857)

## Summary

While investigating some issues with the alertsClient, I realized that
we weren't writing out any rule information for the logged messages.
This made debugging quite difficult, as I wanted to see the rule, so had
to search through the alerts indices for the specified alert to get it's
rule id, rule type, etc.

As an example, see elastic#190376

This PR adds that kind of rule info to the logged messages in
alertsClient, as well as the typical sort of tags we write out (rule id,
rule type, module).
  • Loading branch information
pmuellr authored Aug 22, 2024
1 parent e782811 commit 07717a4
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 42 deletions.
24 changes: 17 additions & 7 deletions x-pack/plugins/alerting/server/alerts_client/alerts_client.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ const defaultExecutionOpts = {
startedAt: null,
};

const ruleInfo = `for test.rule-type:1 'rule-name'`;
const logTags = { tags: ['test.rule-type', '1', 'alerts-client'] };

describe('Alerts Client', () => {
let alertsClientParams: AlertsClientParams;
let processAndLogAlertsOpts: ProcessAndLogAlertsOpts;
Expand Down Expand Up @@ -484,7 +487,8 @@ describe('Alerts Client', () => {
});

expect(logger.error).toHaveBeenCalledWith(
`Error searching for tracked alerts by UUID - search failed!`
`Error searching for tracked alerts by UUID ${ruleInfo} - search failed!`,
logTags
);

spy.mockRestore();
Expand Down Expand Up @@ -778,7 +782,8 @@ describe('Alerts Client', () => {
expect(spy).toHaveBeenNthCalledWith(2, 'recoveredCurrent');

expect(logger.error).toHaveBeenCalledWith(
"Error writing alert(2) to .alerts-test.alerts-default - alert(2) doesn't exist in active alerts"
`Error writing alert(2) to .alerts-test.alerts-default - alert(2) doesn't exist in active alerts ${ruleInfo}.`,
logTags
);
spy.mockRestore();

Expand Down Expand Up @@ -1346,7 +1351,8 @@ describe('Alerts Client', () => {

expect(clusterClient.bulk).toHaveBeenCalled();
expect(logger.error).toHaveBeenCalledWith(
`Error writing alerts: 1 successful, 0 conflicts, 2 errors: Validation Failed: 1: index is missing;2: type is missing;; failed to parse field [process.command_line] of type [wildcard] in document with id 'f0c9805be95fedbc3c99c663f7f02cc15826c122'.`
`Error writing alerts ${ruleInfo}: 1 successful, 0 conflicts, 2 errors: Validation Failed: 1: index is missing;2: type is missing;; failed to parse field [process.command_line] of type [wildcard] in document with id 'f0c9805be95fedbc3c99c663f7f02cc15826c122'.`,
{ tags: ['test.rule-type', '1', 'resolve-alert-conflicts'] }
);
});

Expand Down Expand Up @@ -1423,7 +1429,8 @@ describe('Alerts Client', () => {
});

expect(logger.warn).toHaveBeenCalledWith(
`Could not update alert abc in partial-.internal.alerts-test.alerts-default-000001. Partial and restored alert indices are not supported.`
`Could not update alert abc in partial-.internal.alerts-test.alerts-default-000001. Partial and restored alert indices are not supported ${ruleInfo}.`,
logTags
);
});

Expand All @@ -1448,7 +1455,8 @@ describe('Alerts Client', () => {

expect(clusterClient.bulk).toHaveBeenCalled();
expect(logger.error).toHaveBeenCalledWith(
`Error writing 2 alerts to .alerts-test.alerts-default - fail`
`Error writing 2 alerts to .alerts-test.alerts-default ${ruleInfo} - fail`,
logTags
);
});

Expand Down Expand Up @@ -1478,7 +1486,8 @@ describe('Alerts Client', () => {
});

expect(logger.debug).toHaveBeenCalledWith(
`Resources registered and installed for test context but "shouldWrite" is set to false.`
`Resources registered and installed for test context but "shouldWrite" is set to false ${ruleInfo}.`,
logTags
);
expect(clusterClient.bulk).not.toHaveBeenCalled();
});
Expand Down Expand Up @@ -2026,7 +2035,8 @@ describe('Alerts Client', () => {
).rejects.toBe('something went wrong!');

expect(logger.warn).toHaveBeenCalledWith(
'Error updating alert maintenance window IDs: something went wrong!'
`Error updating alert maintenance window IDs for test.rule-type:1 'rule-name': something went wrong!`,
logTags
);
});
});
Expand Down
35 changes: 27 additions & 8 deletions x-pack/plugins/alerting/server/alerts_client/alerts_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ export class AlertsClient<

private reportedAlerts: Record<string, DeepPartial<AlertData>> = {};
private _isUsingDataStreams: boolean;
private ruleInfoMessage: string;
private logTags: { tags: string[] };

constructor(private readonly options: AlertsClientParams) {
this.legacyAlertsClient = new LegacyAlertsClient<
Expand All @@ -130,6 +132,8 @@ export class AlertsClient<
this.rule = formatRule({ rule: this.options.rule, ruleType: this.options.ruleType });
this.ruleType = options.ruleType;
this._isUsingDataStreams = this.options.dataStreamAdapter.isUsingDataStreams();
this.ruleInfoMessage = `for ${this.ruleType.id}:${this.options.rule.id} '${this.options.rule.name}'`;
this.logTags = { tags: [this.ruleType.id, this.options.rule.id, 'alerts-client'] };
}

public async initializeExecution(opts: InitializeExecutionOpts) {
Expand Down Expand Up @@ -202,7 +206,10 @@ export class AlertsClient<
this.fetchedAlerts.primaryTerm[alertUuid] = hit._primary_term;
}
} catch (err) {
this.options.logger.error(`Error searching for tracked alerts by UUID - ${err.message}`);
this.options.logger.error(
`Error searching for tracked alerts by UUID ${this.ruleInfoMessage} - ${err.message}`,
this.logTags
);
}
}

Expand Down Expand Up @@ -327,7 +334,8 @@ export class AlertsClient<
);
} catch (e) {
this.options.logger.debug(
`Failed to update alert matched by maintenance window scoped query for rule ${this.ruleType.id}:${this.options.rule.id}: '${this.options.rule.name}'.`
`Failed to update alert matched by maintenance window scoped query ${this.ruleInfoMessage}`,
this.logTags
);
}

Expand Down Expand Up @@ -407,7 +415,8 @@ export class AlertsClient<
private async persistAlertsHelper() {
if (!this.ruleType.alerts?.shouldWrite) {
this.options.logger.debug(
`Resources registered and installed for ${this.ruleType.alerts?.context} context but "shouldWrite" is set to false.`
`Resources registered and installed for ${this.ruleType.alerts?.context} context but "shouldWrite" is set to false ${this.ruleInfoMessage}.`,
this.logTags
);
return;
}
Expand Down Expand Up @@ -482,7 +491,8 @@ export class AlertsClient<
}
} else {
this.options.logger.error(
`Error writing alert(${id}) to ${this.indexTemplateAndPattern.alias} - alert(${id}) doesn't exist in active alerts`
`Error writing alert(${id}) to ${this.indexTemplateAndPattern.alias} - alert(${id}) doesn't exist in active alerts ${this.ruleInfoMessage}.`,
this.logTags
);
}
}
Expand Down Expand Up @@ -529,7 +539,8 @@ export class AlertsClient<
return true;
} else if (!isValidAlertIndexName(alertIndex)) {
this.options.logger.warn(
`Could not update alert ${alertUuid} in ${alertIndex}. Partial and restored alert indices are not supported.`
`Could not update alert ${alertUuid} in ${alertIndex}. Partial and restored alert indices are not supported ${this.ruleInfoMessage}.`,
this.logTags
);
return false;
}
Expand Down Expand Up @@ -573,11 +584,15 @@ export class AlertsClient<
operations: bulkBody,
},
bulkResponse: response,
ruleId: this.options.rule.id,
ruleName: this.options.rule.name,
ruleType: this.ruleType.id,
});
}
} catch (err) {
this.options.logger.error(
`Error writing ${alertsToIndex.length} alerts to ${this.indexTemplateAndPattern.alias} - ${err.message}`
`Error writing ${alertsToIndex.length} alerts to ${this.indexTemplateAndPattern.alias} ${this.ruleInfoMessage} - ${err.message}`,
this.logTags
);
}
}
Expand Down Expand Up @@ -669,7 +684,10 @@ export class AlertsClient<
});
return response;
} catch (err) {
this.options.logger.warn(`Error updating alert maintenance window IDs: ${err}`);
this.options.logger.warn(
`Error updating alert maintenance window IDs ${this.ruleInfoMessage}: ${err}`,
this.logTags
);
throw err;
}
}
Expand Down Expand Up @@ -739,7 +757,8 @@ export class AlertsClient<
// Update alerts with new maintenance window IDs, await not needed
this.updateAlertMaintenanceWindowIds(uniqueAlertsId).catch(() => {
this.options.logger.debug(
'Failed to update new alerts with scoped query maintenance window Ids by updateByQuery.'
`Failed to update new alerts with scoped query maintenance window Ids by updateByQuery ${this.ruleInfoMessage}.`,
this.logTags
);
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ import { resolveAlertConflicts } from './alert_conflict_resolver';

const logger = loggingSystemMock.create().get();
const esClient = elasticsearchServiceMock.createElasticsearchClient();
const ruleId = 'rule-id';
const ruleName = 'name of rule';
const ruleType = 'rule-type';

const ruleInfo = `for ${ruleType}:${ruleId} '${ruleName}'`;
const logTags = { tags: [ruleType, ruleId, 'resolve-alert-conflicts'] };

const alertDoc = {
[EVENT_ACTION]: 'active',
Expand All @@ -45,11 +51,20 @@ describe('alert_conflict_resolver', () => {

esClient.mget.mockRejectedValueOnce(new Error('mget failed'));

await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});

expect(logger.error).toHaveBeenNthCalledWith(
2,
'Error resolving alert conflicts: mget failed'
`Error resolving alert conflicts ${ruleInfo}: mget failed`,
logTags
);
});

Expand All @@ -61,25 +76,50 @@ describe('alert_conflict_resolver', () => {
});
esClient.bulk.mockRejectedValueOnce(new Error('bulk failed'));

await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});

expect(logger.error).toHaveBeenNthCalledWith(
2,
'Error resolving alert conflicts: bulk failed'
`Error resolving alert conflicts ${ruleInfo}: bulk failed`,
logTags
);
});
});

describe('is successful with', () => {
test('no bulk results', async () => {
const { bulkRequest, bulkResponse } = getReqRes('');
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).not.toHaveBeenCalled();
});

test('no errors in bulk results', async () => {
const { bulkRequest, bulkResponse } = getReqRes('c is is c is');
await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});
expect(logger.error).not.toHaveBeenCalled();
});

Expand All @@ -96,16 +136,30 @@ describe('alert_conflict_resolver', () => {
items: [getBulkResItem(0)],
});

await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});

expect(logger.error).toHaveBeenNthCalledWith(
1,
`Error writing alerts: 0 successful, 1 conflicts, 0 errors: `
`Error writing alerts ${ruleInfo}: 0 successful, 1 conflicts, 0 errors: `,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(
1,
`Retrying bulk update of 1 conflicted alerts ${ruleInfo}`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 1 conflicted alerts`);
expect(logger.info).toHaveBeenNthCalledWith(
2,
`Retried bulk update of 1 conflicted alerts succeeded`
`Retried bulk update of 1 conflicted alerts succeeded ${ruleInfo}`,
logTags
);
});

Expand All @@ -122,16 +176,30 @@ describe('alert_conflict_resolver', () => {
items: [getBulkResItem(2)],
});

await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});

expect(logger.error).toHaveBeenNthCalledWith(
1,
`Error writing alerts: 2 successful, 1 conflicts, 1 errors: hallo`
`Error writing alerts ${ruleInfo}: 2 successful, 1 conflicts, 1 errors: hallo`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(
1,
`Retrying bulk update of 1 conflicted alerts ${ruleInfo}`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 1 conflicted alerts`);
expect(logger.info).toHaveBeenNthCalledWith(
2,
`Retried bulk update of 1 conflicted alerts succeeded`
`Retried bulk update of 1 conflicted alerts succeeded ${ruleInfo}`,
logTags
);
});

Expand All @@ -148,16 +216,30 @@ describe('alert_conflict_resolver', () => {
items: [getBulkResItem(2), getBulkResItem(3), getBulkResItem(5)],
});

await resolveAlertConflicts({ logger, esClient, bulkRequest, bulkResponse });
await resolveAlertConflicts({
logger,
esClient,
bulkRequest,
bulkResponse,
ruleId,
ruleName,
ruleType,
});

expect(logger.error).toHaveBeenNthCalledWith(
1,
`Error writing alerts: 2 successful, 3 conflicts, 1 errors: hallo`
`Error writing alerts ${ruleInfo}: 2 successful, 3 conflicts, 1 errors: hallo`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(
1,
`Retrying bulk update of 3 conflicted alerts ${ruleInfo}`,
logTags
);
expect(logger.info).toHaveBeenNthCalledWith(1, `Retrying bulk update of 3 conflicted alerts`);
expect(logger.info).toHaveBeenNthCalledWith(
2,
`Retried bulk update of 3 conflicted alerts succeeded`
`Retried bulk update of 3 conflicted alerts succeeded ${ruleInfo}`,
logTags
);
});
});
Expand Down
Loading

0 comments on commit 07717a4

Please sign in to comment.