From c9ddb78d3740c7ea714d9dd1049c5e5d5a690b60 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Tue, 24 Aug 2021 16:26:13 -0400 Subject: [PATCH 01/18] Support retry with email as an example --- x-pack/plugins/actions/server/builtin_action_types/email.ts | 5 +++++ x-pack/plugins/actions/server/lib/action_executor.ts | 2 ++ x-pack/plugins/actions/server/lib/task_runner_factory.ts | 1 + x-pack/plugins/actions/server/types.ts | 2 ++ 4 files changed, 10 insertions(+) diff --git a/x-pack/plugins/actions/server/builtin_action_types/email.ts b/x-pack/plugins/actions/server/builtin_action_types/email.ts index 47748f0f13722..4c509b9aad7cf 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/email.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/email.ts @@ -33,6 +33,7 @@ export type EmailActionTypeExecutorOptions = ActionTypeExecutorOptions< export type ActionTypeConfigType = TypeOf; const EMAIL_FOOTER_DIVIDER = '\n\n--\n\n'; +const defaultBackoffPerFailure = 10000; const ConfigSchemaProps = { service: schema.nullable(schema.string()), @@ -149,6 +150,7 @@ export function getActionType(params: GetActionTypeParams): EmailActionType { name: i18n.translate('xpack.actions.builtin.emailTitle', { defaultMessage: 'Email', }), + maxAttempts: 10, validate: { config: schema.object(ConfigSchemaProps, { validate: curry(validateConfig)(configurationUtilities), @@ -244,6 +246,9 @@ async function executor( actionId, message, serviceMessage: err.message, + retry: execOptions.taskInfo + ? new Date(Date.now() + execOptions.taskInfo?.attempts * defaultBackoffPerFailure) + : false, }; } diff --git a/x-pack/plugins/actions/server/lib/action_executor.ts b/x-pack/plugins/actions/server/lib/action_executor.ts index 5dfe56cff5016..d265bca237c3b 100644 --- a/x-pack/plugins/actions/server/lib/action_executor.ts +++ b/x-pack/plugins/actions/server/lib/action_executor.ts @@ -44,6 +44,7 @@ export interface ActionExecutorContext { export interface TaskInfo { scheduled: Date; + attempts: number; } export interface ExecuteOptions { @@ -210,6 +211,7 @@ export class ActionExecutor { config: validatedConfig, secrets: validatedSecrets, isEphemeral, + taskInfo, }); } catch (err) { rawResult = { diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 2354ea55eded6..58f23229c92e8 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -77,6 +77,7 @@ export class TaskRunnerFactory { const taskInfo = { scheduled: taskInstance.runAt, + attempts: taskInstance.attempts, }; return { diff --git a/x-pack/plugins/actions/server/types.ts b/x-pack/plugins/actions/server/types.ts index 14e9e120a853a..64250ca77fba4 100644 --- a/x-pack/plugins/actions/server/types.ts +++ b/x-pack/plugins/actions/server/types.ts @@ -19,6 +19,7 @@ import { SavedObjectReference, } from '../../../../src/core/server'; import { ActionTypeExecutorResult } from '../common'; +import { TaskInfo } from './lib/action_executor'; export { ActionTypeExecutorResult } from '../common'; export { GetFieldsByIssueTypeResponse as JiraGetFieldsResponse } from './builtin_action_types/jira/types'; export { GetCommonFieldsResponse as ServiceNowGetFieldsResponse } from './builtin_action_types/servicenow/types'; @@ -59,6 +60,7 @@ export interface ActionTypeExecutorOptions { secrets: Secrets; params: Params; isEphemeral?: boolean; + taskInfo?: TaskInfo; } export interface ActionResult { From 366af39d5f4642e79fbdf3d7d0f641523a22d07e Mon Sep 17 00:00:00 2001 From: chrisronline Date: Wed, 25 Aug 2021 09:31:46 -0400 Subject: [PATCH 02/18] Fix tests --- .../actions/server/builtin_action_types/email.test.ts | 1 + x-pack/plugins/actions/server/lib/action_executor.test.ts | 2 ++ .../plugins/actions/server/lib/task_runner_factory.test.ts | 5 +++++ 3 files changed, 8 insertions(+) diff --git a/x-pack/plugins/actions/server/builtin_action_types/email.test.ts b/x-pack/plugins/actions/server/builtin_action_types/email.test.ts index 8e9ea1c5e4aa9..66de84c2457bb 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/email.test.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/email.test.ts @@ -403,6 +403,7 @@ describe('execute()', () => { Object { "actionId": "some-id", "message": "error sending email", + "retry": false, "serviceMessage": "wops", "status": "error", } diff --git a/x-pack/plugins/actions/server/lib/action_executor.test.ts b/x-pack/plugins/actions/server/lib/action_executor.test.ts index 440de161490aa..ba7f750859d40 100644 --- a/x-pack/plugins/actions/server/lib/action_executor.test.ts +++ b/x-pack/plugins/actions/server/lib/action_executor.test.ts @@ -187,10 +187,12 @@ test('successfully executes as a task', async () => { const scheduleDelay = 10000; // milliseconds const scheduled = new Date(Date.now() - scheduleDelay); + const attempts = 1; await actionExecutor.execute({ ...executeParams, taskInfo: { scheduled, + attempts, }, }); diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts index 722ba08a26258..b5958bc502fdd 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts @@ -136,6 +136,7 @@ test('executes the task by calling the executor with proper parameters', async ( }), taskInfo: { scheduled: new Date(), + attempts: 0, }, }); @@ -262,6 +263,7 @@ test('uses API key when provided', async () => { }), taskInfo: { scheduled: new Date(), + attempts: 0, }, }); @@ -311,6 +313,7 @@ test('uses relatedSavedObjects when provided', async () => { }), taskInfo: { scheduled: new Date(), + attempts: 0, }, }); }); @@ -348,6 +351,7 @@ test('sanitizes invalid relatedSavedObjects when provided', async () => { relatedSavedObjects: [], taskInfo: { scheduled: new Date(), + attempts: 0, }, }); }); @@ -381,6 +385,7 @@ test(`doesn't use API key when not provided`, async () => { }), taskInfo: { scheduled: new Date(), + attempts: 0, }, }); From 195944b2e58ad2c789f127709d997141b21861b6 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Wed, 25 Aug 2021 13:38:32 -0400 Subject: [PATCH 03/18] Add logic to treat as failure if there is a retry --- x-pack/plugins/actions/server/lib/task_runner_factory.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 58f23229c92e8..68ee3daa5fc14 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -138,13 +138,13 @@ export class TaskRunnerFactory { throw e; } - if (executorResult.status === 'error') { + if (executorResult.status === 'error' && executorResult.retry !== undefined) { // Task manager error handler only kicks in when an error thrown (at this time) // So what we have to do is throw when the return status is `error`. throw new ExecutorError( executorResult.message, executorResult.data, - executorResult.retry == null ? false : executorResult.retry + executorResult.retry as boolean | Date ); } From ac9e9c66efe913804b95cc43207656f65f5143b7 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Thu, 26 Aug 2021 11:44:06 -0400 Subject: [PATCH 04/18] Handle retry better --- .../actions/server/action_type_registry.ts | 3 ++- .../server/builtin_action_types/email.ts | 13 ++++++---- .../actions/server/lib/task_runner_factory.ts | 25 ++++++++++++++++--- x-pack/plugins/actions/server/types.ts | 1 + 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/x-pack/plugins/actions/server/action_type_registry.ts b/x-pack/plugins/actions/server/action_type_registry.ts index e5846560a6c98..f56c5f97516c1 100644 --- a/x-pack/plugins/actions/server/action_type_registry.ts +++ b/x-pack/plugins/actions/server/action_type_registry.ts @@ -134,7 +134,8 @@ export class ActionTypeRegistry { // Don't retry other kinds of errors return false; }, - createTaskRunner: (context: RunContext) => this.taskRunnerFactory.create(context), + createTaskRunner: (context: RunContext) => + this.taskRunnerFactory.create(context, actionType.maxAttempts ?? 1, actionType.getRetry), }, }); // No need to notify usage on basic action types diff --git a/x-pack/plugins/actions/server/builtin_action_types/email.ts b/x-pack/plugins/actions/server/builtin_action_types/email.ts index 4c509b9aad7cf..e1c4a95c79eeb 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/email.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/email.ts @@ -33,7 +33,6 @@ export type EmailActionTypeExecutorOptions = ActionTypeExecutorOptions< export type ActionTypeConfigType = TypeOf; const EMAIL_FOOTER_DIVIDER = '\n\n--\n\n'; -const defaultBackoffPerFailure = 10000; const ConfigSchemaProps = { service: schema.nullable(schema.string()), @@ -134,6 +133,11 @@ function validateParams(paramsObject: unknown): string | void { } } +const defaultBackoffPerFailure = 10000; +function getRetry(attempts: number) { + return new Date(Date.now() + attempts * defaultBackoffPerFailure); +} + interface GetActionTypeParams { logger: Logger; publicBaseUrl?: string; @@ -150,7 +154,8 @@ export function getActionType(params: GetActionTypeParams): EmailActionType { name: i18n.translate('xpack.actions.builtin.emailTitle', { defaultMessage: 'Email', }), - maxAttempts: 10, + maxAttempts: 3, + getRetry, validate: { config: schema.object(ConfigSchemaProps, { validate: curry(validateConfig)(configurationUtilities), @@ -246,9 +251,7 @@ async function executor( actionId, message, serviceMessage: err.message, - retry: execOptions.taskInfo - ? new Date(Date.now() + execOptions.taskInfo?.attempts * defaultBackoffPerFailure) - : false, + retry: execOptions.taskInfo ? getRetry(execOptions.taskInfo?.attempts) : false, }; } diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 68ee3daa5fc14..53aaca166d7ef 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -61,7 +61,11 @@ export class TaskRunnerFactory { this.taskRunnerContext = taskRunnerContext; } - public create({ taskInstance }: RunContext) { + public create( + { taskInstance }: RunContext, + maxAttempts: number, + getRetry?: (attempts: number) => boolean | Date + ) { if (!this.isInitialized) { throw new Error('TaskRunnerFactory not initialized'); } @@ -119,7 +123,12 @@ export class TaskRunnerFactory { basePathService.set(fakeRequest, path); - let executorResult: ActionTypeExecutorResult; + // Throwing an executor error means we will attempt to retry the task + // TM will treat a task as a failure if `attempts >= maxAttempts` + // so we need to handle that here to avoid TM persisting the failed task + const isRetryableBasedOnAttempts = taskInfo.attempts < maxAttempts; + + let executorResult: ActionTypeExecutorResult | undefined; try { executorResult = await actionExecutor.execute({ params, @@ -135,10 +144,18 @@ export class TaskRunnerFactory { // We'll stop re-trying due to action being forbidden throw new ExecutorError(e.message, {}, false); } - throw e; + + if (isRetryableBasedOnAttempts) { + throw new ExecutorError(e.message, {}, getRetry ? getRetry(taskInfo.attempts) : true); + } } - if (executorResult.status === 'error' && executorResult.retry !== undefined) { + if ( + executorResult && + executorResult?.status === 'error' && + executorResult?.retry !== undefined && + isRetryableBasedOnAttempts + ) { // Task manager error handler only kicks in when an error thrown (at this time) // So what we have to do is throw when the return status is `error`. throw new ExecutorError( diff --git a/x-pack/plugins/actions/server/types.ts b/x-pack/plugins/actions/server/types.ts index 64250ca77fba4..756af4caecc19 100644 --- a/x-pack/plugins/actions/server/types.ts +++ b/x-pack/plugins/actions/server/types.ts @@ -106,6 +106,7 @@ export interface ActionType< id: string; name: string; maxAttempts?: number; + getRetry: (attempts: number) => boolean | Date; minimumLicenseRequired: LicenseType; validate?: { params?: ValidatorType; From 705fd7a01d448ec6112079d70e44a06a3e60046e Mon Sep 17 00:00:00 2001 From: chrisronline Date: Thu, 26 Aug 2021 11:58:05 -0400 Subject: [PATCH 05/18] Make this optional --- x-pack/plugins/actions/server/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/actions/server/types.ts b/x-pack/plugins/actions/server/types.ts index 756af4caecc19..cf799b93fd9c8 100644 --- a/x-pack/plugins/actions/server/types.ts +++ b/x-pack/plugins/actions/server/types.ts @@ -106,7 +106,7 @@ export interface ActionType< id: string; name: string; maxAttempts?: number; - getRetry: (attempts: number) => boolean | Date; + getRetry?: (attempts: number) => boolean | Date; minimumLicenseRequired: LicenseType; validate?: { params?: ValidatorType; From e9c036d6fa56e791825f48cd28b98e6a481412da Mon Sep 17 00:00:00 2001 From: chrisronline Date: Thu, 26 Aug 2021 13:11:07 -0400 Subject: [PATCH 06/18] Tweaks --- .../actions/server/lib/task_runner_factory.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 53aaca166d7ef..3a58071648f46 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -22,7 +22,6 @@ import { ActionExecutorContract } from './action_executor'; import { ExecutorError } from './executor_error'; import { RunContext } from '../../../task_manager/server'; import { EncryptedSavedObjectsClient } from '../../../encrypted_saved_objects/server'; -import { ActionTypeDisabledError } from './errors'; import { ActionTaskParams, ActionTypeRegistryContract, @@ -140,14 +139,8 @@ export class TaskRunnerFactory { relatedSavedObjects: validatedRelatedSavedObjects(logger, relatedSavedObjects), }); } catch (e) { - if (e instanceof ActionTypeDisabledError) { - // We'll stop re-trying due to action being forbidden - throw new ExecutorError(e.message, {}, false); - } - - if (isRetryableBasedOnAttempts) { - throw new ExecutorError(e.message, {}, getRetry ? getRetry(taskInfo.attempts) : true); - } + // Intentionally swallow the alert to avoid persisting failed tasks + logger.error(`Action '${actionId}' failed: ${e.message}`); } if ( @@ -163,6 +156,8 @@ export class TaskRunnerFactory { executorResult.data, executorResult.retry as boolean | Date ); + } else if (executorResult && executorResult?.status === 'error') { + logger.error(`Action '${actionId}' failed: ${executorResult.message}`); } // Cleanup action_task_params object now that we're done with it From 70df414766808a0b26c5ac315f2ec804eebf0296 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Thu, 26 Aug 2021 14:47:52 -0400 Subject: [PATCH 07/18] Remove unnecessary code --- x-pack/plugins/actions/server/action_type_registry.ts | 2 +- .../plugins/actions/server/builtin_action_types/email.ts | 8 -------- x-pack/plugins/actions/server/lib/task_runner_factory.ts | 8 ++------ x-pack/plugins/actions/server/types.ts | 1 - 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/x-pack/plugins/actions/server/action_type_registry.ts b/x-pack/plugins/actions/server/action_type_registry.ts index f56c5f97516c1..76b360ce8b17f 100644 --- a/x-pack/plugins/actions/server/action_type_registry.ts +++ b/x-pack/plugins/actions/server/action_type_registry.ts @@ -135,7 +135,7 @@ export class ActionTypeRegistry { return false; }, createTaskRunner: (context: RunContext) => - this.taskRunnerFactory.create(context, actionType.maxAttempts ?? 1, actionType.getRetry), + this.taskRunnerFactory.create(context, actionType.maxAttempts), }, }); // No need to notify usage on basic action types diff --git a/x-pack/plugins/actions/server/builtin_action_types/email.ts b/x-pack/plugins/actions/server/builtin_action_types/email.ts index e1c4a95c79eeb..47748f0f13722 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/email.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/email.ts @@ -133,11 +133,6 @@ function validateParams(paramsObject: unknown): string | void { } } -const defaultBackoffPerFailure = 10000; -function getRetry(attempts: number) { - return new Date(Date.now() + attempts * defaultBackoffPerFailure); -} - interface GetActionTypeParams { logger: Logger; publicBaseUrl?: string; @@ -154,8 +149,6 @@ export function getActionType(params: GetActionTypeParams): EmailActionType { name: i18n.translate('xpack.actions.builtin.emailTitle', { defaultMessage: 'Email', }), - maxAttempts: 3, - getRetry, validate: { config: schema.object(ConfigSchemaProps, { validate: curry(validateConfig)(configurationUtilities), @@ -251,7 +244,6 @@ async function executor( actionId, message, serviceMessage: err.message, - retry: execOptions.taskInfo ? getRetry(execOptions.taskInfo?.attempts) : false, }; } diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 002ffcaeea3be..156e60e24a478 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -61,11 +61,7 @@ export class TaskRunnerFactory { this.taskRunnerContext = taskRunnerContext; } - public create( - { taskInstance }: RunContext, - maxAttempts: number, - getRetry?: (attempts: number) => boolean | Date - ) { + public create({ taskInstance }: RunContext, maxAttempts?: number) { if (!this.isInitialized) { throw new Error('TaskRunnerFactory not initialized'); } @@ -126,7 +122,7 @@ export class TaskRunnerFactory { // Throwing an executor error means we will attempt to retry the task // TM will treat a task as a failure if `attempts >= maxAttempts` // so we need to handle that here to avoid TM persisting the failed task - const isRetryableBasedOnAttempts = taskInfo.attempts < maxAttempts; + const isRetryableBasedOnAttempts = taskInfo.attempts < (maxAttempts ?? 1); let executorResult: ActionTypeExecutorResult | undefined; try { diff --git a/x-pack/plugins/actions/server/types.ts b/x-pack/plugins/actions/server/types.ts index cf799b93fd9c8..64250ca77fba4 100644 --- a/x-pack/plugins/actions/server/types.ts +++ b/x-pack/plugins/actions/server/types.ts @@ -106,7 +106,6 @@ export interface ActionType< id: string; name: string; maxAttempts?: number; - getRetry?: (attempts: number) => boolean | Date; minimumLicenseRequired: LicenseType; validate?: { params?: ValidatorType; From 58d894668586ce9abbdbc45aac111cef2946083c Mon Sep 17 00:00:00 2001 From: chrisronline Date: Tue, 31 Aug 2021 14:49:36 -0400 Subject: [PATCH 08/18] Fix existing tests --- .../server/builtin_action_types/email.test.ts | 1 - .../server/lib/task_runner_factory.test.ts | 16 ++++++++++++---- .../actions/server/lib/task_runner_factory.ts | 6 +++++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/x-pack/plugins/actions/server/builtin_action_types/email.test.ts b/x-pack/plugins/actions/server/builtin_action_types/email.test.ts index 66de84c2457bb..8e9ea1c5e4aa9 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/email.test.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/email.test.ts @@ -403,7 +403,6 @@ describe('execute()', () => { Object { "actionId": "some-id", "message": "error sending email", - "retry": false, "serviceMessage": "wops", "status": "error", } diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts index 19d00d4aa1cc6..a3d786f4d5cdd 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts @@ -192,6 +192,7 @@ test('executes the task by calling the executor with proper parameters, using st }), taskInfo: { scheduled: new Date(), + attempts: 0, }, }); @@ -454,6 +455,7 @@ test('uses relatedSavedObjects as is when references are empty', async () => { }), taskInfo: { scheduled: new Date(), + attempts: 0, }, }); }); @@ -554,9 +556,15 @@ test(`doesn't use API key when not provided`, async () => { }); test(`throws an error when license doesn't support the action type`, async () => { - const taskRunner = taskRunnerFactory.create({ - taskInstance: mockedTaskInstance, - }); + const taskRunner = taskRunnerFactory.create( + { + taskInstance: { + ...mockedTaskInstance, + attempts: 1, + }, + }, + 2 + ); mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({ id: '3', @@ -584,6 +592,6 @@ test(`throws an error when license doesn't support the action type`, async () => } catch (e) { expect(e instanceof ExecutorError).toEqual(true); expect(e.data).toEqual({}); - expect(e.retry).toEqual(false); + expect(e.retry).toEqual(true); } }); diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 156e60e24a478..7fddee528f3bd 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -136,8 +136,12 @@ export class TaskRunnerFactory { relatedSavedObjects: validatedRelatedSavedObjects(logger, relatedSavedObjects), }); } catch (e) { - // Intentionally swallow the alert to avoid persisting failed tasks logger.error(`Action '${actionId}' failed: ${e.message}`); + if (isRetryableBasedOnAttempts) { + // In order for retry to work, we need to indicate to task manager this task + // failed + throw new ExecutorError(e.message, {}, true); + } } if ( From df80d91577edc6926b2558929f834bcbcd1aa986 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Tue, 31 Aug 2021 16:26:22 -0400 Subject: [PATCH 09/18] Add some unit tests --- .../server/lib/task_runner_factory.test.ts | 132 ++++++++++++++++++ .../actions/server/lib/task_runner_factory.ts | 3 +- 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts index a3d786f4d5cdd..a5640a99337d3 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts @@ -595,3 +595,135 @@ test(`throws an error when license doesn't support the action type`, async () => expect(e.retry).toEqual(true); } }); + +test(`treats errors as errors if the task is retryable`, async () => { + const taskRunner = taskRunnerFactory.create({ + taskInstance: { + ...mockedTaskInstance, + attempts: 0, + }, + }); + + mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({ + id: '3', + type: 'action_task_params', + attributes: { + actionId: '2', + params: { baz: true }, + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [ + { + id: '2', + name: 'actionRef', + type: 'action', + }, + ], + }); + mockedActionExecutor.execute.mockResolvedValueOnce({ + status: 'error', + actionId: '2', + message: 'Error message', + data: { foo: true }, + retry: false, + }); + + let err; + try { + await taskRunner.run(); + } catch (e) { + err = e; + } + expect(err).toBeDefined(); + expect(err instanceof ExecutorError).toEqual(true); + expect(err.data).toEqual({ foo: true }); + expect(err.retry).toEqual(false); + expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith( + `Action '2' failed: Error message` + ); +}); + +test(`treats errors as successes if the task is not retryable`, async () => { + const taskRunner = taskRunnerFactory.create({ + taskInstance: { + ...mockedTaskInstance, + attempts: 1, + }, + }); + + mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({ + id: '3', + type: 'action_task_params', + attributes: { + actionId: '2', + params: { baz: true }, + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [ + { + id: '2', + name: 'actionRef', + type: 'action', + }, + ], + }); + mockedActionExecutor.execute.mockResolvedValueOnce({ + status: 'error', + actionId: '2', + message: 'Error message', + data: { foo: true }, + retry: false, + }); + + let err; + try { + await taskRunner.run(); + } catch (e) { + err = e; + } + expect(err).toBeUndefined(); + expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith( + `Action '2' failed: Error message` + ); +}); + +test('treats errors as errors if the error is thrown instead of returned', async () => { + const taskRunner = taskRunnerFactory.create({ + taskInstance: { + ...mockedTaskInstance, + attempts: 0, + }, + }); + + mockedEncryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({ + id: '3', + type: 'action_task_params', + attributes: { + actionId: '2', + params: { baz: true }, + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [ + { + id: '2', + name: 'actionRef', + type: 'action', + }, + ], + }); + mockedActionExecutor.execute.mockRejectedValueOnce({}); + + let err; + try { + await taskRunner.run(); + } catch (e) { + err = e; + } + expect(err).toBeDefined(); + expect(err instanceof ExecutorError).toEqual(true); + expect(err.data).toEqual({}); + expect(err.retry).toEqual(true); + expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith( + `Action '2' failed: undefined` + ); +}); diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 7fddee528f3bd..a2d1ba91181fc 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -61,7 +61,7 @@ export class TaskRunnerFactory { this.taskRunnerContext = taskRunnerContext; } - public create({ taskInstance }: RunContext, maxAttempts?: number) { + public create({ taskInstance }: RunContext, maxAttempts: number = 1) { if (!this.isInitialized) { throw new Error('TaskRunnerFactory not initialized'); } @@ -150,6 +150,7 @@ export class TaskRunnerFactory { executorResult?.retry !== undefined && isRetryableBasedOnAttempts ) { + logger.error(`Action '${actionId}' failed: ${executorResult.message}`); // Task manager error handler only kicks in when an error thrown (at this time) // So what we have to do is throw when the return status is `error`. throw new ExecutorError( From b3b316ae9d668a959a790447da9025b1c0b36397 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Wed, 1 Sep 2021 12:45:37 -0400 Subject: [PATCH 10/18] Add test --- .../alerting_api_integration/common/config.ts | 1 + .../plugins/alerts/server/action_types.ts | 37 ++++++++ .../spaces_only/tests/actions/enqueue.ts | 89 ++++++++++++++++--- 3 files changed, 114 insertions(+), 13 deletions(-) diff --git a/x-pack/test/alerting_api_integration/common/config.ts b/x-pack/test/alerting_api_integration/common/config.ts index 5a9d2a20fee16..65fc73550c9bd 100644 --- a/x-pack/test/alerting_api_integration/common/config.ts +++ b/x-pack/test/alerting_api_integration/common/config.ts @@ -44,6 +44,7 @@ const enabledActionTypes = [ 'test.noop', 'test.delayed', 'test.rate-limit', + 'test.no-attempts-rate-limit', 'test.throw', ]; diff --git a/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts b/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts index a848207bf1b70..e7e48a0938084 100644 --- a/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts +++ b/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts @@ -37,6 +37,7 @@ export function defineActionTypes( actions.registerType(getDelayedActionType()); actions.registerType(getFailingActionType()); actions.registerType(getRateLimitedActionType()); + actions.registerType(getNoAttemptsRateLimitedActionType()); actions.registerType(getAuthorizationActionType(core)); } @@ -183,6 +184,42 @@ function getRateLimitedActionType() { return result; } +function getNoAttemptsRateLimitedActionType() { + const paramsSchema = schema.object({ + index: schema.string(), + reference: schema.string(), + retryAt: schema.number(), + }); + type ParamsType = TypeOf; + const result: ActionType<{}, {}, ParamsType> = { + id: 'test.no-attempts-rate-limit', + name: 'Test: Rate Limit', + minimumLicenseRequired: 'gold', + maxAttempts: 0, + validate: { + params: paramsSchema, + }, + async executor({ config, params, services }) { + await services.scopedClusterClient.index({ + index: params.index, + refresh: 'wait_for', + body: { + params, + config, + reference: params.reference, + source: 'action:test.rate-limit', + }, + }); + return { + status: 'error', + retry: new Date(params.retryAt), + actionId: '', + }; + }, + }; + return result; +} + function getAuthorizationActionType(core: CoreSetup) { const paramsSchema = schema.object({ callClusterAuthorizationIndex: schema.string(), diff --git a/x-pack/test/alerting_api_integration/spaces_only/tests/actions/enqueue.ts b/x-pack/test/alerting_api_integration/spaces_only/tests/actions/enqueue.ts index f937e63840937..533570ae4c16d 100644 --- a/x-pack/test/alerting_api_integration/spaces_only/tests/actions/enqueue.ts +++ b/x-pack/test/alerting_api_integration/spaces_only/tests/actions/enqueue.ts @@ -97,21 +97,8 @@ export default function ({ getService }: FtrProviderContext) { }, }) .expect(204); - await esTestIndexTool.waitForDocs('action:test.failing', reference, 1); - await supertest - .put( - `${getUrlPrefix( - Spaces.space1.id - )}/api/alerts_fixture/Actions-cleanup_failed_action_executions/reschedule_task` - ) - .set('kbn-xsrf', 'foo') - .send({ - runAt: new Date().toISOString(), - }) - .expect(200); - await retry.try(async () => { const searchResult = await es.search({ index: '.kibana_task_manager', @@ -139,5 +126,81 @@ export default function ({ getService }: FtrProviderContext) { expect((searchResult.body.hits.total as estypes.SearchTotalHits).value).to.eql(0); }); }); + + it('should never leaved a failed task, even if max attempts is reached', async () => { + // We have to provide the test.rate-limit the next runAt, for testing purposes + const retryDate = new Date(Date.now() + 1); + const { body: createdAction } = await supertest + .post(`${getUrlPrefix(Spaces.space1.id)}/api/actions/connector`) + .set('kbn-xsrf', 'foo') + .send({ + name: 'My action', + connector_type_id: 'test.no-attempts-rate-limit', + config: {}, + secrets: {}, + }) + .expect(200); + objectRemover.add(Spaces.space1.id, createdAction.id, 'action', 'actions'); + + const reference = `actions-enqueue-2:${Spaces.space1.id}:${createdAction.id}`; + await supertest + .post( + `${getUrlPrefix(Spaces.space1.id)}/api/alerts_fixture/${createdAction.id}/enqueue_action` + ) + .set('kbn-xsrf', 'foo') + .send({ + params: { + reference, + index: ES_TEST_INDEX_NAME, + retryAt: retryDate.getTime(), + }, + }) + .expect(204); + + await retry.try(async () => { + const runningSearchResult = await es.search({ + index: '.kibana_task_manager', + body: { + query: { + bool: { + must: [ + { + term: { + 'task.taskType': 'actions:test.no-attempts-rate-limit', + }, + }, + { + term: { + 'task.status': 'running', + }, + }, + ], + }, + }, + }, + }); + expect((runningSearchResult.body.hits.total as estypes.SearchTotalHits).value).to.eql(1); + }); + + await retry.try(async () => { + const searchResult = await es.search({ + index: '.kibana_task_manager', + body: { + query: { + bool: { + must: [ + { + term: { + 'task.taskType': 'actions:test.no-attempts-rate-limit', + }, + }, + ], + }, + }, + }, + }); + expect((searchResult.body.hits.total as estypes.SearchTotalHits).value).to.eql(0); + }); + }); }); } From a89fd86080f3b99aaca4ded8df5634069800d84d Mon Sep 17 00:00:00 2001 From: chrisronline Date: Wed, 1 Sep 2021 14:55:14 -0400 Subject: [PATCH 11/18] Add doc note --- .../task-manager-health-monitoring.asciidoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/user/production-considerations/task-manager-health-monitoring.asciidoc b/docs/user/production-considerations/task-manager-health-monitoring.asciidoc index 3321a9d0c02a1..37837d2b58bee 100644 --- a/docs/user/production-considerations/task-manager-health-monitoring.asciidoc +++ b/docs/user/production-considerations/task-manager-health-monitoring.asciidoc @@ -111,6 +111,7 @@ a| Runtime | This section tracks excution performance of Task Manager, tracking task _drift_, worker _load_, and execution stats broken down by type, including duration and execution results. + a| Capacity Estimation | This section provides a rough estimate about the sufficiency of its capacity. As the name suggests, these are estimates based on historical data and should not be used as predictions. Use these estimations when following the Task Manager <>. @@ -123,6 +124,12 @@ The root `status` indicates the `status` of the system overall. The Runtime `status` indicates whether task executions have exceeded any of the <>. An `OK` status means none of the threshold have been exceeded. A `Warning` status means that at least one warning threshold has been exceeded. An `Error` status means that at least one error threshold has been exceeded. +[IMPORTANT] +============================================== +Some tasks (such as <>) will incorrectly report their status as successful even if the task failed. +The runtime and workload block will return data about success and failures and will not take this into consideration. +============================================== + The Capacity Estimation `status` indicates the sufficiency of the observed capacity. An `OK` status means capacity is sufficient. A `Warning` status means that capacity is sufficient for the scheduled recurring tasks, but non-recurring tasks often cause the cluster to exceed capacity. An `Error` status means that there is insufficient capacity across all types of tasks. By monitoring the `status` of the system overall, and the `status` of specific task types of interest, you can evaluate the health of the {kib} Task Management system. From a2b75066d0e5550b2dfe281395c3df46020f96ab Mon Sep 17 00:00:00 2001 From: chrisronline Date: Thu, 2 Sep 2021 09:01:51 -0400 Subject: [PATCH 12/18] More docs --- .../task-manager-health-monitoring.asciidoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/user/production-considerations/task-manager-health-monitoring.asciidoc b/docs/user/production-considerations/task-manager-health-monitoring.asciidoc index 37837d2b58bee..b07a01906b895 100644 --- a/docs/user/production-considerations/task-manager-health-monitoring.asciidoc +++ b/docs/user/production-considerations/task-manager-health-monitoring.asciidoc @@ -128,6 +128,8 @@ The Runtime `status` indicates whether task executions have exceeded any of the ============================================== Some tasks (such as <>) will incorrectly report their status as successful even if the task failed. The runtime and workload block will return data about success and failures and will not take this into consideration. + +To get a better sense of action failures, please refer to the <> for more accurate context into failures and successes. ============================================== The Capacity Estimation `status` indicates the sufficiency of the observed capacity. An `OK` status means capacity is sufficient. A `Warning` status means that capacity is sufficient for the scheduled recurring tasks, but non-recurring tasks often cause the cluster to exceed capacity. An `Error` status means that there is insufficient capacity across all types of tasks. From 975ad8501f1f542d655554b2a653d2cf15215d07 Mon Sep 17 00:00:00 2001 From: chrisronline Date: Fri, 3 Sep 2021 11:11:49 -0400 Subject: [PATCH 13/18] PR feedback --- docs/management/action-types.asciidoc | 10 ++++++++++ .../server/lib/task_runner_factory.test.ts | 6 +++--- .../actions/server/lib/task_runner_factory.ts | 18 +++++++++++++++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/docs/management/action-types.asciidoc b/docs/management/action-types.asciidoc index 3d3d7aeb2d777..cb3b1df257c0d 100644 --- a/docs/management/action-types.asciidoc +++ b/docs/management/action-types.asciidoc @@ -135,5 +135,15 @@ image::images/connectors-with-missing-secrets.png[Connectors with missing secret For out-of-the-box and standardized connectors, you can <> before {kib} starts. +[float] +[[montoring-connectors]] +=== Monitoring + +The <> is useful to understand the performance of all tasks in your environment. +However, if connectors fail to execute for any reason, they will report as successful to Task Manager. This means the failure stats will not +accurately depict the performance of connectors. + +To get a better sense of connector failures, please refer to the <> +for more accurate context into failures and successes. include::connectors/index.asciidoc[] diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts index a5640a99337d3..85d819ba09b8a 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts @@ -639,7 +639,7 @@ test(`treats errors as errors if the task is retryable`, async () => { expect(err.data).toEqual({ foo: true }); expect(err.retry).toEqual(false); expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith( - `Action '2' failed: Error message` + `Action '2' failed and will not retry: Error message` ); }); @@ -683,7 +683,7 @@ test(`treats errors as successes if the task is not retryable`, async () => { } expect(err).toBeUndefined(); expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith( - `Action '2' failed: Error message` + `Action '2' failed and will not retry: Error message` ); }); @@ -724,6 +724,6 @@ test('treats errors as errors if the error is thrown instead of returned', async expect(err.data).toEqual({}); expect(err.retry).toEqual(true); expect(taskRunnerFactoryInitializerParams.logger.error as jest.Mock).toHaveBeenCalledWith( - `Action '2' failed: undefined` + `Action '2' failed and will retry: undefined` ); }); diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index a2d1ba91181fc..9a3856bbf7cee 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -123,6 +123,8 @@ export class TaskRunnerFactory { // TM will treat a task as a failure if `attempts >= maxAttempts` // so we need to handle that here to avoid TM persisting the failed task const isRetryableBasedOnAttempts = taskInfo.attempts < (maxAttempts ?? 1); + const willRetryMessage = `and will retry`; + const willNotRetryMessage = `and will not retry`; let executorResult: ActionTypeExecutorResult | undefined; try { @@ -136,7 +138,11 @@ export class TaskRunnerFactory { relatedSavedObjects: validatedRelatedSavedObjects(logger, relatedSavedObjects), }); } catch (e) { - logger.error(`Action '${actionId}' failed: ${e.message}`); + logger.error( + `Action '${actionId}' failed ${ + isRetryableBasedOnAttempts ? willRetryMessage : willNotRetryMessage + }: ${e.message}` + ); if (isRetryableBasedOnAttempts) { // In order for retry to work, we need to indicate to task manager this task // failed @@ -150,7 +156,11 @@ export class TaskRunnerFactory { executorResult?.retry !== undefined && isRetryableBasedOnAttempts ) { - logger.error(`Action '${actionId}' failed: ${executorResult.message}`); + logger.error( + `Action '${actionId}' failed ${ + !!executorResult.retry ? willRetryMessage : willNotRetryMessage + }: ${executorResult.message}` + ); // Task manager error handler only kicks in when an error thrown (at this time) // So what we have to do is throw when the return status is `error`. throw new ExecutorError( @@ -159,7 +169,9 @@ export class TaskRunnerFactory { executorResult.retry as boolean | Date ); } else if (executorResult && executorResult?.status === 'error') { - logger.error(`Action '${actionId}' failed: ${executorResult.message}`); + logger.error( + `Action '${actionId}' failed ${willNotRetryMessage}: ${executorResult.message}` + ); } // Cleanup action_task_params object now that we're done with it From ac96c2649313e160b4b5e89c8adf0b9022d32893 Mon Sep 17 00:00:00 2001 From: Chris Roberson Date: Tue, 7 Sep 2021 13:50:27 -0400 Subject: [PATCH 14/18] Update docs/management/action-types.asciidoc Co-authored-by: gchaps <33642766+gchaps@users.noreply.github.com> --- docs/management/action-types.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/management/action-types.asciidoc b/docs/management/action-types.asciidoc index cb3b1df257c0d..a3e0e1683e5c6 100644 --- a/docs/management/action-types.asciidoc +++ b/docs/management/action-types.asciidoc @@ -137,7 +137,7 @@ before {kib} starts. [float] [[montoring-connectors]] -=== Monitoring +=== Monitoring connectors The <> is useful to understand the performance of all tasks in your environment. However, if connectors fail to execute for any reason, they will report as successful to Task Manager. This means the failure stats will not From 07ce53c9a0ce80be7c80c639399a17b3e2cd0182 Mon Sep 17 00:00:00 2001 From: Chris Roberson Date: Tue, 7 Sep 2021 13:50:35 -0400 Subject: [PATCH 15/18] Update docs/management/action-types.asciidoc Co-authored-by: gchaps <33642766+gchaps@users.noreply.github.com> --- docs/management/action-types.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/management/action-types.asciidoc b/docs/management/action-types.asciidoc index a3e0e1683e5c6..4e650a0a2970b 100644 --- a/docs/management/action-types.asciidoc +++ b/docs/management/action-types.asciidoc @@ -140,7 +140,7 @@ before {kib} starts. === Monitoring connectors The <> is useful to understand the performance of all tasks in your environment. -However, if connectors fail to execute for any reason, they will report as successful to Task Manager. This means the failure stats will not +However, if connectors fail to execute, they will report as successful to Task Manager. The failure stats will not accurately depict the performance of connectors. To get a better sense of connector failures, please refer to the <> From 5ddf97ccb0a53f399b8b35c140191a491e65eea5 Mon Sep 17 00:00:00 2001 From: Chris Roberson Date: Tue, 7 Sep 2021 13:50:40 -0400 Subject: [PATCH 16/18] Update docs/management/action-types.asciidoc Co-authored-by: gchaps <33642766+gchaps@users.noreply.github.com> --- docs/management/action-types.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/management/action-types.asciidoc b/docs/management/action-types.asciidoc index 4e650a0a2970b..1aba5544e811c 100644 --- a/docs/management/action-types.asciidoc +++ b/docs/management/action-types.asciidoc @@ -143,7 +143,7 @@ The <> is useful to unde However, if connectors fail to execute, they will report as successful to Task Manager. The failure stats will not accurately depict the performance of connectors. -To get a better sense of connector failures, please refer to the <> +For more information on connector successes and failures, refer to the <>. for more accurate context into failures and successes. include::connectors/index.asciidoc[] From 1b9eb7fe275fe130a2254ace00fc4a1422f43919 Mon Sep 17 00:00:00 2001 From: Chris Roberson Date: Tue, 7 Sep 2021 13:50:45 -0400 Subject: [PATCH 17/18] Update docs/management/action-types.asciidoc Co-authored-by: gchaps <33642766+gchaps@users.noreply.github.com> --- docs/management/action-types.asciidoc | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/management/action-types.asciidoc b/docs/management/action-types.asciidoc index 1aba5544e811c..a0e27f0f5c5d7 100644 --- a/docs/management/action-types.asciidoc +++ b/docs/management/action-types.asciidoc @@ -144,6 +144,5 @@ However, if connectors fail to execute, they will report as successful to Task M accurately depict the performance of connectors. For more information on connector successes and failures, refer to the <>. -for more accurate context into failures and successes. include::connectors/index.asciidoc[] From 1ce82513ab6f4044d20102109609cd34638b3343 Mon Sep 17 00:00:00 2001 From: Chris Roberson Date: Tue, 7 Sep 2021 13:50:51 -0400 Subject: [PATCH 18/18] Update docs/management/action-types.asciidoc Co-authored-by: gchaps <33642766+gchaps@users.noreply.github.com> --- docs/management/action-types.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/management/action-types.asciidoc b/docs/management/action-types.asciidoc index a0e27f0f5c5d7..92adbaf97d8c5 100644 --- a/docs/management/action-types.asciidoc +++ b/docs/management/action-types.asciidoc @@ -139,7 +139,7 @@ before {kib} starts. [[montoring-connectors]] === Monitoring connectors -The <> is useful to understand the performance of all tasks in your environment. +The <> helps you understand the performance of all tasks in your environment. However, if connectors fail to execute, they will report as successful to Task Manager. The failure stats will not accurately depict the performance of connectors.