Skip to content

Commit

Permalink
[Security Solution] Set socket timeout for potentially long running R…
Browse files Browse the repository at this point in the history
…ule Management API endpoints (elastic#177329)

**Fixes: elastic#177277

## Summary

This PR set a reasonably high (1 hour) socket timeout for potentially long running Rule Management API endpoints.

It's important to note this fix only mitigates closing TCP connection risks. Proxies have own TCP connection timeout though it's higher than default node.js 2 minutes.

## Details

When performing operations on a large number of rules and/or in a resource limited or suffering from performance degradation environment endpoints may take more time than default node.js socket timeout which is 2 minutes. According to the [HTTP spec](https://www.w3.org/Protocols/rfc2616/rfc2616-sec8.html#sec8.2.4) browser should retry if the connection was closed by the server. Taking into account API endpoint's handler isn't terminated after closing a TCP connection a retry attempt will spawn a new request processing in parallel. Under some circumstance it can lead to creating multiple rules with the same `rule_id` and for example end up creating more rules than expected like described here elastic#176207.

(cherry picked from commit 05d3dfa)
  • Loading branch information
maximpn committed Mar 6, 2024
1 parent 1dc941b commit 5de5947
Show file tree
Hide file tree
Showing 14 changed files with 67 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import type { RulesClient } from '@kbn/alerting-plugin/server';
import type { ExceptionListClient } from '@kbn/lists-plugin/server';
import { transformError } from '@kbn/securitysolution-es-utils';
import moment from 'moment';
import {
InstallPrebuiltRulesAndTimelinesResponse,
PREBUILT_RULES_URL,
Expand All @@ -19,6 +18,7 @@ import type {
} from '../../../../../types';
import { buildSiemResponse } from '../../../routes/utils';
import { getExistingPrepackagedRules } from '../../../rule_management/logic/search/get_existing_prepackaged_rules';
import { PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS } from '../../constants';
import { ensureLatestRulesPackageInstalled } from '../../logic/ensure_latest_rules_package_installed';
import { getRulesToInstall } from '../../logic/get_rules_to_install';
import { getRulesToUpdate } from '../../logic/get_rules_to_update';
Expand All @@ -36,11 +36,7 @@ export const installPrebuiltRulesAndTimelinesRoute = (router: SecuritySolutionPl
options: {
tags: ['access:securitySolution'],
timeout: {
// FUNFACT: If we do not add a very long timeout what will happen
// is that Chrome which receive a 408 error and then do a retry.
// This retry can cause lots of connections to happen. Using a very
// long timeout will ensure that Chrome does not do retries and saturate the connections.
idleSocket: moment.duration('1', 'hour').asMilliseconds(),
idleSocket: PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS,
},
},
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { createPrebuiltRuleObjectsClient } from '../../logic/rule_objects/prebui
import { fetchRuleVersionsTriad } from '../../logic/rule_versions/fetch_rule_versions_triad';
import { getVersionBuckets } from '../../model/rule_versions/get_version_buckets';
import { performTimelinesInstallation } from '../../logic/perform_timelines_installation';
import { PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS } from '../../constants';

export const performRuleInstallationRoute = (router: SecuritySolutionPluginRouter) => {
router.versioned
Expand All @@ -36,6 +37,9 @@ export const performRuleInstallationRoute = (router: SecuritySolutionPluginRoute
path: PERFORM_RULE_INSTALLATION_URL,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import { upgradePrebuiltRules } from '../../logic/rule_objects/upgrade_prebuilt_
import { fetchRuleVersionsTriad } from '../../logic/rule_versions/fetch_rule_versions_triad';
import type { PrebuiltRuleAsset } from '../../model/rule_assets/prebuilt_rule_asset';
import { getVersionBuckets } from '../../model/rule_versions/get_version_buckets';
import { PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS } from '../../constants';

export const performRuleUpgradeRoute = (router: SecuritySolutionPluginRouter) => {
router.versioned
Expand All @@ -38,6 +39,9 @@ export const performRuleUpgradeRoute = (router: SecuritySolutionPluginRouter) =>
path: PERFORM_RULE_UPGRADE_URL,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { fetchRuleVersionsTriad } from '../../logic/rule_versions/fetch_rule_ver
import type { PrebuiltRuleAsset } from '../../model/rule_assets/prebuilt_rule_asset';
import { getVersionBuckets } from '../../model/rule_versions/get_version_buckets';
import { convertPrebuiltRuleAssetToRuleResponse } from '../../../rule_management/normalization/rule_converters';
import { PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS } from '../../constants';

export const reviewRuleInstallationRoute = (router: SecuritySolutionPluginRouter) => {
router.versioned
Expand All @@ -27,6 +28,9 @@ export const reviewRuleInstallationRoute = (router: SecuritySolutionPluginRouter
path: REVIEW_RULE_INSTALLATION_URL,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { createPrebuiltRuleObjectsClient } from '../../logic/rule_objects/prebui
import { fetchRuleVersionsTriad } from '../../logic/rule_versions/fetch_rule_versions_triad';
import { getVersionBuckets } from '../../model/rule_versions/get_version_buckets';
import { convertPrebuiltRuleAssetToRuleResponse } from '../../../rule_management/normalization/rule_converters';
import { PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS } from '../../constants';

export const reviewRuleUpgradeRoute = (router: SecuritySolutionPluginRouter) => {
router.versioned
Expand All @@ -33,6 +34,9 @@ export const reviewRuleUpgradeRoute = (router: SecuritySolutionPluginRouter) =>
path: REVIEW_RULE_UPGRADE_URL,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const PREBUILT_RULES_OPERATION_SOCKET_TIMEOUT_MS = 1800000 as const; // 30 minutes
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
*/

import { truncate } from 'lodash';
import moment from 'moment';
import { BadRequestError, transformError } from '@kbn/securitysolution-es-utils';
import type { IKibanaResponse, KibanaResponseFactory, Logger } from '@kbn/core/server';

Expand Down Expand Up @@ -58,6 +57,7 @@ import {
validateBulkDuplicateRule,
dryRunValidateBulkEditRule,
} from '../../../logic/bulk_actions/validations';
import { RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS } from '../../timeouts';

const MAX_RULES_TO_PROCESS_TOTAL = 10000;
const MAX_ERROR_MESSAGE_LENGTH = 1000;
Expand Down Expand Up @@ -242,7 +242,7 @@ export const performBulkActionRoute = (
options: {
tags: ['access:securitySolution', routeLimitedConcurrencyTag(MAX_ROUTE_CONCURRENCY)],
timeout: {
idleSocket: moment.duration(15, 'minutes').asMilliseconds(),
idleSocket: RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS,
},
},
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import { transformValidateBulkError } from '../../../utils/validate';
import { buildRouteValidationWithZod } from '../../../../../../utils/build_validation/route_validation';
import { validateRuleDefaultExceptionList } from '../../../logic/exceptions/validate_rule_default_exception_list';
import { validateRulesWithDuplicatedDefaultExceptionsList } from '../../../logic/exceptions/validate_rules_with_duplicated_default_exceptions_list';

import { RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS } from '../../timeouts';
import {
transformBulkError,
createBulkErrorObject,
Expand All @@ -48,6 +48,9 @@ export const bulkCreateRulesRoute = (
path: DETECTION_ENGINE_RULES_BULK_CREATE,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import { readRules } from '../../../logic/crud/read_rules';
import { getIdBulkError } from '../../../utils/utils';
import { transformValidateBulkError } from '../../../utils/validate';
import { getDeprecatedBulkEndpointHeader, logDeprecatedBulkEndpoint } from '../../deprecation';
import { RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS } from '../../timeouts';

type Handler = RequestHandler<
unknown,
Expand Down Expand Up @@ -106,6 +107,9 @@ export const bulkDeleteRulesRoute = (router: SecuritySolutionPluginRouter, logge
path: DETECTION_ENGINE_RULES_BULK_DELETE,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS,
},
},
};
const versionConfig = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import { readRules } from '../../../logic/crud/read_rules';
import { getDeprecatedBulkEndpointHeader, logDeprecatedBulkEndpoint } from '../../deprecation';
import { validateRuleDefaultExceptionList } from '../../../logic/exceptions/validate_rule_default_exception_list';
import { validateRulesWithDuplicatedDefaultExceptionsList } from '../../../logic/exceptions/validate_rules_with_duplicated_default_exceptions_list';
import { RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS } from '../../timeouts';

/**
* @deprecated since version 8.2.0. Use the detection_engine/rules/_bulk_action API instead
Expand All @@ -42,6 +43,9 @@ export const bulkPatchRulesRoute = (
path: DETECTION_ENGINE_RULES_BULK_UPDATE,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { readRules } from '../../../logic/crud/read_rules';
import { getDeprecatedBulkEndpointHeader, logDeprecatedBulkEndpoint } from '../../deprecation';
import { validateRuleDefaultExceptionList } from '../../../logic/exceptions/validate_rule_default_exception_list';
import { validateRulesWithDuplicatedDefaultExceptionsList } from '../../../logic/exceptions/validate_rules_with_duplicated_default_exceptions_list';
import { RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS } from '../../timeouts';

/**
* @deprecated since version 8.2.0. Use the detection_engine/rules/_bulk_action API instead
Expand All @@ -47,6 +48,9 @@ export const bulkUpdateRulesRoute = (
path: DETECTION_ENGINE_RULES_BULK_UPDATE,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { getNonPackagedRulesCount } from '../../../logic/search/get_existing_pre
import { getExportByObjectIds } from '../../../logic/export/get_export_by_object_ids';
import { getExportAll } from '../../../logic/export/get_export_all';
import { buildSiemResponse } from '../../../../routes/utils';
import { RULE_MANAGEMENT_IMPORT_EXPORT_SOCKET_TIMEOUT_MS } from '../../timeouts';

export const exportRulesRoute = (
router: SecuritySolutionPluginRouter,
Expand All @@ -33,6 +34,9 @@ export const exportRulesRoute = (
path: `${DETECTION_ENGINE_RULES_URL}/_export`,
options: {
tags: ['access:securitySolution'],
timeout: {
idleSocket: RULE_MANAGEMENT_IMPORT_EXPORT_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import {
getTupleDuplicateErrorsAndUniqueRules,
migrateLegacyActionsIds,
} from '../../../utils/utils';
import { RULE_MANAGEMENT_IMPORT_EXPORT_SOCKET_TIMEOUT_MS } from '../../timeouts';

const CHUNK_PARSED_OBJECT_SIZE = 50;

Expand All @@ -51,6 +52,9 @@ export const importRulesRoute = (
maxBytes: config.maxRuleImportPayloadBytes,
output: 'stream',
},
timeout: {
idleSocket: RULE_MANAGEMENT_IMPORT_EXPORT_SOCKET_TIMEOUT_MS,
},
},
})
.addVersion(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

/**
* 1 hour = 3600000 ms = 60 minutes * 60 seconds * 1000 ms
*/
export const RULE_MANAGEMENT_BULK_ACTION_SOCKET_TIMEOUT_MS = 3600000 as const;
/**
* 1 hour = 3600000 ms = 60 minutes * 60 seconds * 1000 ms
*/
export const RULE_MANAGEMENT_IMPORT_EXPORT_SOCKET_TIMEOUT_MS = 3600000 as const;

0 comments on commit 5de5947

Please sign in to comment.