From 80273e2bc134b7c8440a84c461716a1d9648eb27 Mon Sep 17 00:00:00 2001 From: Chuan-kai Lin Date: Fri, 19 Sep 2025 09:40:09 -0700 Subject: [PATCH] Overlay: use restoreCache() timeout This commit changes overlay-base database download to pass the segmentTimeoutInMs option to restoreCache(), so that restoreCache() itself can properly abort slow downloads. The waitForResultWithTimeLimit() wrapper around restoreCache() remains as a second line of defense, but with a higher 10-minute time limit, to guard against cache restore hangs outside segment downloads. --- lib/analyze-action.js | 2 +- lib/init-action.js | 34 ++++++++++++++++++++++++++++-- src/overlay-database-utils.ts | 39 +++++++++++++++++++++++++++++++++-- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/lib/analyze-action.js b/lib/analyze-action.js index b15581ae2e..59c7f450d7 100644 --- a/lib/analyze-action.js +++ b/lib/analyze-action.js @@ -90872,7 +90872,7 @@ function computeChangedFiles(baseFileOids, overlayFileOids) { } var CACHE_VERSION = 1; var CACHE_PREFIX = "codeql-overlay-base-database"; -var MAX_CACHE_OPERATION_MS = 12e4; +var MAX_CACHE_OPERATION_MS = 6e5; function checkOverlayBaseDatabase(config, logger, warningPrefix) { const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config); if (!fs6.existsSync(baseDatabaseOidsFilePath)) { diff --git a/lib/init-action.js b/lib/init-action.js index fe8a41dca6..cc482e07ad 100644 --- a/lib/init-action.js +++ b/lib/init-action.js @@ -86478,7 +86478,7 @@ function computeChangedFiles(baseFileOids, overlayFileOids) { } var CACHE_VERSION = 1; var CACHE_PREFIX = "codeql-overlay-base-database"; -var MAX_CACHE_OPERATION_MS = 12e4; +var MAX_CACHE_OPERATION_MS = 6e5; function checkOverlayBaseDatabase(config, logger, warningPrefix) { const baseDatabaseOidsFilePath = getBaseDatabaseOidsFilePath(config); if (!fs6.existsSync(baseDatabaseOidsFilePath)) { @@ -86522,8 +86522,38 @@ async function downloadOverlayBaseDatabaseFromCache(codeql, config, logger) { try { const databaseDownloadStart = performance.now(); const foundKey = await waitForResultWithTimeLimit( + // This ten-minute limit for the cache restore operation is mainly to + // guard against the possibility that the cache service is unresponsive + // and hangs outside the data download. + // + // Data download (which is normally the most time-consuming part of the + // restore operation) should not run long enough to hit this limit. Even + // for an extremely large 10GB database, at a download speed of 40MB/s + // (see below), the download should complete within five minutes. If we + // do hit this limit, there are likely more serious problems other than + // mere slow download speed. + // + // This is important because we don't want any ongoing file operations + // on the database directory when we do hit this limit. Hitting this + // time limit takes us to a fallback path where we re-initialize the + // database from scratch at dbLocation, and having the cache restore + // operation continue to write into dbLocation in the background would + // really mess things up. We want to hit this limit only in the case + // of a hung cache service, not just slow download speed. MAX_CACHE_OPERATION_MS, - actionsCache.restoreCache([dbLocation], cacheRestoreKeyPrefix), + actionsCache.restoreCache( + [dbLocation], + cacheRestoreKeyPrefix, + void 0, + { + // Azure SDK download (which is the default) uses 128MB segments; see + // https://github.com/actions/toolkit/blob/main/packages/cache/README.md. + // Setting segmentTimeoutInMs to 3000 translates to segment download + // speed of about 40 MB/s, which should be achievable unless the + // download is unreliable (in which case we do want to abort). + segmentTimeoutInMs: 3e3 + } + ), () => { logger.info("Timed out downloading overlay-base database from cache"); } diff --git a/src/overlay-database-utils.ts b/src/overlay-database-utils.ts index de40ac660e..f8cd553c7a 100644 --- a/src/overlay-database-utils.ts +++ b/src/overlay-database-utils.ts @@ -158,7 +158,12 @@ function computeChangedFiles( // Constants for database caching const CACHE_VERSION = 1; const CACHE_PREFIX = "codeql-overlay-base-database"; -const MAX_CACHE_OPERATION_MS = 120_000; // Two minutes + +// The purpose of this ten-minute limit is to guard against the possibility +// that the cache service is unresponsive, which would otherwise cause the +// entire action to hang. Normally we expect cache operations to complete +// within two minutes. +const MAX_CACHE_OPERATION_MS = 600_000; /** * Checks that the overlay-base database is valid by checking for the @@ -351,8 +356,38 @@ export async function downloadOverlayBaseDatabaseFromCache( try { const databaseDownloadStart = performance.now(); const foundKey = await waitForResultWithTimeLimit( + // This ten-minute limit for the cache restore operation is mainly to + // guard against the possibility that the cache service is unresponsive + // and hangs outside the data download. + // + // Data download (which is normally the most time-consuming part of the + // restore operation) should not run long enough to hit this limit. Even + // for an extremely large 10GB database, at a download speed of 40MB/s + // (see below), the download should complete within five minutes. If we + // do hit this limit, there are likely more serious problems other than + // mere slow download speed. + // + // This is important because we don't want any ongoing file operations + // on the database directory when we do hit this limit. Hitting this + // time limit takes us to a fallback path where we re-initialize the + // database from scratch at dbLocation, and having the cache restore + // operation continue to write into dbLocation in the background would + // really mess things up. We want to hit this limit only in the case + // of a hung cache service, not just slow download speed. MAX_CACHE_OPERATION_MS, - actionsCache.restoreCache([dbLocation], cacheRestoreKeyPrefix), + actionsCache.restoreCache( + [dbLocation], + cacheRestoreKeyPrefix, + undefined, + { + // Azure SDK download (which is the default) uses 128MB segments; see + // https://github.com/actions/toolkit/blob/main/packages/cache/README.md. + // Setting segmentTimeoutInMs to 3000 translates to segment download + // speed of about 40 MB/s, which should be achievable unless the + // download is unreliable (in which case we do want to abort). + segmentTimeoutInMs: 3000, + }, + ), () => { logger.info("Timed out downloading overlay-base database from cache"); },