From cd909f03b1d71da93041a0b5c184243aa6506dea Mon Sep 17 00:00:00 2001 From: Kyle Pollich Date: Fri, 10 Nov 2023 11:08:09 -0500 Subject: [PATCH] [Fleet] Fix inability to upgrade agents from 8.10.4 -> 8.11 (#170974) ## Summary Closes https://github.com/elastic/kibana/issues/169825 This PR adds logic to Fleet's `/api/agents/available_versions` endpoint that will ensure we periodically try to fetch from the live product versions API at https://www.elastic.co/api/product_versions to make sure we have eventual consistency in the list of available agent versions. Currently, Kibana relies entirely on a static file generated at build time from the above API. If the API isn't up-to-date with the latest agent version (e.g. kibana completed its build before agent), then that build of Kibana will never "see" the corresponding build of agent. This API endpoint is cached for two hours to prevent overfetching from this external API, and from constantly going out to disk to read from the agent versions file. ## To do - [x] Update unit tests - [x] Consider airgapped environments ## On airgapped environments In airgapped environments, we're going to try and fetch from the `product_versions` API and that request is going to fail. What we've seen happen in some environments is that these requests do not "fail fast" and instead wait until a network timeout is reached. I'd love to avoid that timeout case and somehow detect airgapped environments and avoid calling this API at all. However, we don't have a great deterministic way to know if someone is in an airgapped environment. The best guess I think we can make is by checking whether `xpack.fleet.registryUrl` is set to something other than `https://epr.elastic.co`. Curious if anyone has thoughts on this. ## Screenshots ![image](https://github.com/elastic/kibana/assets/6766512/0906817c-0098-4b67-8791-d06730f450f6) ![image](https://github.com/elastic/kibana/assets/6766512/59e7c132-f568-470f-b48d-53761ddc2fde) ![image](https://github.com/elastic/kibana/assets/6766512/986372df-a90f-48c3-ae24-c3012e8f7730) ## To test 1. Set up Fleet Server + ES + Kibana 2. Spin up a Fleet Server running Agent v8.11.0 3. Enroll an agent running v8.10.4 (I used multipass) 4. Verify the agent can be upgraded from the UI --------- Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> --- .../fleet/server/routes/agent/handlers.ts | 3 +- .../fleet/server/services/agents/crud.test.ts | 6 + .../server/services/agents/versions.test.ts | 110 ++++++++++++++- .../fleet/server/services/agents/versions.ts | 125 +++++++++++++----- 4 files changed, 203 insertions(+), 41 deletions(-) diff --git a/x-pack/plugins/fleet/server/routes/agent/handlers.ts b/x-pack/plugins/fleet/server/routes/agent/handlers.ts index 2f344b0f3fb15..81a01de6b5fde 100644 --- a/x-pack/plugins/fleet/server/routes/agent/handlers.ts +++ b/x-pack/plugins/fleet/server/routes/agent/handlers.ts @@ -354,8 +354,9 @@ function isStringArray(arr: unknown | string[]): arr is string[] { export const getAvailableVersionsHandler: RequestHandler = async (context, request, response) => { try { - const availableVersions = await AgentService.getAvailableVersions({}); + const availableVersions = await AgentService.getAvailableVersions(); const body: GetAvailableVersionsResponse = { items: availableVersions }; + return response.ok({ body }); } catch (error) { return defaultFleetErrorHandler({ error, response }); diff --git a/x-pack/plugins/fleet/server/services/agents/crud.test.ts b/x-pack/plugins/fleet/server/services/agents/crud.test.ts index 47bb8028fffcd..e542881fe13f3 100644 --- a/x-pack/plugins/fleet/server/services/agents/crud.test.ts +++ b/x-pack/plugins/fleet/server/services/agents/crud.test.ts @@ -9,7 +9,9 @@ import type { ElasticsearchClient } from '@kbn/core/server'; import { elasticsearchServiceMock, savedObjectsClientMock } from '@kbn/core/server/mocks'; import { AGENTS_INDEX } from '../../constants'; +import { createAppContextStartContractMock } from '../../mocks'; import type { Agent } from '../../types'; +import { appContextService } from '../app_context'; import { auditLoggingService } from '../audit_logging'; @@ -30,6 +32,7 @@ const mockedAuditLoggingService = auditLoggingService as jest.Mocked { const soClientMock = savedObjectsClientMock.create(); + let mockContract: ReturnType; let esClientMock: ElasticsearchClient; let searchMock: jest.Mock; @@ -41,6 +44,9 @@ describe('Agents CRUD test', () => { openPointInTime: jest.fn().mockResolvedValue({ id: '1' }), closePointInTime: jest.fn(), } as unknown as ElasticsearchClient; + + mockContract = createAppContextStartContractMock(); + appContextService.start(mockContract); }); function getEsResponse(ids: string[], total: number) { diff --git a/x-pack/plugins/fleet/server/services/agents/versions.test.ts b/x-pack/plugins/fleet/server/services/agents/versions.test.ts index 92e30141c006a..513fba910705d 100644 --- a/x-pack/plugins/fleet/server/services/agents/versions.test.ts +++ b/x-pack/plugins/fleet/server/services/agents/versions.test.ts @@ -7,6 +7,8 @@ import { readFile } from 'fs/promises'; +import fetch from 'node-fetch'; + let mockKibanaVersion = '300.0.0'; let mockConfig = {}; jest.mock('../app_context', () => { @@ -21,16 +23,30 @@ jest.mock('../app_context', () => { }); jest.mock('fs/promises'); +jest.mock('node-fetch'); const mockedReadFile = readFile as jest.MockedFunction; +const mockedFetch = fetch as jest.MockedFunction; + +const emptyResponse = { + status: 200, + text: jest.fn().mockResolvedValue(JSON.stringify({})), +} as any; + import { getAvailableVersions } from './versions'; describe('getAvailableVersions', () => { + beforeEach(() => { + mockedReadFile.mockReset(); + mockedFetch.mockReset(); + }); + it('should return available version and filter version < 7.17', async () => { mockKibanaVersion = '300.0.0'; mockedReadFile.mockResolvedValue(`["8.1.0", "8.0.0", "7.17.0", "7.16.0"]`); + mockedFetch.mockResolvedValueOnce(emptyResponse); - const res = await getAvailableVersions({ cached: false, includeCurrentVersion: true }); + const res = await getAvailableVersions({ includeCurrentVersion: true, ignoreCache: true }); expect(res).toEqual(['300.0.0', '8.1.0', '8.0.0', '7.17.0']); }); @@ -38,8 +54,9 @@ describe('getAvailableVersions', () => { it('should not strip -SNAPSHOT from kibana version', async () => { mockKibanaVersion = '300.0.0-SNAPSHOT'; mockedReadFile.mockResolvedValue(`["8.1.0", "8.0.0", "7.17.0", "7.16.0"]`); + mockedFetch.mockResolvedValueOnce(emptyResponse); - const res = await getAvailableVersions({ cached: false, includeCurrentVersion: true }); + const res = await getAvailableVersions({ includeCurrentVersion: true, ignoreCache: true }); expect(res).toEqual(['300.0.0-SNAPSHOT', '8.1.0', '8.0.0', '7.17.0']); }); @@ -51,8 +68,9 @@ describe('getAvailableVersions', () => { }, }; mockedReadFile.mockResolvedValue(`["8.1.0", "8.0.0", "7.17.0", "7.16.0"]`); + mockedFetch.mockResolvedValueOnce(emptyResponse); - const res = await getAvailableVersions({ cached: false }); + const res = await getAvailableVersions({ ignoreCache: true }); expect(res).toEqual(['8.1.0', '8.0.0', '7.17.0']); }); @@ -60,8 +78,9 @@ describe('getAvailableVersions', () => { it('should not include the current version if includeCurrentVersion = false', async () => { mockKibanaVersion = '300.0.0-SNAPSHOT'; mockedReadFile.mockResolvedValue(`["8.1.0", "8.0.0", "7.17.0", "7.16.0"]`); + mockedFetch.mockResolvedValueOnce(emptyResponse); - const res = await getAvailableVersions({ cached: false, includeCurrentVersion: false }); + const res = await getAvailableVersions({ includeCurrentVersion: false, ignoreCache: true }); expect(res).toEqual(['8.1.0', '8.0.0', '7.17.0']); }); @@ -74,9 +93,90 @@ describe('getAvailableVersions', () => { }, }; mockedReadFile.mockRejectedValue({ code: 'ENOENT' }); + mockedFetch.mockResolvedValueOnce(emptyResponse); - const res = await getAvailableVersions({ cached: false }); + const res = await getAvailableVersions({ ignoreCache: true }); expect(res).toEqual(['300.0.0']); }); + + it('should include versions returned from product_versions API', async () => { + mockKibanaVersion = '300.0.0'; + mockedReadFile.mockResolvedValue(`["8.1.0", "8.0.0", "7.17.0", "7.16.0"]`); + mockedFetch.mockResolvedValueOnce({ + status: 200, + text: jest.fn().mockResolvedValue( + JSON.stringify([ + [ + { + title: 'Elastic Agent 8.1.0', + version_number: '8.1.0', + }, + { + title: 'Elastic Agent 8.10.0', + version_number: '8.10.0', + }, + { + title: 'Elastic Agent 8.9.2', + version_number: '8.9.2', + }, + , + ], + ]) + ), + } as any); + + const res = await getAvailableVersions({ ignoreCache: true }); + + // Should sort, uniquify and filter out versions < 7.17 + expect(res).toEqual(['8.10.0', '8.9.2', '8.1.0', '8.0.0', '7.17.0']); + }); + + it('should cache results', async () => { + mockKibanaVersion = '300.0.0'; + mockedReadFile.mockResolvedValue(`["8.1.0", "8.0.0", "7.17.0", "7.16.0"]`); + mockedFetch.mockResolvedValueOnce({ + status: 200, + text: jest.fn().mockResolvedValue( + JSON.stringify([ + [ + { + title: 'Elastic Agent 8.1.0', + version_number: '8.1.0', + }, + { + title: 'Elastic Agent 8.10.0', + version_number: '8.10.0', + }, + { + title: 'Elastic Agent 8.9.2', + version_number: '8.9.2', + }, + , + ], + ]) + ), + } as any); + + await getAvailableVersions(); + + mockedFetch.mockResolvedValueOnce({ + status: 200, + text: jest.fn().mockResolvedValue( + JSON.stringify([ + [ + { + title: 'Elastic Agent 300.0.0', + version_number: '300.0.0', + }, + ], + ]) + ), + } as any); + + const res2 = await getAvailableVersions(); + + expect(mockedFetch).toBeCalledTimes(1); + expect(res2).not.toContain('300.0.0'); + }); }); diff --git a/x-pack/plugins/fleet/server/services/agents/versions.ts b/x-pack/plugins/fleet/server/services/agents/versions.ts index 7a1b82bb72359..8f31d3f12b344 100644 --- a/x-pack/plugins/fleet/server/services/agents/versions.ts +++ b/x-pack/plugins/fleet/server/services/agents/versions.ts @@ -8,18 +8,27 @@ import { readFile } from 'fs/promises'; import Path from 'path'; -import { REPO_ROOT } from '@kbn/repo-info'; +import fetch from 'node-fetch'; +import pRetry from 'p-retry'; import { uniq } from 'lodash'; import semverGte from 'semver/functions/gte'; import semverGt from 'semver/functions/gt'; import semverCoerce from 'semver/functions/coerce'; +import { REPO_ROOT } from '@kbn/repo-info'; + import { appContextService } from '..'; const MINIMUM_SUPPORTED_VERSION = '7.17.0'; const AGENT_VERSION_BUILD_FILE = 'x-pack/plugins/fleet/target/agent_versions_list.json'; -let availableVersions: string[] | undefined; +// Endpoint maintained by the web-team and hosted on the elastic website +const PRODUCT_VERSIONS_URL = 'https://www.elastic.co/api/product_versions'; + +// Cache available versions in memory for 1 hour +const CACHE_DURATION = 1000 * 60 * 60; +let CACHED_AVAILABLE_VERSIONS: string[] | undefined; +let LAST_FETCHED: number | undefined; export const getLatestAvailableVersion = async ( includeCurrentVersion?: boolean @@ -30,54 +39,100 @@ export const getLatestAvailableVersion = async ( }; export const getAvailableVersions = async ({ - cached = true, includeCurrentVersion, + ignoreCache = false, // This is only here to allow us to ignore the cache in tests }: { - cached?: boolean; includeCurrentVersion?: boolean; -}): Promise => { - // Use cached value to avoid reading from disk each time - if (cached && availableVersions) { - return availableVersions; + ignoreCache?: boolean; +} = {}): Promise => { + const logger = appContextService.getLogger(); + + if (LAST_FETCHED && !ignoreCache) { + const msSinceLastFetched = Date.now() - (LAST_FETCHED || 0); + + if (msSinceLastFetched < CACHE_DURATION && CACHED_AVAILABLE_VERSIONS !== undefined) { + logger.debug(`Cache is valid, returning cached available versions`); + + return CACHED_AVAILABLE_VERSIONS; + } + + logger.debug('Cache has expired, fetching available versions from disk + API'); } - // Read a static file generated at build time const config = appContextService.getConfig(); - let versionsToDisplay: string[] = []; - const kibanaVersion = appContextService.getKibanaVersion(); + let availableVersions: string[] = []; + + // First, grab available versions from the static file that's placed on disk at build time try { const file = await readFile(Path.join(REPO_ROOT, AGENT_VERSION_BUILD_FILE), 'utf-8'); - - // Exclude versions older than MINIMUM_SUPPORTED_VERSION and pre-release versions (SNAPSHOT, rc..) - // De-dup and sort in descending order const data: string[] = JSON.parse(file); - const versions = data - .map((item: any) => semverCoerce(item)?.version || '') - .filter((v: any) => semverGte(v, MINIMUM_SUPPORTED_VERSION)) - .sort((a: any, b: any) => (semverGt(a, b) ? -1 : 1)); - versionsToDisplay = uniq(versions) as string[]; + availableVersions = [...availableVersions, ...data]; + } catch (error) { + // If we can't read from the file, the error is non-blocking. We'll try to source data from the + // product versions API later. + logger.debug(`Error reading file ${AGENT_VERSION_BUILD_FILE}: ${error.message}`); + } + + // Next, fetch from the product versions API. This API call is aggressively cached, so we won't + // fetch from the live API more than `TIME_BETWEEN_FETCHES` milliseconds. + const apiVersions = await fetchAgentVersionsFromApi(); + + // Coerce each version to a semver object and compare to our `MINIMUM_SUPPORTED_VERSION` - we + // only want support versions in the final result. We'll also sort by newest version first. + availableVersions = uniq([...availableVersions, ...apiVersions]) + .map((item: any) => semverCoerce(item)?.version || '') + .filter((v: any) => semverGte(v, MINIMUM_SUPPORTED_VERSION)) + .sort((a: any, b: any) => (semverGt(a, b) ? -1 : 1)); + + // If the current stack version isn't included in the list of available versions, add it + // at the front of the array + const hasCurrentVersion = availableVersions.some((v) => v === kibanaVersion); + if (includeCurrentVersion && !hasCurrentVersion) { + availableVersions = [kibanaVersion, ...availableVersions]; + } - const appendCurrentVersion = includeCurrentVersion; + // Allow upgrading to the current stack version if this override flag is provided via `kibana.yml`. + // This is useful for development purposes. + if (availableVersions.length === 0 && !config?.internal?.onlyAllowAgentUpgradeToKnownVersions) { + availableVersions = [kibanaVersion]; + } - if (appendCurrentVersion) { - // Add current version if not already present - const hasCurrentVersion = versionsToDisplay.some((v) => v === kibanaVersion); + // Don't prime the cache in tests + if (!ignoreCache) { + CACHED_AVAILABLE_VERSIONS = availableVersions; + LAST_FETCHED = Date.now(); + } - versionsToDisplay = !hasCurrentVersion - ? [kibanaVersion].concat(versionsToDisplay) - : versionsToDisplay; - } + return availableVersions; +}; - availableVersions = versionsToDisplay; +async function fetchAgentVersionsFromApi() { + const logger = appContextService.getLogger(); - return availableVersions; - } catch (e) { - if (e.code === 'ENOENT') { - return config?.internal?.onlyAllowAgentUpgradeToKnownVersions ? [] : [kibanaVersion]; - } - throw e; + const options = { + headers: { + 'Content-Type': 'application/json', + }, + }; + + const response = await pRetry(() => fetch(PRODUCT_VERSIONS_URL, options), { retries: 1 }); + const rawBody = await response.text(); + + // We need to handle non-200 responses gracefully here to support airgapped environments where + // Kibana doesn't have internet access to query this API + if (response.status >= 400) { + logger.debug(`Status code ${response.status} received from versions API: ${rawBody}`); + return []; } -}; + + const jsonBody = JSON.parse(rawBody); + + const versions: string[] = (jsonBody.length ? jsonBody[0] : []) + .filter((item: any) => item?.title?.includes('Elastic Agent')) + .map((item: any) => item?.version_number); + + return versions; +}