From 9d90a2dcd9a9e9ed6b0cd21d66479866629fd72a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 21 Jun 2023 13:40:54 -0700 Subject: [PATCH] Skip downgrade if requested version below daemon version (#2850) * skip downgrade for agent update * add test * report it in status * address comments * revert change * improved error msg * address comment --- azurelinuxagent/common/exception.py | 9 ++++ azurelinuxagent/ga/agent_update_handler.py | 48 +++++++++++++++------- tests/data/wire/ga_manifest.xml | 3 ++ tests/ga/test_agent_update_handler.py | 26 +++++++++++- tests/ga/test_update.py | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 19 +++++++++ 6 files changed, 90 insertions(+), 17 deletions(-) diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index 048466232..603ed1aa2 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -75,6 +75,15 @@ def __init__(self, msg=None, inner=None): super(AgentNetworkError, self).__init__(msg, inner) +class AgentUpdateError(AgentError): + """ + When agent failed to update. + """ + + def __init__(self, msg=None, inner=None): + super(AgentUpdateError, self).__init__(msg, inner) + + class CGroupsException(AgentError): """ Exception to classify any cgroups related issue. diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 3acb5b14c..920007505 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -5,14 +5,14 @@ from azurelinuxagent.common import conf, logger from azurelinuxagent.common.event import add_event, WALAEventOperation -from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.logger import LogLevel from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState @@ -37,8 +37,6 @@ def __init__(self): self.last_attempted_requested_version_update_time = datetime.datetime.min self.last_attempted_hotfix_update_time = datetime.datetime.min self.last_attempted_normal_update_time = datetime.datetime.min - self.last_warning = "" - self.last_warning_time = datetime.datetime.min class AgentUpdateHandler(object): @@ -130,10 +128,10 @@ def __get_agent_family_manifests(self, goal_state): agent_family_manifests.append(m) if not family_found: - raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) + raise AgentUpdateError(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) if len(agent_family_manifests) == 0: - raise Exception( + raise AgentUpdateError( u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( self._ga_family, self._gs_id)) return agent_family_manifests[0] @@ -179,7 +177,7 @@ def __get_agent_package_to_download(self, agent_manifest, version): # Found a matching package, only download that one return pkg - raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " + raise AgentUpdateError("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " "skipping agent update".format(str(version), self._gs_id)) @staticmethod @@ -245,6 +243,15 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] + @staticmethod + def __get_daemon_version_for_update(): + daemon_version = get_daemon_version() + if daemon_version != FlexibleVersion(VERSION_0): + return daemon_version + # We return 0.0.0.0 if daemon version is not specified. In that case, + # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. + return FlexibleVersion("2.2.53") + @staticmethod def __log_event(level, msg, success=True): if level == LogLevel.INFO: @@ -291,11 +298,20 @@ def run(self, goal_state): if warn_msg != "": self.__log_event(LogLevel.WARNING, warn_msg) - msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( - self._gs_id, str(requested_version)) - self.__log_event(LogLevel.INFO, msg) - try: + daemon_version = self.__get_daemon_version_for_update() + if requested_version < daemon_version: + # Don't process the update if the requested version is less than daemon version, + # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with + # installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version. + # + raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than " + "the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version)) + + msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( + self._gs_id, str(requested_version)) + self.__log_event(LogLevel.INFO, msg) + agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) if agent.is_blacklisted or not agent.is_downloaded: @@ -314,9 +330,13 @@ def run(self, goal_state): except Exception as err: if isinstance(err, AgentUpgradeExitException): raise err + elif isinstance(err, AgentUpdateError): + error_msg = ustr(err) + else: + error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) + self.__log_event(LogLevel.WARNING, error_msg, success=False) if "Missing requested version" not in GAUpdateReportState.report_error_msg: - GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) - self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False) + GAUpdateReportState.report_error_msg = error_msg def get_vmagent_update_status(self): """ diff --git a/tests/data/wire/ga_manifest.xml b/tests/data/wire/ga_manifest.xml index e12f05491..799e1f111 100644 --- a/tests/data/wire/ga_manifest.xml +++ b/tests/data/wire/ga_manifest.xml @@ -25,6 +25,9 @@ 2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0 + + 2.5.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.5.0 + 9.9.9.10 diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 5b7800b8c..9e01d0b6c 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -80,7 +80,7 @@ def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ + 'No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent package not found. Got: {0}".format( @@ -217,7 +217,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - downgraded_version = "1.2.0" + downgraded_version = "2.5.0" with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) @@ -230,6 +230,28 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c versions=[downgraded_version, str(CURRENT_VERSION)]) self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + def test_it_should_not_downgrade_below_daemon_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + downgraded_version = "1.2.0" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)), + "New agent directory should not be found") + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "The Agent received a request to downgrade to version" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "We should allow downgrade above daemon version") + def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): data_file = DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index b73ad3db8..5b4babfd1 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1818,7 +1818,7 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION))) self.assertFalse(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted, "The current agent should not be blacklisted") - downgraded_version = "1.2.0" + downgraded_version = "2.5.0" data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 05112fc19..0493efcad 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -27,6 +27,7 @@ from typing import List, Dict, Any import requests +from assertpy import assert_that from azure.identity import DefaultAzureCredential from azure.mgmt.compute.models import VirtualMachine from msrestazure.azure_cloud import Cloud @@ -96,8 +97,19 @@ def run(self) -> None: version: str = "1.3.1.0" log.info("Attempting update version same as current version %s", upgrade_version) self._request_rsm_update(version) + self._check_rsm_gs(version) self._verify_guest_agent_update(version) + # verify requested version below daemon version + log.info("*******Verifying requested version below daemon version scenario*******") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm before update \n%s", stdout) + version: str = "0.5.0" + log.info("Attempting requested version %s", version) + self._request_rsm_update(version) + self._check_rsm_gs(version) + self._verify_no_guest_agent_update(stdout) + def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we mock the rsm update request output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True) @@ -184,6 +196,13 @@ def _check_agent_version(requested_version: str) -> bool: stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") + def _verify_no_guest_agent_update(self, previous_agent: str) -> None: + """ + verify current agent version is same as previous after update attempt + """ + current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + assert_that(current_agent).is_equal_to(previous_agent).described_as(f"Agent version changed.\n Previous Agent {previous_agent} \n Current agent {current_agent}") + def _verify_agent_reported_supported_feature_flag(self): """ RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log