From 9d90a2dcd9a9e9ed6b0cd21d66479866629fd72a Mon Sep 17 00:00:00 2001
From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com>
Date: Wed, 21 Jun 2023 13:40:54 -0700
Subject: [PATCH] Skip downgrade if requested version below daemon version
(#2850)
* skip downgrade for agent update
* add test
* report it in status
* address comments
* revert change
* improved error msg
* address comment
---
azurelinuxagent/common/exception.py | 9 ++++
azurelinuxagent/ga/agent_update_handler.py | 48 +++++++++++++++-------
tests/data/wire/ga_manifest.xml | 3 ++
tests/ga/test_agent_update_handler.py | 26 +++++++++++-
tests/ga/test_update.py | 2 +-
tests_e2e/tests/agent_update/rsm_update.py | 19 +++++++++
6 files changed, 90 insertions(+), 17 deletions(-)
diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py
index 048466232..603ed1aa2 100644
--- a/azurelinuxagent/common/exception.py
+++ b/azurelinuxagent/common/exception.py
@@ -75,6 +75,15 @@ def __init__(self, msg=None, inner=None):
super(AgentNetworkError, self).__init__(msg, inner)
+class AgentUpdateError(AgentError):
+ """
+ When agent failed to update.
+ """
+
+ def __init__(self, msg=None, inner=None):
+ super(AgentUpdateError, self).__init__(msg, inner)
+
+
class CGroupsException(AgentError):
"""
Exception to classify any cgroups related issue.
diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py
index 3acb5b14c..920007505 100644
--- a/azurelinuxagent/ga/agent_update_handler.py
+++ b/azurelinuxagent/ga/agent_update_handler.py
@@ -5,14 +5,14 @@
from azurelinuxagent.common import conf, logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
-from azurelinuxagent.common.exception import AgentUpgradeExitException
+from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.logger import LogLevel
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource
-from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus
+from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus
from azurelinuxagent.common.utils import fileutil, textutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
-from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN
+from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN
from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState
@@ -37,8 +37,6 @@ def __init__(self):
self.last_attempted_requested_version_update_time = datetime.datetime.min
self.last_attempted_hotfix_update_time = datetime.datetime.min
self.last_attempted_normal_update_time = datetime.datetime.min
- self.last_warning = ""
- self.last_warning_time = datetime.datetime.min
class AgentUpdateHandler(object):
@@ -130,10 +128,10 @@ def __get_agent_family_manifests(self, goal_state):
agent_family_manifests.append(m)
if not family_found:
- raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family))
+ raise AgentUpdateError(u"Agent family: {0} not found in the goal state, skipping agent update".format(family))
if len(agent_family_manifests) == 0:
- raise Exception(
+ raise AgentUpdateError(
u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format(
self._ga_family, self._gs_id))
return agent_family_manifests[0]
@@ -179,7 +177,7 @@ def __get_agent_package_to_download(self, agent_manifest, version):
# Found a matching package, only download that one
return pkg
- raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, "
+ raise AgentUpdateError("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, "
"skipping agent update".format(str(version), self._gs_id))
@staticmethod
@@ -245,6 +243,15 @@ def __get_all_agents_on_disk():
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)]
+ @staticmethod
+ def __get_daemon_version_for_update():
+ daemon_version = get_daemon_version()
+ if daemon_version != FlexibleVersion(VERSION_0):
+ return daemon_version
+ # We return 0.0.0.0 if daemon version is not specified. In that case,
+ # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53.
+ return FlexibleVersion("2.2.53")
+
@staticmethod
def __log_event(level, msg, success=True):
if level == LogLevel.INFO:
@@ -291,11 +298,20 @@ def run(self, goal_state):
if warn_msg != "":
self.__log_event(LogLevel.WARNING, warn_msg)
- msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format(
- self._gs_id, str(requested_version))
- self.__log_event(LogLevel.INFO, msg)
-
try:
+ daemon_version = self.__get_daemon_version_for_update()
+ if requested_version < daemon_version:
+ # Don't process the update if the requested version is less than daemon version,
+ # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with
+ # installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version.
+ #
+ raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than "
+ "the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version))
+
+ msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format(
+ self._gs_id, str(requested_version))
+ self.__log_event(LogLevel.INFO, msg)
+
agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version)
if agent.is_blacklisted or not agent.is_downloaded:
@@ -314,9 +330,13 @@ def run(self, goal_state):
except Exception as err:
if isinstance(err, AgentUpgradeExitException):
raise err
+ elif isinstance(err, AgentUpdateError):
+ error_msg = ustr(err)
+ else:
+ error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
+ self.__log_event(LogLevel.WARNING, error_msg, success=False)
if "Missing requested version" not in GAUpdateReportState.report_error_msg:
- GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
- self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False)
+ GAUpdateReportState.report_error_msg = error_msg
def get_vmagent_update_status(self):
"""
diff --git a/tests/data/wire/ga_manifest.xml b/tests/data/wire/ga_manifest.xml
index e12f05491..799e1f111 100644
--- a/tests/data/wire/ga_manifest.xml
+++ b/tests/data/wire/ga_manifest.xml
@@ -25,6 +25,9 @@
2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0
+
+ 2.5.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.5.0
+
9.9.9.10
diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py
index 5b7800b8c..9e01d0b6c 100644
--- a/tests/ga/test_agent_update_handler.py
+++ b/tests/ga/test_agent_update_handler.py
@@ -80,7 +80,7 @@ def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1,
def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"):
upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
- 'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[
+ 'No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]
self.assertEqual(1, len(upgrade_event_msgs),
"Did not find the event indicating that the agent package not found. Got: {0}".format(
@@ -217,7 +217,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")
- downgraded_version = "1.2.0"
+ downgraded_version = "2.5.0"
with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version)
@@ -230,6 +230,28 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c
versions=[downgraded_version, str(CURRENT_VERSION)])
self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason))
+ def test_it_should_not_downgrade_below_daemon_version(self):
+ data_file = DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
+
+ # Set the test environment by adding 20 random agents to the agent directory
+ self.prepare_agents()
+ self.assertEqual(20, self.agent_count(), "Agent directories not set properly")
+
+ downgraded_version = "1.2.0"
+
+ with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
+ agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version)
+ agent_update_handler._protocol.mock_wire_data.set_incarnation(2)
+ agent_update_handler._protocol.client.update_goal_state()
+ agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
+ self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)),
+ "New agent directory should not be found")
+ self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
+ "The Agent received a request to downgrade to version" in kwarg[
+ 'message'] and kwarg[
+ 'op'] == WALAEventOperation.AgentUpgrade]), "We should allow downgrade above daemon version")
+
def test_handles_if_requested_version_not_found_in_pkgs_to_download(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py
index b73ad3db8..5b4babfd1 100644
--- a/tests/ga/test_update.py
+++ b/tests/ga/test_update.py
@@ -1818,7 +1818,7 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self):
self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION)))
self.assertFalse(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted,
"The current agent should not be blacklisted")
- downgraded_version = "1.2.0"
+ downgraded_version = "2.5.0"
data_file = mockwiredata.DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py
index 05112fc19..0493efcad 100644
--- a/tests_e2e/tests/agent_update/rsm_update.py
+++ b/tests_e2e/tests/agent_update/rsm_update.py
@@ -27,6 +27,7 @@
from typing import List, Dict, Any
import requests
+from assertpy import assert_that
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute.models import VirtualMachine
from msrestazure.azure_cloud import Cloud
@@ -96,8 +97,19 @@ def run(self) -> None:
version: str = "1.3.1.0"
log.info("Attempting update version same as current version %s", upgrade_version)
self._request_rsm_update(version)
+ self._check_rsm_gs(version)
self._verify_guest_agent_update(version)
+ # verify requested version below daemon version
+ log.info("*******Verifying requested version below daemon version scenario*******")
+ stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
+ log.info("Current agent version running on the vm before update \n%s", stdout)
+ version: str = "0.5.0"
+ log.info("Attempting requested version %s", version)
+ self._request_rsm_update(version)
+ self._check_rsm_gs(version)
+ self._verify_no_guest_agent_update(stdout)
+
def _check_rsm_gs(self, requested_version: str) -> None:
# This checks if RSM GS available to the agent after we mock the rsm update request
output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True)
@@ -184,6 +196,13 @@ def _check_agent_version(requested_version: str) -> bool:
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}")
+ def _verify_no_guest_agent_update(self, previous_agent: str) -> None:
+ """
+ verify current agent version is same as previous after update attempt
+ """
+ current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
+ assert_that(current_agent).is_equal_to(previous_agent).described_as(f"Agent version changed.\n Previous Agent {previous_agent} \n Current agent {current_agent}")
+
def _verify_agent_reported_supported_feature_flag(self):
"""
RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log