Skip to content

Commit

Permalink
Skip downgrade if requested version below daemon version (#2850)
Browse files Browse the repository at this point in the history
* skip downgrade for agent update

* add test

* report it in status

* address comments

* revert change

* improved error msg

* address comment
  • Loading branch information
nagworld9 authored Jun 21, 2023
1 parent 33493d0 commit 9d90a2d
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 17 deletions.
9 changes: 9 additions & 0 deletions azurelinuxagent/common/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ def __init__(self, msg=None, inner=None):
super(AgentNetworkError, self).__init__(msg, inner)


class AgentUpdateError(AgentError):
"""
When agent failed to update.
"""

def __init__(self, msg=None, inner=None):
super(AgentUpdateError, self).__init__(msg, inner)


class CGroupsException(AgentError):
"""
Exception to classify any cgroups related issue.
Expand Down
48 changes: 34 additions & 14 deletions azurelinuxagent/ga/agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

from azurelinuxagent.common import conf, logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import AgentUpgradeExitException
from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.logger import LogLevel
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource
from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus
from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus
from azurelinuxagent.common.utils import fileutil, textutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN
from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN
from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState


Expand All @@ -37,8 +37,6 @@ def __init__(self):
self.last_attempted_requested_version_update_time = datetime.datetime.min
self.last_attempted_hotfix_update_time = datetime.datetime.min
self.last_attempted_normal_update_time = datetime.datetime.min
self.last_warning = ""
self.last_warning_time = datetime.datetime.min


class AgentUpdateHandler(object):
Expand Down Expand Up @@ -130,10 +128,10 @@ def __get_agent_family_manifests(self, goal_state):
agent_family_manifests.append(m)

if not family_found:
raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family))
raise AgentUpdateError(u"Agent family: {0} not found in the goal state, skipping agent update".format(family))

if len(agent_family_manifests) == 0:
raise Exception(
raise AgentUpdateError(
u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format(
self._ga_family, self._gs_id))
return agent_family_manifests[0]
Expand Down Expand Up @@ -179,7 +177,7 @@ def __get_agent_package_to_download(self, agent_manifest, version):
# Found a matching package, only download that one
return pkg

raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, "
raise AgentUpdateError("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, "
"skipping agent update".format(str(version), self._gs_id))

@staticmethod
Expand Down Expand Up @@ -245,6 +243,15 @@ def __get_all_agents_on_disk():
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)]

@staticmethod
def __get_daemon_version_for_update():
daemon_version = get_daemon_version()
if daemon_version != FlexibleVersion(VERSION_0):
return daemon_version
# We return 0.0.0.0 if daemon version is not specified. In that case,
# use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53.
return FlexibleVersion("2.2.53")

@staticmethod
def __log_event(level, msg, success=True):
if level == LogLevel.INFO:
Expand Down Expand Up @@ -291,11 +298,20 @@ def run(self, goal_state):
if warn_msg != "":
self.__log_event(LogLevel.WARNING, warn_msg)

msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format(
self._gs_id, str(requested_version))
self.__log_event(LogLevel.INFO, msg)

try:
daemon_version = self.__get_daemon_version_for_update()
if requested_version < daemon_version:
# Don't process the update if the requested version is less than daemon version,
# as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with
# installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version.
#
raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than "
"the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version))

msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format(
self._gs_id, str(requested_version))
self.__log_event(LogLevel.INFO, msg)

agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version)

if agent.is_blacklisted or not agent.is_downloaded:
Expand All @@ -314,9 +330,13 @@ def run(self, goal_state):
except Exception as err:
if isinstance(err, AgentUpgradeExitException):
raise err
elif isinstance(err, AgentUpdateError):
error_msg = ustr(err)
else:
error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
self.__log_event(LogLevel.WARNING, error_msg, success=False)
if "Missing requested version" not in GAUpdateReportState.report_error_msg:
GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False)
GAUpdateReportState.report_error_msg = error_msg

def get_vmagent_update_status(self):
"""
Expand Down
3 changes: 3 additions & 0 deletions tests/data/wire/ga_manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
<Plugin>
<Version>2.1.0</Version><Uris><Uri>http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0</Uri></Uris>
</Plugin>
<Plugin>
<Version>2.5.0</Version><Uris><Uri>http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.5.0</Uri></Uris>
</Plugin>
<Plugin>
<Version>9.9.9.10</Version>
<Uris>
Expand Down
26 changes: 24 additions & 2 deletions tests/ga/test_agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1,

def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"):
upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[
'No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]
self.assertEqual(1, len(upgrade_event_msgs),
"Did not find the event indicating that the agent package not found. Got: {0}".format(
Expand Down Expand Up @@ -217,7 +217,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")

downgraded_version = "1.2.0"
downgraded_version = "2.5.0"

with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version)
Expand All @@ -230,6 +230,28 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c
versions=[downgraded_version, str(CURRENT_VERSION)])
self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason))

def test_it_should_not_downgrade_below_daemon_version(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"

# Set the test environment by adding 20 random agents to the agent directory
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")

downgraded_version = "1.2.0"

with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version)
agent_update_handler._protocol.mock_wire_data.set_incarnation(2)
agent_update_handler._protocol.client.update_goal_state()
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)),
"New agent directory should not be found")
self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"The Agent received a request to downgrade to version" in kwarg[
'message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "We should allow downgrade above daemon version")

def test_handles_if_requested_version_not_found_in_pkgs_to_download(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
Expand Down
2 changes: 1 addition & 1 deletion tests/ga/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -1818,7 +1818,7 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self):
self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION)))
self.assertFalse(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted,
"The current agent should not be blacklisted")
downgraded_version = "1.2.0"
downgraded_version = "2.5.0"

data_file = mockwiredata.DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
Expand Down
19 changes: 19 additions & 0 deletions tests_e2e/tests/agent_update/rsm_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import List, Dict, Any

import requests
from assertpy import assert_that
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute.models import VirtualMachine
from msrestazure.azure_cloud import Cloud
Expand Down Expand Up @@ -96,8 +97,19 @@ def run(self) -> None:
version: str = "1.3.1.0"
log.info("Attempting update version same as current version %s", upgrade_version)
self._request_rsm_update(version)
self._check_rsm_gs(version)
self._verify_guest_agent_update(version)

# verify requested version below daemon version
log.info("*******Verifying requested version below daemon version scenario*******")
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info("Current agent version running on the vm before update \n%s", stdout)
version: str = "0.5.0"
log.info("Attempting requested version %s", version)
self._request_rsm_update(version)
self._check_rsm_gs(version)
self._verify_no_guest_agent_update(stdout)

def _check_rsm_gs(self, requested_version: str) -> None:
# This checks if RSM GS available to the agent after we mock the rsm update request
output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True)
Expand Down Expand Up @@ -184,6 +196,13 @@ def _check_agent_version(requested_version: str) -> bool:
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}")

def _verify_no_guest_agent_update(self, previous_agent: str) -> None:
"""
verify current agent version is same as previous after update attempt
"""
current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
assert_that(current_agent).is_equal_to(previous_agent).described_as(f"Agent version changed.\n Previous Agent {previous_agent} \n Current agent {current_agent}")

def _verify_agent_reported_supported_feature_flag(self):
"""
RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log
Expand Down

0 comments on commit 9d90a2d

Please sign in to comment.