From 053e9ae20356ff7b256b1eeb82f967c126c02d89 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:42:35 -0800 Subject: [PATCH 01/14] agent update refactor (#2706) * agent update refactor * address PR comments * updated available agents * fix pylint warn * updated test case warning * added kill switch flag * fix pylint warning * move last update attempt variables --- azurelinuxagent/common/conf.py | 3 +- azurelinuxagent/ga/agent_update.py | 260 +++++ azurelinuxagent/ga/guestagent.py | 316 +++++++ azurelinuxagent/ga/update.py | 663 +------------ makepkg.py | 5 +- tests/data/wire/ext_conf_missing_family.xml | 21 - ...requested_version_missing_in_manifest.xml} | 0 tests/ga/mocks.py | 73 +- tests/ga/test_agent_update.py | 311 ++++++ tests/ga/test_guestagent.py | 309 ++++++ tests/ga/test_report_status.py | 13 +- tests/ga/test_update.py | 889 ++++-------------- tests/protocol/mockwiredata.py | 3 + 13 files changed, 1466 insertions(+), 1400 deletions(-) create mode 100644 azurelinuxagent/ga/agent_update.py create mode 100644 azurelinuxagent/ga/guestagent.py rename tests/data/wire/{ext_conf_missing_requested_version.xml => ext_conf_requested_version_missing_in_manifest.xml} (100%) create mode 100644 tests/ga/test_agent_update.py create mode 100644 tests/ga/test_guestagent.py diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 46765ea989..6554ab3081 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -629,10 +629,9 @@ def get_normal_upgrade_frequency(conf=__conf__): def get_enable_ga_versioning(conf=__conf__): """ If True, the agent uses GA Versioning for auto-updating the agent vs automatically auto-updating to the highest version. - NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", False) + return conf.get_switch("Debug.EnableGAVersioning", True) def get_firewall_rules_log_period(conf=__conf__): diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update.py new file mode 100644 index 0000000000..ba98613247 --- /dev/null +++ b/azurelinuxagent/ga/agent_update.py @@ -0,0 +1,260 @@ +import datetime +import glob +import os +import shutil + +from azurelinuxagent.common import conf, logger +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.logger import LogLevel +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.utils import fileutil, textutil +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState + + +def get_agent_update_handler(protocol): + return AgentUpdateHandler(protocol) + + +class AgentUpdateHandler(object): + + def __init__(self, protocol): + self._protocol = protocol + self._ga_family = conf.get_autoupdate_gafamily() + self._autoupdate_enabled = conf.get_autoupdate_enabled() + self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id + self._last_attempted_update_time = datetime.datetime.min + self._last_attempted_update_version = FlexibleVersion("0.0.0.0") + + def __should_update_agent(self, requested_version): + """ + check to see if update is allowed once per (as specified in the conf.get_autoupdate_frequency()) + return false when we don't allow updates. + """ + now = datetime.datetime.now() + + if self._last_attempted_update_time != datetime.datetime.min and self._last_attempted_update_version == requested_version: + next_attempt_time = self._last_attempted_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + else: + next_attempt_time = now + + if next_attempt_time > now: + return False + # The time limit elapsed for us to allow updates. + return True + + def __get_agent_family_from_last_gs(self, goal_state): + """ + Get the agent_family from last GS for the given family + Returns: first entry of Manifest + Exception if no manifests found in the last GS + """ + family = self._ga_family + agent_families = goal_state.extensions_goal_state.agent_families + agent_family_manifests = [m for m in agent_families if m.name == family and len(m.uris) > 0] + if len(agent_family_manifests) == 0: + raise Exception( + u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( + self._ga_family, self._gs_id)) + return agent_family_manifests[0] + + @staticmethod + def __get_requested_version(agent_family): + """ + Get the requested version from agent family + Returns: Requested version if supported and available + None if requested version missing or GA versioning not enabled + """ + if conf.get_enable_ga_versioning() and agent_family.is_requested_version_specified: + if agent_family.requested_version is not None: + return FlexibleVersion(agent_family.requested_version) + return None + + @staticmethod + def __get_largest_version(agent_manifest): + largest_version = FlexibleVersion("0.0.0.0") + for pkg in agent_manifest.pkg_list.versions: + pkg_version = FlexibleVersion(pkg.version) + if pkg_version > largest_version: + largest_version = pkg_version + return largest_version + + def __download_and_get_agent(self, goal_state, agent_family, agent_manifest, requested_version): + """ + This function downloads the new agent(requested version) and returns the downloaded version. + """ + if agent_manifest is None: # Fetch agent manifest if it's not already done + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + package_to_download = self.__get_agent_package_to_download(agent_manifest, requested_version) + is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack + agent = GuestAgent.from_agent_package(package_to_download, self._protocol, is_fast_track_goal_state) + return agent + + def __get_agent_package_to_download(self, agent_manifest, version): + """ + Returns the package of the given Version found in the manifest. If not found, returns exception + """ + for pkg in agent_manifest.pkg_list.versions: + if FlexibleVersion(pkg.version) == version: + # Found a matching package, only download that one + return pkg + + raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " + "skipping agent update".format(str(version), self._gs_id)) + + @staticmethod + def __purge_extra_agents_from_disk(known_agents): + """ + Remove from disk all directories and .zip files of unknown agents + (without removing the current, running agent). + """ + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + + known_versions = [agent.version for agent in known_agents] + known_versions.append(CURRENT_VERSION) + + for agent_path in glob.iglob(path): + try: + name = fileutil.trim_ext(agent_path, "zip") + m = AGENT_DIR_PATTERN.match(name) + if m is not None and FlexibleVersion(m.group(1)) not in known_versions: + if os.path.isfile(agent_path): + logger.info(u"Purging outdated Agent file {0}", agent_path) + os.remove(agent_path) + else: + logger.info(u"Purging outdated Agent directory {0}", agent_path) + shutil.rmtree(agent_path) + except Exception as e: + logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) + + @staticmethod + def __proceed_with_update(requested_version): + """ + If requested version is specified, upgrade/downgrade to the specified version. + Raises: AgentUpgradeExitException + """ + if requested_version < CURRENT_VERSION: + # In case of a downgrade, we mark the current agent as bad version to avoid starting it back up ever again + # (the expectation here being that if we get request to a downgrade, + # there's a good reason for not wanting the current version). + prefix = "downgrade" + try: + # We should always have an agent directory for the CURRENT_VERSION + agents_on_disk = AgentUpdateHandler.__get_available_agents_on_disk() + current_agent = next(agent for agent in agents_on_disk if agent.version == CURRENT_VERSION) + msg = "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " \ + "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION) + logger.info(msg) + current_agent.mark_failure(is_fatal=True, reason=msg) + except StopIteration: + logger.warn( + "Could not find a matching agent with current version {0} to blacklist, skipping it".format( + CURRENT_VERSION)) + else: + # In case of an upgrade, we don't need to exclude anything as the daemon will automatically + # start the next available highest version which would be the target version + prefix = "upgrade" + raise AgentUpgradeExitException("Agent update found, Exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) + + @staticmethod + def __get_available_agents_on_disk(): + available_agents = [agent for agent in AgentUpdateHandler.__get_all_agents_on_disk() if agent.is_available] + return sorted(available_agents, key=lambda agent: agent.version, reverse=True) + + @staticmethod + def __get_all_agents_on_disk(): + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] + + @staticmethod + def __log_event(level, msg_, success_=True): + if level == LogLevel.WARNING: + logger.warn(msg_) + elif level == LogLevel.ERROR: + logger.error(msg_) + elif level == LogLevel.INFO: + logger.info(msg_) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + + def run(self, goal_state): + try: + # Ignore new agents if update is disabled + if not self._autoupdate_enabled: + return + + self._gs_id = goal_state.extensions_goal_state.id + agent_family = self.__get_agent_family_from_last_gs(goal_state) + requested_version = self.__get_requested_version(agent_family) + agent_manifest = None # This is to make sure fetch agent manifest once per update + + if requested_version is None: + if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled + warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) + self.__log_event(LogLevel.WARNING, warn_msg) + GAUpdateReportState.report_error_msg = warn_msg + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + requested_version = self.__get_largest_version(agent_manifest) + else: + # Save the requested version to report back + GAUpdateReportState.report_expected_version = requested_version + # Remove the missing requested version warning once requested version becomes available + if "Missing requested version" in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = "" + + if requested_version == CURRENT_VERSION: + return + + # Check if an update is allowed + if not self.__should_update_agent(requested_version): + return + + msg_ = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( + self._gs_id, str(requested_version)) + self.__log_event(LogLevel.INFO, msg_) + + try: + agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) + + if not agent.is_available: + msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( + str(agent.version)) + self.__log_event(LogLevel.WARNING, msg) + return + + # We delete the directory and the zip package from the filesystem except current version and target version + self.__purge_extra_agents_from_disk(known_agents=[agent]) + self.__proceed_with_update(requested_version) + + finally: + self._last_attempted_update_time = datetime.datetime.now() + self._last_attempted_update_version = requested_version + + except Exception as err: + if isinstance(err, AgentUpgradeExitException): + raise err + if "Missing requested version" not in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) + self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success_=False) + + def get_vmagent_update_status(self): + """ + This function gets the VMAgent update status as per the last attempted update. + Returns: None if fail to report or update never attempted with requested version + """ + try: + if conf.get_enable_ga_versioning(): + if not GAUpdateReportState.report_error_msg: + status = VMAgentUpdateStatuses.Success + code = 0 + else: + status = VMAgentUpdateStatuses.Error + code = 1 + return VMAgentUpdateStatus(expected_version=str(GAUpdateReportState.report_expected_version), status=status, code=code, message=GAUpdateReportState.report_error_msg) + except Exception as err: + self.__log_event(LogLevel.WARNING, "Unable to report agent update status: {0}".format( + textutil.format_exception(err)), success_=False) + return None diff --git a/azurelinuxagent/ga/guestagent.py b/azurelinuxagent/ga/guestagent.py new file mode 100644 index 0000000000..56f3142447 --- /dev/null +++ b/azurelinuxagent/ga/guestagent.py @@ -0,0 +1,316 @@ +import json +import os +import shutil +import time + +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.utils import textutil + +from azurelinuxagent.common import logger, conf +from azurelinuxagent.common.exception import UpdateError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME, CURRENT_VERSION +from azurelinuxagent.ga.exthandlers import HandlerManifest + +AGENT_ERROR_FILE = "error.json" # File name for agent error record +AGENT_MANIFEST_FILE = "HandlerManifest.json" +MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent + + +class GAUpdateReportState(object): + """ + This class is primarily used to maintain the in-memory persistent state for the agent updates. + This state will be persisted throughout the current service run and might be modified by external classes. + """ + report_error_msg = "" + report_expected_version = FlexibleVersion("0.0.0.0") + + +class GuestAgent(object): + def __init__(self, path, pkg, protocol, is_fast_track_goal_state): + """ + If 'path' is given, the object is initialized to the version installed under that path. + + If 'pkg' is given, the version specified in the package information is downloaded and the object is + initialized to that version. + + 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. + + NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly + """ + self._is_fast_track_goal_state = is_fast_track_goal_state + self.pkg = pkg + self._protocol = protocol + version = None + if path is not None: + m = AGENT_DIR_PATTERN.match(path) + if m is None: + raise UpdateError(u"Illegal agent directory: {0}".format(path)) + version = m.group(1) + elif self.pkg is not None: + version = pkg.version + + if version is None: + raise UpdateError(u"Illegal agent version: {0}".format(version)) + self.version = FlexibleVersion(version) + + location = u"disk" if path is not None else u"package" + logger.verbose(u"Loading Agent {0} from {1}", self.name, location) + + self.error = GuestAgentError(self.get_agent_error_file()) + self.error.load() + + try: + self._ensure_downloaded() + self._ensure_loaded() + except Exception as e: + # If we're unable to download/unpack the agent, delete the Agent directory + try: + if os.path.isdir(self.get_agent_dir()): + shutil.rmtree(self.get_agent_dir(), ignore_errors=True) + except Exception as err: + logger.warn("Unable to delete Agent files: {0}".format(err)) + msg = u"Agent {0} install failed with exception:".format( + self.name) + detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) + if "Missing requested version" not in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = detailed_msg # capture the download errors to report back + add_event( + AGENT_NAME, + version=self.version, + op=WALAEventOperation.Install, + is_success=False, + message=detailed_msg) + + @staticmethod + def from_installed_agent(path): + """ + Creates an instance of GuestAgent using the agent installed in the given 'path'. + """ + return GuestAgent(path, None, None, False) + + @staticmethod + def from_agent_package(package, protocol, is_fast_track_goal_state): + """ + Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. + """ + return GuestAgent(None, package, protocol, is_fast_track_goal_state) + + @property + def name(self): + return "{0}-{1}".format(AGENT_NAME, self.version) + + def get_agent_cmd(self): + return self.manifest.get_enable_command() + + def get_agent_dir(self): + return os.path.join(conf.get_lib_dir(), self.name) + + def get_agent_error_file(self): + return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) + + def get_agent_manifest_path(self): + return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) + + def get_agent_pkg_path(self): + return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip")) + + def clear_error(self): + self.error.clear() + self.error.save() + + @property + def is_available(self): + return self.is_downloaded and not self.is_blacklisted + + @property + def is_blacklisted(self): + return self.error is not None and self.error.is_blacklisted + + @property + def is_downloaded(self): + return self.is_blacklisted or \ + os.path.isfile(self.get_agent_manifest_path()) + + def mark_failure(self, is_fatal=False, reason=''): + try: + if not os.path.isdir(self.get_agent_dir()): + os.makedirs(self.get_agent_dir()) + self.error.mark_failure(is_fatal=is_fatal, reason=reason) + self.error.save() + if self.error.is_blacklisted: + msg = u"Agent {0} is permanently blacklisted".format(self.name) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, + version=self.version) + except Exception as e: + logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) + + def _ensure_downloaded(self): + logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) + + if self.is_downloaded: + logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) + return + + if self.pkg is None: + raise UpdateError(u"Agent {0} is missing package and download URIs".format( + self.name)) + + self._download() + + msg = u"Agent {0} downloaded successfully".format(self.name) + logger.verbose(msg) + add_event( + AGENT_NAME, + version=self.version, + op=WALAEventOperation.Install, + is_success=True, + message=msg) + + def _ensure_loaded(self): + self._load_manifest() + self._load_error() + + def _download(self): + try: + self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) + except Exception as exception: + msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) + add_event( + AGENT_NAME, + op=WALAEventOperation.Download, + version=CURRENT_VERSION, + is_success=False, + message=msg) + raise UpdateError(msg) + + def _load_error(self): + try: + self.error = GuestAgentError(self.get_agent_error_file()) + self.error.load() + logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) + except Exception as e: + logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e)) + + def _load_manifest(self): + path = self.get_agent_manifest_path() + if not os.path.isfile(path): + msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE) + raise UpdateError(msg) + + with open(path, "r") as manifest_file: + try: + manifests = json.load(manifest_file) + except Exception as e: + msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e)) + raise UpdateError(msg) + if type(manifests) is list: + if len(manifests) <= 0: + msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE) + raise UpdateError(msg) + manifest = manifests[0] + else: + manifest = manifests + + try: + self.manifest = HandlerManifest(manifest) # pylint: disable=W0201 + if len(self.manifest.get_enable_command()) <= 0: + raise Exception(u"Manifest is missing the enable command") + except Exception as e: + msg = u"Agent {0} has an illegal {1}: {2}".format( + self.name, + AGENT_MANIFEST_FILE, + ustr(e)) + raise UpdateError(msg) + + logger.verbose( + u"Agent {0} loaded manifest from {1}", + self.name, + self.get_agent_manifest_path()) + logger.verbose(u"Successfully loaded Agent {0} {1}: {2}", + self.name, + AGENT_MANIFEST_FILE, + ustr(self.manifest.data)) + return + + +class GuestAgentError(object): + def __init__(self, path): + self.last_failure = 0.0 + self.was_fatal = False + if path is None: + raise UpdateError(u"GuestAgentError requires a path") + self.path = path + self.failure_count = 0 + self.reason = '' + + self.clear() + return + + def mark_failure(self, is_fatal=False, reason=''): + self.last_failure = time.time() + self.failure_count += 1 + self.was_fatal = is_fatal + self.reason = reason + return + + def clear(self): + self.last_failure = 0.0 + self.failure_count = 0 + self.was_fatal = False + self.reason = '' + return + + @property + def is_blacklisted(self): + return self.was_fatal or self.failure_count >= MAX_FAILURE + + def load(self): + if self.path is not None and os.path.isfile(self.path): + try: + with open(self.path, 'r') as f: + self.from_json(json.load(f)) + except Exception as error: + # The error.json file is only supposed to be written only by the agent. + # If for whatever reason the file is malformed, just delete it to reset state of the errors. + logger.warn( + "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( + self.path, textutil.format_exception(error))) + try: + os.remove(self.path) + except Exception: + # We try best case efforts to delete the file, ignore error if we're unable to do so + pass + return + + def save(self): + if os.path.isdir(os.path.dirname(self.path)): + with open(self.path, 'w') as f: + json.dump(self.to_json(), f) + return + + def from_json(self, data): + self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) + self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) + self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) + reason = data.get(u"reason", '') + self.reason = reason if reason != '' else self.reason + return + + def to_json(self): + data = { + u"last_failure": self.last_failure, + u"failure_count": self.failure_count, + u"was_fatal": self.was_fatal, + u"reason": ustr(self.reason) + } + return data + + def __str__(self): + return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + self.last_failure, + self.failure_count, + self.was_fatal, + self.reason) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 2b0975b05b..4af6d069f2 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -17,7 +17,6 @@ # Requires Python 2.6+ and Openssl 1.0+ # import glob -import json import os import platform import re @@ -38,14 +37,12 @@ from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.event import add_event, initialize_event_logger_vminfo_common_parameters, \ WALAEventOperation, EVENTS_DIRECTORY -from azurelinuxagent.common.exception import UpdateError, ExitException, AgentUpgradeExitException, AgentMemoryExceededException +from azurelinuxagent.common.exception import ExitException, AgentUpgradeExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler -from azurelinuxagent.common.protocol.goal_state import GoalStateSource from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ - VERSION_0 +from azurelinuxagent.common.protocol.restapi import VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils.archive import StateArchiver, AGENT_STATUS_FILE @@ -55,16 +52,16 @@ from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, AGENT_VERSION, \ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version +from azurelinuxagent.ga.agent_update import get_agent_update_handler from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed from azurelinuxagent.ga.collect_telemetry_events import get_collect_telemetry_events_handler from azurelinuxagent.ga.env import get_env_handler -from azurelinuxagent.ga.exthandlers import HandlerManifest, ExtHandlersHandler, list_agent_lib_directory, \ +from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, list_agent_lib_directory, \ ExtensionStatusValue, ExtHandlerStatusValue +from azurelinuxagent.ga.guestagent import GuestAgent from azurelinuxagent.ga.monitor import get_monitor_handler from azurelinuxagent.ga.send_telemetry_events import get_send_telemetry_events_handler -AGENT_ERROR_FILE = "error.json" # File name for agent error record -AGENT_MANIFEST_FILE = "HandlerManifest.json" AGENT_PARTITION_FILE = "partition" CHILD_HEALTH_INTERVAL = 15 * 60 @@ -72,8 +69,6 @@ CHILD_LAUNCH_RESTART_MAX = 3 CHILD_POLL_INTERVAL = 60 -MAX_FAILURE = 3 # Max failure allowed for agent before blacklisted - GOAL_STATE_PERIOD_EXTENSIONS_DISABLED = 5 * 60 ORPHAN_POLL_INTERVAL = 3 @@ -122,14 +117,6 @@ def __str__(self): return ustr(self.summary) -class AgentUpgradeType(object): - """ - Enum for different modes of Agent Upgrade - """ - Hotfix = "Hotfix" - Normal = "Normal" - - def get_update_handler(): return UpdateHandler() @@ -144,11 +131,6 @@ def __init__(self): self._is_running = True - # Member variables to keep track of the Agent AutoUpgrade - self.last_attempt_time = None - self._last_hotfix_upgrade_time = None - self._last_normal_upgrade_time = None - self.agents = [] self.child_agent = None @@ -369,6 +351,7 @@ def run(self, debug=False): from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) + agent_update_handler = get_agent_update_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() @@ -402,7 +385,7 @@ def run(self, debug=False): while self.is_running: self._check_daemon_running(debug) self._check_threads_running(all_thread_handlers) - self._process_goal_state(exthandlers_handler, remote_access_handler) + self._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self._send_heartbeat_telemetry(protocol) self._check_agent_memory_usage() time.sleep(self._goal_state_period) @@ -523,80 +506,6 @@ def _try_update_goal_state(self, protocol): return True - def __update_guest_agent(self, protocol): - """ - This function checks for new Agent updates and raises AgentUpgradeExitException if available. - There are 2 different ways the agent checks for an update - - 1) Requested Version is specified in the Goal State. - - In this case, the Agent will download the requested version and upgrade/downgrade instantly. - 2) No requested version. - - In this case, the agent will periodically check (1 hr) for new agent versions in GA Manifest. - - If available, it will download all versions > CURRENT_VERSION. - - Depending on the highest version > CURRENT_VERSION, - the agent will update within 4 hrs (for a Hotfix update) or 24 hrs (for a Normal update) - """ - - def log_next_update_time(): - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - upgrade_type = self.__get_agent_upgrade_type(available_agent) - next_time = next_hotfix_time if upgrade_type == AgentUpgradeType.Hotfix else next_normal_time - message_ = "Discovered new {0} upgrade {1}; Will upgrade on or after {2}".format( - upgrade_type, available_agent.name, - datetime.utcfromtimestamp(next_time).strftime(logger.Logger.LogTimeFormatInUTC)) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, version=CURRENT_VERSION, is_success=True, - message=message_, log_event=False) - logger.info(message_) - - def handle_updates_for_requested_version(): - if requested_version < CURRENT_VERSION: - prefix = "downgrade" - # In case of a downgrade, we blacklist the current agent to avoid starting it back up ever again - # (the expectation here being that if RSM is asking us to a downgrade, - # there's a good reason for not wanting the current version). - try: - # We should always have an agent directory for the CURRENT_VERSION - # (unless the CURRENT_VERSION == daemon version, but since we don't support downgrading - # below daemon version, we will never reach this code path if that's the scenario) - current_agent = next(agent for agent in self.agents if agent.version == CURRENT_VERSION) - msg = "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " \ - "suggesting that we really don't want to execute any extensions using this version".format( - CURRENT_VERSION) - logger.info(msg) - current_agent.mark_failure(is_fatal=True, reason=msg) - except StopIteration: - logger.warn( - "Could not find a matching agent with current version {0} to blacklist, skipping it".format( - CURRENT_VERSION)) - else: - # In case of an upgrade, we don't need to blacklist anything as the daemon will automatically - # start the next available highest version which would be the requested version - prefix = "upgrade" - raise AgentUpgradeExitException( - "Exiting current process to {0} to the request Agent version {1}".format(prefix, requested_version)) - - # Skip the update if there is no goal state yet or auto-update is disabled - if self._goal_state is None or not conf.get_autoupdate_enabled(): - return False - - if self._download_agent_if_upgrade_available(protocol): - # The call to get_latest_agent_greater_than_daemon() also finds all agents in directory and sets the self.agents property. - # This state is used to find the GuestAgent object with the current version later if requested version is available in last GS. - available_agent = self.get_latest_agent_greater_than_daemon() - requested_version, _ = self.__get_requested_version_and_agent_family_from_last_gs() - if requested_version is not None: - # If requested version specified, upgrade/downgrade to the specified version instantly as this is - # driven by the goal state (as compared to the agent periodically checking for new upgrades every hour) - handle_updates_for_requested_version() - elif available_agent is None: - # Legacy behavior: The current agent can become unavailable and needs to be reverted. - # In that case, self._upgrade_available() returns True and available_agent would be None. Handling it here. - raise AgentUpgradeExitException( - "Agent {0} is reverting to the installed agent -- exiting".format(CURRENT_AGENT)) - else: - log_next_update_time() - - self.__upgrade_agent_if_permitted() - def _processing_new_incarnation(self): """ True if we are currently processing a new incarnation (i.e. WireServer goal state) @@ -610,18 +519,18 @@ def _processing_new_extensions_goal_state(self): egs = self._goal_state.extensions_goal_state return self._goal_state is not None and egs.id != self._last_extensions_gs_id and not egs.is_outdated - def _process_goal_state(self, exthandlers_handler, remote_access_handler): + def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_update_handler): protocol = exthandlers_handler.protocol # update self._goal_state if not self._try_update_goal_state(protocol): - # agent updates and status reporting should be done even when the goal state is not updated - self.__update_guest_agent(protocol) - self._report_status(exthandlers_handler) + # status reporting should be done even when the goal state is not updated + agent_update_status = agent_update_handler.get_vmagent_update_status() + self._report_status(exthandlers_handler, agent_update_status) return # check for agent updates - self.__update_guest_agent(protocol) + agent_update_handler.run(self._goal_state) try: if self._processing_new_extensions_goal_state(): @@ -639,7 +548,8 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): CGroupConfigurator.get_instance().check_cgroups(cgroup_metrics=[]) # report status before processing the remote access, since that operation can take a long time - self._report_status(exthandlers_handler) + agent_update_status = agent_update_handler.get_vmagent_update_status() + self._report_status(exthandlers_handler, agent_update_status) if self._processing_new_incarnation(): remote_access_handler.run() @@ -668,44 +578,7 @@ def _cleanup_legacy_goal_state_history(): except Exception as exception: logger.warn("Error removing legacy history files: {0}", ustr(exception)) - def __get_vmagent_update_status(self, goal_state_changed): - """ - This function gets the VMAgent update status as per the last GoalState. - Returns: None if the last GS does not ask for requested version else VMAgentUpdateStatus - """ - if not conf.get_enable_ga_versioning(): - return None - - update_status = None - - try: - requested_version, manifest = self.__get_requested_version_and_agent_family_from_last_gs() - if manifest is None and goal_state_changed: - logger.info("Unable to report update status as no matching manifest found for family: {0}".format( - conf.get_autoupdate_gafamily())) - return None - - if requested_version is not None: - if CURRENT_VERSION == requested_version: - status = VMAgentUpdateStatuses.Success - code = 0 - else: - status = VMAgentUpdateStatuses.Error - code = 1 - update_status = VMAgentUpdateStatus(expected_version=manifest.requested_version_string, status=status, - code=code) - except Exception as error: - if goal_state_changed: - err_msg = "[This error will only be logged once per goal state] " \ - "Ran into error when trying to fetch updateStatus for the agent, skipping reporting update satus. Error: {0}".format( - textutil.format_exception(error)) - logger.warn(err_msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=err_msg, log_event=False) - - return update_status - - def _report_status(self, exthandlers_handler): - vm_agent_update_status = self.__get_vmagent_update_status(self._processing_new_extensions_goal_state()) + def _report_status(self, exthandlers_handler, vm_agent_update_status): # report_ext_handlers_status does its own error handling and returns None if an error occurred vm_status = exthandlers_handler.report_ext_handlers_status( goal_state_changed=self._processing_new_extensions_goal_state(), @@ -1071,173 +944,6 @@ def _shutdown(self): str(e)) return - def __get_requested_version_and_agent_family_from_last_gs(self): - """ - Get the requested version and corresponding manifests from last GS if supported - Returns: (Requested Version, Manifest) if supported and available - (None, None) if no manifests found in the last GS - (None, manifest) if not supported or not specified in GS - """ - family_name = conf.get_autoupdate_gafamily() - agent_families = self._goal_state.extensions_goal_state.agent_families - agent_families = [m for m in agent_families if m.name == family_name and len(m.uris) > 0] - if len(agent_families) == 0: - return None, None - if conf.get_enable_ga_versioning() and agent_families[0].is_requested_version_specified: - return agent_families[0].requested_version, agent_families[0] - return None, agent_families[0] - - def _download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION): - """ - This function downloads the new agent if an update is available. - If a requested version is available in goal state, then only that version is downloaded (new-update model) - Else, we periodically (1hr by default) checks if new Agent upgrade is available and download it on filesystem if available (old-update model) - rtype: Boolean - return: True if current agent is no longer available or an agent with a higher version number is available - else False - """ - - def report_error(msg_, version_=CURRENT_VERSION, op=WALAEventOperation.Download): - logger.warn(msg_) - add_event(AGENT_NAME, op=op, version=version_, is_success=False, message=msg_, log_event=False) - - def can_proceed_with_requested_version(): - if not gs_updated: - # If the goal state didn't change, don't process anything. - return False - - # With the new model, we will get a new GS when CRP wants us to auto-update using required version. - # If there's no new goal state, don't proceed with anything - msg_ = "Found requested version in manifest: {0} for goal state {1}".format( - requested_version, goal_state_id) - logger.info(msg_) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg_, log_event=False) - - if requested_version < daemon_version: - # Don't process the update if the requested version is lesser than daemon version, - # as we don't support downgrades below daemon versions. - report_error( - "Can't process the upgrade as the requested version: {0} is < current daemon version: {1}".format( - requested_version, daemon_version), op=WALAEventOperation.AgentUpgrade) - return False - - return True - - def agent_upgrade_time_elapsed(now_): - if self.last_attempt_time is not None: - next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() - else: - next_attempt_time = now_ - if next_attempt_time > now_: - return False - return True - - agent_family_name = conf.get_autoupdate_gafamily() - gs_updated = False - daemon_version = self.__get_daemon_version_for_update() - try: - # Fetch the agent manifests from the latest Goal State - goal_state_id = self._goal_state.extensions_goal_state.id - gs_updated = self._processing_new_extensions_goal_state() - requested_version, agent_family = self.__get_requested_version_and_agent_family_from_last_gs() - if agent_family is None: - logger.verbose( - u"No manifest links found for agent family: {0} for goal state {1}, skipping update check".format( - agent_family_name, goal_state_id)) - return False - except Exception as err: - # If there's some issues in fetching the agent manifests, report it only on goal state change - msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) - if gs_updated: - report_error(msg) - else: - logger.verbose(msg) - return False - - if requested_version is not None: - # If GA versioning is enabled and requested version present in GS, and it's a new GS, follow new logic - if not can_proceed_with_requested_version(): - return False - else: - # If no requested version specified in the Goal State, follow the old auto-update logic - # Note: If the first Goal State contains a requested version, this timer won't start (i.e. self.last_attempt_time won't be updated). - # If any subsequent goal state does not contain requested version, this timer will start then, and we will - # download all versions available in PIR and auto-update to the highest available version on that goal state. - now = time.time() - if not agent_upgrade_time_elapsed(now): - return False - - logger.info("No requested version specified, checking for all versions for agent update (family: {0})", - agent_family_name) - self.last_attempt_time = now - - try: - # If we make it to this point, then either there is a requested version in a new GS (new auto-update model), - # or the 1hr time limit has elapsed for us to check the agent manifest for updates (old auto-update model). - pkg_list = ExtHandlerPackageList() - - # If the requested version is the current version, don't download anything; - # the call to purge() below will delete all other agents from disk - # In this case, no need to even fetch the GA family manifest as we don't need to download any agent. - if requested_version is not None and requested_version == CURRENT_VERSION: - packages_to_download = [] - msg = "The requested version is running as the current version: {0}".format(requested_version) - logger.info(msg) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg) - else: - agent_manifest = self._goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) - pkg_list = agent_manifest.pkg_list - packages_to_download = pkg_list.versions - - # Verify the requested version is in GA family manifest (if specified) - if requested_version is not None and requested_version != CURRENT_VERSION: - for pkg in pkg_list.versions: - if FlexibleVersion(pkg.version) == requested_version: - # Found a matching package, only download that one - packages_to_download = [pkg] - break - else: - msg = "No matching package found in the agent manifest for requested version: {0} in goal state {1}, skipping agent update".format( - requested_version, goal_state_id) - report_error(msg, version_=requested_version) - return False - - # Set the agents to those available for download at least as current as the existing agent - # or to the requested version (if specified) - is_fast_track_goal_state = self._goal_state.extensions_goal_state.source == GoalStateSource.FastTrack - agents_to_download = [GuestAgent.from_agent_package(pkg, protocol, is_fast_track_goal_state) for pkg in packages_to_download] - - # Filter out the agents that were downloaded/extracted successfully. If the agent was not installed properly, - # we delete the directory and the zip package from the filesystem - self._set_and_sort_agents([agent for agent in agents_to_download if agent.is_available]) - - # Remove from disk any agent no longer needed in the VM. - # If requested version is provided, this would delete all other agents present on the VM except - - # - the current version and the requested version if requested version != current version - # - only the current version if requested version == current version - # Note: - # The code leaves on disk available, but blacklisted, agents to preserve the state. - # Otherwise, those agents could be downloaded again and inappropriately retried. - self._purge_agents() - self._filter_blacklisted_agents() - - # If there are no agents available to upgrade/downgrade to, return False - if len(self.agents) == 0: - return False - - if requested_version is not None: - # In case of requested version, return True if an agent with a different version number than the - # current version is available that is higher than the current daemon version - return self.agents[0].version != base_version and self.agents[0].version > daemon_version - else: - # Else, return True if the highest agent is > base_version (CURRENT_VERSION) - return self.agents[0].version > base_version - - except Exception as err: - msg = u"Exception downloading agents for update: {0}".format(textutil.format_exception(err)) - report_error(msg) - return False - def _write_pid_file(self): pid_files = self._get_pid_files() @@ -1425,58 +1131,6 @@ def _execute_run_command(command): msg = "Error while checking ip table rules:{0}".format(ustr(e)) logger.error(msg) - def __get_next_upgrade_times(self): - """ - Get the next upgrade times - return: Next Normal Upgrade Time, Next Hotfix Upgrade Time - """ - - def get_next_process_time(last_val, frequency): - return now if last_val is None else last_val + frequency - - now = time.time() - next_hotfix_time = get_next_process_time(self._last_hotfix_upgrade_time, conf.get_hotfix_upgrade_frequency()) - next_normal_time = get_next_process_time(self._last_normal_upgrade_time, conf.get_normal_upgrade_frequency()) - - return next_normal_time, next_hotfix_time - - @staticmethod - def __get_agent_upgrade_type(available_agent): - # We follow semantic versioning for the agent, if . is same, then . has changed. - # In this case, we consider it as a Hotfix upgrade. Else we consider it a Normal upgrade. - if available_agent.version.major == CURRENT_VERSION.major and available_agent.version.minor == CURRENT_VERSION.minor: - return AgentUpgradeType.Hotfix - return AgentUpgradeType.Normal - - def __upgrade_agent_if_permitted(self): - """ - Check every 4hrs for a Hotfix Upgrade and 24 hours for a Normal upgrade and upgrade the agent if available. - raises: ExitException when a new upgrade is available in the relevant time window, else returns - """ - - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - now = time.time() - # Not permitted to update yet for any of the AgentUpgradeModes - if next_hotfix_time > now and next_normal_time > now: - return - - # Update the last upgrade check time even if no new agent is available for upgrade - self._last_hotfix_upgrade_time = now if next_hotfix_time <= now else self._last_hotfix_upgrade_time - self._last_normal_upgrade_time = now if next_normal_time <= now else self._last_normal_upgrade_time - - available_agent = self.get_latest_agent_greater_than_daemon() - if available_agent is None or available_agent.version <= CURRENT_VERSION: - logger.verbose("No agent upgrade discovered") - return - - upgrade_type = self.__get_agent_upgrade_type(available_agent) - upgrade_message = "{0} Agent upgrade discovered, updating to {1} -- exiting".format(upgrade_type, - available_agent.name) - - if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( - upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): - raise AgentUpgradeExitException(upgrade_message) - def _reset_legacy_blacklisted_agents(self): # Reset the state of all blacklisted agents that were blacklisted by legacy agents (i.e. not during auto-update) @@ -1489,290 +1143,3 @@ def _reset_legacy_blacklisted_agents(self): agent.clear_error() except Exception as err: logger.warn("Unable to reset legacy blacklisted agents due to: {0}".format(err)) - - -class GuestAgent(object): - def __init__(self, path, pkg, protocol, is_fast_track_goal_state): - """ - If 'path' is given, the object is initialized to the version installed under that path. - - If 'pkg' is given, the version specified in the package information is downloaded and the object is - initialized to that version. - - 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. - - NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly - """ - self._is_fast_track_goal_state = is_fast_track_goal_state - self.pkg = pkg - self._protocol = protocol - version = None - if path is not None: - m = AGENT_DIR_PATTERN.match(path) - if m is None: - raise UpdateError(u"Illegal agent directory: {0}".format(path)) - version = m.group(1) - elif self.pkg is not None: - version = pkg.version - - if version is None: - raise UpdateError(u"Illegal agent version: {0}".format(version)) - self.version = FlexibleVersion(version) - - location = u"disk" if path is not None else u"package" - logger.verbose(u"Loading Agent {0} from {1}", self.name, location) - - self.error = GuestAgentError(self.get_agent_error_file()) - self.error.load() - - try: - self._ensure_downloaded() - self._ensure_loaded() - except Exception as e: - # If we're unable to download/unpack the agent, delete the Agent directory - try: - if os.path.isdir(self.get_agent_dir()): - shutil.rmtree(self.get_agent_dir(), ignore_errors=True) - except Exception as err: - logger.warn("Unable to delete Agent files: {0}".format(err)) - msg = u"Agent {0} install failed with exception:".format( - self.name) - detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=False, - message=detailed_msg) - - @staticmethod - def from_installed_agent(path): - """ - Creates an instance of GuestAgent using the agent installed in the given 'path'. - """ - return GuestAgent(path, None, None, False) - - @staticmethod - def from_agent_package(package, protocol, is_fast_track_goal_state): - """ - Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. - """ - return GuestAgent(None, package, protocol, is_fast_track_goal_state) - - @property - def name(self): - return "{0}-{1}".format(AGENT_NAME, self.version) - - def get_agent_cmd(self): - return self.manifest.get_enable_command() - - def get_agent_dir(self): - return os.path.join(conf.get_lib_dir(), self.name) - - def get_agent_error_file(self): - return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) - - def get_agent_manifest_path(self): - return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) - - def get_agent_pkg_path(self): - return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip")) - - def clear_error(self): - self.error.clear() - self.error.save() - - @property - def is_available(self): - return self.is_downloaded and not self.is_blacklisted - - @property - def is_blacklisted(self): - return self.error is not None and self.error.is_blacklisted - - @property - def is_downloaded(self): - return self.is_blacklisted or \ - os.path.isfile(self.get_agent_manifest_path()) - - def mark_failure(self, is_fatal=False, reason=''): - try: - if not os.path.isdir(self.get_agent_dir()): - os.makedirs(self.get_agent_dir()) - self.error.mark_failure(is_fatal=is_fatal, reason=reason) - self.error.save() - if self.error.is_blacklisted: - msg = u"Agent {0} is permanently blacklisted".format(self.name) - logger.warn(msg) - add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, - version=self.version) - except Exception as e: - logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) - - def _ensure_downloaded(self): - logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) - - if self.is_downloaded: - logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) - return - - if self.pkg is None: - raise UpdateError(u"Agent {0} is missing package and download URIs".format( - self.name)) - - self._download() - - msg = u"Agent {0} downloaded successfully".format(self.name) - logger.verbose(msg) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=True, - message=msg) - - def _ensure_loaded(self): - self._load_manifest() - self._load_error() - - def _download(self): - try: - self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) - except Exception as exception: - msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) - add_event( - AGENT_NAME, - op=WALAEventOperation.Download, - version=CURRENT_VERSION, - is_success=False, - message=msg) - raise UpdateError(msg) - - def _load_error(self): - try: - self.error = GuestAgentError(self.get_agent_error_file()) - self.error.load() - logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) - except Exception as e: - logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e)) - - def _load_manifest(self): - path = self.get_agent_manifest_path() - if not os.path.isfile(path): - msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE) - raise UpdateError(msg) - - with open(path, "r") as manifest_file: - try: - manifests = json.load(manifest_file) - except Exception as e: - msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e)) - raise UpdateError(msg) - if type(manifests) is list: - if len(manifests) <= 0: - msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE) - raise UpdateError(msg) - manifest = manifests[0] - else: - manifest = manifests - - try: - self.manifest = HandlerManifest(manifest) # pylint: disable=W0201 - if len(self.manifest.get_enable_command()) <= 0: - raise Exception(u"Manifest is missing the enable command") - except Exception as e: - msg = u"Agent {0} has an illegal {1}: {2}".format( - self.name, - AGENT_MANIFEST_FILE, - ustr(e)) - raise UpdateError(msg) - - logger.verbose( - u"Agent {0} loaded manifest from {1}", - self.name, - self.get_agent_manifest_path()) - logger.verbose(u"Successfully loaded Agent {0} {1}: {2}", - self.name, - AGENT_MANIFEST_FILE, - ustr(self.manifest.data)) - return - - -class GuestAgentError(object): - def __init__(self, path): - self.last_failure = 0.0 - self.was_fatal = False - if path is None: - raise UpdateError(u"GuestAgentError requires a path") - self.path = path - self.failure_count = 0 - self.reason = '' - - self.clear() - return - - def mark_failure(self, is_fatal=False, reason=''): - self.last_failure = time.time() - self.failure_count += 1 - self.was_fatal = is_fatal - self.reason = reason - return - - def clear(self): - self.last_failure = 0.0 - self.failure_count = 0 - self.was_fatal = False - self.reason = '' - return - - @property - def is_blacklisted(self): - return self.was_fatal or self.failure_count >= MAX_FAILURE - - def load(self): - if self.path is not None and os.path.isfile(self.path): - try: - with open(self.path, 'r') as f: - self.from_json(json.load(f)) - except Exception as error: - # The error.json file is only supposed to be written only by the agent. - # If for whatever reason the file is malformed, just delete it to reset state of the errors. - logger.warn( - "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( - self.path, textutil.format_exception(error))) - try: - os.remove(self.path) - except Exception: - # We try best case efforts to delete the file, ignore error if we're unable to do so - pass - return - - def save(self): - if os.path.isdir(os.path.dirname(self.path)): - with open(self.path, 'w') as f: - json.dump(self.to_json(), f) - return - - def from_json(self, data): - self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) - self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) - self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) - reason = data.get(u"reason", '') - self.reason = reason if reason != '' else self.reason - return - - def to_json(self): - data = { - u"last_failure": self.last_failure, - u"failure_count": self.failure_count, - u"was_fatal": self.was_fatal, - u"reason": ustr(self.reason) - } - return data - - def __str__(self): - return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - self.last_failure, - self.failure_count, - self.was_fatal, - self.reason) diff --git a/makepkg.py b/makepkg.py index 5ec04d5d89..bc4aad4c3b 100755 --- a/makepkg.py +++ b/makepkg.py @@ -8,8 +8,9 @@ import subprocess import sys -from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, AGENT_LONG_VERSION -from azurelinuxagent.ga.update import AGENT_MANIFEST_FILE +from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, \ + AGENT_LONG_VERSION +from azurelinuxagent.ga.guestagent import AGENT_MANIFEST_FILE MANIFEST = '''[{{ "name": "{0}", diff --git a/tests/data/wire/ext_conf_missing_family.xml b/tests/data/wire/ext_conf_missing_family.xml index 058c40a881..9e13d03ace 100644 --- a/tests/data/wire/ext_conf_missing_family.xml +++ b/tests/data/wire/ext_conf_missing_family.xml @@ -4,27 +4,6 @@ xmlns:i="http://www.w3.org/2001/XMLSchema-instance"> - Prod - - - - Test - - https://mock-goal-state/rdfepirv2bl2prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr04.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl4prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl4prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr04.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr06.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr09a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl6prdstr02a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - eastus diff --git a/tests/data/wire/ext_conf_missing_requested_version.xml b/tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml similarity index 100% rename from tests/data/wire/ext_conf_missing_requested_version.xml rename to tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml diff --git a/tests/ga/mocks.py b/tests/ga/mocks.py index 6fbc63d7da..e42dd55458 100644 --- a/tests/ga/mocks.py +++ b/tests/ga/mocks.py @@ -18,6 +18,8 @@ import contextlib from mock import PropertyMock + +from azurelinuxagent.ga.agent_update import AgentUpdateHandler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler from azurelinuxagent.ga.update import UpdateHandler, get_update_handler @@ -30,6 +32,7 @@ def mock_update_handler(protocol, on_new_iteration=lambda _: None, exthandlers_handler=None, remote_access_handler=None, + agent_update_handler=None, autoupdate_enabled=False, check_daemon_running=False, start_background_threads=False, @@ -71,6 +74,9 @@ def is_running(*args): # mock for property UpdateHandler.is_running, which cont if remote_access_handler is None: remote_access_handler = RemoteAccessHandler(protocol) + if agent_update_handler is None: + agent_update_handler = AgentUpdateHandler(protocol) + cleanup_functions = [] def patch_object(target, attribute): @@ -80,39 +86,40 @@ def patch_object(target, attribute): try: with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): - with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): - with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): - with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: - with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: - if not check_daemon_running: - patch_object(UpdateHandler, "_check_daemon_running") - if not start_background_threads: - patch_object(UpdateHandler, "_start_threads") - if not check_background_threads: - patch_object(UpdateHandler, "_check_threads_running") - - def get_exit_code(): - if mock_exit.call_count == 0: - raise Exception("The UpdateHandler did not exit") - if mock_exit.call_count != 1: - raise Exception("The UpdateHandler exited multiple times ({0})".format(mock_exit.call_count)) - args, _ = mock_exit.call_args - return args[0] - - def get_iterations(): - return iteration_count[0] - - def get_iterations_completed(): - return sleep.call_count - - update_handler = get_update_handler() - update_handler.protocol_util.get_protocol = Mock(return_value=protocol) - update_handler.get_exit_code = get_exit_code - update_handler.get_iterations = get_iterations - update_handler.get_iterations_completed = get_iterations_completed - - yield update_handler + with patch("azurelinuxagent.ga.agent_update.get_agent_update_handler", return_value=agent_update_handler): + with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): + with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: + with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: + if not check_daemon_running: + patch_object(UpdateHandler, "_check_daemon_running") + if not start_background_threads: + patch_object(UpdateHandler, "_start_threads") + if not check_background_threads: + patch_object(UpdateHandler, "_check_threads_running") + + def get_exit_code(): + if mock_exit.call_count == 0: + raise Exception("The UpdateHandler did not exit") + if mock_exit.call_count != 1: + raise Exception("The UpdateHandler exited multiple times ({0})".format(mock_exit.call_count)) + args, _ = mock_exit.call_args + return args[0] + + def get_iterations(): + return iteration_count[0] + + def get_iterations_completed(): + return sleep.call_count + + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=protocol) + update_handler.get_exit_code = get_exit_code + update_handler.get_iterations = get_iterations + update_handler.get_iterations_completed = get_iterations_completed + + yield update_handler finally: for f in cleanup_functions: f() diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py new file mode 100644 index 0000000000..5386bdaf67 --- /dev/null +++ b/tests/ga/test_agent_update.py @@ -0,0 +1,311 @@ +import contextlib +import json +import os + +from azurelinuxagent.common import conf +from azurelinuxagent.common.event import WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.future import ustr, httpclient +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses + +from azurelinuxagent.common.protocol.util import ProtocolUtil +from azurelinuxagent.common.version import CURRENT_VERSION +from azurelinuxagent.ga.agent_update import get_agent_update_handler +from azurelinuxagent.ga.guestagent import GAUpdateReportState +from tests.ga.test_update import UpdateTestCase +from tests.protocol.HttpRequestPredicates import HttpRequestPredicates +from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse +from tests.protocol.mockwiredata import DATA_FILE +from tests.tools import clear_singleton_instances, load_bin_data, patch + + +class TestAgentUpdate(UpdateTestCase): + + def setUp(self): + UpdateTestCase.setUp(self) + # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not + # reuse a previous state + clear_singleton_instances(ProtocolUtil) + + @contextlib.contextmanager + def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True): + # Default to DATA_FILE of test_data parameter raises the pylint warning + # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) + test_data = DATA_FILE if test_data is None else test_data + + with mock_wire_protocol(test_data) as protocol: + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + protocol.mock_wire_data.call_counts['agentArtifact'] += 1 + return MockHttpResponse(status=httpclient.OK, body=agent_pkg) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.ga.agent_update.add_event") as mock_telemetry: + agent_update_handler = get_agent_update_handler(protocol) + agent_update_handler._protocol = protocol + yield agent_update_handler, mock_telemetry + + def __assert_agent_directories_available(self, versions): + for version in versions: + self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) + + def __assert_agent_directories_exist_and_others_dont_exist(self, versions): + self.__assert_agent_directories_available(versions=versions) + other_agents = [agent_dir for agent_dir in self.agent_dirs() if + agent_dir not in [self.agent_dir(version) for version in versions]] + self.assertFalse(any(other_agents), + "All other agents should be purged from agent dir: {0}".format(other_agents)) + + def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Goal state incarnation_{0} is requesting a new agent version {1}'.format(inc, version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent requested version found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent package not found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def test_it_should_not_update_when_autoupdate_disabled(self): + self.prepare_agents(count=1) + with self.__get_agent_update_handler(autoupdate_enabled=False) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "requesting a new agent version" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "should not check for requested version") + + def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=False): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + + def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + version = "5.2.0.1" + with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + # Now we shouldn't check for download if update not allowed.This run should not add new logs + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + + def test_it_should_update_to_largest_version_if_requested_version_not_available(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "requesting a new agent version" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "requested version should be same as current version") + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_upgrade_agent_if_requested_version_is_available_greater_than_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + downgraded_version = "1.2.0" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) + self.__assert_agent_directories_exist_and_others_dont_exist( + versions=[downgraded_version, str(CURRENT_VERSION)]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + version = "5.2.0.4" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.assertFalse(os.path.exists(self.agent_dir(version)), + "New agent directory should not be found") + + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + + def test_handles_missing_agent_family(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + def test_it_should_report_update_status_with_success(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + GAUpdateReportState.report_error_msg = "" + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Success, vm_agent_update_status.status) + self.assertEqual(0, vm_agent_update_status.code) + self.assertEqual(str(CURRENT_VERSION), vm_agent_update_status.expected_version) + + def test_it_should_report_update_status_with_error_on_download_fail(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + @contextlib.contextmanager + def mock_agent_update_handler(test_data): + with mock_wire_protocol(test_data) as protocol: + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + protocol.set_http_handlers(http_get_handler=get_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + agent_update_handler_local = get_agent_update_handler(protocol) + yield agent_update_handler_local + + with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + GAUpdateReportState.report_error_msg = "" + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) + self.assertIn("Unable to download Agent", vm_agent_update_status.message) + + def test_it_should_report_update_status_with_missing_requested_version_error(self): + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + + @contextlib.contextmanager + def mock_agent_update_handler(test_data): + with mock_wire_protocol(test_data) as protocol: + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + protocol.set_http_handlers(http_get_handler=get_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + agent_update_handler_local = get_agent_update_handler(protocol) + yield agent_update_handler_local + + with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + GAUpdateReportState.report_error_msg = "" + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertIn("Missing requested version", vm_agent_update_status.message) diff --git a/tests/ga/test_guestagent.py b/tests/ga/test_guestagent.py new file mode 100644 index 0000000000..81e248bb04 --- /dev/null +++ b/tests/ga/test_guestagent.py @@ -0,0 +1,309 @@ +import json +import os + +from azurelinuxagent.common import conf +from azurelinuxagent.common.exception import UpdateError +from azurelinuxagent.ga.guestagent import GuestAgent, AGENT_MANIFEST_FILE, AGENT_ERROR_FILE, GuestAgentError, \ + MAX_FAILURE +from azurelinuxagent.common.future import httpclient +from azurelinuxagent.common.protocol.restapi import ExtHandlerPackage +from azurelinuxagent.common.version import AGENT_NAME +from tests.ga.test_update import UpdateTestCase, EMPTY_MANIFEST, WITH_ERROR, NO_ERROR +from tests.protocol import mockwiredata +from tests.protocol.mocks import MockHttpResponse, mock_wire_protocol +from tests.tools import load_bin_data, patch + + +class TestGuestAgent(UpdateTestCase): + def setUp(self): + UpdateTestCase.setUp(self) + self.copy_agents(self._get_agent_file_path()) + self.agent_path = os.path.join(self.tmp_dir, self._get_agent_name()) + + def test_creation(self): + with self.assertRaises(UpdateError): + GuestAgent.from_installed_agent("A very bad file name") + + with self.assertRaises(UpdateError): + GuestAgent.from_installed_agent("{0}-a.bad.version".format(AGENT_NAME)) + + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertNotEqual(None, agent) + self.assertEqual(self._get_agent_name(), agent.name) + self.assertEqual(self._get_agent_version(), agent.version) + + self.assertEqual(self.agent_path, agent.get_agent_dir()) + + path = os.path.join(self.agent_path, AGENT_MANIFEST_FILE) + self.assertEqual(path, agent.get_agent_manifest_path()) + + self.assertEqual( + os.path.join(self.agent_path, AGENT_ERROR_FILE), + agent.get_agent_error_file()) + + path = ".".join((os.path.join(conf.get_lib_dir(), self._get_agent_name()), "zip")) + self.assertEqual(path, agent.get_agent_pkg_path()) + + self.assertTrue(agent.is_downloaded) + self.assertFalse(agent.is_blacklisted) + self.assertTrue(agent.is_available) + + def test_clear_error(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.mark_failure(is_fatal=True) + + self.assertTrue(agent.error.last_failure > 0.0) + self.assertEqual(1, agent.error.failure_count) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + agent.clear_error() + self.assertEqual(0.0, agent.error.last_failure) + self.assertEqual(0, agent.error.failure_count) + self.assertFalse(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + def test_is_available(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + + self.assertTrue(agent.is_available) + agent.mark_failure(is_fatal=True) + self.assertFalse(agent.is_available) + + def test_is_blacklisted(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertFalse(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + agent.mark_failure(is_fatal=True) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + def test_is_downloaded(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(agent.is_downloaded) + + def test_mark_failure(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + + agent.mark_failure() + self.assertEqual(1, agent.error.failure_count) + + agent.mark_failure(is_fatal=True) + self.assertEqual(2, agent.error.failure_count) + self.assertTrue(agent.is_blacklisted) + + def test_load_manifest(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + agent._load_manifest() + self.assertEqual(agent.manifest.get_enable_command(), + agent.get_agent_cmd()) + + def test_load_manifest_missing(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + os.remove(agent.get_agent_manifest_path()) + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_manifest_is_empty(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) + + with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin + json.dump(EMPTY_MANIFEST, file) + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_manifest_is_malformed(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) + + with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin + file.write("This is not JSON data") + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_error(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.error = None + + agent._load_error() + self.assertTrue(agent.error is not None) + + def test_download(self): + self.remove_agents() + self.assertFalse(os.path.isdir(self.agent_path)) + + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' + + def http_get_handler(uri, *_, **__): + if uri == agent_uri: + response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + return MockHttpResponse(status=httpclient.OK, body=response) + return None + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertTrue(os.path.isdir(agent.get_agent_dir())) + self.assertTrue(agent.is_downloaded) + + def test_download_fail(self): + self.remove_agents() + self.assertFalse(os.path.isdir(self.agent_path)) + + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return None + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + with patch("azurelinuxagent.ga.guestagent.add_event") as add_event: + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertFalse(os.path.isfile(self.agent_path)) + + messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] + self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) + self.assertIn('[UpdateError] Unable to download Agent WALinuxAgent-9.9.9.9', messages[0], "The install error does not include the expected message") + + self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") + + def test_invalid_agent_package_does_not_blacklist_the_agent(self): + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") + return MockHttpResponse(status=httpclient.OK, body=response) + return None + + pkg = ExtHandlerPackage(version="9.9.9.9") + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") + self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") + + @patch("azurelinuxagent.ga.update.GuestAgent._download") + def test_ensure_download_skips_blacklisted(self, mock_download): + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertEqual(0, mock_download.call_count) + + agent.clear_error() + agent.mark_failure(is_fatal=True) + self.assertTrue(agent.is_blacklisted) + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(None) + # _download is mocked so there will be no http request; passing a None protocol + agent = GuestAgent.from_agent_package(pkg, None, False) + + self.assertEqual(1, agent.error.failure_count) + self.assertTrue(agent.error.was_fatal) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(0, mock_download.call_count) + + +class TestGuestAgentError(UpdateTestCase): + def test_creation(self): + self.assertRaises(TypeError, GuestAgentError) + self.assertRaises(UpdateError, GuestAgentError, None) + + with self.get_error_file(error_data=WITH_ERROR) as path: + err = GuestAgentError(path.name) + err.load() + self.assertEqual(path.name, err.path) + self.assertNotEqual(None, err) + + self.assertEqual(WITH_ERROR["last_failure"], err.last_failure) + self.assertEqual(WITH_ERROR["failure_count"], err.failure_count) + self.assertEqual(WITH_ERROR["was_fatal"], err.was_fatal) + return + + def test_clear(self): + with self.get_error_file(error_data=WITH_ERROR) as path: + err = GuestAgentError(path.name) + err.load() + self.assertEqual(path.name, err.path) + self.assertNotEqual(None, err) + + err.clear() + self.assertEqual(NO_ERROR["last_failure"], err.last_failure) + self.assertEqual(NO_ERROR["failure_count"], err.failure_count) + self.assertEqual(NO_ERROR["was_fatal"], err.was_fatal) + return + + def test_save(self): + err1 = self.create_error() + err1.mark_failure() + err1.mark_failure(is_fatal=True) + + err2 = self.create_error(err1.to_json()) + self.assertEqual(err1.last_failure, err2.last_failure) + self.assertEqual(err1.failure_count, err2.failure_count) + self.assertEqual(err1.was_fatal, err2.was_fatal) + + def test_mark_failure(self): + err = self.create_error() + self.assertFalse(err.is_blacklisted) + + for i in range(0, MAX_FAILURE): # pylint: disable=unused-variable + err.mark_failure() + + # Agent failed >= MAX_FAILURE, it should be blacklisted + self.assertTrue(err.is_blacklisted) + self.assertEqual(MAX_FAILURE, err.failure_count) + return + + def test_mark_failure_permanent(self): + err = self.create_error() + + self.assertFalse(err.is_blacklisted) + + # Fatal errors immediately blacklist + err.mark_failure(is_fatal=True) + self.assertTrue(err.is_blacklisted) + self.assertTrue(err.failure_count < MAX_FAILURE) + return + + def test_str(self): + err = self.create_error(error_data=NO_ERROR) + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + NO_ERROR["last_failure"], + NO_ERROR["failure_count"], + NO_ERROR["was_fatal"], + NO_ERROR["reason"]) + self.assertEqual(s, str(err)) + + err = self.create_error(error_data=WITH_ERROR) + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + WITH_ERROR["last_failure"], + WITH_ERROR["failure_count"], + WITH_ERROR["was_fatal"], + WITH_ERROR["reason"]) + self.assertEqual(s, str(err)) + return diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index c5a20b5167..f63d1d42dc 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -3,6 +3,7 @@ import json +from azurelinuxagent.ga.agent_update import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler from tests.ga.mocks import mock_update_handler @@ -78,21 +79,23 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): update_handler = get_update_handler() update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here exthandlers_handler = ExtHandlersHandler(protocol) - update_handler._report_status(exthandlers_handler) + agent_update_handler = get_agent_update_handler(protocol) + agent_update_status = agent_update_handler.get_vmagent_update_status() + update_handler._report_status(exthandlers_handler, agent_update_status) self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_status) self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler, agent_update_status) self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index e5f15fbd07..b93d409bb3 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -20,6 +20,8 @@ from datetime import datetime, timedelta from threading import current_thread +from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \ + AGENT_ERROR_FILE, GAUpdateReportState from tests.common.osutil.test_default import TestOSUtil import azurelinuxagent.common.osutil.default as osutil @@ -27,7 +29,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.event import EVENTS_DIRECTORY, WALAEventOperation -from azurelinuxagent.common.exception import ProtocolError, UpdateError, HttpError, \ +from azurelinuxagent.common.exception import HttpError, \ ExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler @@ -41,13 +43,12 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ - AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION, set_daemon_version, \ - __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE + AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, ExtHandlerInstance, HandlerEnvironment, ExtensionStatusValue -from azurelinuxagent.ga.update import GuestAgent, GuestAgentError, MAX_FAILURE, AGENT_MANIFEST_FILE, \ - get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, AGENT_ERROR_FILE, ORPHAN_WAIT_INTERVAL, \ +from azurelinuxagent.ga.update import \ + get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, ORPHAN_WAIT_INTERVAL, \ CHILD_LAUNCH_RESTART_MAX, CHILD_HEALTH_INTERVAL, GOAL_STATE_PERIOD_EXTENSIONS_DISABLED, UpdateHandler, \ - READONLY_FILE_GLOBS, ExtensionsSummary, AgentUpgradeType + READONLY_FILE_GLOBS, ExtensionsSummary from tests.ga.mocks import mock_update_handler from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS @@ -56,6 +57,7 @@ from tests.protocol import mockwiredata from tests.protocol.HttpRequestPredicates import HttpRequestPredicates + NO_ERROR = { "last_failure": 0.0, "failure_count": 0, @@ -99,7 +101,7 @@ def faux_logger(): @contextlib.contextmanager -def _get_update_handler(iterations=1, test_data=None, protocol=None): +def _get_update_handler(iterations=1, test_data=None, protocol=None, autoupdate_enabled=True): """ This function returns a mocked version of the UpdateHandler object to be used for testing. It will only run the main loop [iterations] no of times. @@ -110,10 +112,10 @@ def _get_update_handler(iterations=1, test_data=None, protocol=None): with patch.object(HostPluginProtocol, "is_default_channel", False): if protocol is None: with mock_wire_protocol(test_data) as mock_protocol: - with mock_update_handler(mock_protocol, iterations=iterations, autoupdate_enabled=True) as update_handler: + with mock_update_handler(mock_protocol, iterations=iterations, autoupdate_enabled=autoupdate_enabled) as update_handler: yield update_handler, mock_protocol else: - with mock_update_handler(protocol, iterations=iterations, autoupdate_enabled=True) as update_handler: + with mock_update_handler(protocol, iterations=iterations, autoupdate_enabled=autoupdate_enabled) as update_handler: yield update_handler, protocol @@ -315,302 +317,6 @@ def replicate_agents(self, return dst_v -class TestGuestAgentError(UpdateTestCase): - def test_creation(self): - self.assertRaises(TypeError, GuestAgentError) - self.assertRaises(UpdateError, GuestAgentError, None) - - with self.get_error_file(error_data=WITH_ERROR) as path: - err = GuestAgentError(path.name) - err.load() - self.assertEqual(path.name, err.path) - self.assertNotEqual(None, err) - - self.assertEqual(WITH_ERROR["last_failure"], err.last_failure) - self.assertEqual(WITH_ERROR["failure_count"], err.failure_count) - self.assertEqual(WITH_ERROR["was_fatal"], err.was_fatal) - return - - def test_clear(self): - with self.get_error_file(error_data=WITH_ERROR) as path: - err = GuestAgentError(path.name) - err.load() - self.assertEqual(path.name, err.path) - self.assertNotEqual(None, err) - - err.clear() - self.assertEqual(NO_ERROR["last_failure"], err.last_failure) - self.assertEqual(NO_ERROR["failure_count"], err.failure_count) - self.assertEqual(NO_ERROR["was_fatal"], err.was_fatal) - return - - def test_save(self): - err1 = self.create_error() - err1.mark_failure() - err1.mark_failure(is_fatal=True) - - err2 = self.create_error(err1.to_json()) - self.assertEqual(err1.last_failure, err2.last_failure) - self.assertEqual(err1.failure_count, err2.failure_count) - self.assertEqual(err1.was_fatal, err2.was_fatal) - - def test_mark_failure(self): - err = self.create_error() - self.assertFalse(err.is_blacklisted) - - for i in range(0, MAX_FAILURE): # pylint: disable=unused-variable - err.mark_failure() - - # Agent failed >= MAX_FAILURE, it should be blacklisted - self.assertTrue(err.is_blacklisted) - self.assertEqual(MAX_FAILURE, err.failure_count) - return - - def test_mark_failure_permanent(self): - err = self.create_error() - - self.assertFalse(err.is_blacklisted) - - # Fatal errors immediately blacklist - err.mark_failure(is_fatal=True) - self.assertTrue(err.is_blacklisted) - self.assertTrue(err.failure_count < MAX_FAILURE) - return - - def test_str(self): - err = self.create_error(error_data=NO_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - NO_ERROR["last_failure"], - NO_ERROR["failure_count"], - NO_ERROR["was_fatal"], - NO_ERROR["reason"]) - self.assertEqual(s, str(err)) - - err = self.create_error(error_data=WITH_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - WITH_ERROR["last_failure"], - WITH_ERROR["failure_count"], - WITH_ERROR["was_fatal"], - WITH_ERROR["reason"]) - self.assertEqual(s, str(err)) - return - - -class TestGuestAgent(UpdateTestCase): - def setUp(self): - UpdateTestCase.setUp(self) - self.copy_agents(self._get_agent_file_path()) - self.agent_path = os.path.join(self.tmp_dir, self._get_agent_name()) - - def test_creation(self): - with self.assertRaises(UpdateError): - GuestAgent.from_installed_agent("A very bad file name") - - with self.assertRaises(UpdateError): - GuestAgent.from_installed_agent("{0}-a.bad.version".format(AGENT_NAME)) - - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertNotEqual(None, agent) - self.assertEqual(self._get_agent_name(), agent.name) - self.assertEqual(self._get_agent_version(), agent.version) - - self.assertEqual(self.agent_path, agent.get_agent_dir()) - - path = os.path.join(self.agent_path, AGENT_MANIFEST_FILE) - self.assertEqual(path, agent.get_agent_manifest_path()) - - self.assertEqual( - os.path.join(self.agent_path, AGENT_ERROR_FILE), - agent.get_agent_error_file()) - - path = ".".join((os.path.join(conf.get_lib_dir(), self._get_agent_name()), "zip")) - self.assertEqual(path, agent.get_agent_pkg_path()) - - self.assertTrue(agent.is_downloaded) - self.assertFalse(agent.is_blacklisted) - self.assertTrue(agent.is_available) - - def test_clear_error(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - agent.mark_failure(is_fatal=True) - - self.assertTrue(agent.error.last_failure > 0.0) - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - agent.clear_error() - self.assertEqual(0.0, agent.error.last_failure) - self.assertEqual(0, agent.error.failure_count) - self.assertFalse(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - def test_is_available(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - - self.assertTrue(agent.is_available) - agent.mark_failure(is_fatal=True) - self.assertFalse(agent.is_available) - - def test_is_blacklisted(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertFalse(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - def test_is_downloaded(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(agent.is_downloaded) - - def test_mark_failure(self): - agent = GuestAgent.from_installed_agent(self.agent_path) - - agent.mark_failure() - self.assertEqual(1, agent.error.failure_count) - - agent.mark_failure(is_fatal=True) - self.assertEqual(2, agent.error.failure_count) - self.assertTrue(agent.is_blacklisted) - - def test_load_manifest(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - agent._load_manifest() - self.assertEqual(agent.manifest.get_enable_command(), - agent.get_agent_cmd()) - - def test_load_manifest_missing(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - os.remove(agent.get_agent_manifest_path()) - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_manifest_is_empty(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) - - with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin - json.dump(EMPTY_MANIFEST, file) - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_manifest_is_malformed(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) - - with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin - file.write("This is not JSON data") - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_error(self): - agent = GuestAgent.from_installed_agent(self.agent_path) - agent.error = None - - agent._load_error() - self.assertTrue(agent.error is not None) - - def test_download(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri == agent_uri: - response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertTrue(os.path.isdir(agent.get_agent_dir())) - self.assertTrue(agent.is_downloaded) - - def test_download_fail(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return None - - agent_version = self._get_agent_version() - pkg = ExtHandlerPackage(version=str(agent_version)) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - with patch("azurelinuxagent.ga.update.add_event") as add_event: - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(os.path.isfile(self.agent_path)) - - messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] - self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) - self.assertIn(str.format('[UpdateError] Unable to download Agent WALinuxAgent-{0}', agent_version), messages[0], "The install error does not include the expected message") - - self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") - - def test_invalid_agent_package_does_not_blacklist_the_agent(self): - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version="9.9.9.9") - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") - self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") - - @patch("azurelinuxagent.ga.update.GuestAgent._download") - def test_ensure_download_skips_blacklisted(self, mock_download): - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertEqual(0, mock_download.call_count) - - agent.clear_error() - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(None) - # _download is mocked so there will be no http request; passing a None protocol - agent = GuestAgent.from_agent_package(pkg, None, False) - - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.error.was_fatal) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(0, mock_download.call_count) - - class TestUpdate(UpdateTestCase): def setUp(self): UpdateTestCase.setUp(self) @@ -628,8 +334,6 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) def test_creation(self): - self.assertEqual(None, self.update_handler.last_attempt_time) - self.assertEqual(0, len(self.update_handler.agents)) self.assertEqual(None, self.update_handler.child_agent) @@ -853,9 +557,6 @@ def test_get_latest_agent(self): def test_get_latest_agent_excluded(self): self.prepare_agent(AGENT_VERSION) - self.assertFalse(self._test_upgrade_available( - versions=self.agent_versions(), - count=1)) self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) def test_get_latest_agent_no_updates(self): @@ -1192,85 +893,6 @@ def test_shutdown_ignores_exceptions(self): except Exception as e: # pylint: disable=unused-variable self.assertTrue(False, "Unexpected exception") # pylint: disable=redundant-unittest-assert - def _test_upgrade_available( - self, - base_version=FlexibleVersion(AGENT_VERSION), - protocol=None, - versions=None, - count=20): - - if protocol is None: - protocol = self._create_protocol(count=count, versions=versions) - - self.update_handler.protocol_util = protocol - self.update_handler._goal_state = protocol.get_goal_state() - self.update_handler._goal_state.extensions_goal_state.is_outdated = False - conf.get_autoupdate_gafamily = Mock(return_value=protocol.family) - - return self.update_handler._download_agent_if_upgrade_available(protocol, base_version=base_version) - - def test_upgrade_available_returns_true_on_first_use(self): - self.assertTrue(self._test_upgrade_available()) - - def test_upgrade_available_handles_missing_family(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" - - with mock_wire_protocol(data_file) as protocol: - self.update_handler.protocol_util = protocol - with patch('azurelinuxagent.common.logger.warn') as mock_logger: - with patch('azurelinuxagent.common.protocol.goal_state.GoalState.fetch_agent_manifest', side_effect=ProtocolError): - self.assertFalse(self.update_handler._download_agent_if_upgrade_available(protocol, base_version=CURRENT_VERSION)) - self.assertEqual(0, mock_logger.call_count) - - def test_upgrade_available_includes_old_agents(self): - self.prepare_agents() - - old_version = self.agent_versions()[-1] - old_count = old_version.version[-1] - - self.replicate_agents(src_v=old_version, count=old_count, increment=-1) - all_count = len(self.agent_versions()) - - self.assertTrue(self._test_upgrade_available(versions=self.agent_versions())) - self.assertEqual(all_count, len(self.update_handler.agents)) - - def test_upgrade_available_purges_old_agents(self): - self.prepare_agents() - agent_count = self.agent_count() - self.assertEqual(20, agent_count) - - agent_versions = self.agent_versions()[:3] - self.assertTrue(self._test_upgrade_available(versions=agent_versions)) - self.assertEqual(len(agent_versions), len(self.update_handler.agents)) - - # Purging always keeps the running agent - if CURRENT_VERSION not in agent_versions: - agent_versions.append(CURRENT_VERSION) - self.assertEqual(agent_versions, self.agent_versions()) - - def test_upgrade_available_skips_if_too_frequent(self): - conf.get_autoupdate_frequency = Mock(return_value=10000) - self.update_handler.last_attempt_time = time.time() - self.assertFalse(self._test_upgrade_available()) - - def test_upgrade_available_skips_when_no_new_versions(self): - self.prepare_agents() - base_version = self.agent_versions()[0] + 1 - self.assertFalse(self._test_upgrade_available(base_version=base_version)) - - def test_upgrade_available_skips_when_no_versions(self): - self.assertFalse(self._test_upgrade_available(protocol=ProtocolMock())) - - def test_upgrade_available_sorts(self): - self.prepare_agents() - self._test_upgrade_available() - - v = FlexibleVersion("100000") - for a in self.update_handler.agents: - self.assertTrue(v > a.version) - v = a.version - def test_write_pid_file(self): for n in range(1112): fileutil.write_file(os.path.join(self.tmp_dir, str(n) + "_waagent.pid"), ustr(n + 1)) @@ -1295,7 +917,7 @@ def test_update_happens_when_extensions_disabled(self): behavior never changes. """ with patch('azurelinuxagent.common.conf.get_extensions_enabled', return_value=False): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', return_value=True) as download_agent: + with patch('azurelinuxagent.ga.agent_update.AgentUpdateHandler.run') as download_agent: with mock_wire_protocol(DATA_FILE) as protocol: with mock_update_handler(protocol, autoupdate_enabled=True) as update_handler: update_handler.run() @@ -1358,7 +980,7 @@ def match_expected_info(): def test_it_should_recreate_handler_env_on_service_startup(self): iterations = 5 - with _get_update_handler(iterations) as (update_handler, protocol): + with _get_update_handler(iterations, autoupdate_enabled=False) as (update_handler, protocol): update_handler.run(debug=True) expected_handler = self._get_test_ext_handler_instance(protocol) @@ -1375,7 +997,7 @@ def test_it_should_recreate_handler_env_on_service_startup(self): # re-runnning the update handler. Then,ensure that the HandlerEnvironment file is recreated with eventsFolder # flag in HandlerEnvironment.json file. self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1) as (update_handler, protocol): + with _get_update_handler(iterations=1, autoupdate_enabled=False) as (update_handler, protocol): with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) @@ -1573,7 +1195,7 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): @contextlib.contextmanager def _setup_test_for_ext_event_dirs_retention(self): try: - with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT) as (update_handler, protocol): + with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT, autoupdate_enabled=False) as (update_handler, protocol): with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) expected_events_dirs = glob.glob(os.path.join(conf.get_ext_log_dir(), "*", EVENTS_DIRECTORY)) @@ -1623,62 +1245,69 @@ def test_it_should_recreate_extension_event_directories_for_existing_extensions_ def test_it_should_report_update_status_in_status_blob(self): with mock_wire_protocol(DATA_FILE) as protocol: - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.common.logger.warn") as patch_warn: - - protocol.aggregate_status = None - protocol.incarnation = 1 - - def mock_http_put(url, *args, **_): - if HttpRequestPredicates.is_host_plugin_status_request(url): - # Skip reading the HostGA request data as its encoded - return MockHttpResponse(status=500) - protocol.aggregate_status = json.loads(args[0]) - return MockHttpResponse(status=201) - - def update_goal_state_and_run_handler(): - protocol.incarnation += 1 - protocol.mock_wire_data.set_incarnation(protocol.incarnation) - self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1, protocol=protocol) as (update_handler, _): - update_handler.run(debug=True) - self.assertEqual(0, update_handler.get_exit_code(), - "Exit code should be 0; List of all warnings logged by the agent: {0}".format( - patch_warn.call_args_list)) - - protocol.set_http_handlers(http_put_handler=mock_http_put) - - # Case 1: No requested version in GS; updateStatus should not be reported - update_goal_state_and_run_handler() - self.assertFalse("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should not be reported if not asked in GS") - - # Case 2: Requested version in GS != Current Version; updateStatus should be error - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") - self.assertEqual(update_status['expectedVersion'], "9.9.9.10", "incorrect version reported") - self.assertEqual(update_status['code'], 1, "incorrect code reported") - - # Case 3: Requested version in GS == Current Version; updateStatus should be Success - protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be reported if asked in GS") - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") - self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") - self.assertEqual(update_status['code'], 0, "incorrect code reported") - - # Case 4: Requested version removed in GS; no updateStatus should be reported - protocol.mock_wire_data.reload() - update_goal_state_and_run_handler() - self.assertFalse("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should not be reported if not asked in GS") + with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.common.logger.warn") as patch_warn: + + protocol.aggregate_status = None + protocol.incarnation = 1 + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + def update_goal_state_and_run_handler(autoupdate_enabled = True): + protocol.incarnation += 1 + protocol.mock_wire_data.set_incarnation(protocol.incarnation) + self._add_write_permission_to_goal_state_files() + with _get_update_handler(iterations=1, protocol=protocol, autoupdate_enabled=autoupdate_enabled) as (update_handler, _): + GAUpdateReportState.report_error_msg = "" + update_handler.run(debug=True) + self.assertEqual(0, update_handler.get_exit_code(), + "Exit code should be 0; List of all warnings logged by the agent: {0}".format( + patch_warn.call_args_list)) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + # Case 1: Requested version removed in GS; report missing requested version errr + protocol.mock_wire_data.set_extension_config("wire/ext_conf.xml") + protocol.mock_wire_data.reload() + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['code'], 1, "incorrect code reported") + self.assertIn("Missing requested version", update_status['formattedMessage']['message'], "incorrect message reported") + + # Case 2: Requested version in GS == Current Version; updateStatus should be Success + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported if asked in GS") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") + self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") + self.assertEqual(update_status['code'], 0, "incorrect code reported") + + # Case 3: Requested version in GS != Current Version; update fail and report error + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version("5.2.0.1") + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['expectedVersion'], "5.2.0.1", "incorrect version reported") + self.assertEqual(update_status['code'], 1, "incorrect code reported") def test_it_should_wait_to_fetch_first_goal_state(self): with _get_update_handler() as (update_handler, protocol): @@ -1721,7 +1350,7 @@ def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): else: self.assertFalse(agent.is_blacklisted, "Agent {0} should not be blacklisted".format(agent.name)) - with _get_update_handler() as (update_handler, _): + with _get_update_handler(autoupdate_enabled=False) as (update_handler, _): update_handler.run(debug=True) self.assertEqual(20, self.agent_count(), "All agents should be available on disk") # Ensure none of the agents are blacklisted @@ -1776,11 +1405,6 @@ def _test_run(self, autoupdate_enabled=False, check_daemon_running=False, expect def test_run(self): self._test_run() - def test_run_stops_if_update_available(self): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', return_value=True): - update_handler = self._test_run(autoupdate_enabled=True) - self.assertEqual(0, update_handler.get_iterations_completed()) - def test_run_stops_if_orphaned(self): with patch('os.getppid', return_value=1): update_handler = self._test_run(check_daemon_running=True) @@ -1791,7 +1415,7 @@ def test_run_clears_sentinel_on_successful_exit(self): self.assertFalse(os.path.isfile(update_handler._sentinel_file_path())) def test_run_leaves_sentinel_on_unsuccessful_exit(self): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', side_effect=Exception): + with patch('azurelinuxagent.ga.agent_update.AgentUpdateHandler.run', side_effect=Exception): update_handler = self._test_run(autoupdate_enabled=True,expected_exit_code=1) self.assertTrue(os.path.isfile(update_handler._sentinel_file_path())) @@ -1803,20 +1427,16 @@ def test_run_emits_restart_event(self): class TestAgentUpgrade(UpdateTestCase): @contextlib.contextmanager - def create_conf_mocks(self, hotfix_frequency, normal_frequency): + def create_conf_mocks(self, autoupdate_frequency): # Disabling extension processing to speed up tests as this class deals with testing agent upgrades with patch("azurelinuxagent.common.conf.get_extensions_enabled", return_value=False): - with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", - return_value=hotfix_frequency): - with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", - return_value=normal_frequency): - with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - yield + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + yield @contextlib.contextmanager - def __get_update_handler(self, iterations=1, test_data=None, hotfix_frequency=1.0, normal_frequency=2.0, - reload_conf=None): + def __get_update_handler(self, iterations=1, test_data=None, + reload_conf=None, autoupdate_frequency=0.001): test_data = DATA_FILE if test_data is None else test_data @@ -1842,32 +1462,23 @@ def put_handler(url, *args, **_): return MockHttpResponse(status=201) protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) - with self.create_conf_mocks(hotfix_frequency, normal_frequency): - with patch("azurelinuxagent.ga.update.add_event") as mock_telemetry: + with self.create_conf_mocks(autoupdate_frequency): + with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: update_handler._protocol = protocol yield update_handler, mock_telemetry def __assert_exit_code_successful(self, update_handler): self.assertEqual(0, update_handler.get_exit_code(), "Exit code should be 0") - def __assert_upgrade_telemetry_emitted_for_requested_version(self, mock_telemetry, upgrade=True, version="99999.0.0.0"): + def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Exiting current process to {0} to the request Agent version {1}'.format( + 'Agent update found, Exiting current process to {0} to the new Agent version {1}'.format( "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent was upgraded. Got: {0}".format( mock_telemetry.call_args_list)) - def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade_type=AgentUpgradeType.Normal): - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - '{0} Agent upgrade discovered, updating to WALinuxAgent-99999.0.0.0 -- exiting'.format( - upgrade_type) in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade] - self.assertEqual(1, len(upgrade_event_msgs), - "Did not find the event indicating that the agent was upgraded. Got: {0}".format( - mock_telemetry.call_args_list)) - def __assert_agent_directories_available(self, versions): for version in versions: self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) @@ -1879,11 +1490,6 @@ def __assert_agent_directories_exist_and_others_dont_exist(self, versions): self.assertFalse(any(other_agents), "All other agents should be purged from agent dir: {0}".format(other_agents)) - def __assert_no_agent_upgrade_telemetry(self, mock_telemetry): - self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - "Agent upgrade discovered, updating to" in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "Unwanted upgrade") - def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VERSION)): self.assertIsNotNone(aggregate_status, "Status should be reported") self.assertEqual(aggregate_status['aggregateStatus']['guestAgentStatus']['version'], version, @@ -1892,128 +1498,64 @@ def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VE "Guest Agent should be reported as Ready") def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): - with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): - + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file, iterations=10) as (update_handler, mock_telemetry): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) self.assertEqual(1, update_handler.get_iterations(), "Update handler should've exited after the first run") - self.__assert_agent_directories_available(versions=["99999.0.0.0"]) + self.__assert_agent_directories_available(versions=["9.9.9.10"]) self.__assert_upgrade_telemetry_emitted(mock_telemetry) - def test_it_should_download_new_agents_and_not_auto_upgrade_if_not_permitted(self): + def test_it_should_not_update_agent_if_last_update_time_not_permitted(self): no_of_iterations = 10 data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations/2: - reload_conf.call_count += 1 - # Ensure the first set of versions were downloaded as part of the first manifest - self.__assert_agent_directories_available(versions=["1.0.0", "1.1.0", "1.2.0"]) - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, hotfix_frequency=10, - normal_frequency=10, reload_conf=reload_conf) as (update_handler, mock_telemetry): + data_file['ext_conf'] = "wire/ext_conf_requested_version.xml" + + self.prepare_agents(1) + test_frequency = 10 + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, + autoupdate_frequency=test_frequency) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_ga_manifest_version_version("5.2.0.1") + update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") self.__assert_exit_code_successful(update_handler) self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler should've run its course") - # Ensure the new agent versions were also downloaded once the manifest was updated - self.__assert_agent_directories_available(versions=["2.0.0", "2.1.0", "99999.0.0.0"]) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - - def test_it_should_upgrade_agent_in_given_time_window_if_permitted(self): - data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= 2: - reload_conf.call_count += 1 - # Ensure no new agent available so far - self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - test_normal_frequency = 0.1 - with self.__get_update_handler(iterations=50, test_data=data_file, reload_conf=reload_conf, - normal_frequency=test_normal_frequency) as (update_handler, mock_telemetry): - start_time = time.time() - update_handler.run(debug=True) - diff = time.time() - start_time - - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") - self.__assert_exit_code_successful(update_handler) - self.assertGreaterEqual(update_handler.get_iterations(), 3, - "Update handler should've run at least until the new GA was available") - # A bare-bone check to ensure that the agent waited for the new agent at least for the preset frequency time - self.assertGreater(diff, test_normal_frequency, "The test run should be at least greater than the set frequency") - self.__assert_agent_directories_available(versions=["99999.0.0.0"]) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) + self.assertFalse(os.path.exists(self.agent_dir("5.2.0.1")), + "New agent directory should not be found") def test_it_should_not_auto_upgrade_if_auto_update_disabled(self): - with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): + with self.__get_update_handler(iterations=10) as (update_handler, _): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) self.assertGreaterEqual(update_handler.get_iterations(), 10, "Update handler should've run 10 times") - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_not_auto_upgrade_if_corresponding_time_not_elapsed(self): - # On Normal upgrade, should not upgrade if Hotfix time elapsed - no_of_iterations = 10 - data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations / 2: - reload_conf.call_count += 1 - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, hotfix_frequency=0.01, - normal_frequency=10, reload_conf=reload_conf) as (update_handler, mock_telemetry): + def test_it_should_download_only_requested_version_if_available(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") - self.__assert_exit_code_successful(update_handler) - self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler didn't run completely") - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] == WALAEventOperation.AgentUpgrade] - self.assertGreater(len([msg for msg in upgrade_event_msgs if - 'Discovered new {0} upgrade WALinuxAgent-99999.0.0.0; Will upgrade on or after'.format( - AgentUpgradeType.Normal) in msg]), 0, "Error message not propagated properly") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) - def test_it_should_download_only_requested_version_if_available(self): + def test_it_should_download_largest_version_if_ga_versioning_disabled(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): + with patch.object(conf, "get_enable_ga_versioning", return_value=False): update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0"]) def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): data_file = mockwiredata.DATA_FILE.copy() @@ -2024,37 +1566,36 @@ def test_it_should_cleanup_all_agents_except_requested_version_and_current_versi self.assertEqual(20, self.agent_count(), "Agent directories not set properly") with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): + self.prepare_agents(1) data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_missing_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_requested_version_missing_in_manifest.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] - # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in agent_msgs if - "Found requested version in manifest: 5.2.1.0 for goal state incarnation_1" in kwarg['message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") - - skipping_update = next(kwarg for kwarg in agent_msgs if - "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation_1, skipping agent update" in kwarg['message']) - self.assertEqual(skipping_update['version'], FlexibleVersion("5.2.1.0"), - "The not found message should be reported from requested agent version") - self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") - - def test_it_should_only_try_downloading_requested_version_on_new_incarnation(self): + self.__assert_exit_code_successful(update_handler) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if + kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] + # This will throw if corresponding message not found so not asserting on that + requested_version_found = next(kwarg for kwarg in agent_msgs if + "Goal state incarnation_1 is requesting a new agent version 5.2.1.0, will update the agent before processing the goal state" in kwarg['message']) + self.assertTrue(requested_version_found['is_success'], + "The requested version found op should be reported as a success") + + skipping_update = next(kwarg for kwarg in agent_msgs if + "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation: incarnation_1, skipping agent update" in kwarg['message']) + self.assertEqual(skipping_update['version'], str(CURRENT_VERSION), + "The not found message should be reported from current agent version") + self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") + + def test_it_should_try_downloading_requested_version_on_new_incarnation(self): no_of_iterations = 1000 # Set the test environment by adding 20 random agents to the agent directory @@ -2069,7 +1610,7 @@ def reload_conf(url, protocol): "goalstate"] >= 10 and mock_wire_data.call_counts["goalstate"] < 15: # Ensure we didn't try to download any agents except during the incarnation change - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) # Update the requested version to "99999.0.0.0" update_handler._protocol.mock_wire_data.set_extension_config_requested_version("99999.0.0.0") @@ -2083,23 +1624,21 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=0.01, hotfix_frequency=0.01) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.assertGreaterEqual(reload_conf.call_count, 1, "Reload conf not updated as expected") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) self.assertEqual(update_handler._protocol.mock_wire_data.call_counts['agentArtifact'], 1, "only 1 agent should've been downloaded - 1 per incarnation") - self.assertEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, + self.assertGreaterEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, "only 1 agent manifest call should've been made - 1 per incarnation") - def test_it_should_fallback_to_old_update_logic_if_requested_version_not_available(self): + def test_it_should_update_to_largest_version_if_requested_version_not_available(self): no_of_iterations = 100 # Set the test environment by adding 20 random agents to the agent directory @@ -2115,7 +1654,7 @@ def reload_conf(url, protocol): reload_conf.call_count += 1 # By this point, the GS with requested version should've been executed. Verify that - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) # Update the ext-conf and incarnation and remove requested versions from GS, # this should download all versions requested in config @@ -2130,20 +1669,17 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=0.001) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) - self.__assert_agent_directories_exist_and_others_dont_exist( - versions=["1.0.0", "1.1.0", "1.2.0", "2.0.0", "2.1.0", "9.9.9.10", "99999.0.0.0", str(CURRENT_VERSION)]) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) - def test_it_should_not_download_anything_if_requested_version_is_current_version_and_delete_all_agents(self): + def test_it_should_not_download_anything_if_requested_version_is_current_version(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" @@ -2151,15 +1687,14 @@ def test_it_should_not_download_anything_if_requested_version_is_current_version self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(test_data=data_file) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") def test_it_should_skip_wait_to_update_if_requested_version_available(self): no_of_iterations = 100 @@ -2185,18 +1720,18 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=10, hotfix_frequency=10) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(20) + update_handler.run(debug=True) self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.assertLess(update_handler.get_iterations(), no_of_iterations, "The code should've exited as soon as requested version was found") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") - def test_it_should_blacklist_current_agent_on_downgrade(self): + def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): # Create Agent directory for current agent self.prepare_agents(count=1) self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION))) @@ -2207,53 +1742,19 @@ def test_it_should_blacklist_current_agent_on_downgrade(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) - update_handler._protocol.mock_wire_data.set_incarnation(2) - try: - set_daemon_version("1.0.0.0") - update_handler.run(debug=True) - finally: - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, upgrade=False, + self.__assert_upgrade_telemetry_emitted(mock_telemetry, upgrade=False, version=downgraded_version) current_agent = next(agent for agent in self.agents() if agent.version == CURRENT_VERSION) self.assertTrue(current_agent.is_blacklisted, "The current agent should be blacklisted") - self.assertEqual(current_agent.error.reason, "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " + self.assertEqual(current_agent.error.reason, "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION), "Invalid reason specified for blacklisting agent") - - def test_it_should_not_downgrade_below_daemon_version(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version("1.0.0.0") - update_handler._protocol.mock_wire_data.set_incarnation(2) - - try: - set_daemon_version("1.2.3.4") - update_handler.run(debug=True) - finally: - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) - - self.__assert_exit_code_successful(update_handler) - upgrade_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] == WALAEventOperation.AgentUpgrade] - # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in upgrade_msgs if - "Found requested version in manifest: 1.0.0.0 for goal state incarnation_2" in kwarg[ - 'message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") - - skipping_update = next(kwarg for kwarg in upgrade_msgs if - "Can't process the upgrade as the requested version: 1.0.0.0 is < current daemon version: 1.2.3.4" in - kwarg['message']) - self.assertFalse(skipping_update['is_success'], "Failed Event should be reported as a failure") - self.__assert_ga_version_in_status(update_handler._protocol.aggregate_status) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[downgraded_version, str(CURRENT_VERSION)]) @patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') @@ -2287,12 +1788,13 @@ def iterator(*_, **__): mock_is_running.__get__ = Mock(side_effect=iterator) with patch('azurelinuxagent.ga.exthandlers.get_exthandlers_handler'): with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): - with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): - with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff - with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): - with patch('time.sleep'): - with patch('sys.exit'): - self.update_handler.run() + with patch('azurelinuxagent.ga.agent_update.get_agent_update_handler'): + with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): + with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff + with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): + with patch('time.sleep'): + with patch('sys.exit'): + self.update_handler.run() def _setup_mock_thread_and_start_test_run(self, mock_thread, is_alive=True, invocations=0): thread = MagicMock() @@ -2622,34 +2124,41 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() + agent_update_handler = Mock() + agent_update_handler.run = Mock() # process a goal state - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(1, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have been called on the first goal state") self.assertEqual(1, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on the first goal state") self.assertEqual(1, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on the first goal state") + self.assertEqual(1, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the first goal state") # process the same goal state - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(1, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have not been called on the same goal state") self.assertEqual(2, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on the same goal state") self.assertEqual(1, remote_access_handler.run.call_count, "remote_access_handler.run() should not have been called on the same goal state") + self.assertEqual(2, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the same goal state") # process a new goal state exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) exthandlers_handler.protocol.client.update_goal_state() - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(2, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have been called on a new goal state") self.assertEqual(3, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on a new goal state") self.assertEqual(2, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on a new goal state") + self.assertEqual(3, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the new goal state") def test_it_should_write_the_agent_status_to_the_history_folder(self): with _mock_exthandlers_handler() as exthandlers_handler: update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() + agent_update_handler = Mock() + agent_update_handler.run = Mock() - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) incarnation = exthandlers_handler.protocol.get_goal_state().incarnation matches = glob.glob(os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "*_{0}".format(incarnation))) @@ -2850,16 +2359,17 @@ def test_update_handler_should_use_the_initial_goal_state_period_until_the_goal_ with patch('azurelinuxagent.common.conf.get_goal_state_period', return_value=goal_state_period): with _mock_exthandlers_handler([ExtensionStatusValue.transitioning, ExtensionStatusValue.success]) as exthandlers_handler: remote_access_handler = Mock() + agent_update_handler = Mock() update_handler = _create_update_handler() self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period") # the extension is transisioning, so we should still be using the initial goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period when the extension is transitioning") # the goal state converged (the extension succeeded), so we should switch to the regular goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(goal_state_period, update_handler._goal_state_period, "Expected the regular goal state period after the goal state converged") def test_update_handler_should_switch_to_the_regular_goal_state_period_when_the_goal_state_does_not_converges(self): @@ -2868,17 +2378,18 @@ def test_update_handler_should_switch_to_the_regular_goal_state_period_when_the_ with patch('azurelinuxagent.common.conf.get_goal_state_period', return_value=goal_state_period): with _mock_exthandlers_handler([ExtensionStatusValue.transitioning, ExtensionStatusValue.transitioning]) as exthandlers_handler: remote_access_handler = Mock() + agent_update_handler = Mock() update_handler = _create_update_handler() self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period") # the extension is transisioning, so we should still be using the initial goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period when the extension is transitioning") # a new goal state arrives before the current goal state converged (the extension is transitioning), so we should switch to the regular goal state period exthandlers_handler.protocol.mock_wire_data.set_incarnation(100) - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(goal_state_period, update_handler._goal_state_period, "Expected the regular goal state period when the goal state does not converge") diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 196ed32db8..936533e97b 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -462,3 +462,6 @@ def set_extension_config(self, ext_conf_file): def set_extension_config_requested_version(self, version): self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "Version", version) + + def set_ga_manifest_version_version(self, version): + self.ga_manifest = WireProtocolData.replace_xml_element_value(self.ga_manifest, "Version", version) From 9943b390b5d46135d45cb7ff00af2b5e61bad28b Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 6 Feb 2023 15:37:18 -0800 Subject: [PATCH 02/14] report GA versioning supported feature. (#2752) * control agent updates in e2e tests and fix uts (#2743) * disable agent updates in dcr and fix uts * address comments * fix uts * report GA versioning feature --- .../common/agent_supported_feature.py | 16 +++- azurelinuxagent/common/conf.py | 9 ++ azurelinuxagent/ga/agent_update.py | 4 +- tests/common/test_agent_supported_feature.py | 18 ++++ tests/ga/test_agent_update.py | 10 +-- tests/protocol/test_wire.py | 83 ++++++++++++------- tests/test_agent.py | 1 + 7 files changed, 101 insertions(+), 40 deletions(-) diff --git a/azurelinuxagent/common/agent_supported_feature.py b/azurelinuxagent/common/agent_supported_feature.py index d7f93e2245..8663352a24 100644 --- a/azurelinuxagent/common/agent_supported_feature.py +++ b/azurelinuxagent/common/agent_supported_feature.py @@ -23,6 +23,7 @@ class SupportedFeatureNames(object): MultiConfig = "MultipleExtensionsPerHandler" ExtensionTelemetryPipeline = "ExtensionTelemetryPipeline" FastTrack = "FastTrack" + GAVersioningGovernance = "VersioningGovernance" # Guest Agent Versioning class AgentSupportedFeature(object): @@ -72,9 +73,22 @@ def __init__(self): supported=self.__SUPPORTED) +class _GAVersioningGovernanceFeature(AgentSupportedFeature): + + __NAME = SupportedFeatureNames.GAVersioningGovernance + __VERSION = "1.0" + __SUPPORTED = True + + def __init__(self): + super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME, + version=self.__VERSION, + supported=self.__SUPPORTED) + + # This is the list of features that Agent supports and we advertise to CRP __CRP_ADVERTISED_FEATURES = { - SupportedFeatureNames.MultiConfig: _MultiConfigFeature() + SupportedFeatureNames.MultiConfig: _MultiConfigFeature(), + SupportedFeatureNames.GAVersioningGovernance: _GAVersioningGovernanceFeature() } diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 6554ab3081..cb929e433a 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -129,6 +129,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "ResourceDisk.EnableSwapEncryption": False, "AutoUpdate.Enabled": True, "EnableOverProvisioning": True, + "GAUpdates.Enabled": True, # # "Debug" options are experimental and may be removed in later # versions of the Agent. @@ -502,6 +503,14 @@ def get_monitor_network_configuration_changes(conf=__conf__): return conf.get_switch("Monitor.NetworkConfigurationChanges", False) +def get_ga_updates_enabled(conf=__conf__): + """ + If True, the agent go through update logic to look for new agents otherwise it will stop agent updates. + NOTE: This option is needed in e2e tests to control agent updates. + """ + return conf.get_switch("GAUpdates.Enabled", True) + + def get_cgroup_check_period(conf=__conf__): """ How often to perform checks on cgroups (are the processes in the cgroups as expected, diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update.py index ba98613247..206a628eaa 100644 --- a/azurelinuxagent/ga/agent_update.py +++ b/azurelinuxagent/ga/agent_update.py @@ -182,8 +182,8 @@ def __log_event(level, msg_, success_=True): def run(self, goal_state): try: - # Ignore new agents if update is disabled - if not self._autoupdate_enabled: + # Ignore new agents if update is disabled. The latter flag only used in e2e tests. + if not self._autoupdate_enabled or not conf.get_ga_updates_enabled(): return self._gs_id = goal_state.extensions_goal_state.id diff --git a/tests/common/test_agent_supported_feature.py b/tests/common/test_agent_supported_feature.py index cf367f90ba..c2d3b1981e 100644 --- a/tests/common/test_agent_supported_feature.py +++ b/tests/common/test_agent_supported_feature.py @@ -53,3 +53,21 @@ def test_it_should_return_extension_supported_features_properly(self): self.assertEqual(SupportedFeatureNames.ExtensionTelemetryPipeline, get_supported_feature_by_name(SupportedFeatureNames.ExtensionTelemetryPipeline).name, "Invalid/Wrong feature returned") + + def test_it_should_return_ga_versioning_governance_feature_properly(self): + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", True): + self.assertIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), + "GAVersioningGovernance should be fetched in crp_supported_features") + + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", False): + self.assertNotIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), + "GAVersioningGovernance should not be fetched in crp_supported_features as not supported") + + self.assertEqual(SupportedFeatureNames.GAVersioningGovernance, + get_supported_feature_by_name(SupportedFeatureNames.GAVersioningGovernance).name, + "Invalid/Wrong feature returned") + + # Raise error if feature name not found + with self.assertRaises(NotImplementedError): + get_supported_feature_by_name("ABC") + diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py index 5386bdaf67..f484a1dae0 100644 --- a/tests/ga/test_agent_update.py +++ b/tests/ga/test_agent_update.py @@ -117,7 +117,7 @@ def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(se with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, mock_telemetry): agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) - agent_update_handler._protocol.update_goal_state() + agent_update_handler._protocol.client.update_goal_state() agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) @@ -151,7 +151,7 @@ def test_it_should_not_agent_update_if_requested_version_is_same_as_current_vers agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( str(CURRENT_VERSION)) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) - agent_update_handler._protocol.update_goal_state() + agent_update_handler._protocol.client.update_goal_state() agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if "requesting a new agent version" in kwarg['message'] and kwarg[ @@ -187,7 +187,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) - agent_update_handler._protocol.update_goal_state() + agent_update_handler._protocol.client.update_goal_state() with self.assertRaises(AgentUpgradeExitException) as context: agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) @@ -208,7 +208,7 @@ def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) - agent_update_handler._protocol.update_goal_state() + agent_update_handler._protocol.client.update_goal_state() agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) @@ -245,7 +245,7 @@ def test_it_should_report_update_status_with_success(self): agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( str(CURRENT_VERSION)) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) - agent_update_handler._protocol.update_goal_state() + agent_update_handler._protocol.client.update_goal_state() agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) vm_agent_update_status = agent_update_handler.get_vmagent_update_status() self.assertEqual(VMAgentUpdateStatuses.Success, vm_agent_update_status.status) diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 2a36fc2913..ebe925bc33 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -360,41 +360,60 @@ def mock_http_put(url, *args, **__): exthandlers_handler = get_exthandlers_handler(protocol) with patch("azurelinuxagent.common.agent_supported_feature._MultiConfigFeature.is_supported", True): - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") - self.assertIn("supportedFeatures", protocol.aggregate_status, "supported features not reported") - multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) - found = False - for feature in protocol.aggregate_status['supportedFeatures']: - if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: - found = True - break - self.assertTrue(found, "Multi-config name should be present in supportedFeatures") + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", True): + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") + self.assertIn("supportedFeatures", protocol.aggregate_status, "supported features not reported") + multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: + found = True + break + self.assertTrue(found, "Multi-config name should be present in supportedFeatures") + + ga_versioning_feature = get_supported_feature_by_name(SupportedFeatureNames.GAVersioningGovernance) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == ga_versioning_feature.name and feature['Value'] == ga_versioning_feature.version: + found = True + break + self.assertTrue(found, "ga versioning name should be present in supportedFeatures") # Feature should not be reported if not present with patch("azurelinuxagent.common.agent_supported_feature._MultiConfigFeature.is_supported", False): - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") - if "supportedFeatures" not in protocol.aggregate_status: - # In the case Multi-config was the only feature available, 'supportedFeatures' should not be - # reported in the status blob as its not supported as of now. - # Asserting no other feature was available to report back to crp - self.assertEqual(0, len(get_agent_supported_features_list_for_crp()), - "supportedFeatures should be available if there are more features") - return - - # If there are other features available, confirm MultiConfig was not reported - multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) - found = False - for feature in protocol.aggregate_status['supportedFeatures']: - if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: - found = True - break - self.assertFalse(found, "Multi-config name should be present in supportedFeatures") + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", False): + + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") + if "supportedFeatures" not in protocol.aggregate_status: + # In the case Multi-config and GA Versioning only features available, 'supportedFeatures' should not be + # reported in the status blob as its not supported as of now. + # Asserting no other feature was available to report back to crp + self.assertEqual(0, len(get_agent_supported_features_list_for_crp()), + "supportedFeatures should be available if there are more features") + return + + # If there are other features available, confirm MultiConfig and GA versioning was not reported + multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: + found = True + break + self.assertFalse(found, "Multi-config name should not be present in supportedFeatures") + + ga_versioning_feature = get_supported_feature_by_name(SupportedFeatureNames.GAVersioningGovernance) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == ga_versioning_feature.name and feature['Value'] == ga_versioning_feature.version: + found = True + break + self.assertFalse(found, "ga versioning name should not be present in supportedFeatures") @patch("azurelinuxagent.common.utils.restutil.http_request") def test_send_encoded_event(self, mock_http_request, *args): diff --git a/tests/test_agent.py b/tests/test_agent.py index f0f773f059..f5e91405a6 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -51,6 +51,7 @@ Extensions.Enabled = True Extensions.GoalStatePeriod = 6 Extensions.InitialGoalStatePeriod = 6 +GAUpdates.Enabled = True HttpProxy.Host = None HttpProxy.Port = None Lib.Dir = /var/lib/waagent From 23bf29ac427311f03c61c089e01d3a8b11cb6fc6 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 13 Feb 2023 14:39:46 -0800 Subject: [PATCH 03/14] Don't report SF flag idf auto update is disabled (#2754) --- azurelinuxagent/common/agent_supported_feature.py | 9 ++++++++- tests/common/test_agent_supported_feature.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/common/agent_supported_feature.py b/azurelinuxagent/common/agent_supported_feature.py index 8663352a24..c3e83c5142 100644 --- a/azurelinuxagent/common/agent_supported_feature.py +++ b/azurelinuxagent/common/agent_supported_feature.py @@ -14,6 +14,7 @@ # # Requires Python 2.6+ and Openssl 1.0+ # +from azurelinuxagent.common import conf class SupportedFeatureNames(object): @@ -74,10 +75,16 @@ def __init__(self): class _GAVersioningGovernanceFeature(AgentSupportedFeature): + """ + CRP would drive the RSM upgrade version if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version. + Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning. + + Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution. + """ __NAME = SupportedFeatureNames.GAVersioningGovernance __VERSION = "1.0" - __SUPPORTED = True + __SUPPORTED = conf.get_autoupdate_enabled() def __init__(self): super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME, diff --git a/tests/common/test_agent_supported_feature.py b/tests/common/test_agent_supported_feature.py index c2d3b1981e..d8401e4661 100644 --- a/tests/common/test_agent_supported_feature.py +++ b/tests/common/test_agent_supported_feature.py @@ -59,7 +59,7 @@ def test_it_should_return_ga_versioning_governance_feature_properly(self): self.assertIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), "GAVersioningGovernance should be fetched in crp_supported_features") - with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", False): + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): self.assertNotIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), "GAVersioningGovernance should not be fetched in crp_supported_features as not supported") From 7b7c955e4ac9ba56605b010a610ed35e23897fad Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 14 Feb 2023 17:48:17 -0800 Subject: [PATCH 04/14] fix uts (#2759) --- tests/common/test_agent_supported_feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/test_agent_supported_feature.py b/tests/common/test_agent_supported_feature.py index d8401e4661..c2d3b1981e 100644 --- a/tests/common/test_agent_supported_feature.py +++ b/tests/common/test_agent_supported_feature.py @@ -59,7 +59,7 @@ def test_it_should_return_ga_versioning_governance_feature_properly(self): self.assertIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), "GAVersioningGovernance should be fetched in crp_supported_features") - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", False): self.assertNotIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), "GAVersioningGovernance should not be fetched in crp_supported_features as not supported") From 23a52069f61b01f582fb5c00b751c9df14782128 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 6 Mar 2023 14:12:20 -0800 Subject: [PATCH 05/14] agent versioning test_suite (#2770) * agent versioning test_suite * address PR comments * fix pylint warning * fix update assertion * fix pylint error --- azurelinuxagent/common/event.py | 1 + azurelinuxagent/ga/update.py | 28 ++- tests_e2e/orchestrator/scripts/install-agent | 3 + tests_e2e/test_suites/agent_update.yml | 5 + tests_e2e/tests/agent_update/__init__.py | 0 tests_e2e/tests/agent_update/rsm_update.py | 175 ++++++++++++++++ tests_e2e/tests/lib/retry.py | 20 ++ tests_e2e/tests/lib/ssh_client.py | 7 + tests_e2e/tests/lib/virtual_machine.py | 210 +++++++++++++++++++ tests_e2e/tests/scripts/agent-python | 38 ++++ tests_e2e/tests/scripts/agent-service | 80 +++++++ tests_e2e/tests/scripts/agent-update-config | 33 +++ 12 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 tests_e2e/test_suites/agent_update.yml create mode 100644 tests_e2e/tests/agent_update/__init__.py create mode 100644 tests_e2e/tests/agent_update/rsm_update.py create mode 100644 tests_e2e/tests/lib/virtual_machine.py create mode 100755 tests_e2e/tests/scripts/agent-python create mode 100755 tests_e2e/tests/scripts/agent-service create mode 100755 tests_e2e/tests/scripts/agent-update-config diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 1f903a9faa..1fdf8f9170 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -127,6 +127,7 @@ class WALAEventOperation: Update = "Update" VmSettings = "VmSettings" VmSettingsSummary = "VmSettingsSummary" + FeatureFlag = "FeatureFlag" SHOULD_ENCODE_MESSAGE_LEN = 80 diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 4af6d069f2..b02aaa9f5b 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -33,7 +33,8 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.protocol.imds import get_imds_client from azurelinuxagent.common.utils import fileutil, textutil -from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames +from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames, \ + get_agent_supported_features_list_for_crp from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.event import add_event, initialize_event_logger_vminfo_common_parameters, \ WALAEventOperation, EVENTS_DIRECTORY @@ -150,6 +151,9 @@ def __init__(self): # VM Size is reported via the heartbeat, default it here. self._vm_size = None + # Flag is Used to log if GA supports versioning on agent start + self._agent_supports_versioning_logged = False + # these members are used to avoid reporting errors too frequently self._heartbeat_update_goal_state_error_count = 0 self._update_goal_state_error_count = 0 @@ -527,6 +531,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_ # status reporting should be done even when the goal state is not updated agent_update_status = agent_update_handler.get_vmagent_update_status() self._report_status(exthandlers_handler, agent_update_status) + self._log_agent_supports_versioning_or_not() return # check for agent updates @@ -551,6 +556,9 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_ agent_update_status = agent_update_handler.get_vmagent_update_status() self._report_status(exthandlers_handler, agent_update_status) + # Logging after agent reports supported feature flag so this msg in sync with report status + self._log_agent_supports_versioning_or_not() + if self._processing_new_incarnation(): remote_access_handler.run() @@ -614,6 +622,24 @@ def _report_extensions_summary(self, vm_status): logger.warn(msg) add_event(op=WALAEventOperation.GoalState, is_success=False, message=msg) + def _log_agent_supports_versioning_or_not(self): + def _log_event(msg): + logger.info(msg) + add_event(AGENT_NAME, op=WALAEventOperation.FeatureFlag, message=msg) + if not self._agent_supports_versioning_logged: + supports_ga_versioning = False + for _, feature in get_agent_supported_features_list_for_crp().items(): + if feature.name == SupportedFeatureNames.GAVersioningGovernance: + supports_ga_versioning = True + break + if supports_ga_versioning: + msg = "Agent : {0} supports GA Versioning".format(CURRENT_VERSION) + _log_event(msg) + else: + msg = "Agent : {0} doesn't support GA Versioning".format(CURRENT_VERSION) + _log_event(msg) + self._agent_supports_versioning_logged = True + def _on_initial_goal_state_completed(self, extensions_summary): fileutil.write_file(self._initial_goal_state_file_path(), ustr(extensions_summary)) if conf.get_extensions_enabled() and self._goal_state_period != conf.get_goal_state_period(): diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 4b0c8f2497..f0fc705805 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -112,6 +112,9 @@ unzip.py "$package" "/var/lib/waagent/WALinuxAgent-$version" if [[ -e /etc/waagent.conf ]]; then sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' /etc/waagent.conf fi +# By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. +# But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. +sed -i '$a GAUpdates.Enabled=n' /etc/waagent.conf # # Restart the service diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml new file mode 100644 index 0000000000..d06ef98e40 --- /dev/null +++ b/tests_e2e/test_suites/agent_update.yml @@ -0,0 +1,5 @@ +name: "AgentUpdate" +tests: + - "agent_update/rsm_update.py" +images: "endorsed" +location: "eastus2euap" \ No newline at end of file diff --git a/tests_e2e/tests/agent_update/__init__.py b/tests_e2e/tests/agent_update/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py new file mode 100644 index 0000000000..94b166da15 --- /dev/null +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# BVT for the agent update scenario +# +# The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. +# For each scenario, we intiaite the rsm request with target version and then verify agent updated to that target version. +# +import json + +import requests +from azure.identity import DefaultAzureCredential +from azure.mgmt.compute.models import VirtualMachine + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_not_found +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine import VmMachine + + +class RsmUpdateBvt(AgentTest): + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = SshClient( + ip_address=self._context.vm_ip_address, + username=self._context.username, + private_key_file=self._context.private_key_file) + + def run(self) -> None: + # Allow agent to send supported feature flag + self._verify_agent_reported_supported_feature_flag() + + log.info("*******Verifying the Agent Downgrade scenario*******") + self._mock_rsm_update("1.3.0.0") + self._prepare_agent() + + # Verify downgrade scenario + self._verify_guest_agent_update("1.3.0.0") + + # Verify upgrade scenario + log.info("*******Verifying the Agent Upgrade scenario*******") + self._mock_rsm_update("1.3.1.0") + self._verify_guest_agent_update("1.3.1.0") + + # verify no version update. There is bug in CRP and will enable once it's fixed + # log.info("*******Verifying the no version update scenario*******") + # self._prepare_rsm_update("1.3.1.0") + # self._verify_guest_agent_update("1.3.1.0") + + def _prepare_agent(self) -> None: + """ + This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags + 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. + 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. + """ + local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-python" + remote_path = self._context.remote_working_directory/"agent-python" + self._ssh_client.copy(local_path, remote_path) + local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-service" + remote_path = self._context.remote_working_directory/"agent-service" + self._ssh_client.copy(local_path, remote_path) + local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-update-config" + remote_path = self._context.remote_working_directory/"agent-update-config" + self._ssh_client.copy(local_path, remote_path) + self._ssh_client.run_command(f"sudo {remote_path}") + + @staticmethod + def _verify_agent_update_flag_enabled(vm: VmMachine) -> bool: + result: VirtualMachine = vm.get() + flag: bool = result.os_profile.linux_configuration.enable_vm_agent_platform_updates + if flag is None: + return False + return flag + + def _enable_agent_update_flag(self, vm: VmMachine) -> None: + osprofile = { + "location": self._context.vm.location, # location is required field + "properties": { + "osProfile": { + "linuxConfiguration": { + "enableVMAgentPlatformUpdates": True + } + } + } + } + vm.create_or_update(osprofile) + + def _mock_rsm_update(self, requested_version: str) -> None: + """ + This method is to simulate the rsm request. + First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api + """ + vm: VmMachine = VmMachine(self._context.vm) + if not self._verify_agent_update_flag_enabled(vm): + # enable the flag + self._enable_agent_update_flag(vm) + log.info("Set the enableVMAgentPlatformUpdates flag to True") + else: + log.info("Already enableVMAgentPlatformUpdates flag set to True") + + credential = DefaultAzureCredential() + token = credential.get_token("https://management.azure.com/.default") + headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} + # Later this api call will be replaced by azure-python-sdk wrapper + # Todo: management endpoints are different for national clouds. we need to change this. + url = "https://management.azure.com/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ + "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, self._context.vm.resource_group, self._context.vm.name) + data = { + "target": "Microsoft.OSTCLinuxAgent.Test", + "targetVersion": requested_version + } + + response = requests.post(url, data=json.dumps(data), headers=headers) + if response.status_code == 202: + log.info("RSM upgrade request accepted") + else: + raise Exception("Error occurred while RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content)) + + def _verify_guest_agent_update(self, requested_version: str) -> None: + """ + Verify current agent version running on rsm requested version + """ + def _check_agent_version(requested_version: str) -> bool: + stdout: str = self._ssh_client.run_command("sudo waagent --version") + expected_version = f"Goal state agent: {requested_version}" + if expected_version in stdout: + return True + else: + raise Exception("Guest agent didn't update to requested version {0} but found \n {1}. \n " + "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, stdout)) + + log.info("Verifying agent updated to requested version") + retry_if_not_found(lambda: _check_agent_version(requested_version)) + stdout: str = self._ssh_client.run_command("sudo waagent --version") + log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") + + def _verify_agent_reported_supported_feature_flag(self): + """ + RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log + """ + def _check_agent_supports_versioning() -> bool: + found: str = self._ssh_client.run_command("grep -q 'Agent.*supports GA Versioning' /var/log/waagent.log && echo true || echo false").rstrip() + return True if found == "true" else False + + log.info("Verifying agent reported supported feature flag") + found: bool = retry_if_not_found(_check_agent_supports_versioning) + + if not found: + raise Exception("Agent failed to report supported feature flag, so skipping agent update validations") + else: + log.info("Successfully verified agent reported supported feature flag") + + +if __name__ == "__main__": + RsmUpdateBvt.run_from_command_line() diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index e399efdda5..705b305648 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -57,3 +57,23 @@ def retry_ssh_run(operation: Callable[[], Any]) -> Any: raise log.warning("The operation failed, retrying in 30 secs.\n%s", e) time.sleep(30) + + +def retry_if_not_found(operation: Callable[[], bool], attempts: int = 5) -> bool: + """ + This method attempts the given operation retrying a few times + (after a short delay) + Note: Method used for operations which are return True or False + """ + found: bool = False + while attempts > 0 and not found: + attempts -= 1 + try: + found = operation() + except Exception: + if attempts == 0: + raise + if not found: + log.info("Current execution didn't find it, retrying in 30 secs.") + time.sleep(30) + return found diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index fda9911d92..8b428f352b 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -83,3 +83,10 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: command.extend([str(source), str(target)]) shell.run_command(command) + + def copy(self, local_path: Path, remote_path: Path): + """ + Copy file from local to remote machine + """ + destination = f"{self._username}@{self._ip_address}:{remote_path}" + shell.run_command(["scp", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, local_path, destination]) diff --git a/tests_e2e/tests/lib/virtual_machine.py b/tests_e2e/tests/lib/virtual_machine.py new file mode 100644 index 0000000000..9576d401a7 --- /dev/null +++ b/tests_e2e/tests/lib/virtual_machine.py @@ -0,0 +1,210 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to execute some operations on virtual machines and scale sets (list extensions, restart, etc). +# + +from abc import ABC, abstractmethod +from builtins import TimeoutError +from typing import Any, List + +from azure.core.polling import LROPoller +from azure.identity import DefaultAzureCredential +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineScaleSetExtension, VirtualMachineInstanceView, VirtualMachineScaleSetInstanceView, VirtualMachine, VirtualMachineScaleSetVM +from azure.mgmt.resource import ResourceManagementClient +from msrestazure.azure_cloud import Cloud + +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS +from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import execute_with_retry + + +class VirtualMachineBaseClass(ABC): + """ + Abstract base class for VirtualMachine and VmScaleSet. + + Defines the interface common to both classes and provides the implementation of some methods in that interface. + """ + def __init__(self, vm: VmIdentifier): + super().__init__() + self._identifier: VmIdentifier = vm + cloud: Cloud = AZURE_CLOUDS[vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + self._compute_client = ComputeManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self._resource_client = ResourceManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + + @abstractmethod + def get_instance_view(self) -> Any: # Returns VirtualMachineInstanceView or VirtualMachineScaleSetInstanceView + """ + Retrieves the instance view of the virtual machine or scale set + """ + + @abstractmethod + def get_extensions(self) -> Any: # Returns List[VirtualMachineExtension] or List[VirtualMachineScaleSetExtension] + """ + Retrieves the extensions installed on the virtual machine or scale set + """ + + def restart(self, timeout=5 * 60) -> None: + """ + Restarts the virtual machine or scale set + """ + log.info("Initiating restart of %s", self._identifier) + + poller: LROPoller = execute_with_retry(self._begin_restart) + + poller.wait(timeout=timeout) + + if not poller.done(): + raise TimeoutError(f"Failed to restart {self._identifier.name} after {timeout} seconds") + + log.info("Restarted %s", self._identifier.name) + + @abstractmethod + def _begin_restart(self) -> LROPoller: + """ + Derived classes must provide the implementation for this method using their corresponding begin_restart() implementation + """ + + @abstractmethod + def get(self) -> Any: + """ + Retrieves the information about the virtual machine or scale set + """ + + def create_or_update(self, parameters=None, timeout=5 * 60) -> None: + """ + Creates or updates the virtual machine or scale set with custom settings + """ + if parameters is None: + parameters = {} + + log.info("Creating/Updating VM for %s", self._identifier) + + poller: LROPoller = execute_with_retry(lambda: self._begin_create_or_update(parameters)) + + poller.wait(timeout=timeout) + + if not poller.done(): + raise TimeoutError(f"Failed to restart {self._identifier.name} after {timeout} seconds") + + @abstractmethod + def _begin_create_or_update(self, parameters) -> Any: + """ + Derived classes must provide the implementation for this method using their corresponding begin_create_or_update() implementation + """ + + def __str__(self): + return f"{self._identifier}" + + +class VmMachine(VirtualMachineBaseClass): + def get_instance_view(self) -> VirtualMachineInstanceView: + log.info("Retrieving instance view for %s", self._identifier) + return execute_with_retry(lambda: self._compute_client.virtual_machines.get( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name, + expand="instanceView" + ).instance_view) + + def get_extensions(self) -> List[VirtualMachineExtension]: + log.info("Retrieving extensions for %s", self._identifier) + return execute_with_retry(lambda: self._compute_client.virtual_machine_extensions.list( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name)) + + def get(self) -> VirtualMachine: + log.info("Retrieving vm information for %s", self._identifier) + return execute_with_retry(lambda: self._compute_client.virtual_machines.get( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name)) + + def _begin_restart(self) -> LROPoller: + return self._compute_client.virtual_machines.begin_restart( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name) + + def _begin_create_or_update(self, parameters) -> LROPoller: + return self._compute_client.virtual_machines.begin_create_or_update(self._identifier.resource_group, self._identifier.name, parameters) + + +class VmScaleSet(VirtualMachineBaseClass): + def get_instance_view(self) -> VirtualMachineScaleSetInstanceView: + log.info("Retrieving instance view for %s", self._identifier) + + # TODO: Revisit this implementation. Currently this method returns the instance view of the first VM instance available. + # For the instance view of the complete VMSS, use the compute_client.virtual_machine_scale_sets function + # https://docs.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.v2019_12_01.operations.virtualmachinescalesetsoperations?view=azure-python + for vm in execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.list(self._identifier.resource_group, self._identifier.name)): + try: + return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.get_instance_view( + resource_group_name=self._identifier.resource_group, + vm_scale_set_name=self._identifier.name, + instance_id=vm.instance_id)) + except Exception as e: + log.warning("Unable to retrieve instance view for scale set instance %s. Trying out other instances.\nError: %s", vm, e) + + raise Exception(f"Unable to retrieve instance view of any instances for scale set {self._identifier}") + + + @property + def vm_func(self): + return self._compute_client.virtual_machine_scale_set_vms + + @property + def extension_func(self): + return self._compute_client.virtual_machine_scale_set_extensions + + def get_extensions(self) -> List[VirtualMachineScaleSetExtension]: + log.info("Retrieving extensions for %s", self._identifier) + return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_extensions.list( + resource_group_name=self._identifier.resource_group, + vm_scale_set_name=self._identifier.name)) + + def get(self) -> List[VirtualMachineScaleSetVM]: + log.info("Retrieving vm information for %s", self._identifier) + vmss_vm_list: List[VirtualMachineScaleSetVM] = [] + for vm in execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.list(self._identifier.resource_group, self._identifier.name)): + try: + vmss_vm: VirtualMachineScaleSetVM = execute_with_retry(self._compute_client.virtual_machine_scale_set_vms.get( + resource_group_name=self._identifier.resource_group, vm_scale_set_name=self._identifier.name, instance_id=vm.instance_id)) + vmss_vm_list.append(vmss_vm) + + except Exception as e: + log.warning("Unable to retrieve vm information for scale set instance %s. Trying out other instances.\nError: %s", vm, e) + + return vmss_vm_list + + def _begin_restart(self) -> LROPoller: + return self._compute_client.virtual_machine_scale_sets.begin_restart( + resource_group_name=self._identifier.resource_group, + vm_scale_set_name=self._identifier.name) + + def _begin_create_or_update(self, parameters) -> None: + # TODO: Revisit this implementation + return \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent-python b/tests_e2e/tests/scripts/agent-python new file mode 100755 index 0000000000..786e6bb1ac --- /dev/null +++ b/tests_e2e/tests/scripts/agent-python @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The script is needed for agent-update tests to find agent python for updating agent version in source file versoon.py +# +set -euo pipefail + +PYTHON="" +# Looking for Agent's Python +shebang=`cat $(which waagent) | head -1 | grep '^#!'` +if [[ -z ${shebang+x} ]]; then + echo "ERROR: Can't determine Agent's Python." + exit 1 +fi +shebang=`echo $shebang | sed 's/^#!//'` +# example /usr/bin/env python3 +# some distros will have like /usr/bin/python3.6 +read -ra strarr <<< "$shebang" +for val in "${strarr[@]}"; do + PYTHON=$val +done +echo $PYTHON diff --git a/tests_e2e/tests/scripts/agent-service b/tests_e2e/tests/scripts/agent-service new file mode 100755 index 0000000000..9dc61367d7 --- /dev/null +++ b/tests_e2e/tests/scripts/agent-service @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -euo pipefail + +# +# The service name is walinuxagent in Ubuntu/debian and waagent elsewhere +# +# TODO: Update install-agent to use this script + +usage() ( + echo "Usage: agent-service command" + exit 1 +) + +if [ "$#" -lt 1 ]; then + usage +fi +cmd=$1 +shift + +if [ "$#" -ne 0 ] || [ -z ${cmd+x} ] ; then + usage +fi + +if command -v systemctl &> /dev/null; then + service-status() { systemctl --no-pager -l status $1; } + service-stop() { systemctl stop $1; } + service-restart() { systemctl restart $1; } + service-start() { systemctl start $1; } +else + service-status() { service $1 status; } + service-stop() { service $1 stop; } + service-restart() { service $1 restart; } + service-start() { service $1 start; } +fi + +if service-status walinuxagent > /dev/null 2>&1;then + service_name="walinuxagent" +else + service_name="waagent" +fi +echo "Service name: $service_name" + +if [[ "$cmd" == "restart" ]]; then + echo "Restarting service..." + service-restart $service_name +fi + +if [[ "$cmd" == "start" ]]; then + echo "Starting service..." + service-start $service_name +fi + +if [[ "$cmd" == "stop" ]]; then + echo "Stopping service..." + service-stop $service_name +fi + +if [[ "$cmd" == "status" ]]; then + echo "Service status..." + service-status $service_name +fi + diff --git a/tests_e2e/tests/scripts/agent-update-config b/tests_e2e/tests/scripts/agent-update-config new file mode 100755 index 0000000000..c1b1432da8 --- /dev/null +++ b/tests_e2e/tests/scripts/agent-update-config @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script to update necessary flags to make agent ready for rsm updates +# +set -euo pipefail + +AGENT_PYTHON_SCRIPT="agent-python" +AGENT_SERVICE_SCRIPT="agent-service" +PYTHON=$(source $AGENT_PYTHON_SCRIPT) +echo "Agent's Python: $PYTHON" +# some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first. +version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))') +version_file_full_path="$version_file_dir/version.py" +sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path +sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' /etc/waagent.conf +sed -i '$a AutoUpdate.GAFamily=Test' /etc/waagent.conf +source $AGENT_SERVICE_SCRIPT restart \ No newline at end of file From 87f83645082a53b7e8317d8972de95e129130ec8 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 9 Mar 2023 15:30:18 -0800 Subject: [PATCH 06/14] logging manifest type and don't log same error until next period in agent update. (#2778) * improve logging and don't log same error until next period * address comments * update comment * update comment --- azurelinuxagent/common/protocol/goal_state.py | 2 +- azurelinuxagent/common/protocol/wire.py | 4 +-- azurelinuxagent/ga/agent_update.py | 27 +++++++++++++------ tests/ga/test_agent_update.py | 26 ++++++++++++++++++ tests/ga/test_extension.py | 2 +- tests/protocol/test_wire.py | 8 +++--- 6 files changed, 53 insertions(+), 16 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 0980ca9d02..3d02268ced 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -185,7 +185,7 @@ def fetch_extension_manifest(self, extension_name, uris): def _fetch_manifest(self, manifest_type, name, uris): try: is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack - xml_text = self._wire_client.fetch_manifest(uris, use_verify_header=is_fast_track) + xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track) self._history.save_manifest(name, xml_text) return ExtensionManifest(xml_text) except Exception as e: diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 38a3e0621d..ea397f0497 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -582,8 +582,8 @@ def call_storage_service(http_req, *args, **kwargs): def fetch_artifacts_profile_blob(self, uri): return self._fetch_content("artifacts profile blob", [uri], use_verify_header=False)[1] # _fetch_content returns a (uri, content) tuple - def fetch_manifest(self, uris, use_verify_header): - uri, content = self._fetch_content("manifest", uris, use_verify_header=use_verify_header) + def fetch_manifest(self, manifest_type, uris, use_verify_header): + uri, content = self._fetch_content("{0} manifest".format(manifest_type), uris, use_verify_header=use_verify_header) self.get_host_plugin().update_manifest_uri(uri) return content diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update.py index 206a628eaa..3728e57ed8 100644 --- a/azurelinuxagent/ga/agent_update.py +++ b/azurelinuxagent/ga/agent_update.py @@ -29,6 +29,8 @@ def __init__(self, protocol): self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id self._last_attempted_update_time = datetime.datetime.min self._last_attempted_update_version = FlexibleVersion("0.0.0.0") + self._last_warning = "" + self._last_warning_time = datetime.datetime.min def __should_update_agent(self, requested_version): """ @@ -170,15 +172,24 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - @staticmethod - def __log_event(level, msg_, success_=True): - if level == LogLevel.WARNING: - logger.warn(msg_) - elif level == LogLevel.ERROR: - logger.error(msg_) - elif level == LogLevel.INFO: + def __log_event(self, level, msg_, success_=True): + if level == LogLevel.INFO: logger.info(msg_) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + else: + msg_ += "[NOTE: Will not log the same error for the next 6 hours]" + # Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg + # Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update" + prefix_msg = msg_.split("incarnation", 1)[0] + prefix_last_warning_msg = self._last_warning.split("incarnation", 1)[0] + if prefix_msg != prefix_last_warning_msg or self._last_warning_time == datetime.datetime.min or datetime.datetime.now() >= self._last_warning_time + datetime.timedelta(hours=6): + if level == LogLevel.WARNING: + logger.warn(msg_) + elif level == LogLevel.ERROR: + logger.error(msg_) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + self._last_warning_time = datetime.datetime.now() + self._last_warning = msg_ def run(self, goal_state): try: diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py index f484a1dae0..73339d7c3c 100644 --- a/tests/ga/test_agent_update.py +++ b/tests/ga/test_agent_update.py @@ -309,3 +309,29 @@ def get_handler(url, **kwargs): self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) self.assertEqual(1, vm_agent_update_status.code) self.assertIn("Missing requested version", vm_agent_update_status.message) + + def test_it_should_not_log_same_error_next_hours(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") \ No newline at end of file diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 10f442749d..5309b80566 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3185,7 +3185,7 @@ def manifest_location_handler(url, **kwargs): wire._DOWNLOAD_TIMEOUT = datetime.timedelta(minutes=0) try: with self.assertRaises(ExtensionDownloadError): - protocol.client.fetch_manifest(ext_handlers[0].manifest_uris, use_verify_header=False) + protocol.client.fetch_manifest("extension", ext_handlers[0].manifest_uris, use_verify_header=False) finally: wire._DOWNLOAD_TIMEOUT = download_timeout diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index ebe925bc33..cc73b0fb45 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -684,7 +684,7 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded') @@ -707,7 +707,7 @@ def http_get_handler(url, *_, **kwargs): HostPluginProtocol.is_default_channel = False try: - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded') @@ -744,7 +744,7 @@ def http_get_handler(url, *_, **kwargs): protocol.client.get_host_plugin() protocol.set_http_handlers(http_get_handler=http_get_handler) - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml) @@ -778,7 +778,7 @@ def http_get_handler(url, *_, **kwargs): protocol.set_http_handlers(http_get_handler=http_get_handler) with self.assertRaises(ExtensionDownloadError): - protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(len(urls), 4, "Unexpected number of HTTP requests: [{0}]".format(urls)) From 5b118d9d2bff4df17db2c3e326abc7bf0a2d0832 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 4 Apr 2023 11:24:37 -0700 Subject: [PATCH 07/14] Added self-update time window. (#2794) * Added self-update time window * address comment --- azurelinuxagent/ga/agent_update.py | 105 ++++++++++++++++++++---- tests/data/wire/ga_manifest_no_uris.xml | 39 +++++++++ tests/ga/test_agent_update.py | 35 ++++++++ tests/ga/test_update.py | 91 ++++++++++++++++++-- tests/protocol/mockwiredata.py | 3 + 5 files changed, 250 insertions(+), 23 deletions(-) create mode 100644 tests/data/wire/ga_manifest_no_uris.xml diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update.py index 3728e57ed8..85c9ce53a6 100644 --- a/azurelinuxagent/ga/agent_update.py +++ b/azurelinuxagent/ga/agent_update.py @@ -20,6 +20,27 @@ def get_agent_update_handler(protocol): return AgentUpdateHandler(protocol) +class AgentUpgradeType(object): + """ + Enum for different modes of Agent Upgrade + """ + Hotfix = "Hotfix" + Normal = "Normal" + + +class AgentUpdateHandlerUpdateState(object): + """ + This class is primarily used to maintain the in-memory persistent state for the agent updates. + This state will be persisted throughout the current service run. + """ + def __init__(self): + self.last_attempted_requested_version_update_time = datetime.datetime.min + self.last_attempted_hotfix_update_time = datetime.datetime.min + self.last_attempted_normal_update_time = datetime.datetime.min + self.last_warning = "" + self.last_warning_time = datetime.datetime.min + + class AgentUpdateHandler(object): def __init__(self, protocol): @@ -27,27 +48,73 @@ def __init__(self, protocol): self._ga_family = conf.get_autoupdate_gafamily() self._autoupdate_enabled = conf.get_autoupdate_enabled() self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id - self._last_attempted_update_time = datetime.datetime.min - self._last_attempted_update_version = FlexibleVersion("0.0.0.0") - self._last_warning = "" - self._last_warning_time = datetime.datetime.min + self._is_requested_version_update = True # This is to track the current update type(requested version or self update) + self.persistent_data = AgentUpdateHandlerUpdateState() def __should_update_agent(self, requested_version): """ - check to see if update is allowed once per (as specified in the conf.get_autoupdate_frequency()) - return false when we don't allow updates. + requested version update: + update is allowed once per (as specified in the conf.get_autoupdate_frequency()) + return false when we don't allow updates. + largest version update(self-update): + update is allowed once per (as specified in the conf.get_hotfix_upgrade_frequency() or conf.get_normal_upgrade_frequency()) + return false when we don't allow updates. """ now = datetime.datetime.now() - if self._last_attempted_update_time != datetime.datetime.min and self._last_attempted_update_version == requested_version: - next_attempt_time = self._last_attempted_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + if self._is_requested_version_update: + if self.persistent_data.last_attempted_requested_version_update_time != datetime.datetime.min: + next_attempt_time = self.persistent_data.last_attempted_requested_version_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + else: + next_attempt_time = now + + if next_attempt_time > now: + return False + # The time limit elapsed for us to allow updates. + return True else: - next_attempt_time = now + next_hotfix_time, next_normal_time = self.__get_next_upgrade_times(now) + upgrade_type = self.__get_agent_upgrade_type(requested_version) - if next_attempt_time > now: + if next_hotfix_time > now and next_normal_time > now: + return False + + if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( + upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): + return True return False - # The time limit elapsed for us to allow updates. - return True + + def __update_last_attempt_update_times(self): + now = datetime.datetime.now() + if self._is_requested_version_update: + self.persistent_data.last_attempted_requested_version_update_time = now + else: + self.persistent_data.last_attempted_normal_update_time = now + self.persistent_data.last_attempted_hotfix_update_time = now + + @staticmethod + def __get_agent_upgrade_type(requested_version): + # We follow semantic versioning for the agent, if .. is same, then has changed. + # In this case, we consider it as a Hotfix upgrade. Else we consider it a Normal upgrade. + if requested_version.major == CURRENT_VERSION.major and requested_version.minor == CURRENT_VERSION.minor and requested_version.patch == CURRENT_VERSION.patch: + return AgentUpgradeType.Hotfix + return AgentUpgradeType.Normal + + def __get_next_upgrade_times(self, now): + """ + Get the next upgrade times + return: Next Hotfix Upgrade Time, Next Normal Upgrade Time + """ + + def get_next_process_time(last_val, frequency): + return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency) + + next_hotfix_time = get_next_process_time(self.persistent_data.last_attempted_hotfix_update_time, + conf.get_hotfix_upgrade_frequency()) + next_normal_time = get_next_process_time(self.persistent_data.last_attempted_normal_update_time, + conf.get_normal_upgrade_frequency()) + + return next_hotfix_time, next_normal_time def __get_agent_family_from_last_gs(self, goal_state): """ @@ -180,16 +247,17 @@ def __log_event(self, level, msg_, success_=True): msg_ += "[NOTE: Will not log the same error for the next 6 hours]" # Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg # Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update" + now = datetime.datetime.now() prefix_msg = msg_.split("incarnation", 1)[0] - prefix_last_warning_msg = self._last_warning.split("incarnation", 1)[0] - if prefix_msg != prefix_last_warning_msg or self._last_warning_time == datetime.datetime.min or datetime.datetime.now() >= self._last_warning_time + datetime.timedelta(hours=6): + prefix_last_warning_msg = self.persistent_data.last_warning.split("incarnation", 1)[0] + if prefix_msg != prefix_last_warning_msg or self.persistent_data.last_warning_time == datetime.datetime.min or now >= self.persistent_data.last_warning_time + datetime.timedelta(hours=6): if level == LogLevel.WARNING: logger.warn(msg_) elif level == LogLevel.ERROR: logger.error(msg_) add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) - self._last_warning_time = datetime.datetime.now() - self._last_warning = msg_ + self.persistent_data.last_warning_time = now + self.persistent_data.last_warning = msg_ def run(self, goal_state): try: @@ -209,7 +277,9 @@ def run(self, goal_state): GAUpdateReportState.report_error_msg = warn_msg agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) requested_version = self.__get_largest_version(agent_manifest) + self._is_requested_version_update = False else: + self._is_requested_version_update = True # Save the requested version to report back GAUpdateReportState.report_expected_version = requested_version # Remove the missing requested version warning once requested version becomes available @@ -241,8 +311,7 @@ def run(self, goal_state): self.__proceed_with_update(requested_version) finally: - self._last_attempted_update_time = datetime.datetime.now() - self._last_attempted_update_version = requested_version + self.__update_last_attempt_update_times() except Exception as err: if isinstance(err, AgentUpgradeExitException): diff --git a/tests/data/wire/ga_manifest_no_uris.xml b/tests/data/wire/ga_manifest_no_uris.xml new file mode 100644 index 0000000000..89573ad63b --- /dev/null +++ b/tests/data/wire/ga_manifest_no_uris.xml @@ -0,0 +1,39 @@ + + + + + 1.0.0 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__1.0.0 + + + + 1.1.0 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__1.1.0 + + + + 1.2.0 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__1.2.0 + + + + 2.0.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.0.0 + + + 2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0 + + + 9.9.9.10 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0 + + + + 99999.0.0.0 + + + + diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py index 73339d7c3c..dbdf8dab5a 100644 --- a/tests/ga/test_agent_update.py +++ b/tests/ga/test_agent_update.py @@ -108,6 +108,41 @@ def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + def test_it_should_update_to_largest_version_if_time_window_not_elapsed(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_update_to_largest_version_if_time_window_elapsed(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=0.001): + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): self.prepare_agents(count=1) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index b93d409bb3..64cb9bcf07 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1427,16 +1427,18 @@ def test_run_emits_restart_event(self): class TestAgentUpgrade(UpdateTestCase): @contextlib.contextmanager - def create_conf_mocks(self, autoupdate_frequency): + def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequency): # Disabling extension processing to speed up tests as this class deals with testing agent upgrades with patch("azurelinuxagent.common.conf.get_extensions_enabled", return_value=False): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): - with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - yield + with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=hotfix_frequency): + with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=normal_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + yield @contextlib.contextmanager def __get_update_handler(self, iterations=1, test_data=None, - reload_conf=None, autoupdate_frequency=0.001): + reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0): test_data = DATA_FILE if test_data is None else test_data @@ -1462,7 +1464,7 @@ def put_handler(url, *args, **_): return MockHttpResponse(status=201) protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) - with self.create_conf_mocks(autoupdate_frequency): + with self.create_conf_mocks(autoupdate_frequency, hotfix_frequency, normal_frequency): with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: update_handler._protocol = protocol yield update_handler, mock_telemetry @@ -1679,6 +1681,85 @@ def reload_conf(url, protocol): self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) + def test_it_should_not_update_largest_version_if_time_window_not_elapsed(self): + no_of_iterations = 20 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ + "goalstate"] >= 5: + reload_conf.call_count += 1 + + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) + + # Update the ga_manifest and incarnation to send largest version manifest + mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" + mock_wire_data.reload() + self._add_write_permission_to_goal_state_files() + reload_conf.incarnation += 1 + mock_wire_data.set_incarnation(reload_conf.incarnation) + + reload_conf.call_count = 0 + reload_conf.incarnation = 2 + + data_file = mockwiredata.DATA_FILE.copy() + # This is to fail the agent update at first attempt so that agent doesn't go through update + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, + hotfix_frequency=10, normal_frequency=10) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.__assert_exit_code_successful(update_handler) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_update_largest_version_if_time_window_elapsed(self): + no_of_iterations = 20 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ + "goalstate"] >= 5: + reload_conf.call_count += 1 + + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) + + # Update the ga_manifest and incarnation to send largest version manifest + mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" + mock_wire_data.reload() + self._add_write_permission_to_goal_state_files() + reload_conf.incarnation += 1 + mock_wire_data.set_incarnation(reload_conf.incarnation) + + reload_conf.call_count = 0 + reload_conf.incarnation = 2 + + data_file = mockwiredata.DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, + hotfix_frequency=0.001, normal_frequency=0.001) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) + def test_it_should_not_download_anything_if_requested_version_is_current_version(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 936533e97b..c3beabf566 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -460,6 +460,9 @@ def set_manifest_version(self, version): def set_extension_config(self, ext_conf_file): self.ext_conf = load_data(ext_conf_file) + def set_ga_manifest(self, ga_manifest): + self.ga_manifest = load_data(ga_manifest) + def set_extension_config_requested_version(self, version): self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "Version", version) From b8f159243d8fca809c746d4eee552cf123d992c1 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 11 Apr 2023 16:48:35 -0700 Subject: [PATCH 08/14] Wait and retry for rsm goal state (#2801) * wait for rsm goal state * address comments --- tests_e2e/orchestrator/scripts/install-agent | 21 ++--- .../scripts/waagent-version} | 21 +---- tests_e2e/tests/agent_update/rsm_update.py | 38 ++++++--- tests_e2e/tests/lib/ssh_client.py | 7 -- tests_e2e/tests/scripts/agent-service | 80 ------------------- tests_e2e/tests/scripts/agent-update-config | 12 +-- tests_e2e/tests/scripts/rsm_goal_state.py | 66 +++++++++++++++ 7 files changed, 113 insertions(+), 132 deletions(-) rename tests_e2e/{tests/scripts/agent-python => orchestrator/scripts/waagent-version} (55%) delete mode 100755 tests_e2e/tests/scripts/agent-service create mode 100755 tests_e2e/tests/scripts/rsm_goal_state.py diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index f0fc705805..b494ac8e28 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -73,6 +73,7 @@ fi # # Output the initial version of the agent + # python=$(get-agent-python) waagent=$(get-agent-bin-path) @@ -107,25 +108,25 @@ echo "========== Installing Agent ==========" echo "Installing $package as version $version..." unzip.py "$package" "/var/lib/waagent/WALinuxAgent-$version" -# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar, don't have a waagent.conf -# but AutoUpdate defaults to True so there is no need to do anything in that case. -if [[ -e /etc/waagent.conf ]]; then - sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' /etc/waagent.conf -fi +python=$(get-agent-python) +# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar have a waagent.conf in different path +waagent_conf_path=$($python -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') +echo "Agent's conf path: $waagent_conf_path" +sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path" # By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. # But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. -sed -i '$a GAUpdates.Enabled=n' /etc/waagent.conf +sed -i '$a GAUpdates.Enabled=n' "$waagent_conf_path" # # Restart the service # echo "Restarting service..." -service-stop $service_name +agent-service stop # Rename the previous log to ensure the new log starts with the agent we just installed mv /var/log/waagent.log /var/log/waagent."$(date --iso-8601=seconds)".log -service-start $service_name +agent-service start # # Verify that the new agent is running and output its status. @@ -137,7 +138,7 @@ check-version() { # We need to wait for the extension handler to start, give it a couple of minutes for i in {1..12} do - if $python "$waagent" --version | grep -E "Goal state agent:\s+$version" > /dev/null; then + if waagent-version | grep -E "Goal state agent:\s+$version" > /dev/null; then return 0 fi sleep 10 @@ -160,6 +161,6 @@ printf "\n" echo "========== Final Status ==========" $python "$waagent" --version printf "\n" -service-status $service_name +agent-service status exit $exit_code diff --git a/tests_e2e/tests/scripts/agent-python b/tests_e2e/orchestrator/scripts/waagent-version similarity index 55% rename from tests_e2e/tests/scripts/agent-python rename to tests_e2e/orchestrator/scripts/waagent-version index 786e6bb1ac..842ae91d29 100755 --- a/tests_e2e/tests/scripts/agent-python +++ b/tests_e2e/orchestrator/scripts/waagent-version @@ -16,23 +16,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -# The script is needed for agent-update tests to find agent python for updating agent version in source file versoon.py +# returns the version of the agent # set -euo pipefail -PYTHON="" -# Looking for Agent's Python -shebang=`cat $(which waagent) | head -1 | grep '^#!'` -if [[ -z ${shebang+x} ]]; then - echo "ERROR: Can't determine Agent's Python." - exit 1 -fi -shebang=`echo $shebang | sed 's/^#!//'` -# example /usr/bin/env python3 -# some distros will have like /usr/bin/python3.6 -read -ra strarr <<< "$shebang" -for val in "${strarr[@]}"; do - PYTHON=$val -done -echo $PYTHON +python=$(get-agent-python) +waagent=$(get-agent-bin-path) +$python "$waagent" --version \ No newline at end of file diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 94b166da15..df66fb8679 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -24,6 +24,7 @@ # For each scenario, we intiaite the rsm request with target version and then verify agent updated to that target version. # import json +from typing import List, Dict, Any import requests from azure.identity import DefaultAzureCredential @@ -46,12 +47,27 @@ def __init__(self, context: AgentTestContext): username=self._context.username, private_key_file=self._context.private_key_file) + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # This is expected as we validate the downgrade scenario + # + # WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted + # + { + 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" + } + + ] + return ignore_rules + def run(self) -> None: # Allow agent to send supported feature flag self._verify_agent_reported_supported_feature_flag() log.info("*******Verifying the Agent Downgrade scenario*******") self._mock_rsm_update("1.3.0.0") + self._check_rsm_gs("1.3.0.0") self._prepare_agent() # Verify downgrade scenario @@ -60,6 +76,7 @@ def run(self) -> None: # Verify upgrade scenario log.info("*******Verifying the Agent Upgrade scenario*******") self._mock_rsm_update("1.3.1.0") + self._check_rsm_gs("1.3.1.0") self._verify_guest_agent_update("1.3.1.0") # verify no version update. There is bug in CRP and will enable once it's fixed @@ -67,22 +84,19 @@ def run(self) -> None: # self._prepare_rsm_update("1.3.1.0") # self._verify_guest_agent_update("1.3.1.0") + def _check_rsm_gs(self, requested_version: str) -> None: + # This checks if RSM GS available to the agent after we mock the rsm update request + output = self._ssh_client.run_command(f"rsm_goal_state.py --version {requested_version}", use_sudo=True) + log.info('Verifying requested version GS available to the agent \n%s', output) + def _prepare_agent(self) -> None: """ This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ - local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-python" - remote_path = self._context.remote_working_directory/"agent-python" - self._ssh_client.copy(local_path, remote_path) - local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-service" - remote_path = self._context.remote_working_directory/"agent-service" - self._ssh_client.copy(local_path, remote_path) - local_path = self._context.test_source_directory/"tests"/"scripts"/"agent-update-config" - remote_path = self._context.remote_working_directory/"agent-update-config" - self._ssh_client.copy(local_path, remote_path) - self._ssh_client.run_command(f"sudo {remote_path}") + output = self._ssh_client.run_command("agent-update-config", use_sudo=True) + log.info('Updating agent update required config \n%s', output) @staticmethod def _verify_agent_update_flag_enabled(vm: VmMachine) -> bool: @@ -141,7 +155,7 @@ def _verify_guest_agent_update(self, requested_version: str) -> None: Verify current agent version running on rsm requested version """ def _check_agent_version(requested_version: str) -> bool: - stdout: str = self._ssh_client.run_command("sudo waagent --version") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) expected_version = f"Goal state agent: {requested_version}" if expected_version in stdout: return True @@ -151,7 +165,7 @@ def _check_agent_version(requested_version: str) -> bool: log.info("Verifying agent updated to requested version") retry_if_not_found(lambda: _check_agent_version(requested_version)) - stdout: str = self._ssh_client.run_command("sudo waagent --version") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") def _verify_agent_reported_supported_feature_flag(self): diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index 8b428f352b..fda9911d92 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -83,10 +83,3 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: command.extend([str(source), str(target)]) shell.run_command(command) - - def copy(self, local_path: Path, remote_path: Path): - """ - Copy file from local to remote machine - """ - destination = f"{self._username}@{self._ip_address}:{remote_path}" - shell.run_command(["scp", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, local_path, destination]) diff --git a/tests_e2e/tests/scripts/agent-service b/tests_e2e/tests/scripts/agent-service deleted file mode 100755 index 9dc61367d7..0000000000 --- a/tests_e2e/tests/scripts/agent-service +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash - -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -euo pipefail - -# -# The service name is walinuxagent in Ubuntu/debian and waagent elsewhere -# -# TODO: Update install-agent to use this script - -usage() ( - echo "Usage: agent-service command" - exit 1 -) - -if [ "$#" -lt 1 ]; then - usage -fi -cmd=$1 -shift - -if [ "$#" -ne 0 ] || [ -z ${cmd+x} ] ; then - usage -fi - -if command -v systemctl &> /dev/null; then - service-status() { systemctl --no-pager -l status $1; } - service-stop() { systemctl stop $1; } - service-restart() { systemctl restart $1; } - service-start() { systemctl start $1; } -else - service-status() { service $1 status; } - service-stop() { service $1 stop; } - service-restart() { service $1 restart; } - service-start() { service $1 start; } -fi - -if service-status walinuxagent > /dev/null 2>&1;then - service_name="walinuxagent" -else - service_name="waagent" -fi -echo "Service name: $service_name" - -if [[ "$cmd" == "restart" ]]; then - echo "Restarting service..." - service-restart $service_name -fi - -if [[ "$cmd" == "start" ]]; then - echo "Starting service..." - service-start $service_name -fi - -if [[ "$cmd" == "stop" ]]; then - echo "Stopping service..." - service-stop $service_name -fi - -if [[ "$cmd" == "status" ]]; then - echo "Service status..." - service-status $service_name -fi - diff --git a/tests_e2e/tests/scripts/agent-update-config b/tests_e2e/tests/scripts/agent-update-config index c1b1432da8..f121e6f4b0 100755 --- a/tests_e2e/tests/scripts/agent-update-config +++ b/tests_e2e/tests/scripts/agent-update-config @@ -20,14 +20,14 @@ # set -euo pipefail -AGENT_PYTHON_SCRIPT="agent-python" -AGENT_SERVICE_SCRIPT="agent-service" -PYTHON=$(source $AGENT_PYTHON_SCRIPT) +PYTHON=$(get-agent-python) echo "Agent's Python: $PYTHON" # some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first. version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))') version_file_full_path="$version_file_dir/version.py" sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path -sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' /etc/waagent.conf -sed -i '$a AutoUpdate.GAFamily=Test' /etc/waagent.conf -source $AGENT_SERVICE_SCRIPT restart \ No newline at end of file +waagent_conf_path=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') +sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' "$waagent_conf_path" +sed -i '$a AutoUpdate.GAFamily=Test' "$waagent_conf_path" +echo "Restarting service..." +agent-service restart \ No newline at end of file diff --git a/tests_e2e/tests/scripts/rsm_goal_state.py b/tests_e2e/tests/scripts/rsm_goal_state.py new file mode 100755 index 0000000000..dd8469df9c --- /dev/null +++ b/tests_e2e/tests/scripts/rsm_goal_state.py @@ -0,0 +1,66 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Verify the latest goal state included rsm requested version and if not, retry +# +import argparse +import sys +import time + +from azurelinuxagent.common.protocol.util import get_protocol_util +from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties + + +def get_requested_version(gs: GoalState) -> str: + agent_families = gs.extensions_goal_state.agent_families + agent_family_manifests = [m for m in agent_families if m.name == "Test" and len(m.uris) > 0] + if len(agent_family_manifests) == 0: + raise Exception( + u"No manifest links found for agent family Test, skipping agent update verification") + manifest = agent_family_manifests[0] + if manifest.is_requested_version_specified and manifest.requested_version is not None: + return str(manifest.requested_version) + return "" + + +try: + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--version', required=True) + args = parser.parse_args() + + protocol = get_protocol_util().get_protocol(init_goal_state=False) + protocol.client.reset_goal_state( + goal_state_properties=GoalStateProperties.ExtensionsGoalState | GoalStateProperties.Certificates) + + attempts = 5 + while attempts > 0: + protocol.client.update_goal_state() + goal_state = protocol.client.get_goal_state() + requested_version = get_requested_version(goal_state) + if requested_version == args.version: + print("Latest GS includes rsm requested version : {0}.".format(requested_version)) + break + print("RSM requested version GS not available yet to the agent, checking again in 30 secs.") + attempts -= 1 + time.sleep(30) + +except Exception as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + +sys.exit(0) From a90d7aad3837b7ba767ddd2dcbe3c6196f314d3c Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 21 Apr 2023 19:09:51 -0700 Subject: [PATCH 09/14] Not sharing agent update tests vms and added scenario to daily run (#2809) * add own vm property * add agent_update to daily run --- tests_e2e/pipeline/pipeline.yml | 1 - tests_e2e/test_suites/agent_update.yml | 3 ++- tests_e2e/tests/scripts/rsm_goal_state.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 9a1cd0e4e7..160795e30a 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -10,7 +10,6 @@ parameters: displayName: Test Suites type: string default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned - # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition # files. Those parameters are useful for 1-off tests, like testing a VHD or checking if diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index d06ef98e40..77a0144d57 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -2,4 +2,5 @@ name: "AgentUpdate" tests: - "agent_update/rsm_update.py" images: "endorsed" -location: "eastus2euap" \ No newline at end of file +location: "eastus2euap" +owns_vm: true \ No newline at end of file diff --git a/tests_e2e/tests/scripts/rsm_goal_state.py b/tests_e2e/tests/scripts/rsm_goal_state.py index dd8469df9c..be0a4deaf6 100755 --- a/tests_e2e/tests/scripts/rsm_goal_state.py +++ b/tests_e2e/tests/scripts/rsm_goal_state.py @@ -45,7 +45,7 @@ def get_requested_version(gs: GoalState) -> str: protocol = get_protocol_util().get_protocol(init_goal_state=False) protocol.client.reset_goal_state( - goal_state_properties=GoalStateProperties.ExtensionsGoalState | GoalStateProperties.Certificates) + goal_state_properties=GoalStateProperties.ExtensionsGoalState) attempts = 5 while attempts > 0: From f6b07a7121392b518431210054a3919f67c827bb Mon Sep 17 00:00:00 2001 From: nnandigam Date: Tue, 30 May 2023 01:37:43 -0700 Subject: [PATCH 10/14] merge conflicts --- .../orchestrator/lib/agent_test_suite.py | 4 +- tests_e2e/pipeline/pipeline.yml | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 12 +- tests_e2e/tests/lib/virtual_machine.py | 210 ------------------ 4 files changed, 9 insertions(+), 219 deletions(-) delete mode 100644 tests_e2e/tests/lib/virtual_machine.py diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 3cd91ba4cb..caa499b341 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -320,8 +320,8 @@ def _setup_node(self, install_test_agent: bool) -> None: command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) log.info("%s\n%s", command, run_command(command, shell=True)) log.info("Adding tests/scripts") - # command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) - # log.info("%s\n%s", command, run_command(command, shell=True)) + command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) log.info("Adding tests/lib") command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) log.info("%s\n%s", command, run_command(command, shell=True)) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 160795e30a..21d36d0b33 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,7 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned + default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition # files. Those parameters are useful for 1-off tests, like testing a VHD or checking if diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index df66fb8679..7c676f241c 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -35,7 +35,7 @@ from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_not_found from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine import VmMachine +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient class RsmUpdateBvt(AgentTest): @@ -99,14 +99,14 @@ def _prepare_agent(self) -> None: log.info('Updating agent update required config \n%s', output) @staticmethod - def _verify_agent_update_flag_enabled(vm: VmMachine) -> bool: - result: VirtualMachine = vm.get() + def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: + result: VirtualMachine = vm.get_description() flag: bool = result.os_profile.linux_configuration.enable_vm_agent_platform_updates if flag is None: return False return flag - def _enable_agent_update_flag(self, vm: VmMachine) -> None: + def _enable_agent_update_flag(self, vm: VirtualMachineClient) -> None: osprofile = { "location": self._context.vm.location, # location is required field "properties": { @@ -117,14 +117,14 @@ def _enable_agent_update_flag(self, vm: VmMachine) -> None: } } } - vm.create_or_update(osprofile) + vm.update(osprofile) def _mock_rsm_update(self, requested_version: str) -> None: """ This method is to simulate the rsm request. First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api """ - vm: VmMachine = VmMachine(self._context.vm) + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) if not self._verify_agent_update_flag_enabled(vm): # enable the flag self._enable_agent_update_flag(vm) diff --git a/tests_e2e/tests/lib/virtual_machine.py b/tests_e2e/tests/lib/virtual_machine.py deleted file mode 100644 index 9576d401a7..0000000000 --- a/tests_e2e/tests/lib/virtual_machine.py +++ /dev/null @@ -1,210 +0,0 @@ -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -# This module includes facilities to execute some operations on virtual machines and scale sets (list extensions, restart, etc). -# - -from abc import ABC, abstractmethod -from builtins import TimeoutError -from typing import Any, List - -from azure.core.polling import LROPoller -from azure.identity import DefaultAzureCredential -from azure.mgmt.compute import ComputeManagementClient -from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineScaleSetExtension, VirtualMachineInstanceView, VirtualMachineScaleSetInstanceView, VirtualMachine, VirtualMachineScaleSetVM -from azure.mgmt.resource import ResourceManagementClient -from msrestazure.azure_cloud import Cloud - -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS -from tests_e2e.tests.lib.identifiers import VmIdentifier -from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.retry import execute_with_retry - - -class VirtualMachineBaseClass(ABC): - """ - Abstract base class for VirtualMachine and VmScaleSet. - - Defines the interface common to both classes and provides the implementation of some methods in that interface. - """ - def __init__(self, vm: VmIdentifier): - super().__init__() - self._identifier: VmIdentifier = vm - cloud: Cloud = AZURE_CLOUDS[vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - self._compute_client = ComputeManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - self._resource_client = ResourceManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - - @abstractmethod - def get_instance_view(self) -> Any: # Returns VirtualMachineInstanceView or VirtualMachineScaleSetInstanceView - """ - Retrieves the instance view of the virtual machine or scale set - """ - - @abstractmethod - def get_extensions(self) -> Any: # Returns List[VirtualMachineExtension] or List[VirtualMachineScaleSetExtension] - """ - Retrieves the extensions installed on the virtual machine or scale set - """ - - def restart(self, timeout=5 * 60) -> None: - """ - Restarts the virtual machine or scale set - """ - log.info("Initiating restart of %s", self._identifier) - - poller: LROPoller = execute_with_retry(self._begin_restart) - - poller.wait(timeout=timeout) - - if not poller.done(): - raise TimeoutError(f"Failed to restart {self._identifier.name} after {timeout} seconds") - - log.info("Restarted %s", self._identifier.name) - - @abstractmethod - def _begin_restart(self) -> LROPoller: - """ - Derived classes must provide the implementation for this method using their corresponding begin_restart() implementation - """ - - @abstractmethod - def get(self) -> Any: - """ - Retrieves the information about the virtual machine or scale set - """ - - def create_or_update(self, parameters=None, timeout=5 * 60) -> None: - """ - Creates or updates the virtual machine or scale set with custom settings - """ - if parameters is None: - parameters = {} - - log.info("Creating/Updating VM for %s", self._identifier) - - poller: LROPoller = execute_with_retry(lambda: self._begin_create_or_update(parameters)) - - poller.wait(timeout=timeout) - - if not poller.done(): - raise TimeoutError(f"Failed to restart {self._identifier.name} after {timeout} seconds") - - @abstractmethod - def _begin_create_or_update(self, parameters) -> Any: - """ - Derived classes must provide the implementation for this method using their corresponding begin_create_or_update() implementation - """ - - def __str__(self): - return f"{self._identifier}" - - -class VmMachine(VirtualMachineBaseClass): - def get_instance_view(self) -> VirtualMachineInstanceView: - log.info("Retrieving instance view for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name, - expand="instanceView" - ).instance_view) - - def get_extensions(self) -> List[VirtualMachineExtension]: - log.info("Retrieving extensions for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machine_extensions.list( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) - - def get(self) -> VirtualMachine: - log.info("Retrieving vm information for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) - - def _begin_restart(self) -> LROPoller: - return self._compute_client.virtual_machines.begin_restart( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name) - - def _begin_create_or_update(self, parameters) -> LROPoller: - return self._compute_client.virtual_machines.begin_create_or_update(self._identifier.resource_group, self._identifier.name, parameters) - - -class VmScaleSet(VirtualMachineBaseClass): - def get_instance_view(self) -> VirtualMachineScaleSetInstanceView: - log.info("Retrieving instance view for %s", self._identifier) - - # TODO: Revisit this implementation. Currently this method returns the instance view of the first VM instance available. - # For the instance view of the complete VMSS, use the compute_client.virtual_machine_scale_sets function - # https://docs.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.v2019_12_01.operations.virtualmachinescalesetsoperations?view=azure-python - for vm in execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.list(self._identifier.resource_group, self._identifier.name)): - try: - return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.get_instance_view( - resource_group_name=self._identifier.resource_group, - vm_scale_set_name=self._identifier.name, - instance_id=vm.instance_id)) - except Exception as e: - log.warning("Unable to retrieve instance view for scale set instance %s. Trying out other instances.\nError: %s", vm, e) - - raise Exception(f"Unable to retrieve instance view of any instances for scale set {self._identifier}") - - - @property - def vm_func(self): - return self._compute_client.virtual_machine_scale_set_vms - - @property - def extension_func(self): - return self._compute_client.virtual_machine_scale_set_extensions - - def get_extensions(self) -> List[VirtualMachineScaleSetExtension]: - log.info("Retrieving extensions for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_extensions.list( - resource_group_name=self._identifier.resource_group, - vm_scale_set_name=self._identifier.name)) - - def get(self) -> List[VirtualMachineScaleSetVM]: - log.info("Retrieving vm information for %s", self._identifier) - vmss_vm_list: List[VirtualMachineScaleSetVM] = [] - for vm in execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.list(self._identifier.resource_group, self._identifier.name)): - try: - vmss_vm: VirtualMachineScaleSetVM = execute_with_retry(self._compute_client.virtual_machine_scale_set_vms.get( - resource_group_name=self._identifier.resource_group, vm_scale_set_name=self._identifier.name, instance_id=vm.instance_id)) - vmss_vm_list.append(vmss_vm) - - except Exception as e: - log.warning("Unable to retrieve vm information for scale set instance %s. Trying out other instances.\nError: %s", vm, e) - - return vmss_vm_list - - def _begin_restart(self) -> LROPoller: - return self._compute_client.virtual_machine_scale_sets.begin_restart( - resource_group_name=self._identifier.resource_group, - vm_scale_set_name=self._identifier.name) - - def _begin_create_or_update(self, parameters) -> None: - # TODO: Revisit this implementation - return \ No newline at end of file From e9c7fee118d43e7e70ae2350c267cb5fb0a3c590 Mon Sep 17 00:00:00 2001 From: nnandigam Date: Wed, 31 May 2023 11:21:41 -0700 Subject: [PATCH 11/14] address comments --- tests_e2e/test_suites/agent_update.yml | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 63 ++++++++++++------- tests_e2e/tests/lib/retry.py | 6 +- ...ate-config => modify-agent-version-config} | 0 ...al_state.py => wait_for_rsm_goal_state.py} | 31 +++++---- 5 files changed, 62 insertions(+), 40 deletions(-) rename tests_e2e/tests/scripts/{agent-update-config => modify-agent-version-config} (100%) rename tests_e2e/tests/scripts/{rsm_goal_state.py => wait_for_rsm_goal_state.py} (70%) diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 77a0144d57..9d17ac35bb 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -1,6 +1,6 @@ name: "AgentUpdate" tests: - "agent_update/rsm_update.py" -images: "endorsed" +images: "ubuntu_2004" location: "eastus2euap" owns_vm: true \ No newline at end of file diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 7c676f241c..605956ac9a 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -21,19 +21,22 @@ # BVT for the agent update scenario # # The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. -# For each scenario, we intiaite the rsm request with target version and then verify agent updated to that target version. + # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # import json from typing import List, Dict, Any import requests +from assertpy import assert_that from azure.identity import DefaultAzureCredential from azure.mgmt.compute.models import VirtualMachine +from msrestazure.azure_cloud import Cloud from tests_e2e.tests.lib.agent_test import AgentTest from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.retry import retry_if_not_found +from tests_e2e.tests.lib.retry import retry_if_false from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient @@ -66,27 +69,39 @@ def run(self) -> None: self._verify_agent_reported_supported_feature_flag() log.info("*******Verifying the Agent Downgrade scenario*******") - self._mock_rsm_update("1.3.0.0") - self._check_rsm_gs("1.3.0.0") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm is \n%s", stdout) + downgrade_version: str = "1.3.0.0" + log.info("Attempting downgrade version %s", downgrade_version) + self._request_rsm_update(downgrade_version) + self._check_rsm_gs(downgrade_version) self._prepare_agent() # Verify downgrade scenario - self._verify_guest_agent_update("1.3.0.0") + self._verify_guest_agent_update(downgrade_version) # Verify upgrade scenario log.info("*******Verifying the Agent Upgrade scenario*******") - self._mock_rsm_update("1.3.1.0") - self._check_rsm_gs("1.3.1.0") - self._verify_guest_agent_update("1.3.1.0") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm is \n%s", stdout) + upgrade_version: str = "1.3.1.0" + log.info("Attempting upgrade version %s", upgrade_version) + self._request_rsm_update(upgrade_version) + self._check_rsm_gs(upgrade_version) + self._verify_guest_agent_update(upgrade_version) # verify no version update. There is bug in CRP and will enable once it's fixed - # log.info("*******Verifying the no version update scenario*******") - # self._prepare_rsm_update("1.3.1.0") - # self._verify_guest_agent_update("1.3.1.0") + log.info("*******Verifying the no version update scenario*******") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm is \n%s", stdout) + version: str = "1.3.1.0" + log.info("Attempting update version same as current version %s", upgrade_version) + self._request_rsm_update(version) + self._verify_guest_agent_update(version) def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we mock the rsm update request - output = self._ssh_client.run_command(f"rsm_goal_state.py --version {requested_version}", use_sudo=True) + output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True) log.info('Verifying requested version GS available to the agent \n%s', output) def _prepare_agent(self) -> None: @@ -95,7 +110,7 @@ def _prepare_agent(self) -> None: 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ - output = self._ssh_client.run_command("agent-update-config", use_sudo=True) + output = self._ssh_client.run_command("modify-agent-version-config", use_sudo=True) log.info('Updating agent update required config \n%s', output) @staticmethod @@ -119,7 +134,7 @@ def _enable_agent_update_flag(self, vm: VirtualMachineClient) -> None: } vm.update(osprofile) - def _mock_rsm_update(self, requested_version: str) -> None: + def _request_rsm_update(self, requested_version: str) -> None: """ This method is to simulate the rsm request. First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api @@ -127,17 +142,19 @@ def _mock_rsm_update(self, requested_version: str) -> None: vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) if not self._verify_agent_update_flag_enabled(vm): # enable the flag + log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") self._enable_agent_update_flag(vm) log.info("Set the enableVMAgentPlatformUpdates flag to True") else: log.info("Already enableVMAgentPlatformUpdates flag set to True") - credential = DefaultAzureCredential() - token = credential.get_token("https://management.azure.com/.default") + cloud: Cloud = AZURE_CLOUDS[self._context.vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + token = credential.get_token(cloud.endpoints.resource_manager + "/.default") headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} # Later this api call will be replaced by azure-python-sdk wrapper - # Todo: management endpoints are different for national clouds. we need to change this. - url = "https://management.azure.com/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ + base_url = cloud.endpoints.resource_manager + url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, self._context.vm.resource_group, self._context.vm.name) data = { "target": "Microsoft.OSTCLinuxAgent.Test", @@ -164,7 +181,7 @@ def _check_agent_version(requested_version: str) -> bool: "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, stdout)) log.info("Verifying agent updated to requested version") - retry_if_not_found(lambda: _check_agent_version(requested_version)) + retry_if_false(lambda: _check_agent_version(requested_version)) stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") @@ -177,12 +194,10 @@ def _check_agent_supports_versioning() -> bool: return True if found == "true" else False log.info("Verifying agent reported supported feature flag") - found: bool = retry_if_not_found(_check_agent_supports_versioning) + found: bool = retry_if_false(_check_agent_supports_versioning) - if not found: - raise Exception("Agent failed to report supported feature flag, so skipping agent update validations") - else: - log.info("Successfully verified agent reported supported feature flag") + assert_that(found).is_true().described_as("Agent failed to report supported feature flag, so skipping agent update validations") + log.info("Successfully verified agent reported supported feature flag") if __name__ == "__main__": diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index 705b305648..31ce94cb20 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -59,7 +59,7 @@ def retry_ssh_run(operation: Callable[[], Any]) -> Any: time.sleep(30) -def retry_if_not_found(operation: Callable[[], bool], attempts: int = 5) -> bool: +def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool: """ This method attempts the given operation retrying a few times (after a short delay) @@ -74,6 +74,6 @@ def retry_if_not_found(operation: Callable[[], bool], attempts: int = 5) -> bool if attempts == 0: raise if not found: - log.info("Current execution didn't find it, retrying in 30 secs.") - time.sleep(30) + log.info(f"Current execution didn't find it, retrying in {duration} secs.") + time.sleep(duration) return found diff --git a/tests_e2e/tests/scripts/agent-update-config b/tests_e2e/tests/scripts/modify-agent-version-config similarity index 100% rename from tests_e2e/tests/scripts/agent-update-config rename to tests_e2e/tests/scripts/modify-agent-version-config diff --git a/tests_e2e/tests/scripts/rsm_goal_state.py b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py similarity index 70% rename from tests_e2e/tests/scripts/rsm_goal_state.py rename to tests_e2e/tests/scripts/wait_for_rsm_goal_state.py index be0a4deaf6..efac62e18c 100755 --- a/tests_e2e/tests/scripts/rsm_goal_state.py +++ b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py @@ -20,10 +20,11 @@ # import argparse import sys -import time from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties +from azurelinuxagent.common.protocol.wire import WireProtocol +from tests_e2e.tests.lib.retry import retry_if_false def get_requested_version(gs: GoalState) -> str: @@ -38,6 +39,16 @@ def get_requested_version(gs: GoalState) -> str: return "" +def verify_rsm_requested_version(protocol: WireProtocol, expected_version: str) -> bool: + protocol.client.update_goal_state() + goal_state = protocol.client.get_goal_state() + requested_version = get_requested_version(goal_state) + if requested_version == expected_version: + return True + else: + return False + + try: parser = argparse.ArgumentParser() parser.add_argument('-v', '--version', required=True) @@ -47,17 +58,13 @@ def get_requested_version(gs: GoalState) -> str: protocol.client.reset_goal_state( goal_state_properties=GoalStateProperties.ExtensionsGoalState) - attempts = 5 - while attempts > 0: - protocol.client.update_goal_state() - goal_state = protocol.client.get_goal_state() - requested_version = get_requested_version(goal_state) - if requested_version == args.version: - print("Latest GS includes rsm requested version : {0}.".format(requested_version)) - break - print("RSM requested version GS not available yet to the agent, checking again in 30 secs.") - attempts -= 1 - time.sleep(30) + found: bool = retry_if_false(lambda: verify_rsm_requested_version(protocol, args.version)) + + if not found: + raise Exception("Latest GS does not include rsm requested version : {0}.".format(args.version)) + else: + print("Latest GS includes rsm requested version : {0}.".format(args.version)) + except Exception as e: print(f"{e}", file=sys.stderr) From 7e66d5491aa1ca62e7405bd687267565c79f510a Mon Sep 17 00:00:00 2001 From: nnandigam Date: Mon, 5 Jun 2023 01:06:36 -0700 Subject: [PATCH 12/14] address comments --- azurelinuxagent/common/event.py | 1 - ...gent_update.py => agent_update_handler.py} | 77 ++++++++++--------- azurelinuxagent/ga/update.py | 50 +++++------- tests/ga/mocks.py | 2 +- tests/ga/test_agent_update.py | 2 +- tests/ga/test_report_status.py | 2 +- tests_e2e/test_suites/agent_update.yml | 2 +- 7 files changed, 63 insertions(+), 73 deletions(-) rename azurelinuxagent/ga/{agent_update.py => agent_update_handler.py} (85%) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 1fdf8f9170..1f903a9faa 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -127,7 +127,6 @@ class WALAEventOperation: Update = "Update" VmSettings = "VmSettings" VmSettingsSummary = "VmSettingsSummary" - FeatureFlag = "FeatureFlag" SHOULD_ENCODE_MESSAGE_LEN = 80 diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update_handler.py similarity index 85% rename from azurelinuxagent/ga/agent_update.py rename to azurelinuxagent/ga/agent_update_handler.py index 85c9ce53a6..7182a7463b 100644 --- a/azurelinuxagent/ga/agent_update.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -12,7 +12,7 @@ from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState @@ -47,9 +47,9 @@ def __init__(self, protocol): self._protocol = protocol self._ga_family = conf.get_autoupdate_gafamily() self._autoupdate_enabled = conf.get_autoupdate_enabled() - self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id + self._gs_id = "unknown" self._is_requested_version_update = True # This is to track the current update type(requested version or self update) - self.persistent_data = AgentUpdateHandlerUpdateState() + self.update_state = AgentUpdateHandlerUpdateState() def __should_update_agent(self, requested_version): """ @@ -63,8 +63,8 @@ def __should_update_agent(self, requested_version): now = datetime.datetime.now() if self._is_requested_version_update: - if self.persistent_data.last_attempted_requested_version_update_time != datetime.datetime.min: - next_attempt_time = self.persistent_data.last_attempted_requested_version_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + if self.update_state.last_attempted_requested_version_update_time != datetime.datetime.min: + next_attempt_time = self.update_state.last_attempted_requested_version_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) else: next_attempt_time = now @@ -76,9 +76,6 @@ def __should_update_agent(self, requested_version): next_hotfix_time, next_normal_time = self.__get_next_upgrade_times(now) upgrade_type = self.__get_agent_upgrade_type(requested_version) - if next_hotfix_time > now and next_normal_time > now: - return False - if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): return True @@ -87,10 +84,10 @@ def __should_update_agent(self, requested_version): def __update_last_attempt_update_times(self): now = datetime.datetime.now() if self._is_requested_version_update: - self.persistent_data.last_attempted_requested_version_update_time = now + self.update_state.last_attempted_requested_version_update_time = now else: - self.persistent_data.last_attempted_normal_update_time = now - self.persistent_data.last_attempted_hotfix_update_time = now + self.update_state.last_attempted_normal_update_time = now + self.update_state.last_attempted_hotfix_update_time = now @staticmethod def __get_agent_upgrade_type(requested_version): @@ -109,14 +106,14 @@ def __get_next_upgrade_times(self, now): def get_next_process_time(last_val, frequency): return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency) - next_hotfix_time = get_next_process_time(self.persistent_data.last_attempted_hotfix_update_time, + next_hotfix_time = get_next_process_time(self.update_state.last_attempted_hotfix_update_time, conf.get_hotfix_upgrade_frequency()) - next_normal_time = get_next_process_time(self.persistent_data.last_attempted_normal_update_time, + next_normal_time = get_next_process_time(self.update_state.last_attempted_normal_update_time, conf.get_normal_upgrade_frequency()) return next_hotfix_time, next_normal_time - def __get_agent_family_from_last_gs(self, goal_state): + def __get_agent_family_manifests(self, goal_state): """ Get the agent_family from last GS for the given family Returns: first entry of Manifest @@ -124,7 +121,17 @@ def __get_agent_family_from_last_gs(self, goal_state): """ family = self._ga_family agent_families = goal_state.extensions_goal_state.agent_families - agent_family_manifests = [m for m in agent_families if m.name == family and len(m.uris) > 0] + family_found = False + agent_family_manifests = [] + for m in agent_families: + if m.name == family: + family_found = True + if len(m.uris) > 0: + agent_family_manifests.append(m) + + if not family_found: + raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) + if len(agent_family_manifests) == 0: raise Exception( u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( @@ -135,7 +142,7 @@ def __get_agent_family_from_last_gs(self, goal_state): def __get_requested_version(agent_family): """ Get the requested version from agent family - Returns: Requested version if supported and available + Returns: Requested version if supported and available in the GS None if requested version missing or GA versioning not enabled """ if conf.get_enable_ga_versioning() and agent_family.is_requested_version_specified: @@ -184,7 +191,6 @@ def __purge_extra_agents_from_disk(known_agents): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) known_versions = [agent.version for agent in known_agents] - known_versions.append(CURRENT_VERSION) for agent_path in glob.iglob(path): try: @@ -200,8 +206,7 @@ def __purge_extra_agents_from_disk(known_agents): except Exception as e: logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) - @staticmethod - def __proceed_with_update(requested_version): + def __proceed_with_update(self, requested_version): """ If requested version is specified, upgrade/downgrade to the specified version. Raises: AgentUpgradeExitException @@ -217,7 +222,7 @@ def __proceed_with_update(requested_version): current_agent = next(agent for agent in agents_on_disk if agent.version == CURRENT_VERSION) msg = "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " \ "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION) - logger.info(msg) + self.__log_event(LogLevel.INFO, msg) current_agent.mark_failure(is_fatal=True, reason=msg) except StopIteration: logger.warn( @@ -239,25 +244,25 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - def __log_event(self, level, msg_, success_=True): + def __log_event(self, level, msg, success_=True): if level == LogLevel.INFO: - logger.info(msg_) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg, log_event=False) else: - msg_ += "[NOTE: Will not log the same error for the next 6 hours]" + msg += "[NOTE: Will not log the same error for the next 6 hours]" # Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg # Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update" now = datetime.datetime.now() - prefix_msg = msg_.split("incarnation", 1)[0] - prefix_last_warning_msg = self.persistent_data.last_warning.split("incarnation", 1)[0] - if prefix_msg != prefix_last_warning_msg or self.persistent_data.last_warning_time == datetime.datetime.min or now >= self.persistent_data.last_warning_time + datetime.timedelta(hours=6): + prefix_msg = msg.split("incarnation", 1)[0] + prefix_last_warning_msg = self.update_state.last_warning.split("incarnation", 1)[0] + if prefix_msg != prefix_last_warning_msg or self.update_state.last_warning_time == datetime.datetime.min or now >= self.update_state.last_warning_time + datetime.timedelta(hours=6): if level == LogLevel.WARNING: - logger.warn(msg_) + logger.warn(msg) elif level == LogLevel.ERROR: - logger.error(msg_) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) - self.persistent_data.last_warning_time = now - self.persistent_data.last_warning = msg_ + logger.error(msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg, log_event=False) + self.update_state.last_warning_time = now + self.update_state.last_warning = msg def run(self, goal_state): try: @@ -266,7 +271,7 @@ def run(self, goal_state): return self._gs_id = goal_state.extensions_goal_state.id - agent_family = self.__get_agent_family_from_last_gs(goal_state) + agent_family = self.__get_agent_family_manifests(goal_state) requested_version = self.__get_requested_version(agent_family) agent_manifest = None # This is to make sure fetch agent manifest once per update @@ -293,9 +298,9 @@ def run(self, goal_state): if not self.__should_update_agent(requested_version): return - msg_ = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( + msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( self._gs_id, str(requested_version)) - self.__log_event(LogLevel.INFO, msg_) + self.__log_event(LogLevel.INFO, msg) try: agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) @@ -307,7 +312,7 @@ def run(self, goal_state): return # We delete the directory and the zip package from the filesystem except current version and target version - self.__purge_extra_agents_from_disk(known_agents=[agent]) + self.__purge_extra_agents_from_disk(known_agents=[agent, CURRENT_AGENT]) self.__proceed_with_update(requested_version) finally: diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index b02aaa9f5b..6f666156f4 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -53,7 +53,7 @@ from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, AGENT_VERSION, \ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version -from azurelinuxagent.ga.agent_update import get_agent_update_handler +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed from azurelinuxagent.ga.collect_telemetry_events import get_collect_telemetry_events_handler from azurelinuxagent.ga.env import get_env_handler @@ -151,9 +151,6 @@ def __init__(self): # VM Size is reported via the heartbeat, default it here. self._vm_size = None - # Flag is Used to log if GA supports versioning on agent start - self._agent_supports_versioning_logged = False - # these members are used to avoid reporting errors too frequently self._heartbeat_update_goal_state_error_count = 0 self._update_goal_state_error_count = 0 @@ -528,10 +525,9 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_ # update self._goal_state if not self._try_update_goal_state(protocol): + agent_update_handler.run(self._goal_state) # status reporting should be done even when the goal state is not updated - agent_update_status = agent_update_handler.get_vmagent_update_status() - self._report_status(exthandlers_handler, agent_update_status) - self._log_agent_supports_versioning_or_not() + self._report_status(exthandlers_handler, agent_update_handler) return # check for agent updates @@ -553,11 +549,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_ CGroupConfigurator.get_instance().check_cgroups(cgroup_metrics=[]) # report status before processing the remote access, since that operation can take a long time - agent_update_status = agent_update_handler.get_vmagent_update_status() - self._report_status(exthandlers_handler, agent_update_status) - - # Logging after agent reports supported feature flag so this msg in sync with report status - self._log_agent_supports_versioning_or_not() + self._report_status(exthandlers_handler, agent_update_handler) if self._processing_new_incarnation(): remote_access_handler.run() @@ -586,11 +578,11 @@ def _cleanup_legacy_goal_state_history(): except Exception as exception: logger.warn("Error removing legacy history files: {0}", ustr(exception)) - def _report_status(self, exthandlers_handler, vm_agent_update_status): + def _report_status(self, exthandlers_handler, agent_update_handler): # report_ext_handlers_status does its own error handling and returns None if an error occurred vm_status = exthandlers_handler.report_ext_handlers_status( goal_state_changed=self._processing_new_extensions_goal_state(), - vm_agent_update_status=vm_agent_update_status, vm_agent_supports_fast_track=self._supports_fast_track) + vm_agent_update_status=agent_update_handler.get_vmagent_update_status(), vm_agent_supports_fast_track=self._supports_fast_track) if vm_status is not None: self._report_extensions_summary(vm_status) @@ -622,24 +614,6 @@ def _report_extensions_summary(self, vm_status): logger.warn(msg) add_event(op=WALAEventOperation.GoalState, is_success=False, message=msg) - def _log_agent_supports_versioning_or_not(self): - def _log_event(msg): - logger.info(msg) - add_event(AGENT_NAME, op=WALAEventOperation.FeatureFlag, message=msg) - if not self._agent_supports_versioning_logged: - supports_ga_versioning = False - for _, feature in get_agent_supported_features_list_for_crp().items(): - if feature.name == SupportedFeatureNames.GAVersioningGovernance: - supports_ga_versioning = True - break - if supports_ga_versioning: - msg = "Agent : {0} supports GA Versioning".format(CURRENT_VERSION) - _log_event(msg) - else: - msg = "Agent : {0} doesn't support GA Versioning".format(CURRENT_VERSION) - _log_event(msg) - self._agent_supports_versioning_logged = True - def _on_initial_goal_state_completed(self, extensions_summary): fileutil.write_file(self._initial_goal_state_file_path(), ustr(extensions_summary)) if conf.get_extensions_enabled() and self._goal_state_period != conf.get_goal_state_period(): @@ -730,6 +704,16 @@ def log_if_op_disabled(name, value): if not value: log_event("{0} is set to False, not processing the operation".format(name)) + def log_if_agent_versioning_feature_disabled(): + supports_ga_versioning = False + for _, feature in get_agent_supported_features_list_for_crp().items(): + if feature.name == SupportedFeatureNames.GAVersioningGovernance: + supports_ga_versioning = True + break + if not supports_ga_versioning: + msg = "Agent : {0} doesn't support GA Versioning".format(CURRENT_VERSION) + log_event(msg) + log_if_int_changed_from_default("Extensions.GoalStatePeriod", conf.get_goal_state_period(), "Changing this value affects how often extensions are processed and status for the VM is reported. Too small a value may report the VM as unresponsive") log_if_int_changed_from_default("Extensions.InitialGoalStatePeriod", conf.get_initial_goal_state_period(), @@ -750,6 +734,8 @@ def log_if_op_disabled(name, value): if conf.get_lib_dir() != "/var/lib/waagent": log_event("lib dir is in an unexpected location: {0}".format(conf.get_lib_dir())) + log_if_agent_versioning_feature_disabled() + except Exception as e: logger.warn("Failed to log changes in configuration: {0}", ustr(e)) diff --git a/tests/ga/mocks.py b/tests/ga/mocks.py index e42dd55458..a264390b31 100644 --- a/tests/ga/mocks.py +++ b/tests/ga/mocks.py @@ -19,7 +19,7 @@ from mock import PropertyMock -from azurelinuxagent.ga.agent_update import AgentUpdateHandler +from azurelinuxagent.ga.agent_update_handler import AgentUpdateHandler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler from azurelinuxagent.ga.update import UpdateHandler, get_update_handler diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py index dbdf8dab5a..fda4a78c52 100644 --- a/tests/ga/test_agent_update.py +++ b/tests/ga/test_agent_update.py @@ -10,7 +10,7 @@ from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.version import CURRENT_VERSION -from azurelinuxagent.ga.agent_update import get_agent_update_handler +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.guestagent import GAUpdateReportState from tests.ga.test_update import UpdateTestCase from tests.protocol.HttpRequestPredicates import HttpRequestPredicates diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index f63d1d42dc..18a8b9353c 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -3,7 +3,7 @@ import json -from azurelinuxagent.ga.agent_update import get_agent_update_handler +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler from tests.ga.mocks import mock_update_handler diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 9d17ac35bb..77a0144d57 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -1,6 +1,6 @@ name: "AgentUpdate" tests: - "agent_update/rsm_update.py" -images: "ubuntu_2004" +images: "endorsed" location: "eastus2euap" owns_vm: true \ No newline at end of file From 22de2613cc34d702b2b25751910ebd20f4f4e7ac Mon Sep 17 00:00:00 2001 From: nnandigam Date: Thu, 8 Jun 2023 03:19:23 -0700 Subject: [PATCH 13/14] additional comments addressed --- azurelinuxagent/ga/agent_update_handler.py | 43 +++++++-------- tests/data/wire/ext_conf_missing_family.xml | 2 + tests/ga/mocks.py | 2 +- ...update.py => test_agent_update_handler.py} | 2 +- tests/ga/test_report_status.py | 11 ++-- tests/ga/test_update.py | 6 +-- tests_e2e/tests/agent_update/rsm_update.py | 10 ++-- .../scripts/verify_agent_supported_feature.py | 53 +++++++++++++++++++ .../tests/scripts/wait_for_rsm_goal_state.py | 9 ++-- 9 files changed, 91 insertions(+), 47 deletions(-) rename tests/ga/{test_agent_update.py => test_agent_update_handler.py} (99%) create mode 100755 tests_e2e/tests/scripts/verify_agent_supported_feature.py diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 7182a7463b..3acb5b14cf 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -12,7 +12,7 @@ from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState @@ -183,7 +183,7 @@ def __get_agent_package_to_download(self, agent_manifest, version): "skipping agent update".format(str(version), self._gs_id)) @staticmethod - def __purge_extra_agents_from_disk(known_agents): + def __purge_extra_agents_from_disk(current_version, known_agents): """ Remove from disk all directories and .zip files of unknown agents (without removing the current, running agent). @@ -191,6 +191,7 @@ def __purge_extra_agents_from_disk(known_agents): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) known_versions = [agent.version for agent in known_agents] + known_versions.append(current_version) for agent_path in glob.iglob(path): try: @@ -244,25 +245,15 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - def __log_event(self, level, msg, success_=True): + @staticmethod + def __log_event(level, msg, success=True): if level == LogLevel.INFO: logger.info(msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg, log_event=False) - else: - msg += "[NOTE: Will not log the same error for the next 6 hours]" - # Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg - # Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update" - now = datetime.datetime.now() - prefix_msg = msg.split("incarnation", 1)[0] - prefix_last_warning_msg = self.update_state.last_warning.split("incarnation", 1)[0] - if prefix_msg != prefix_last_warning_msg or self.update_state.last_warning_time == datetime.datetime.min or now >= self.update_state.last_warning_time + datetime.timedelta(hours=6): - if level == LogLevel.WARNING: - logger.warn(msg) - elif level == LogLevel.ERROR: - logger.error(msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg, log_event=False) - self.update_state.last_warning_time = now - self.update_state.last_warning = msg + elif level == LogLevel.WARNING: + logger.warn(msg) + elif level == LogLevel.ERROR: + logger.error(msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success, message=msg, log_event=False) def run(self, goal_state): try: @@ -274,11 +265,10 @@ def run(self, goal_state): agent_family = self.__get_agent_family_manifests(goal_state) requested_version = self.__get_requested_version(agent_family) agent_manifest = None # This is to make sure fetch agent manifest once per update - + warn_msg = "" if requested_version is None: if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) - self.__log_event(LogLevel.WARNING, warn_msg) GAUpdateReportState.report_error_msg = warn_msg agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) requested_version = self.__get_largest_version(agent_manifest) @@ -298,6 +288,9 @@ def run(self, goal_state): if not self.__should_update_agent(requested_version): return + if warn_msg != "": + self.__log_event(LogLevel.WARNING, warn_msg) + msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( self._gs_id, str(requested_version)) self.__log_event(LogLevel.INFO, msg) @@ -305,14 +298,14 @@ def run(self, goal_state): try: agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) - if not agent.is_available: + if agent.is_blacklisted or not agent.is_downloaded: msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( str(agent.version)) self.__log_event(LogLevel.WARNING, msg) return # We delete the directory and the zip package from the filesystem except current version and target version - self.__purge_extra_agents_from_disk(known_agents=[agent, CURRENT_AGENT]) + self.__purge_extra_agents_from_disk(CURRENT_VERSION, known_agents=[agent]) self.__proceed_with_update(requested_version) finally: @@ -323,7 +316,7 @@ def run(self, goal_state): raise err if "Missing requested version" not in GAUpdateReportState.report_error_msg: GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) - self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success_=False) + self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False) def get_vmagent_update_status(self): """ @@ -341,5 +334,5 @@ def get_vmagent_update_status(self): return VMAgentUpdateStatus(expected_version=str(GAUpdateReportState.report_expected_version), status=status, code=code, message=GAUpdateReportState.report_error_msg) except Exception as err: self.__log_event(LogLevel.WARNING, "Unable to report agent update status: {0}".format( - textutil.format_exception(err)), success_=False) + textutil.format_exception(err)), success=False) return None diff --git a/tests/data/wire/ext_conf_missing_family.xml b/tests/data/wire/ext_conf_missing_family.xml index 9e13d03ace..10760a975b 100644 --- a/tests/data/wire/ext_conf_missing_family.xml +++ b/tests/data/wire/ext_conf_missing_family.xml @@ -4,6 +4,8 @@ xmlns:i="http://www.w3.org/2001/XMLSchema-instance"> + Prod + eastus diff --git a/tests/ga/mocks.py b/tests/ga/mocks.py index a264390b31..588825f780 100644 --- a/tests/ga/mocks.py +++ b/tests/ga/mocks.py @@ -86,7 +86,7 @@ def patch_object(target, attribute): try: with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): - with patch("azurelinuxagent.ga.agent_update.get_agent_update_handler", return_value=agent_update_handler): + with patch("azurelinuxagent.ga.agent_update_handler.get_agent_update_handler", return_value=agent_update_handler): with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update_handler.py similarity index 99% rename from tests/ga/test_agent_update.py rename to tests/ga/test_agent_update_handler.py index fda4a78c52..5b7800b8c6 100644 --- a/tests/ga/test_agent_update.py +++ b/tests/ga/test_agent_update_handler.py @@ -54,7 +54,7 @@ def put_handler(url, *args, **_): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.ga.agent_update.add_event") as mock_telemetry: + with patch("azurelinuxagent.ga.agent_update_handler.add_event") as mock_telemetry: agent_update_handler = get_agent_update_handler(protocol) agent_update_handler._protocol = protocol yield agent_update_handler, mock_telemetry diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index 18a8b9353c..8f4ce58f4d 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -80,22 +80,21 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here exthandlers_handler = ExtHandlersHandler(protocol) agent_update_handler = get_agent_update_handler(protocol) - agent_update_status = agent_update_handler.get_vmagent_update_status() - update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_handler) self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] - update_handler._report_status(exthandlers_handler, agent_update_status) - update_handler._report_status(exthandlers_handler, agent_update_status) - update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler, agent_update_status) + update_handler._report_status(exthandlers_handler, agent_update_handler) self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 64cb9bcf07..b73ad3db8f 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -917,7 +917,7 @@ def test_update_happens_when_extensions_disabled(self): behavior never changes. """ with patch('azurelinuxagent.common.conf.get_extensions_enabled', return_value=False): - with patch('azurelinuxagent.ga.agent_update.AgentUpdateHandler.run') as download_agent: + with patch('azurelinuxagent.ga.agent_update_handler.AgentUpdateHandler.run') as download_agent: with mock_wire_protocol(DATA_FILE) as protocol: with mock_update_handler(protocol, autoupdate_enabled=True) as update_handler: update_handler.run() @@ -1415,7 +1415,7 @@ def test_run_clears_sentinel_on_successful_exit(self): self.assertFalse(os.path.isfile(update_handler._sentinel_file_path())) def test_run_leaves_sentinel_on_unsuccessful_exit(self): - with patch('azurelinuxagent.ga.agent_update.AgentUpdateHandler.run', side_effect=Exception): + with patch('azurelinuxagent.ga.agent_update_handler.AgentUpdateHandler.run', side_effect=Exception): update_handler = self._test_run(autoupdate_enabled=True,expected_exit_code=1) self.assertTrue(os.path.isfile(update_handler._sentinel_file_path())) @@ -1869,7 +1869,7 @@ def iterator(*_, **__): mock_is_running.__get__ = Mock(side_effect=iterator) with patch('azurelinuxagent.ga.exthandlers.get_exthandlers_handler'): with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): - with patch('azurelinuxagent.ga.agent_update.get_agent_update_handler'): + with patch('azurelinuxagent.ga.agent_update_handler.get_agent_update_handler'): with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 605956ac9a..b38d6b6da4 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -23,6 +23,7 @@ # The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # +import glob import json from typing import List, Dict, Any @@ -189,15 +190,10 @@ def _verify_agent_reported_supported_feature_flag(self): """ RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log """ - def _check_agent_supports_versioning() -> bool: - found: str = self._ssh_client.run_command("grep -q 'Agent.*supports GA Versioning' /var/log/waagent.log && echo true || echo false").rstrip() - return True if found == "true" else False log.info("Verifying agent reported supported feature flag") - found: bool = retry_if_false(_check_agent_supports_versioning) - - assert_that(found).is_true().described_as("Agent failed to report supported feature flag, so skipping agent update validations") - log.info("Successfully verified agent reported supported feature flag") + self._ssh_client.run_command(f"verify_agent_supported_feature.py", use_sudo=True) + log.info("Agent reported VersioningGovernance supported feature flag") if __name__ == "__main__": diff --git a/tests_e2e/tests/scripts/verify_agent_supported_feature.py b/tests_e2e/tests/scripts/verify_agent_supported_feature.py new file mode 100755 index 0000000000..d7ac441e48 --- /dev/null +++ b/tests_e2e/tests/scripts/verify_agent_supported_feature.py @@ -0,0 +1,53 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Verify if the agent reported supportedfeature VersioningGovernance flag to CRP through status file +# +import glob +import json +import logging +import sys + +from tests_e2e.tests.lib.retry import retry_if_false + + +def check_agent_supports_versioning() -> bool: + agent_status_file = "/var/lib/waagent/history/*/waagent_status.json" + file_paths = glob.glob(agent_status_file, recursive=True) + for file in file_paths: + logging.info("Agent status file found %s", file) + with open(file, 'r') as f: + data = json.load(f) + status = data["__status__"] + supported_features = status["supportedFeatures"] + for supported_feature in supported_features: + if supported_feature["Key"] == "VersioningGovernance": + return True + return False + + +try: + found: bool = retry_if_false(check_agent_supports_versioning) + if not found: + raise Exception("Agent failed to report supported feature flag, so skipping agent update validations") + +except Exception as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + +sys.exit(0) diff --git a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py index efac62e18c..e9c67123da 100755 --- a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py +++ b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py @@ -20,6 +20,7 @@ # import argparse import sys +import logging from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties @@ -39,9 +40,9 @@ def get_requested_version(gs: GoalState) -> str: return "" -def verify_rsm_requested_version(protocol: WireProtocol, expected_version: str) -> bool: - protocol.client.update_goal_state() - goal_state = protocol.client.get_goal_state() +def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version: str) -> bool: + wire_protocol.client.update_goal_state() + goal_state = wire_protocol.client.get_goal_state() requested_version = get_requested_version(goal_state) if requested_version == expected_version: return True @@ -63,7 +64,7 @@ def verify_rsm_requested_version(protocol: WireProtocol, expected_version: str) if not found: raise Exception("Latest GS does not include rsm requested version : {0}.".format(args.version)) else: - print("Latest GS includes rsm requested version : {0}.".format(args.version)) + logging.info("Latest GS includes rsm requested version : {0}.".format(args.version)) except Exception as e: From 7b6d2e3e55f1ce0e511cf9f4cb220c3e15174205 Mon Sep 17 00:00:00 2001 From: nnandigam Date: Thu, 8 Jun 2023 03:31:57 -0700 Subject: [PATCH 14/14] fix pylint warning --- tests_e2e/tests/agent_update/rsm_update.py | 4 +--- tests_e2e/tests/scripts/verify_agent_supported_feature.py | 4 ++-- tests_e2e/tests/scripts/wait_for_rsm_goal_state.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index b38d6b6da4..cfa1a7d18b 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -23,12 +23,10 @@ # The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # -import glob import json from typing import List, Dict, Any import requests -from assertpy import assert_that from azure.identity import DefaultAzureCredential from azure.mgmt.compute.models import VirtualMachine from msrestazure.azure_cloud import Cloud @@ -192,7 +190,7 @@ def _verify_agent_reported_supported_feature_flag(self): """ log.info("Verifying agent reported supported feature flag") - self._ssh_client.run_command(f"verify_agent_supported_feature.py", use_sudo=True) + self._ssh_client.run_command("verify_agent_supported_feature.py", use_sudo=True) log.info("Agent reported VersioningGovernance supported feature flag") diff --git a/tests_e2e/tests/scripts/verify_agent_supported_feature.py b/tests_e2e/tests/scripts/verify_agent_supported_feature.py index d7ac441e48..b6c1307e2d 100755 --- a/tests_e2e/tests/scripts/verify_agent_supported_feature.py +++ b/tests_e2e/tests/scripts/verify_agent_supported_feature.py @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -# Verify if the agent reported supportedfeature VersioningGovernance flag to CRP through status file +# Verify if the agent reported supportedfeature VersioningGovernance flag to CRP via status file # import glob import json @@ -30,9 +30,9 @@ def check_agent_supports_versioning() -> bool: agent_status_file = "/var/lib/waagent/history/*/waagent_status.json" file_paths = glob.glob(agent_status_file, recursive=True) for file in file_paths: - logging.info("Agent status file found %s", file) with open(file, 'r') as f: data = json.load(f) + logging.info("Agent status file is %s and it's content %s", file, data) status = data["__status__"] supported_features = status["supportedFeatures"] for supported_feature in supported_features: diff --git a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py index e9c67123da..5905a5fe1d 100755 --- a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py +++ b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py @@ -64,7 +64,7 @@ def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version: if not found: raise Exception("Latest GS does not include rsm requested version : {0}.".format(args.version)) else: - logging.info("Latest GS includes rsm requested version : {0}.".format(args.version)) + logging.info("Latest GS includes rsm requested version : %s", args.version) except Exception as e: