From b490692bb63fadb48d333e02c1ca115ebd257791 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 27 Jun 2023 07:38:19 -0700 Subject: [PATCH 1/5] Do not collect LISA logs by default (#2857) Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 468203cd22..dff8985da8 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -46,7 +46,7 @@ parameters: - name: collect_lisa_logs displayName: Collect LISA logs type: boolean - default: true + default: false - name: keep_environment displayName: Keep the test VMs (do not delete them) From faebcdbf57e585bcfe0a299d980ffa019d82a4b6 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 28 Jun 2023 11:04:14 -0700 Subject: [PATCH 2/5] Add check for noexec on Permission denied errors (#2859) * Add check for noexec on Permission denied errors * remove type annotation --------- Co-authored-by: narrieta --- azurelinuxagent/common/event.py | 1 + .../common/utils/extensionprocessutil.py | 54 +++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 1f903a9faa..4679608067 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -104,6 +104,7 @@ class WALAEventOperation: InitializeHostPlugin = "InitializeHostPlugin" Log = "Log" LogCollection = "LogCollection" + NoExec = "NoExec" OSInfo = "OSInfo" Partition = "Partition" PersistFirewallRules = "PersistFirewallRules" diff --git a/azurelinuxagent/common/utils/extensionprocessutil.py b/azurelinuxagent/common/utils/extensionprocessutil.py index 137f3aa2dd..c24ebf1946 100644 --- a/azurelinuxagent/common/utils/extensionprocessutil.py +++ b/azurelinuxagent/common/utils/extensionprocessutil.py @@ -22,7 +22,9 @@ import signal import time +from azurelinuxagent.common import conf from azurelinuxagent.common import logger +from azurelinuxagent.common.event import WALAEventOperation, add_event from azurelinuxagent.common.exception import ExtensionErrorCodes, ExtensionOperationError, ExtensionError from azurelinuxagent.common.future import ustr @@ -74,7 +76,7 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c process_output = read_output(stdout, stderr) if timed_out: - if cpu_cgroup is not None:# Report CPUThrottledTime when timeout happens + if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output), code=ExtensionErrorCodes.PluginHandlerScriptTimedout) @@ -82,12 +84,58 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c code=ExtensionErrorCodes.PluginHandlerScriptTimedout) if return_code != 0: - raise ExtensionOperationError("Non-zero exit code: {0}, {1}\n{2}".format(return_code, command, process_output), - code=error_code, exit_code=return_code) + noexec_warning = "" + if return_code == 126: # Permission denied + noexec_path = _check_noexec() + if noexec_path is not None: + noexec_warning = "\nWARNING: {0} is mounted with the noexec flag, which can prevent execution of VM Extensions.".format(noexec_path) + raise ExtensionOperationError( + "Non-zero exit code: {0}, {1}{2}\n{3}".format(return_code, command, noexec_warning, process_output), + code=error_code, + exit_code=return_code) return process_output +# +# Collect a sample of errors while checking for the noexec flag. Consider removing this telemetry after a few releases. +# +_COLLECT_NOEXEC_ERRORS = True + + +def _check_noexec(): + """ + Check if /var is mounted with the noexec flag. + """ + try: + agent_dir = conf.get_lib_dir() + with open('/proc/mounts', 'r') as f: + while True: + line = f.readline() + if line == "": # EOF + break + # The mount point is on the second column, and the flags are on the fourth. e.g. + # + # # grep /var /proc/mounts + # /dev/mapper/rootvg-varlv /var xfs rw,seclabel,noexec,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota 0 0 + # + columns = line.split() + mount_point = columns[1] + flags = columns[3] + if agent_dir.startswith(mount_point) and "noexec" in flags: + message = "The noexec flag is set on {0}. This can prevent extensions from executing.".format(mount_point) + logger.warn(message) + add_event(op=WALAEventOperation.NoExec, is_success=False, message=message) + return mount_point + except Exception as e: + message = "Error while checking the noexec flag: {0}".format(e) + logger.warn(message) + if _COLLECT_NOEXEC_ERRORS: + _COLLECT_NOEXEC_ERRORS = False + add_event(op=WALAEventOperation.NoExec, is_success=False, log_event=False, message="Error while checking the noexec flag: {0}".format(e)) + return None + + SAS_TOKEN_RE = re.compile(r'(https://\S+\?)((sv|st|se|sr|sp|sip|spr|sig)=\S+)+', flags=re.IGNORECASE) From 8bd63636b28a19ca20520332923f8cf8606e4b6d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 29 Jun 2023 15:37:00 -0700 Subject: [PATCH 3/5] Wait for log message in AgentNotProvisioned test (#2861) * Wait for log message in AgentNotProvisioned test * hardcoded value --------- Co-authored-by: narrieta --- .../agent_not_provisioned.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py index 45ff903c39..490fba3b8d 100755 --- a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py +++ b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py @@ -44,8 +44,19 @@ def run(self): log.info("Checking the Agent's log to verify that it is disabled.") try: output = ssh_client.run_command(""" - grep -E 'WARNING.*Daemon.*Disabling guest agent in accordance with ovf-env.xml' /var/log/waagent.log || \ - grep -E 'WARNING.*Daemon.*Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart' /var/log/waagent.log + # We need to wait for the agent to start and hit the disable code, give it a few minutes + n=18 + for i in $(seq $n); do + grep -E 'WARNING.*Daemon.*Disabling guest agent in accordance with ovf-env.xml' /var/log/waagent.log || \ + grep -E 'WARNING.*Daemon.*Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart' /var/log/waagent.log + if [[ $? == 0 ]]; then + exit 0 + fi + echo "Did not find the expected message in the agent's log, retrying after sleeping for a few seconds (attempt $i/$n)..." + sleep 10 + done + echo "Did not find the expected message in the agent's log, giving up." + exit 1 """) log.info("The Agent is disabled, log message: [%s]", output.rstrip()) except CommandError as e: From 077f66d1a0d168769cc565f13ae035446f3d7e9f Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 30 Jun 2023 10:41:28 -0700 Subject: [PATCH 4/5] Always collect logs on end-to-end tests (#2863) * Always collect logs * cleanup --------- Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index dff8985da8..2cf0979575 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -46,7 +46,7 @@ parameters: - name: collect_lisa_logs displayName: Collect LISA logs type: boolean - default: false + default: true - name: keep_environment displayName: Keep the test VMs (do not delete them) @@ -124,6 +124,8 @@ jobs: - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/collect_artifacts.sh displayName: "Collect test artifacts" + # Collect artifacts even if the previous step is cancelled (e.g. timeout) + condition: always() env: COLLECT_LISA_LOGS: ${{ parameters.collect_lisa_logs }} From 2ddd73617c4958e6a8fcda77c97220e036d7d1d3 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 30 Jun 2023 12:30:38 -0700 Subject: [PATCH 5/5] agent publish scenario (#2847) * agent publish * remove vm size * address comments * deamom version fallback * daemon versionfix * address comments * fix pylint error * address comment * added error handling --- azurelinuxagent/common/version.py | 16 ++- azurelinuxagent/ga/agent_update_handler.py | 15 +-- tests/common/test_version.py | 11 +- tests/ga/test_agent_update_handler.py | 10 +- tests/ga/test_extension.py | 3 +- tests/ga/test_report_status.py | 101 ++++++++-------- tests/ga/test_update.py | 18 ++- .../orchestrator/lib/agent_test_loader.py | 14 ++- .../lib/agent_test_suite_combinator.py | 2 +- .../orchestrator/scripts/update-waagent-conf | 29 +++-- tests_e2e/test_suites/agent_publish.yml | 11 ++ tests_e2e/test_suites/agent_update.yml | 2 +- tests_e2e/test_suites/images.yml | 5 - .../tests/agent_publish/agent_publish.py | 100 ++++++++++++++++ tests_e2e/tests/agent_update/__init__.py | 0 tests_e2e/tests/agent_update/rsm_update.py | 61 ++++++---- .../extensions_disabled.py | 2 +- .../scripts/agent_publish-check_update.py | 112 ++++++++++++++++++ ...nfig => agent_update-modify_agent_version} | 12 +- ...ate-verify_agent_reported_update_status.py | 59 +++++++++ ...te-verify_versioning_supported_feature.py} | 0 ...ate.py => agent_update-wait_for_rsm_gs.py} | 0 22 files changed, 456 insertions(+), 127 deletions(-) create mode 100644 tests_e2e/test_suites/agent_publish.yml create mode 100644 tests_e2e/tests/agent_publish/agent_publish.py delete mode 100644 tests_e2e/tests/agent_update/__init__.py create mode 100755 tests_e2e/tests/scripts/agent_publish-check_update.py rename tests_e2e/tests/scripts/{modify-agent-version-config => agent_update-modify_agent_version} (75%) create mode 100755 tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py rename tests_e2e/tests/scripts/{verify_agent_supported_feature.py => agent_update-verify_versioning_supported_feature.py} (100%) rename tests_e2e/tests/scripts/{wait_for_rsm_goal_state.py => agent_update-wait_for_rsm_gs.py} (100%) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index ff9c903b93..08c01b5ceb 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -21,6 +21,7 @@ import sys import azurelinuxagent.common.conf as conf +from azurelinuxagent.common import logger import azurelinuxagent.common.utils.shellutil as shellutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr, get_linux_distribution @@ -48,12 +49,21 @@ def get_daemon_version(): The value indicates the version of the daemon that started the current agent process or, if the current process is the daemon, the version of the current process. If the variable is not set (because the agent is < 2.2.53, or the process was not started by the daemon and - the process is not the daemon itself) the function returns "0.0.0.0" + the process is not the daemon itself) the function returns version of agent which started by the python """ if __DAEMON_VERSION_ENV_VARIABLE in os.environ: return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE]) - return FlexibleVersion("0.0.0.0") - + else: + # The agent process which execute the extensions can have different version(after upgrades) and importing version from that process may provide wrong version for daemon. + # so launching new process with sys.executable python provides the correct version for daemon which preinstalled in the image. + try: + cmd = ["{0}".format(sys.executable), "-c", "\'from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)\'"] + version = shellutil.run_command(cmd) + return FlexibleVersion(version) + except Exception as e: # Make the best effort to get the daemon version, but don't fail the update if we can't. So default to 2.2.53 as env variable is not set < 2.2.53 + logger.warn("Failed to get the daemon version: {0}", ustr(e)) + return FlexibleVersion("2.2.53") + def get_f5_platform(): """ diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 9200075055..8de6cfd81e 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -9,7 +9,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.logger import LogLevel from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource -from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN @@ -231,7 +231,7 @@ def __proceed_with_update(self, requested_version): # In case of an upgrade, we don't need to exclude anything as the daemon will automatically # start the next available highest version which would be the target version prefix = "upgrade" - raise AgentUpgradeExitException("Agent update found, Exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) + raise AgentUpgradeExitException("Agent update found, exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) @staticmethod def __get_available_agents_on_disk(): @@ -243,15 +243,6 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - @staticmethod - def __get_daemon_version_for_update(): - daemon_version = get_daemon_version() - if daemon_version != FlexibleVersion(VERSION_0): - return daemon_version - # We return 0.0.0.0 if daemon version is not specified. In that case, - # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. - return FlexibleVersion("2.2.53") - @staticmethod def __log_event(level, msg, success=True): if level == LogLevel.INFO: @@ -299,7 +290,7 @@ def run(self, goal_state): self.__log_event(LogLevel.WARNING, warn_msg) try: - daemon_version = self.__get_daemon_version_for_update() + daemon_version = get_daemon_version() if requested_version < daemon_version: # Don't process the update if the requested version is less than daemon version, # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with diff --git a/tests/common/test_version.py b/tests/common/test_version.py index ba1fb76720..625c0bcfc3 100644 --- a/tests/common/test_version.py +++ b/tests/common/test_version.py @@ -136,11 +136,12 @@ def test_get_daemon_version_should_return_the_version_that_was_previously_set(se finally: os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) - def test_get_daemon_version_should_return_zero_when_the_version_has_not_been_set(self): - self.assertEqual( - FlexibleVersion("0.0.0.0"), get_daemon_version(), - "The daemon version should not be defined. Environment={0}".format(os.environ) - ) + def test_get_daemon_version_from_fallback_when_the_version_has_not_been_set(self): + with patch("azurelinuxagent.common.utils.shellutil.run_command", return_value=FlexibleVersion("2.2.53")): + self.assertEqual( + FlexibleVersion("2.2.53"), get_daemon_version(), + "The daemon version should not be defined. Environment={0}".format(os.environ) + ) class TestCurrentAgentName(AgentTestCase): diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 9e01d0b6c2..49b0dc7628 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -106,7 +106,7 @@ def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_update_to_largest_version_if_time_window_not_elapsed(self): self.prepare_agents(count=1) @@ -142,7 +142,7 @@ def test_it_should_update_to_largest_version_if_time_window_elapsed(self): agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): self.prepare_agents(count=1) @@ -172,7 +172,7 @@ def test_it_should_update_to_largest_version_if_requested_version_not_available( agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): data_file = DATA_FILE.copy() @@ -207,7 +207,7 @@ def test_it_should_upgrade_agent_if_requested_version_is_available_greater_than_ agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, version="9.9.9.10") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_current_version(self): data_file = DATA_FILE.copy() @@ -228,7 +228,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) self.__assert_agent_directories_exist_and_others_dont_exist( versions=[downgraded_version, str(CURRENT_VERSION)]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_not_downgrade_below_daemon_version(self): data_file = DATA_FILE.copy() diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 5309b80566..76dde881d2 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3209,7 +3209,8 @@ def tearDown(self): AgentTestCase.tearDown(self) @patch('time.gmtime', MagicMock(return_value=time.gmtime(0))) - def test_ext_handler_reporting_status_file(self): + @patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("0.0.0.0")) + def test_ext_handler_reporting_status_file(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: def mock_http_put(url, *args, **_): diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index 8f4ce58f4d..1dcfe33edc 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -3,6 +3,7 @@ import json +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler @@ -35,67 +36,69 @@ def on_new_iteration(iteration): exthandlers_handler = ExtHandlersHandler(protocol) with patch.object(exthandlers_handler, "run", wraps=exthandlers_handler.run) as exthandlers_handler_run: with mock_update_handler(protocol, iterations=2, on_new_iteration=on_new_iteration, exthandlers_handler=exthandlers_handler) as update_handler: - update_handler.run(debug=True) - - self.assertEqual(1, exthandlers_handler_run.call_count, "Extensions should have been executed only once.") - self.assertEqual(2, len(protocol.mock_wire_data.status_blobs), "Status should have been reported for the 2 iterations.") - - # - # Verify that we reported status for the extension in the test data - # - first_status = json.loads(protocol.mock_wire_data.status_blobs[0]) - - handler_aggregate_status = first_status.get('aggregateStatus', {}).get("handlerAggregateStatus") - self.assertIsNotNone(handler_aggregate_status, "Could not find the handlerAggregateStatus") - self.assertEqual(1, len(handler_aggregate_status), "Expected 1 extension status. Got: {0}".format(handler_aggregate_status)) - extension_status = handler_aggregate_status[0] - self.assertEqual("OSTCExtensions.ExampleHandlerLinux", extension_status["handlerName"], "The status does not correspond to the test data") - - # - # Verify that we reported the same status (minus timestamps) in the 2 iterations - # - second_status = json.loads(protocol.mock_wire_data.status_blobs[1]) - - def remove_timestamps(x): - if isinstance(x, list): - for v in x: - remove_timestamps(v) - elif isinstance(x, dict): - for k, v in x.items(): - if k == "timestampUTC": - x[k] = '' - else: + with patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("2.2.53")): + update_handler.run(debug=True) + + self.assertEqual(1, exthandlers_handler_run.call_count, "Extensions should have been executed only once.") + self.assertEqual(2, len(protocol.mock_wire_data.status_blobs), "Status should have been reported for the 2 iterations.") + + # + # Verify that we reported status for the extension in the test data + # + first_status = json.loads(protocol.mock_wire_data.status_blobs[0]) + + handler_aggregate_status = first_status.get('aggregateStatus', {}).get("handlerAggregateStatus") + self.assertIsNotNone(handler_aggregate_status, "Could not find the handlerAggregateStatus") + self.assertEqual(1, len(handler_aggregate_status), "Expected 1 extension status. Got: {0}".format(handler_aggregate_status)) + extension_status = handler_aggregate_status[0] + self.assertEqual("OSTCExtensions.ExampleHandlerLinux", extension_status["handlerName"], "The status does not correspond to the test data") + + # + # Verify that we reported the same status (minus timestamps) in the 2 iterations + # + second_status = json.loads(protocol.mock_wire_data.status_blobs[1]) + + def remove_timestamps(x): + if isinstance(x, list): + for v in x: remove_timestamps(v) + elif isinstance(x, dict): + for k, v in x.items(): + if k == "timestampUTC": + x[k] = '' + else: + remove_timestamps(v) - remove_timestamps(first_status) - remove_timestamps(second_status) + remove_timestamps(first_status) + remove_timestamps(second_status) - self.assertEqual(first_status, second_status) + self.assertEqual(first_status, second_status) def test_report_status_should_log_errors_only_once_per_goal_state(self): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: - update_handler = get_update_handler() - update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here - exthandlers_handler = ExtHandlersHandler(protocol) - agent_update_handler = get_agent_update_handler(protocol) - update_handler._report_status(exthandlers_handler, agent_update_handler) - self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") + with patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("2.2.53")): + update_handler = get_update_handler() + update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here + exthandlers_handler = ExtHandlersHandler(protocol) + agent_update_handler = get_agent_update_handler(protocol) + update_handler._report_status(exthandlers_handler, agent_update_handler) + self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") - with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() - get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] + with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() + get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] - update_handler._report_status(exthandlers_handler, agent_update_handler) - update_handler._report_status(exthandlers_handler, agent_update_handler) - update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) - self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") + self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") - exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) - update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler, agent_update_handler) - self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") + exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) + update_handler._try_update_goal_state(exthandlers_handler.protocol) + update_handler._report_status(exthandlers_handler, agent_update_handler) + self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 5b4babfd14..e342fec7d3 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -43,7 +43,7 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ - AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION + AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION, set_daemon_version, __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, ExtHandlerInstance, HandlerEnvironment, ExtensionStatusValue from azurelinuxagent.ga.update import \ get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, ORPHAN_WAIT_INTERVAL, \ @@ -135,11 +135,16 @@ def setUpClass(cls): source = os.path.join(data_dir, "ga", sample_agent_zip) target = os.path.join(UpdateTestCase._agent_zip_dir, test_agent_zip) shutil.copyfile(source, target) + # The update_handler inherently calls agent update handler, which in turn calls daemon version. So now daemon version logic has fallback if env variable is not set. + # The fallback calls popen which is not mocked. So we set the env variable to avoid the fallback. + # This will not change any of the test validations. At the ene of all update test validations, we reset the env variable. + set_daemon_version("1.2.3.4") @classmethod def tearDownClass(cls): super(UpdateTestCase, cls).tearDownClass() shutil.rmtree(UpdateTestCase._test_suite_tmp_dir) + os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) @staticmethod def _get_agent_pkgs(in_dir=None): @@ -328,7 +333,6 @@ def setUp(self): self.update_handler._goal_state = Mock() self.update_handler._goal_state.extensions_goal_state = Mock() self.update_handler._goal_state.extensions_goal_state.source = "Fabric" - # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not reuse # a previous state clear_singleton_instances(ProtocolUtil) @@ -1474,7 +1478,7 @@ def __assert_exit_code_successful(self, update_handler): def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Agent update found, Exiting current process to {0} to the new Agent version {1}'.format( + 'Agent update found, exiting current process to {0} to the new Agent version {1}'.format( "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), @@ -2293,9 +2297,11 @@ def test_it_should_clear_the_timestamp_for_the_most_recent_fast_track_goal_state raise Exception("The test setup did not save the Fast Track state") with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): - with mock_wire_protocol(data_file) as protocol: - with mock_update_handler(protocol) as update_handler: - update_handler.run() + with patch("azurelinuxagent.common.version.get_daemon_version", + return_value=FlexibleVersion("2.2.53")): + with mock_wire_protocol(data_file) as protocol: + with mock_update_handler(protocol) as update_handler: + update_handler.run() self.assertEqual(HostPluginProtocol.get_fast_track_timestamp(), timeutil.create_timestamp(datetime.min), "The Fast Track state was not cleared") diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index 31c6e52a40..fbd6cfe8f8 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -130,12 +130,19 @@ def _validate(self): """ Performs some basic validations on the data loaded from the YAML description files """ + def _parse_image(image: str) -> str: + """ + Parses a reference to an image or image set and returns the name of the image or image set + """ + match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) + if match is not None: + return match.group('image_set') + return image + for suite in self.test_suites: # Validate that the images the suite must run on are in images.yml for image in suite.images: - match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) - if match is not None: - image = match.group('image_set') + image = _parse_image(image) if image not in self.images: raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml") @@ -146,6 +153,7 @@ def _validate(self): else: continue for suite_image in suite.images: + suite_image = _parse_image(suite_image) for image in self.images[suite_image]: # If the image has a location restriction, validate that it is available on the location the suite must run on if image.locations: diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 5efdf54bf5..be72cc4c70 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -273,7 +273,7 @@ def _get_test_suite_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> Lis count = 1 matching_images = loader.images[match.group('image_set')].copy() random.shuffle(matching_images) - image_list = matching_images[0:count] + image_list = matching_images[0:int(count)] for i in image_list: unique[i.urn] = i return [v for k, v in unique.items()] diff --git a/tests_e2e/orchestrator/scripts/update-waagent-conf b/tests_e2e/orchestrator/scripts/update-waagent-conf index 13cfd45401..43dadeee27 100755 --- a/tests_e2e/orchestrator/scripts/update-waagent-conf +++ b/tests_e2e/orchestrator/scripts/update-waagent-conf @@ -18,24 +18,31 @@ # # -# Updates waagent.conf with the specified setting and value and restarts the Agent. +# Updates waagent.conf with the specified setting and value(allows multiple) and restarts the Agent. # set -euo pipefail -if [[ $# -ne 2 ]]; then - echo "Usage: update-waagent-conf " +if [[ $# -lt 1 ]]; then + echo "Usage: update-waagent-conf []" exit 1 fi -name=$1 -value=$2 - PYTHON=$(get-agent-python) waagent_conf=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; print(get_osutil().agent_conf_file_path)') -echo "Setting $name=$value in $waagent_conf" -sed -i -E "/^$name=/d" "$waagent_conf" -sed -i -E "\$a $name=$value" "$waagent_conf" -updated=$(grep "$name" "$waagent_conf") -echo "Updated value: $updated" +for setting_value in "$@"; do + IFS='=' read -r -a setting_value_array <<< "$setting_value" + name=${setting_value_array[0]} + value=${setting_value_array[1]} + + if [[ -z "$name" || -z "$value" ]]; then + echo "Invalid setting=value: $setting_value" + exit 1 + fi + echo "Setting $name=$value in $waagent_conf" + sed -i -E "/^$name=/d" "$waagent_conf" + sed -i -E "\$a $name=$value" "$waagent_conf" + updated=$(grep "$name" "$waagent_conf") + echo "Updated value: $updated" +done agent-service restart \ No newline at end of file diff --git a/tests_e2e/test_suites/agent_publish.yml b/tests_e2e/test_suites/agent_publish.yml new file mode 100644 index 0000000000..9b855f4ce4 --- /dev/null +++ b/tests_e2e/test_suites/agent_publish.yml @@ -0,0 +1,11 @@ +# +# This test is used to verify that the agent will be updated after publishing a new version to the agent update channel. +# +name: "AgentPublish" +tests: + - "agent_publish/agent_publish.py" +images: + - "random(endorsed, 10)" + - "random(endorsed-arm64, 2)" +locations: "AzureCloud:centraluseuap" +owns_vm: true \ No newline at end of file diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 865fa89ca5..7ef477e00b 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -1,7 +1,7 @@ name: "AgentUpdate" tests: - "agent_update/rsm_update.py" -images: "endorsed" +images: "random(endorsed, 10)" locations: "AzureCloud:eastus2euap" owns_vm: true skip_on_clouds: diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index a19105710b..5440486c25 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -91,8 +91,6 @@ images: locations: AzureChinaCloud: [] AzureUSGovernment: [] - vm_sizes: - - "Standard_D2pls_v5" mariner_1: urn: "microsoftcblmariner cbl-mariner cbl-mariner-1 latest" locations: @@ -101,11 +99,8 @@ images: mariner_2_arm64: urn: "microsoftcblmariner cbl-mariner cbl-mariner-2-arm64 latest" locations: - AzureCloud: ["eastus"] AzureChinaCloud: [] AzureUSGovernment: [] - vm_sizes: - - "Standard_D2pls_v5" rocky_9: urn: "erockyenterprisesoftwarefoundationinc1653071250513 rockylinux-9 rockylinux-9 latest" locations: diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py new file mode 100644 index 0000000000..d476414414 --- /dev/null +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import uuid +from typing import Any, Dict, List + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class AgentPublishTest(AgentTest): + """ + This script verifies if the agent update performed in the vm. + """ + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client: SshClient = self._context.create_ssh_client() + + def run(self): + """ + we run the scenario in the following steps: + 1. Print the current agent version before the update + 2. Prepare the agent for the update + 3. Check for agent update from the log + 4. Print the agent version after the update + 5. Ensure CSE is working + """ + self._get_agent_info() + self._prepare_agent() + self._check_update() + self._get_agent_info() + self._check_cse() + + def _get_agent_info(self) -> None: + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info('Agent info \n%s', stdout) + + def _prepare_agent(self) -> None: + log.info("Modifying agent update related config flags") + output = self._ssh_client.run_command("update-waagent-conf GAUpdates.Enabled=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('Updated agent-update related config flags \n%s', output) + + def _check_update(self) -> None: + log.info("Verifying for agent update status") + output = self._ssh_client.run_command("agent_publish-check_update.py") + log.info('Checked the agent update \n%s', output) + + def _check_cse(self) -> None: + custom_script_2_1 = VirtualMachineExtensionClient( + self._context.vm, + VmExtensionIdentifier(VmExtensionIds.CustomScript.publisher, VmExtensionIds.CustomScript.type, "2.1"), + resource_name="CustomScript") + + log.info("Installing %s", custom_script_2_1) + message = f"Hello {uuid.uuid4()}!" + custom_script_2_1.enable( + settings={ + 'commandToExecute': f"echo \'{message}\'" + }, + auto_upgrade_minor_version=False + ) + custom_script_2_1.assert_instance_view(expected_version="2.1", expected_message=message) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # This is expected as latest version can be the less than test version + # + # WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted + # + { + 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" + } + + ] + return ignore_rules + + +if __name__ == "__main__": + AgentPublishTest.run_from_command_line() diff --git a/tests_e2e/tests/agent_update/__init__.py b/tests_e2e/tests/agent_update/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 0493efcad1..a6a41ec3d0 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -21,7 +21,7 @@ # BVT for the agent update scenario # # The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. - # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. +# For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # import json from typing import List, Dict, Any @@ -70,59 +70,71 @@ def run(self) -> None: log.info("*******Verifying the Agent Downgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm is \n%s", stdout) + log.info("Current agent version running on the vm before update is \n%s", stdout) downgrade_version: str = "1.3.0.0" log.info("Attempting downgrade version %s", downgrade_version) self._request_rsm_update(downgrade_version) self._check_rsm_gs(downgrade_version) self._prepare_agent() - # Verify downgrade scenario self._verify_guest_agent_update(downgrade_version) + self._verify_agent_reported_update_status(downgrade_version) + # Verify upgrade scenario log.info("*******Verifying the Agent Upgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm is \n%s", stdout) + log.info("Current agent version running on the vm before update is \n%s", stdout) upgrade_version: str = "1.3.1.0" log.info("Attempting upgrade version %s", upgrade_version) self._request_rsm_update(upgrade_version) self._check_rsm_gs(upgrade_version) self._verify_guest_agent_update(upgrade_version) + self._verify_agent_reported_update_status(upgrade_version) # verify no version update. There is bug in CRP and will enable once it's fixed log.info("*******Verifying the no version update scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm is \n%s", stdout) + log.info("Current agent version running on the vm before update is \n%s", stdout) version: str = "1.3.1.0" log.info("Attempting update version same as current version %s", upgrade_version) self._request_rsm_update(version) self._check_rsm_gs(version) self._verify_guest_agent_update(version) - - # verify requested version below daemon version - log.info("*******Verifying requested version below daemon version scenario*******") - stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm before update \n%s", stdout) - version: str = "0.5.0" - log.info("Attempting requested version %s", version) - self._request_rsm_update(version) - self._check_rsm_gs(version) - self._verify_no_guest_agent_update(stdout) + self._verify_agent_reported_update_status(version) + + # disabled until the new daemon version logic is released in test versions + # # verify requested version below daemon version + # log.info("*******Verifying requested version below daemon version scenario*******") + # # changing back to 1.3.1.0 from 1.0.0.0 as there is no pkg below than 1.0.0.0 available in PIR, Otherwise we will get pkg not found error + # self._prepare_agent("1.3.0.1", update_config=False) + # stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + # log.info("Current agent version running on the vm before update \n%s", stdout) + # version: str = "1.3.0.0" + # log.info("Attempting requested version %s", version) + # self._request_rsm_update(version) + # self._check_rsm_gs(version) + # self._verify_no_guest_agent_update(stdout) + # self._verify_agent_reported_update_status(version) def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we mock the rsm update request - output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True) + output = self._ssh_client.run_command(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) log.info('Verifying requested version GS available to the agent \n%s', output) - def _prepare_agent(self) -> None: + def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: """ This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ - output = self._ssh_client.run_command("modify-agent-version-config", use_sudo=True) - log.info('Updating agent update required config \n%s', output) + log.info('Modifying agent installed version') + output = self._ssh_client.run_command(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) + log.info('Updated agent installed version \n%s', output) + if update_config: + log.info('Modifying agent update config flags') + output = self._ssh_client.run_command("update-waagent-conf GAUpdates.Enabled=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('updated agent update required config \n%s', output) @staticmethod def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: @@ -209,9 +221,18 @@ def _verify_agent_reported_supported_feature_flag(self): """ log.info("Verifying agent reported supported feature flag") - self._ssh_client.run_command("verify_agent_supported_feature.py", use_sudo=True) + self._ssh_client.run_command("agent_update-verify_versioning_supported_feature.py", use_sudo=True) log.info("Agent reported VersioningGovernance supported feature flag") + def _verify_agent_reported_update_status(self, version: str): + """ + Verify if the agent reported update status to CRP after update performed + """ + + log.info("Verifying agent reported update status for version {0}".format(version)) + self._ssh_client.run_command(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) + log.info("Successfully Agent reported update status for version {0}".format(version)) + if __name__ == "__main__": RsmUpdateBvt.run_from_command_line() diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 1ec9e58da3..66cafcfc1c 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -44,7 +44,7 @@ def run(self): # Disable extension processing on the test VM log.info("Disabling extension processing on the test VM [%s]", self._context.vm.name) - output = ssh_client.run_command("update-waagent-conf Extensions.Enabled n", use_sudo=True) + output = ssh_client.run_command("update-waagent-conf Extensions.Enabled=n", use_sudo=True) log.info("Disable completed:\n%s", output) # From now on, extensions will time out; set the timeout to the minimum allowed(15 minutes) diff --git a/tests_e2e/tests/scripts/agent_publish-check_update.py b/tests_e2e/tests/scripts/agent_publish-check_update.py new file mode 100755 index 0000000000..9f8f66c4f2 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_publish-check_update.py @@ -0,0 +1,112 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +import sys +import logging + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.retry import retry_if_false + + +# pylint: disable=W0105 +""" +Post the _LOG_PATTERN_00 changes, the last group sometimes might not have the 'Agent' part at the start of the sentence; thus making it optional. + +> WALinuxAgent-2.2.18 discovered WALinuxAgent-2.2.47 as an update and will exit +(None, 'WALinuxAgent-2.2.18', '2.2.47') +""" +_UPDATE_PATTERN_00 = re.compile(r'(.*Agent\s)?(\S*)\sdiscovered\sWALinuxAgent-(\S*)\sas an update and will exit') + +""" +> Agent WALinuxAgent-2.2.45 discovered update WALinuxAgent-2.2.47 -- exiting +('Agent', 'WALinuxAgent-2.2.45', '2.2.47') +""" +_UPDATE_PATTERN_01 = re.compile(r'(.*Agent)?\s(\S*) discovered update WALinuxAgent-(\S*) -- exiting') + +""" +> Normal Agent upgrade discovered, updating to WALinuxAgent-2.9.1.0 -- exiting +('Normal Agent', WALinuxAgent, '2.9.1.0 ') +""" +_UPDATE_PATTERN_02 = re.compile(r'(.*Agent) upgrade discovered, updating to (WALinuxAgent)-(\S*) -- exiting') + +""" +> Agent update found, exiting current process to downgrade to the new Agent version 1.3.0.0 +(Agent, 'downgrade', '1.3.0.0') +""" +_UPDATE_PATTERN_03 = re.compile(r'(.*Agent) update found, exiting current process to (\S*) to the new Agent version (\S*)') + +""" +> Agent WALinuxAgent-2.2.47 is running as the goal state agent +('2.2.47',) +""" +_RUNNING_PATTERN_00 = re.compile(r'.*Agent\sWALinuxAgent-(\S*)\sis running as the goal state agent') + + +def verify_agent_update_from_log(): + + exit_code = 0 + detected_update = False + update_successful = False + update_version = '' + + log = AgentLog() + + for record in log.read(): + if 'TelemetryData' in record.text: + continue + + for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03]: + update_match = re.match(p, record.text) + if update_match: + detected_update = True + update_version = update_match.groups()[2] + logging.info('found the agent update log: %s', record.text) + break + + if detected_update: + running_match = re.match(_RUNNING_PATTERN_00, record.text) + if running_match and update_version == running_match.groups()[0]: + update_successful = True + logging.info('found the agent started new version log: %s', record.text) + + if detected_update: + logging.info('update was detected: %s', update_version) + if update_successful: + logging.info('update was successful') + else: + logging.warning('update was not successful') + exit_code = 1 + else: + logging.warning('update was not detected') + exit_code = 1 + + return exit_code == 0 + + +# This method will trace agent update messages in the agent log and determine if the update was successful or not. +try: + logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG, stream=sys.stdout) + found: bool = retry_if_false(verify_agent_update_from_log) + if not found: + raise Exception('update was not found in the logs') +except Exception as e: + logging.error(e) + sys.exit(1) + +sys.exit(0) diff --git a/tests_e2e/tests/scripts/modify-agent-version-config b/tests_e2e/tests/scripts/agent_update-modify_agent_version similarity index 75% rename from tests_e2e/tests/scripts/modify-agent-version-config rename to tests_e2e/tests/scripts/agent_update-modify_agent_version index f121e6f4b0..c8011e0094 100755 --- a/tests_e2e/tests/scripts/modify-agent-version-config +++ b/tests_e2e/tests/scripts/agent_update-modify_agent_version @@ -20,14 +20,18 @@ # set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: agent_update-modify_agent_version " + exit 1 +fi + +version=$1 PYTHON=$(get-agent-python) echo "Agent's Python: $PYTHON" # some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first. version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))') version_file_full_path="$version_file_dir/version.py" -sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path -waagent_conf_path=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') -sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' "$waagent_conf_path" -sed -i '$a AutoUpdate.GAFamily=Test' "$waagent_conf_path" +sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '$version'/" $version_file_full_path echo "Restarting service..." agent-service restart \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py new file mode 100755 index 0000000000..8e8d50a482 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py @@ -0,0 +1,59 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Verify if the agent reported update status to CRP via status file +# +import argparse +import glob +import json +import logging +import sys + +from tests_e2e.tests.lib.retry import retry_if_false + + +def check_agent_reported_update_status(expected_version: str) -> bool: + agent_status_file = "/var/lib/waagent/history/*/waagent_status.json" + file_paths = glob.glob(agent_status_file, recursive=True) + for file in file_paths: + with open(file, 'r') as f: + data = json.load(f) + logging.info("Agent status file is %s and it's content %s", file, data) + status = data["__status__"] + guest_agent_status = status["aggregateStatus"]["guestAgentStatus"] + if "updateStatus" in guest_agent_status.keys(): + if guest_agent_status["updateStatus"]["expectedVersion"] == expected_version: + return True + return False + + +try: + + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--version', required=True) + args = parser.parse_args() + + found: bool = retry_if_false(lambda: check_agent_reported_update_status(args.version)) + if not found: + raise Exception("Agent failed to report update status, so skipping rest of the agent update validations") + +except Exception as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + +sys.exit(0) diff --git a/tests_e2e/tests/scripts/verify_agent_supported_feature.py b/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py similarity index 100% rename from tests_e2e/tests/scripts/verify_agent_supported_feature.py rename to tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py diff --git a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py similarity index 100% rename from tests_e2e/tests/scripts/wait_for_rsm_goal_state.py rename to tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py