From dfc4cc8a711a381c88e15b190bc14a4ce3995492 Mon Sep 17 00:00:00 2001 From: Zhidong Peng Date: Tue, 7 Nov 2023 08:16:00 -0800 Subject: [PATCH 1/3] Gathering Guest ProxyAgent Log Files (#2975) --- azurelinuxagent/ga/logcollector_manifests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azurelinuxagent/ga/logcollector_manifests.py b/azurelinuxagent/ga/logcollector_manifests.py index e77da3d47f..b22316be95 100644 --- a/azurelinuxagent/ga/logcollector_manifests.py +++ b/azurelinuxagent/ga/logcollector_manifests.py @@ -119,4 +119,8 @@ echo,### Gathering Disk Info ### diskinfo, + +echo,### Gathering Guest ProxyAgent Log Files ### +copy,/var/log/proxyagent/* +echo, """ From a539385a97915329169fd20905bdfcbaaa2ae9dc Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 7 Nov 2023 09:44:53 -0800 Subject: [PATCH 2/3] Remove debug info from waagent.status.json (#2971) * Remove debug info from waagent.status.json * pylint warnings * pylint --------- Co-authored-by: narrieta --- azurelinuxagent/ga/exthandlers.py | 30 +------ azurelinuxagent/ga/update.py | 6 +- tests/ga/test_extension.py | 132 +++++++++++++----------------- tests/ga/test_update.py | 1 - 4 files changed, 62 insertions(+), 107 deletions(-) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 3575c3e6d2..a09a81e96c 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -53,8 +53,7 @@ from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION, \ - PY_VERSION_MAJOR, PY_VERSION_MICRO, PY_VERSION_MINOR +from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION _HANDLER_NAME_PATTERN = r'^([^-]+)' _HANDLER_VERSION_PATTERN = r'(\d+(?:\.\d+)*)' @@ -964,33 +963,6 @@ def report_ext_handlers_status(self, goal_state_changed=False, vm_agent_update_s message=msg) return None - def get_ext_handlers_status_debug_info(self, vm_status): - status_blob_text = self.protocol.get_status_blob_data() - if status_blob_text is None: - status_blob_text = "" - - support_multi_config = {} - vm_status_data = get_properties(vm_status) - vm_handler_statuses = vm_status_data.get('vmAgent', {}).get('extensionHandlers') - for handler_status in vm_handler_statuses: - if handler_status.get('name') is not None: - support_multi_config[handler_status.get('name')] = handler_status.get('supports_multi_config') - - debug_text = json.dumps({ - "agentName": AGENT_NAME, - "daemonVersion": str(version.get_daemon_version()), - "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), - "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], - "supportsMultiConfig": support_multi_config - }) - - return '''{{ - "__comment__": "The __status__ property is the actual status reported to CRP", - "__status__": {0}, - "__debug__": {1} -}} -'''.format(status_blob_text, debug_text) - def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 147402709c..2862f8e906 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -617,8 +617,10 @@ def _report_status(self, exthandlers_handler, agent_update_handler): if vm_status is not None: self._report_extensions_summary(vm_status) if self._goal_state is not None: - agent_status = exthandlers_handler.get_ext_handlers_status_debug_info(vm_status) - self._goal_state.save_to_history(agent_status, AGENT_STATUS_FILE) + status_blob_text = exthandlers_handler.protocol.get_status_blob_data() + if status_blob_text is None: + status_blob_text = "{}" + self._goal_state.save_to_history(status_blob_text, AGENT_STATUS_FILE) if self._goal_state.extensions_goal_state.is_outdated: exthandlers_handler.protocol.client.get_host_plugin().clear_fast_track_state() diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index e3e365d9b6..62bd11099d 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -28,16 +28,14 @@ import unittest from azurelinuxagent.common import conf -from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \ - get_agent_supported_features_list_for_crp +from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_crp from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.datacontract import get_properties from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.fileutil import read_file from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, AGENT_NAME, \ - AGENT_VERSION +from azurelinuxagent.common.version import AGENT_VERSION from azurelinuxagent.common.exception import ResourceGoneError, ExtensionDownloadError, ProtocolError, \ ExtensionErrorCodes, ExtensionError, GoalStateAggregateStatusCodes from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, ExtHandlerStatus, \ @@ -3247,91 +3245,75 @@ def mock_http_put(url, *args, **_): ) expected_status = { - "__comment__": "The __status__ property is the actual status reported to CRP", - "__status__": { - "version": "1.1", - "timestampUTC": "1970-01-01T00:00:00Z", - "aggregateStatus": { - "guestAgentStatus": { - "version": AGENT_VERSION, + "version": "1.1", + "timestampUTC": "1970-01-01T00:00:00Z", + "aggregateStatus": { + "guestAgentStatus": { + "version": AGENT_VERSION, + "status": "Ready", + "formattedMessage": { + "lang": "en-US", + "message": "Guest Agent is running" + } + }, + "handlerAggregateStatus": [ + { + "handlerVersion": "1.0.0", + "handlerName": "OSTCExtensions.ExampleHandlerLinux", "status": "Ready", + "code": 0, + "useExactVersion": True, "formattedMessage": { "lang": "en-US", - "message": "Guest Agent is running" - } - }, - "handlerAggregateStatus": [ - { - "handlerVersion": "1.0.0", - "handlerName": "OSTCExtensions.ExampleHandlerLinux", - "status": "Ready", - "code": 0, - "useExactVersion": True, - "formattedMessage": { - "lang": "en-US", - "message": "Plugin enabled" - }, - "runtimeSettingsStatus": { - "settingsStatus": { - "status": { - "name": "OSTCExtensions.ExampleHandlerLinux", - "configurationAppliedTime": None, - "operation": None, - "status": "success", - "code": 0, - "formattedMessage": { - "lang": "en-US", - "message": None - } - }, - "version": 1.0, - "timestampUTC": "1970-01-01T00:00:00Z" + "message": "Plugin enabled" + }, + "runtimeSettingsStatus": { + "settingsStatus": { + "status": { + "name": "OSTCExtensions.ExampleHandlerLinux", + "configurationAppliedTime": None, + "operation": None, + "status": "success", + "code": 0, + "formattedMessage": { + "lang": "en-US", + "message": None + } }, - "sequenceNumber": 0 - } - } - ], - "vmArtifactsAggregateStatus": { - "goalStateAggregateStatus": { - "formattedMessage": { - "lang": "en-US", - "message": "GoalState executed successfully" + "version": 1.0, + "timestampUTC": "1970-01-01T00:00:00Z" }, - "timestampUTC": "1970-01-01T00:00:00Z", - "inSvdSeqNo": "1", - "status": "Success", - "code": 0 + "sequenceNumber": 0 } } - }, - "guestOSInfo": None, - "supportedFeatures": supported_features - }, - "__debug__": { - "agentName": AGENT_NAME, - "daemonVersion": "0.0.0.0", - "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), - "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], - "supportsMultiConfig": { - "OSTCExtensions.ExampleHandlerLinux": False + ], + "vmArtifactsAggregateStatus": { + "goalStateAggregateStatus": { + "formattedMessage": { + "lang": "en-US", + "message": "GoalState executed successfully" + }, + "timestampUTC": "1970-01-01T00:00:00Z", + "inSvdSeqNo": "1", + "status": "Success", + "code": 0 + } } - } + }, + "guestOSInfo": None, + "supportedFeatures": supported_features } - exthandlers_handler.run() - vm_status = exthandlers_handler.report_ext_handlers_status() - actual_status_json = json.loads(exthandlers_handler.get_ext_handlers_status_debug_info(vm_status)) + exthandlers_handler.report_ext_handlers_status() - # Don't compare the guestOSInfo - status_property = actual_status_json.get("__status__") - self.assertIsNotNone(status_property, "The status file is missing the __status__ property") - self.assertIsNotNone(status_property.get("guestOSInfo"), "The status file is missing the guestOSInfo property") - status_property["guestOSInfo"] = None + actual_status = json.loads(protocol.get_status_blob_data()) - actual_status_json.pop('guestOSInfo', None) + # Don't compare the guestOSInfo + self.assertIsNotNone(actual_status.get("guestOSInfo"), "The status file is missing the guestOSInfo property") + actual_status["guestOSInfo"] = None - self.assertEqual(expected_status, actual_status_json) + self.assertEqual(expected_status, actual_status) def test_it_should_process_extensions_only_if_allowed(self): def assert_extensions_called(exthandlers_handler, expected_call_count=0): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 4c58c850e8..49a4e0befa 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2198,7 +2198,6 @@ def create_vm_status(extension_status): exthandlers_handler.report_ext_handlers_status = Mock(return_value=create_vm_status(ExtensionStatusValue.success)) else: exthandlers_handler.report_ext_handlers_status = Mock(side_effect=[create_vm_status(s) for s in extension_statuses]) - exthandlers_handler.get_ext_handlers_status_debug_info = Mock(return_value='') yield exthandlers_handler From 5542f849723ae86170b5e5f46c11bcad57c400d2 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:27:24 -0800 Subject: [PATCH 3/3] Extension sequencing scenario (#2969) * update tests * cleanup * . * . * . * . * . * . * . * . * . * Add new test cases * Update scenario to support new tests * Scenario should support failing extensions and extensions with no settings * Clean up test * Remove locations from test suite yml * Fix deployment issue * Support creating multiple resource groups for vmss in one run * AzureMonitorLinuxAgent is not supported on flatcar * AzureMonitor is not supported on flatcar * remove agent update * Address PR comments * Fix issue with getting random ssh client * Address PR Comments * Address PR Comments * Address PR comments * Do not keep rg count in runbook * Use try/finally with lock * only check logs after scenario startS * Change to instance member --------- Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_suite.py | 21 +- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/ext_sequencing.yml | 10 + .../ext_sequencing/ext_seq_test_cases.py | 318 ++++++++++++++++++ .../tests/ext_sequencing/ext_sequencing.py | 306 +++++++++++++++++ .../tests/lib/vm_extension_identifier.py | 3 +- .../ext_sequencing-get_ext_enable_time.py | 87 +++++ 7 files changed, 742 insertions(+), 5 deletions(-) create mode 100644 tests_e2e/test_suites/ext_sequencing.yml create mode 100644 tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py create mode 100644 tests_e2e/tests/ext_sequencing/ext_sequencing.py create mode 100755 tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index ca0243044b..c9e177fcc5 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -169,6 +169,14 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._create_scale_set: bool self._delete_scale_set: bool + # + # Test suites within the same runbook may be executed concurrently, and we need to keep track of how many resource + # groups are being created. We use this lock and counter to allow only 1 thread to increment the resource group + # count. + # + _rg_count_lock = RLock() + _rg_count = 0 + def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): """ Initializes the AgentTestSuite from the data passed as arguments by LISA. @@ -231,9 +239,16 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ if isinstance(environment.nodes[0], LocalNode): # We need to create a new VMSS. - # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note that we hardcode the resource group - # id to "e0" and the scale set name to "n0" since we are creating a single scale set. - self._resource_group_name = f"lisa-{self._runbook_name}-{RUN_ID}-e0" + # Use the same naming convention as LISA for the scale set name: lisa---e-n0 + # Note that we hardcode the scale set name to "n0" since we are creating a single scale set. + # Resource group name cannot have any uppercase characters, because the publicIP cannot have uppercase + # characters in its domain name label. + AgentTestSuite._rg_count_lock.acquire() + try: + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{AgentTestSuite._rg_count}" + AgentTestSuite._rg_count += 1 + finally: + AgentTestSuite._rg_count_lock.release() self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 6422bfb817..336d22cf67 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing" # # Parameters used to create test VMs diff --git a/tests_e2e/test_suites/ext_sequencing.yml b/tests_e2e/test_suites/ext_sequencing.yml new file mode 100644 index 0000000000..1976a85025 --- /dev/null +++ b/tests_e2e/test_suites/ext_sequencing.yml @@ -0,0 +1,10 @@ +# +# Adds extensions with multiple dependencies to VMSS using 'provisionAfterExtensions' property and validates they are +# enabled in order of dependencies. +# +name: "ExtSequencing" +tests: + - "ext_sequencing/ext_sequencing.py" +images: "endorsed" +# This scenario is executed on instances of a scaleset created by the agent test suite. +executes_on_scale_set: true \ No newline at end of file diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py new file mode 100644 index 0000000000..d1c942d0af --- /dev/null +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -0,0 +1,318 @@ +def add_one_dependent_ext_without_settings(): + # Dependent extensions without settings should be enabled with dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def add_two_extensions_with_dependencies(): + # Checks that extensions are enabled in the correct order when there is only one valid sequence + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": [], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["RunCommandLinux", "AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def remove_one_dependent_extension(): + # Checks that remaining extensions with dependencies are enabled in the correct order after removing a dependent + # extension + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def remove_all_dependencies(): + # Checks that extensions are enabled after adding and removing dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def add_one_dependent_extension(): + # Checks that a valid enable sequence occurs when only one extension has dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["RunCommandLinux", "CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def add_single_dependencies(): + # Checks that extensions are enabled in the correct order when there is only one valid sequence and each extension + # has no more than one dependency + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": [], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def remove_all_dependent_extensions(): + # Checks that remaining extensions with dependencies are enabled in the correct order after removing all dependent + # extension + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + } + ] + + +def add_failing_dependent_extension_with_one_dependency(): + # This case tests that extensions dependent on a failing extensions are skipped, but extensions that are not + # dependent on the failing extension still get enabled + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "exit 1" + } + } + } + ] + + +def add_failing_dependent_extension_with_two_dependencies(): + # This case tests that all extensions dependent on a failing extensions are skipped + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "exit 1" + } + } + } + ] diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py new file mode 100644 index 0000000000..3af9e64fe6 --- /dev/null +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test adds extensions with multiple dependencies to a VMSS using the 'provisionAfterExtensions' property and +# validates they are enabled in order of dependencies. +# +import copy +import re +import uuid +from datetime import datetime +from typing import List, Dict, Any + +from assertpy import fail +from azure.mgmt.compute.models import VirtualMachineScaleSetVMExtensionsSummary + +from tests_e2e.tests.ext_sequencing.ext_seq_test_cases import add_one_dependent_ext_without_settings, add_two_extensions_with_dependencies, \ + remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ + add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies +from tests_e2e.tests.lib.agent_test import AgentVmssTest, TestSkipped +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VmssInstanceIpAddress +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient +from tests_e2e.tests.lib.ssh_client import SshClient + + +class ExtSequencing(AgentVmssTest): + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._scenario_start = datetime.min + + # Cases to test different dependency scenarios + _test_cases = [ + add_one_dependent_ext_without_settings, + add_two_extensions_with_dependencies, + # remove_one_dependent_extension should only be run after another test case which has RunCommandLinux in the + # model + remove_one_dependent_extension, + # remove_all_dependencies should only be run after another test case which has extension dependencies in the + # model + remove_all_dependencies, + add_one_dependent_extension, + add_single_dependencies, + # remove_all_dependent_extensions should only be run after another test case which has dependent extension in + # the model + remove_all_dependent_extensions, + add_failing_dependent_extension_with_one_dependency, + add_failing_dependent_extension_with_two_dependencies + ] + + @staticmethod + def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + dependency_map: Dict[str, Dict[str, Any]] = dict() + + for ext in extensions: + ext_name = ext['name'] + provisioned_after = ext['properties'].get('provisionAfterExtensions') + depends_on = provisioned_after if provisioned_after else [] + # We know an extension should fail if commandToExecute is exactly "exit 1" + ext_settings = ext['properties'].get("settings") + ext_command = ext['properties']['settings'].get("commandToExecute") if ext_settings else None + should_fail = ext_command == "exit 1" + dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": depends_on} + + return dependency_map + + @staticmethod + def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient, test_case_start: datetime) -> List[str]: + # Using VmExtensionIds to get publisher for each ext to be used in remote script + extension_full_names = { + "AzureMonitorLinuxAgent": VmExtensionIds.AzureMonitorLinuxAgent, + "RunCommandLinux": VmExtensionIds.RunCommand, + "CustomScript": VmExtensionIds.CustomScript + } + enabled_times = [] + for ext in extensions: + # Only check extensions which succeeded provisioning + if "succeeded" in ext.statuses_summary[0].code: + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type '{extension_full_names[ext.name]}' --start_time '{str(test_case_start)}'", use_sudo=True) + enabled_times.append( + { + "name": ext.name, + "enabled_time": datetime.strptime(enabled_time.strip(), u'%Y-%m-%d %H:%M:%S') + } + ) + + # sort the extensions based on their enabled datetime + sorted_extensions = sorted(enabled_times, key=lambda ext_: ext_["enabled_time"]) + log.info("") + log.info("Extensions sorted by time they were enabled: {0}".format( + ', '.join(["{0}: {1}".format(ext["name"], ext["enabled_time"]) for ext in sorted_extensions]))) + sorted_extension_names = [ext["name"] for ext in sorted_extensions] + return sorted_extension_names + + @staticmethod + def _validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str], relax_check: bool): + installed_ext = dict() + + # Iterate through the extensions in the enabled order and validate if their depending extensions are already + # enabled prior to that. + for ext in sorted_extension_names: + # Check if the depending extension are already installed + if ext not in dependency_map: + # There should not be any unexpected extensions on the scale set, even in the case we share the VMSS, + # because we update the scale set model with the extensions. Any extensions that are not in the scale + # set model would be disabled. + fail("Unwanted extension found in VMSS Instance view: {0}".format(ext)) + if dependency_map[ext] is not None: + dependencies = dependency_map[ext].get('depends_on') + for dep in dependencies: + if installed_ext.get(dep) is None: + # The depending extension is not installed prior to the current extension + if relax_check: + log.info("{0} is not installed prior to {1}".format(dep, ext)) + else: + fail("{0} is not installed prior to {1}".format(dep, ext)) + + # Mark the current extension as installed + installed_ext[ext] = ext + + # Validate that only extensions expected to fail, and their dependent extensions, failed + for ext, details in dependency_map.items(): + failing_ext_dependencies = [dep for dep in details['depends_on'] if dependency_map[dep]['should_fail']] + if ext not in installed_ext: + if details['should_fail']: + log.info("Extension {0} failed as expected".format(ext)) + elif failing_ext_dependencies: + log.info("Extension {0} failed as expected because it is dependent on {1}".format(ext, ' and '.join(failing_ext_dependencies))) + else: + fail("{0} unexpectedly failed. Only extensions that are expected to fail or depend on a failing extension should fail".format(ext)) + + log.info("Validated extension sequencing") + + def run(self): + instances_ip_address: List[VmssInstanceIpAddress] = self._context.vmss.get_instances_ip_address() + ssh_clients: Dict[str, SshClient] = dict() + for instance in instances_ip_address: + ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) + + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("uname -a")): + raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") + + # This is the base ARM template that's used for deploying extensions for this scenario + base_extension_template = { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json", + "contentVersion": "1.0.0.0", + "resources": [ + { + "type": "Microsoft.Compute/virtualMachineScaleSets", + "name": f"{self._context.vmss.name}", + "location": "[resourceGroup().location]", + "apiVersion": "2018-06-01", + "properties": { + "virtualMachineProfile": { + "extensionProfile": { + "extensions": [] + } + } + } + } + ] + } + + for case in self._test_cases: + test_case_start = datetime.now() + if self._scenario_start == datetime.min: + self._scenario_start = test_case_start + + # Assign unique guid to forceUpdateTag for each extension to make sure they're always unique to force CRP + # to generate a new sequence number each time + test_guid = str(uuid.uuid4()) + extensions = case() + for ext in extensions: + ext["properties"].update({ + "forceUpdateTag": test_guid + }) + + # We update the extension template here with extensions that are specific to the scenario that we want to + # test out + log.info("") + log.info("Test case: {0}".format(case.__name__.replace('_', ' '))) + ext_template = copy.deepcopy(base_extension_template) + ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ + 'extensions'] = extensions + + # Log the dependency map for the extensions in this test case + dependency_map = self._get_dependency_map(extensions) + log.info("") + log.info("The dependency map of the extensions for this test case is:") + for ext, details in dependency_map.items(): + dependencies = details.get('depends_on') + dependency_list = "-" if not dependencies else ' and '.join(dependencies) + log.info("{0} depends on {1}".format(ext, dependency_list)) + + # Deploy updated extension template to the scale set. + log.info("") + log.info("Deploying extensions with the above dependencies to the scale set...") + rg_client = ResourceGroupClient(self._context.vmss.cloud, self._context.vmss.subscription, + self._context.vmss.resource_group, self._context.vmss.location) + try: + rg_client.deploy_template(template=ext_template) + except Exception as e: + # We only expect to catch an exception during deployment if we are forcing one of the extensions to + # fail. We know an extension should fail if "failing" is in the case name. Otherwise, report the + # failure. + deployment_failure_pattern = r"[\s\S]*\"details\": [\s\S]* \"code\": \"(?P.*)\"[\s\S]* \"message\": \"(?P.*)\"[\s\S]*" + msg_pattern = r"Multiple VM extensions failed to be provisioned on the VM. Please see the VM extension instance view for other failures. The first extension failed due to the error: VM Extension '.*' is marked as failed since it depends upon the VM Extension 'CustomScript' which has failed." + deployment_failure_match = re.match(deployment_failure_pattern, str(e)) + if "failing" not in case.__name__: + fail("Extension template deployment unexpectedly failed: {0}".format(e)) + elif not deployment_failure_match or deployment_failure_match.group("code") != "VMExtensionProvisioningError" or not re.match(msg_pattern, deployment_failure_match.group("msg")): + fail("Extension template deployment failed as expected, but with an unexpected error: {0}".format(e)) + + # Get the extensions on the VMSS from the instance view + log.info("") + instance_view_extensions = self._context.vmss.get_instance_view().extensions + + # Validate that the extensions were enabled in the correct order on each instance of the scale set + for instance_name, ssh_client in ssh_clients.items(): + log.info("") + log.info("Validate extension sequencing on {0}:{1}...".format(instance_name, ssh_client.ip_address)) + + # Sort the VM extensions by the time they were enabled + sorted_extension_names = self._get_sorted_extension_names(instance_view_extensions, ssh_client, test_case_start) + + # Validate that the extensions were enabled in the correct order. We relax this check if no settings + # are provided for a dependent extension, since the guest agent currently ignores dependencies in this + # case. + relax_check = True if "settings" in case.__name__ else False + self._validate_extension_sequencing(dependency_map, sorted_extension_names, relax_check) + + log.info("------") + + def get_ignore_errors_before_timestamp(self) -> datetime: + # Ignore errors in the agent log before the first test case starts + return self._scenario_start + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # WARNING ExtHandler ExtHandler Missing dependsOnExtension on extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent + # This message appears when an extension doesn't depend on another extension + # + { + 'message': r"Missing dependsOnExtension on extension .*" + }, + # + # WARNING ExtHandler ExtHandler Extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent does not have any settings. Will ignore dependency (dependency level: 1) + # We currently ignore dependencies for extensions without settings + # + { + 'message': r"Extension .* does not have any settings\. Will ignore dependency \(dependency level: \d\)" + }, + # + # 2023-10-31T17:46:59.675959Z WARNING ExtHandler ExtHandler Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions + # We intentionally make CustomScript fail to test that dependent extensions are skipped + # + { + 'message': r"Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions" + }, + # + # 2023-10-31T17:48:13.349214Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0 + # We intentionally make CustomScript fail to test that dependent extensions are skipped + # + { + 'message': r"Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0" + }, + # + # 2023-10-31T17:47:07.689083Z WARNING ExtHandler ExtHandler [PERIODIC] This status is being reported by the Guest Agent since no status file was reported by extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent: [ExtensionStatusError] Status file /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11/status/6.status does not exist + # We expect extensions that are dependent on a failing extension to not report status + # + { + 'message': r"\[PERIODIC\] This status is being reported by the Guest Agent since no status file was reported by extension .*: \[ExtensionStatusError\] Status file \/var\/lib\/waagent\/.*\/status\/\d.status does not exist" + }, + # + # 2023-10-31T17:48:11.306835Z WARNING ExtHandler ExtHandler A new goal state was received, but not all the extensions in the previous goal state have completed: [('Microsoft.Azure.Extensions.CustomScript', 'error'), ('Microsoft.Azure.Monitor.AzureMonitorLinuxAgent', 'transitioning'), ('Microsoft.CPlat.Core.RunCommandLinux', 'success')] + # This message appears when the previous test scenario had failing extensions due to extension dependencies + # + { + 'message': r"A new goal state was received, but not all the extensions in the previous goal state have completed: \[(\(u?'.*', u?'(error|transitioning|success)'\),?)+\]" + } + ] + return ignore_rules + + +if __name__ == "__main__": + ExtSequencing.run_from_command_line() diff --git a/tests_e2e/tests/lib/vm_extension_identifier.py b/tests_e2e/tests/lib/vm_extension_identifier.py index 26113e445c..fa304cb766 100644 --- a/tests_e2e/tests/lib/vm_extension_identifier.py +++ b/tests_e2e/tests/lib/vm_extension_identifier.py @@ -33,7 +33,8 @@ def __init__(self, publisher: str, ext_type: str, version: str): self.version: str = version unsupported_distros: Dict[str, List[str]] = { - "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"] + "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"], + "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar"] } def supports_distro(self, system_info: str) -> bool: diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py new file mode 100755 index 0000000000..b9b2c66cb5 --- /dev/null +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -0,0 +1,87 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Gets the timestamp for when the provided extension was enabled +# + +import argparse +import re +import sys + +from datetime import datetime +from pathlib import Path + + +def main(): + """ + Returns the timestamp of when the provided extension was enabled + """ + parser = argparse.ArgumentParser() + parser.add_argument("--ext_type", dest='ext_type', required=True) + parser.add_argument("--start_time", dest='start_time', required=True) + args, _ = parser.parse_known_args() + + # Extension enabled time is in extension CommandExecution.log + command_exec_log_path = Path('/var/log/azure/' + args.ext_type + '/CommandExecution.log') + command_exec_log = open(command_exec_log_path, 'r') + enabled_match = None + for line in command_exec_log.readlines(): + line = line.rstrip() + if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": + # AMA logs enable succeeded and its timestamp to the command execution log: + # 2023-11-01T23:22:53.124603Z INFO ExtHandler [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11] Command: ./shim.sh -enable + # [stdout] + # 2023/09/26 04:07:33 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.5] Enable,success,0,Enable succeeded + enable_pattern = r'.*(?P\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) \[Microsoft\.Azure\.Monitor\.AzureMonitorLinuxAgent\-.*] .*Enable succeeded.*' + match = re.match(enable_pattern, line) + if match: + enabled_match = match + else: + # For RC and CSE, we can determine when enable succeeded from the stdout of the enable command execution from + # the command execution log: + # 2023-09-26T04:07:39.042948Z INFO ExtHandler [Microsoft.CPlat.Core.RunCommandLinux-1.0.5] Command: bin/run-command-shim enable + # [stdout] + # ... + # time=2023-09-26T04:07:37Z version=v1.0.4/git@b3be41d-dirty operation=enable seq=0 event=enabledevent=enabled + enable_pattern = r'time=(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*event=enabled' + match = re.match(enable_pattern, line) + if match: + enabled_match = match + + if not enabled_match: + # Try to get enabled time from extension command execution logs + print("Agent log does not show extension was enabled", file=sys.stderr) + sys.exit(1) + + if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": + enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y/%m/%d %H:%M:%S') + else: + enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ') + + start_time = datetime.strptime(args.start_time, u'%Y-%m-%d %H:%M:%S.%f') + if enable_time < start_time: + print("Agent log does not show extension was enabled after this test case started. Last enabled time was {0}. This test case started at {1}".format(enable_time, start_time), file=sys.stderr) + sys.exit(1) + else: + print(enable_time) + + sys.exit(0) + + +if __name__ == "__main__": + main()