diff --git a/README.md b/README.md
index 3d3a824e1f..5a5b126f2f 100644
--- a/README.md
+++ b/README.md
@@ -261,6 +261,30 @@ without the agent. In order to do that, the `provisionVMAgent` flag must be set
provisioning time, via whichever API is being used. We will provide more details on
this on our wiki when it is generally available.
+#### __Extensions.WaitForCloudInit__
+
+_Type: Boolean_
+_Default: n_
+
+Waits for cloud-init to complete (cloud-init status --wait) before executing VM extensions.
+
+Both cloud-init and VM extensions are common ways to customize a VM during initial deployment. By
+default, the agent will start executing extensions while cloud-init may still be in the 'config'
+stage and won't wait for the 'final' stage to complete. Cloud-init and extensions may execute operations
+that conflict with each other (for example, both of them may try to install packages). Setting this option
+to 'y' ensures that VM extensions are executed only after cloud-init has completed all its stages.
+
+Note that using this option requires creating a custom image with the value of this option set to 'y', in
+order to ensure that the wait is performed during the initial deployment of the VM.
+
+#### __Extensions.WaitForCloudInitTimeout__
+
+_Type: Integer_
+_Default: 3600_
+
+Timeout in seconds for the Agent to wait on cloud-init. If the timeout elapses, the Agent will continue
+executing VM extensions. See Extensions.WaitForCloudInit for more details.
+
#### __Extensions.GoalStatePeriod__
_Type: Integer_
@@ -273,19 +297,38 @@ _Note_: setting up this parameter to more than a few minutes can make the state
the VM be reported as unresponsive/unavailable on the Azure portal. Also, this
setting affects how fast the agent starts executing extensions.
-#### __AutoUpdate.Enabled__
+#### __AutoUpdate.UpdateToLatestVersion__
-_Type: Boolean_
+_Type: Boolean_
_Default: y_
-Enables auto-update of the Extension Handler. The Extension Handler is responsible
+Enables auto-update of the Extension Handler. The Extension Handler is responsible
for managing extensions and reporting VM status. The core functionality of the agent
-is contained in the Extension Handler, and we encourage users to enable this option
+is contained in the Extension Handler, and we encourage users to enable this option
in order to maintain an up to date version.
+
+When this option is enabled, the Agent will install new versions when they become
+available. When disabled, the Agent will not install any new versions, but it will use
+the most recent version already installed on the VM.
-On most distros the default value is 'y'.
+_Notes_:
+1. This option was added on version 2.10.0.8 of the Agent. For previous versions, see AutoUpdate.Enabled.
+2. If both options are specified in waagent.conf, AutoUpdate.UpdateToLatestVersion overrides the value set for AutoUpdate.Enabled.
+3. Changing config option requires a service restart to pick up the updated setting.
+
+For more information on the agent version, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output).
+For more information on the agent update, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#how-auto-update-works-for-extension-handler).
+For more information on the AutoUpdate.UpdateToLatestVersion vs AutoUpdate.Enabled, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion).
+
+#### __AutoUpdate.Enabled__
-For more information on the agent version, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output).
+_Type: Boolean_
+_Default: y_
+
+Enables auto-update of the Extension Handler. This flag is supported for legacy reasons and we strongly recommend using AutoUpdate.UpdateToLatestVersion instead.
+The difference between these 2 flags is that, when set to 'n', AutoUpdate.Enabled will use the version of the Extension Handler that is pre-installed on the image, while AutoUpdate.UpdateToLatestVersion will use the most recent version that has already been installed on the VM (via auto-update).
+
+On most distros the default value is 'y'.
#### __Provisioning.Agent__
diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py
index 57d6c9d280..a13f333576 100644
--- a/azurelinuxagent/common/conf.py
+++ b/azurelinuxagent/common/conf.py
@@ -117,6 +117,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"Logs.Console": True,
"Logs.Collect": True,
"Extensions.Enabled": True,
+ "Extensions.WaitForCloudInit": False,
"Provisioning.AllowResetSysUser": False,
"Provisioning.RegenerateSshHostKeyPair": False,
"Provisioning.DeleteRootPassword": False,
@@ -170,6 +171,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
__INTEGER_OPTIONS__ = {
"Extensions.GoalStatePeriod": 6,
"Extensions.InitialGoalStatePeriod": 6,
+ "Extensions.WaitForCloudInitTimeout": 3600,
"OS.EnableFirewallPeriod": 300,
"OS.RemovePersistentNetRulesPeriod": 30,
"OS.RootDeviceScsiTimeoutPeriod": 30,
@@ -372,6 +374,14 @@ def get_extensions_enabled(conf=__conf__):
return conf.get_switch("Extensions.Enabled", True)
+def get_wait_for_cloud_init(conf=__conf__):
+ return conf.get_switch("Extensions.WaitForCloudInit", False)
+
+
+def get_wait_for_cloud_init_timeout(conf=__conf__):
+ return conf.get_switch("Extensions.WaitForCloudInitTimeout", 3600)
+
+
def get_goal_state_period(conf=__conf__):
return conf.get_int("Extensions.GoalStatePeriod", 6)
diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py
index fe313968fe..b010583808 100644
--- a/azurelinuxagent/common/event.py
+++ b/azurelinuxagent/common/event.py
@@ -75,6 +75,7 @@ class WALAEventOperation:
CGroupsCleanUp = "CGroupsCleanUp"
CGroupsDisabled = "CGroupsDisabled"
CGroupsInfo = "CGroupsInfo"
+ CloudInit = "CloudInit"
CollectEventErrors = "CollectEventErrors"
CollectEventUnicodeErrors = "CollectEventUnicodeErrors"
ConfigurationChange = "ConfigurationChange"
diff --git a/azurelinuxagent/common/osutil/redhat.py b/azurelinuxagent/common/osutil/redhat.py
index 05a4b659df..2d8ff3d1e5 100644
--- a/azurelinuxagent/common/osutil/redhat.py
+++ b/azurelinuxagent/common/osutil/redhat.py
@@ -192,3 +192,10 @@ def restart_if(self, ifname, retries=3, wait=5):
time.sleep(wait)
else:
logger.warn("exceeded restart retries")
+
+ def publish_hostname(self, hostname):
+ # RedhatOSUtil was updated to conditionally run NetworkManager restart in response to a race condition between
+ # NetworkManager restart and the agent restarting the network interface during publish_hostname. Keeping the
+ # NetworkManager restart in RedhatOSModernUtil because the issue was not reproduced on these versions.
+ shellutil.run("service NetworkManager restart")
+ DefaultOSUtil.publish_hostname(self, hostname)
diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py
index 50fd4592f1..d2bfd787ed 100644
--- a/azurelinuxagent/common/utils/shellutil.py
+++ b/azurelinuxagent/common/utils/shellutil.py
@@ -18,9 +18,17 @@
#
import os
import subprocess
+import sys
import tempfile
import threading
+if sys.version_info[0] == 2:
+ # TimeoutExpired was introduced on Python 3; define a dummy class for Python 2
+ class TimeoutExpired(Exception):
+ pass
+else:
+ from subprocess import TimeoutExpired
+
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.future import ustr
@@ -206,7 +214,7 @@ def __run_command(command_action, command, log_error, encode_output):
# W0622: Redefining built-in 'input' -- disabled: the parameter name mimics subprocess.communicate()
-def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, log_error=False, encode_input=True, encode_output=True, track_process=True): # pylint:disable=W0622
+def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, log_error=False, encode_input=True, encode_output=True, track_process=True, timeout=None): # pylint:disable=W0622
"""
Executes the given command and returns its stdout.
@@ -227,7 +235,9 @@ def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr=
value for these parameters is anything other than the default (subprocess.PIPE)), then the corresponding
values returned by this function or the CommandError exception will be empty strings.
- Note: This is the preferred method to execute shell commands over `azurelinuxagent.common.utils.shellutil.run` function.
+ NOTE: The 'timeout' parameter is ignored on Python 2
+
+ NOTE: This is the preferred method to execute shell commands over `azurelinuxagent.common.utils.shellutil.run` function.
"""
if input is not None and stdin is not None:
raise ValueError("The input and stdin arguments are mutually exclusive")
@@ -246,7 +256,30 @@ def command_action():
else:
process = subprocess.Popen(command, stdin=popen_stdin, stdout=stdout, stderr=stderr, shell=False)
- command_stdout, command_stderr = process.communicate(input=communicate_input)
+ try:
+ if sys.version_info[0] == 2: # communicate() doesn't support timeout on Python 2
+ command_stdout, command_stderr = process.communicate(input=communicate_input)
+ else:
+ command_stdout, command_stderr = process.communicate(input=communicate_input, timeout=timeout)
+ except TimeoutExpired:
+ if log_error:
+ logger.error(u"Command [{0}] timed out", __format_command(command))
+
+ command_stdout, command_stderr = '', ''
+
+ try:
+ process.kill()
+ # try to get any output from the command, but ignore any errors if we can't
+ try:
+ command_stdout, command_stderr = process.communicate()
+ # W0702: No exception type(s) specified (bare-except)
+ except: # pylint: disable=W0702
+ pass
+ except Exception as exception:
+ if log_error:
+ logger.error(u"Can't terminate timed out process: {0}", ustr(exception))
+ raise CommandError(command=__format_command(command), return_code=-1, stdout=command_stdout, stderr="command timeout\n{0}".format(command_stderr))
+
if track_process:
_on_command_completed(process.pid)
diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py
index 88267b75e2..1a0e362407 100644
--- a/azurelinuxagent/ga/update.py
+++ b/azurelinuxagent/ga/update.py
@@ -149,6 +149,8 @@ def __init__(self):
self._last_check_memory_usage_time = time.time()
self._check_memory_usage_last_error_report = datetime.min
+ self._cloud_init_completed = False # Only used when Extensions.WaitForCloudInit is enabled; note that this variable is always reset on service start.
+
# VM Size is reported via the heartbeat, default it here.
self._vm_size = None
@@ -458,6 +460,22 @@ def _initialize_goal_state(self, protocol):
logger.info("The current Fabric goal state is older than the most recent FastTrack goal state; will skip it.\nFabric: {0}\nFastTrack: {1}",
egs.created_on_timestamp, last_fast_track_timestamp)
+ def _wait_for_cloud_init(self):
+ if conf.get_wait_for_cloud_init() and not self._cloud_init_completed:
+ message = "Waiting for cloud-init to complete..."
+ logger.info(message)
+ add_event(op=WALAEventOperation.CloudInit, message=message)
+ try:
+ output = shellutil.run_command(["cloud-init", "status", "--wait"], timeout=conf.get_wait_for_cloud_init_timeout())
+ message = "cloud-init completed\n{0}".format(output)
+ logger.info(message)
+ add_event(op=WALAEventOperation.CloudInit, message=message)
+ except Exception as e:
+ message = "An error occurred while waiting for cloud-init; will proceed to execute VM extensions. Extensions that have conflicts with cloud-init may fail.\n{0}".format(ustr(e))
+ logger.error(message)
+ add_event(op=WALAEventOperation.CloudInit, message=message, is_success=False, log_event=False)
+ self._cloud_init_completed = True # Mark as completed even on error since we will proceed to execute extensions
+
def _get_vm_size(self, protocol):
"""
Including VMSize is meant to capture the architecture of the VM (i.e. arm64 VMs will
@@ -562,6 +580,8 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_
# check for agent updates
agent_update_handler.run(self._goal_state, self._processing_new_extensions_goal_state())
+ self._wait_for_cloud_init()
+
try:
if self._processing_new_extensions_goal_state():
if not self._extensions_summary.converged:
diff --git a/tests/common/test_conf.py b/tests/common/test_conf.py
index 972b289a79..1ae951bf9f 100644
--- a/tests/common/test_conf.py
+++ b/tests/common/test_conf.py
@@ -27,6 +27,8 @@ class TestConf(AgentTestCase):
# -- These values *MUST* match those from data/test_waagent.conf
EXPECTED_CONFIGURATION = {
"Extensions.Enabled": True,
+ "Extensions.WaitForCloudInit": False,
+ "Extensions.WaitForCloudInitTimeout": 3600,
"Provisioning.Agent": "auto",
"Provisioning.DeleteRootPassword": True,
"Provisioning.RegenerateSshHostKeyPair": True,
diff --git a/tests/common/utils/test_shell_util.py b/tests/common/utils/test_shell_util.py
index 3c6afc60e6..5eb5a83a6d 100644
--- a/tests/common/utils/test_shell_util.py
+++ b/tests/common/utils/test_shell_util.py
@@ -18,13 +18,14 @@
import os
import signal
import subprocess
+import sys
import tempfile
import threading
import unittest
from azurelinuxagent.common.future import ustr
import azurelinuxagent.common.utils.shellutil as shellutil
-from tests.lib.tools import AgentTestCase, patch
+from tests.lib.tools import AgentTestCase, patch, skip_if_predicate_true
from tests.lib.miscellaneous_tools import wait_for, format_processes
@@ -225,6 +226,12 @@ def test_run_command_should_raise_an_exception_when_it_cannot_execute_the_comman
self.__it_should_raise_an_exception_when_it_cannot_execute_the_command(
lambda: shellutil.run_command("nonexistent_command"))
+ @skip_if_predicate_true(lambda: sys.version_info[0] == 2, "Timeouts are not supported on Python 2")
+ def test_run_command_should_raise_an_exception_when_the_command_times_out(self):
+ with self.assertRaises(shellutil.CommandError) as context:
+ shellutil.run_command(["sleep", "5"], timeout=1)
+ self.assertIn("command timeout", context.exception.stderr, "The command did not time out")
+
def test_run_pipe_should_raise_an_exception_when_it_cannot_execute_the_pipe(self):
self.__it_should_raise_an_exception_when_it_cannot_execute_the_command(
lambda: shellutil.run_pipe([["ls", "-ld", "."], ["nonexistent_command"], ["wc", "-l"]]))
diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py
index c25585f143..aa39ccb55a 100644
--- a/tests/ga/test_update.py
+++ b/tests/ga/test_update.py
@@ -38,7 +38,7 @@
ExtHandlerPackage, ExtHandlerPackageList, Extension, VMStatus, ExtHandlerStatus, ExtensionStatus, \
VMAgentUpdateStatuses
from azurelinuxagent.common.protocol.util import ProtocolUtil
-from azurelinuxagent.common.utils import fileutil, textutil, timeutil
+from azurelinuxagent.common.utils import fileutil, textutil, timeutil, shellutil
from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME, AGENT_STATUS_FILE
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules
@@ -980,7 +980,6 @@ def match_expected_info():
match_unexpected_errors() # Match on errors first, they can provide more info.
match_expected_info()
-
def test_it_should_recreate_handler_env_on_service_startup(self):
iterations = 5
@@ -1361,6 +1360,64 @@ def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self):
self.assertFalse(agent.is_blacklisted, "Legacy Agent should not be blacklisted")
+class TestUpdateWaitForCloudInit(AgentTestCase):
+ @staticmethod
+ @contextlib.contextmanager
+ def create_mock_run_command(delay=None):
+ def run_command_mock(cmd, *args, **kwargs):
+ if cmd == ["cloud-init", "status", "--wait"]:
+ if delay is not None:
+ original_run_command(['sleep', str(delay)], *args, **kwargs)
+ return "cloud-init completed"
+ return original_run_command(cmd, *args, **kwargs)
+ original_run_command = shellutil.run_command
+
+ with patch("azurelinuxagent.ga.update.shellutil.run_command", side_effect=run_command_mock) as run_command_patch:
+ yield run_command_patch
+
+ def test_it_should_not_wait_for_cloud_init_by_default(self):
+ update_handler = UpdateHandler()
+ with self.create_mock_run_command() as run_command_patch:
+ update_handler._wait_for_cloud_init()
+ self.assertTrue(run_command_patch.call_count == 0, "'cloud-init status --wait' should not be called by default")
+
+ def test_it_should_wait_for_cloud_init_when_requested(self):
+ update_handler = UpdateHandler()
+ with patch("azurelinuxagent.ga.update.conf.get_wait_for_cloud_init", return_value=True):
+ with self.create_mock_run_command() as run_command_patch:
+ update_handler._wait_for_cloud_init()
+ self.assertEqual(1, run_command_patch.call_count, "'cloud-init status --wait' should have be called once")
+
+ @skip_if_predicate_true(lambda: sys.version_info[0] == 2, "Timeouts are not supported on Python 2")
+ def test_it_should_enforce_timeout_waiting_for_cloud_init(self):
+ update_handler = UpdateHandler()
+ with patch("azurelinuxagent.ga.update.conf.get_wait_for_cloud_init", return_value=True):
+ with patch("azurelinuxagent.ga.update.conf.get_wait_for_cloud_init_timeout", return_value=1):
+ with self.create_mock_run_command(delay=5):
+ with patch("azurelinuxagent.ga.update.logger.error") as mock_logger:
+ update_handler._wait_for_cloud_init()
+ call_args = [args for args, _ in mock_logger.call_args_list if "An error occurred while waiting for cloud-init" in args[0]]
+ self.assertTrue(
+ len(call_args) == 1 and len(call_args[0]) == 1 and "command timeout" in call_args[0][0],
+ "Expected a timeout waiting for cloud-init. Log calls: {0}".format(mock_logger.call_args_list))
+
+ def test_update_handler_should_wait_for_cloud_init_after_agent_update_and_before_extension_processing(self):
+ method_calls = []
+
+ agent_update_handler = Mock()
+ agent_update_handler.run = lambda *_, **__: method_calls.append("AgentUpdateHandler.run()")
+
+ exthandlers_handler = Mock()
+ exthandlers_handler.run = lambda *_, **__: method_calls.append("ExtHandlersHandler.run()")
+
+ with mock_wire_protocol(DATA_FILE) as protocol:
+ with mock_update_handler(protocol, iterations=1, agent_update_handler=agent_update_handler, exthandlers_handler=exthandlers_handler) as update_handler:
+ with patch('azurelinuxagent.ga.update.UpdateHandler._wait_for_cloud_init', side_effect=lambda *_, **__: method_calls.append("UpdateHandler._wait_for_cloud_init()")):
+ update_handler.run()
+
+ self.assertListEqual(["AgentUpdateHandler.run()", "UpdateHandler._wait_for_cloud_init()", "ExtHandlersHandler.run()"], method_calls, "Wait for cloud-init should happen after agent update and before extension processing")
+
+
class UpdateHandlerRunTestCase(AgentTestCase):
def _test_run(self, autoupdate_enabled=False, check_daemon_running=False, expected_exit_code=0, emit_restart_event=None):
fileutil.write_file(conf.get_agent_pid_file_path(), ustr(42))
diff --git a/tests/lib/mock_update_handler.py b/tests/lib/mock_update_handler.py
index f0b311abe2..03d7a44521 100644
--- a/tests/lib/mock_update_handler.py
+++ b/tests/lib/mock_update_handler.py
@@ -86,9 +86,9 @@ def patch_object(target, attribute):
try:
with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler):
- with patch("azurelinuxagent.ga.agent_update_handler.get_agent_update_handler", return_value=agent_update_handler):
+ with patch("azurelinuxagent.ga.update.get_agent_update_handler", return_value=agent_update_handler):
with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler):
- with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled):
+ with patch("azurelinuxagent.ga.update.conf.get_autoupdate_enabled", return_value=autoupdate_enabled):
with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)):
with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep:
with patch('sys.exit', side_effect=lambda _: 0) as mock_exit:
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 414faa7266..0da6a2a853 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -53,6 +53,8 @@
Extensions.Enabled = True
Extensions.GoalStatePeriod = 6
Extensions.InitialGoalStatePeriod = 6
+Extensions.WaitForCloudInit = False
+Extensions.WaitForCloudInitTimeout = 3600
HttpProxy.Host = None
HttpProxy.Port = None
Lib.Dir = /var/lib/waagent
diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py
index fbe53a1bdc..4b650e8641 100644
--- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py
+++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py
@@ -159,15 +159,25 @@ def create_environment_list(self) -> List[Dict[str, Any]]:
for image in images_info:
if image in skip_images_info:
continue
- # 'image.urn' can actually be the URL to a VHD if the runbook provided it in the 'image' parameter
+ # 'image.urn' can actually be the URL to a VHD or an image from a gallery if the runbook provided it in the 'image' parameter
if self._is_vhd(image.urn):
marketplace_image = ""
vhd = image.urn
image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd")
+ shared_gallery = ""
+ elif self._is_image_from_gallery(image.urn):
+ marketplace_image = ""
+ vhd = ""
+ image_name = self._get_name_of_image_from_gallery(image.urn)
+ shared_gallery = image.urn
else:
marketplace_image = image.urn
vhd = ""
image_name = self._get_image_name(image.urn)
+ shared_gallery = ""
+
+ if test_suite_info.executes_on_scale_set and (vhd != "" or shared_gallery != ""):
+ raise Exception("VHDS and images from galleries are currently not supported on scale sets.")
location: str = self._get_location(test_suite_info, image)
if location is None:
@@ -194,6 +204,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]:
env_name=f"{image_name}-{test_suite_info.name}",
marketplace_image=marketplace_image,
vhd=vhd,
+ shared_gallery=shared_gallery,
location=location,
vm_size=vm_size,
test_suite_info=test_suite_info)
@@ -206,9 +217,6 @@ def create_environment_list(self) -> List[Dict[str, Any]]:
env["c_test_suites"].append(test_suite_info)
else:
if test_suite_info.executes_on_scale_set:
- # TODO: Add support for VHDs
- if vhd != "":
- raise Exception("VHDS are currently not supported on scale sets.")
env = self.create_vmss_environment(
env_name=env_name,
marketplace_image=marketplace_image,
@@ -220,18 +228,18 @@ def create_environment_list(self) -> List[Dict[str, Any]]:
env_name=env_name,
marketplace_image=marketplace_image,
vhd=vhd,
+ shared_gallery=shared_gallery,
location=location,
vm_size=vm_size,
test_suite_info=test_suite_info)
shared_environments[env_name] = env
- if test_suite_info.template != '':
- vm_tags = env.get("vm_tags")
- if vm_tags is not None:
- if "templates" not in vm_tags:
- vm_tags["templates"] = test_suite_info.template
- else:
- vm_tags["templates"] += "," + test_suite_info.template
+ if test_suite_info.template != '':
+ vm_tags = env["vm_tags"]
+ if "templates" not in vm_tags:
+ vm_tags["templates"] = test_suite_info.template
+ else:
+ vm_tags["templates"] += "," + test_suite_info.template
environments.extend(shared_environments.values())
@@ -330,7 +338,7 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]:
"c_test_suites": loader.test_suites,
}
- def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]:
+ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, shared_gallery: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]:
#
# Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template
# hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "templates" and pass the
@@ -339,11 +347,9 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str,
# share the same test environment. Similarly, we use a dummy VM tag named "allow_ssh" to pass the value of the "allow_ssh" runbook parameter.
#
vm_tags = {}
- if test_suite_info.template != '':
- vm_tags["templates"] = test_suite_info.template
if self.runbook.allow_ssh != '':
vm_tags["allow_ssh"] = self.runbook.allow_ssh
- return {
+ environment = {
"c_platform": [
{
"type": "azure",
@@ -366,6 +372,7 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str,
"azure": {
"marketplace": marketplace_image,
"vhd": vhd,
+ "shared_gallery": shared_gallery,
"location": location,
"vm_size": vm_size
}
@@ -383,6 +390,18 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str,
"vm_tags": vm_tags
}
+ if shared_gallery != '':
+ # Currently all the images in our shared gallery require secure boot
+ environment['c_platform'][0]['requirement']["features"] = {
+ "items": [
+ {
+ "type": "Security_Profile",
+ "security_profile": "secureboot"
+ }
+ ]
+ }
+ return environment
+
def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]:
return {
"c_platform": [
@@ -406,7 +425,8 @@ def create_vmss_environment(self, env_name: str, marketplace_image: str, locatio
"c_location": location,
"c_image": marketplace_image,
"c_is_vhd": False,
- "c_vm_size": vm_size
+ "c_vm_size": vm_size,
+ "vm_tags": {}
}
def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]:
@@ -420,12 +440,12 @@ def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]:
if images is not None:
return images
- # If it is not image or image set, it must be a URN or VHD
- if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image):
- raise Exception(f"The 'image' parameter must be an image, an image set name, a urn, or a vhd: {self.runbook.image}")
+ # If it is not image or image set, it must be a URN, VHD, or an image from a gallery
+ if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image) and not self._is_image_from_gallery(self.runbook.image):
+ raise Exception(f"The 'image' parameter must be an image, image set name, urn, vhd, or an image from a shared gallery: {self.runbook.image}")
i = VmImageInfo()
- i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD
+ i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD, or an image from a shared gallery
i.locations = []
i.vm_sizes = []
@@ -536,6 +556,20 @@ def _is_vhd(vhd: str) -> bool:
parsed = urllib.parse.urlparse(vhd)
return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != ""
+ # Images from a gallery are given as "//".
+ _IMAGE_FROM_GALLERY = re.compile(r"(?P[^/]+)/(?P[^/]+)/(?P[^/]+)")
+
+ @staticmethod
+ def _is_image_from_gallery(image: str) -> bool:
+ return AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image) is not None
+
+ @staticmethod
+ def _get_name_of_image_from_gallery(image: str) -> bool:
+ match = AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image)
+ if match is None:
+ raise Exception(f"Invalid image from gallery: {image}")
+ return match.group('image')
+
@staticmethod
def _report_test_result(
suite_name: str,
diff --git a/tests_e2e/test_suites/agent_wait_for_cloud_init.yml b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml
new file mode 100644
index 0000000000..727803811e
--- /dev/null
+++ b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml
@@ -0,0 +1,13 @@
+#
+# This test verifies that the Agent waits for cloud-init to complete before it starts processing extensions.
+#
+# NOTE: This test is not fully automated. It requires a custom image where the test Agent has been installed and Extensions.WaitForCloudInit is enabled in waagent.conf.
+# To execute it manually, create a custom image and use the 'image' runbook parameter, for example: "-v: image:gallery/wait-cloud-init/1.0.1".
+#
+name: "AgentWaitForCloudInit"
+tests:
+ - "agent_wait_for_cloud_init/agent_wait_for_cloud_init.py"
+template: "agent_wait_for_cloud_init/add_cloud_init_script.py"
+install_test_agent: false
+# Dummy image, since the parameter is required. The actual image needs to be passed as a parameter to the runbook.
+images: "ubuntu_2204"
diff --git a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py
index 6f0a562cd2..af3bc738a5 100755
--- a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py
+++ b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py
@@ -32,18 +32,11 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None:
#
# NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is
- # under the 'lisa' namespace.
+ # under the 'lisa' namespace. We set 'provisionVMAgent' to False.
#
# "getLinuxConfiguration": {
# "parameters": [
- # {
- # "name": "keyPath",
- # "type": "string"
- # },
- # {
- # "name": "publicKeyData",
- # "type": "string"
- # }
+ # ...
# ],
# "output": {
# "type": "object",
@@ -62,31 +55,9 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None:
# }
# }
#
- # The code below sets template['functions'][i]['members']['getLinuxConfiguration']['output']['value']['provisionVMAgent'] to True,
- # where template['functions'][i] is the 'lisa' namespace.
- #
- functions = template.get("functions")
- if functions is None:
- raise Exception('Cannot find "functions" in the LISA template.')
- for namespace in functions:
- name = namespace.get("namespace")
- if name is None:
- raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}')
- if name == "lisa":
- members = namespace.get('members')
- if members is None:
- raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}')
- get_linux_configuration = members.get('getLinuxConfiguration')
- if get_linux_configuration is None:
- raise Exception(f'Cannot find the "getLinuxConfiguration" function the lisa namespace in the LISA template: {namespace}')
- output = get_linux_configuration.get('output')
- if output is None:
- raise Exception(f'Cannot find the "output" of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}')
- value = output.get('value')
- if value is None:
- raise Exception(f"Cannot find the output's value of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}")
- value['provisionVMAgent'] = False
- break
- else:
- raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}')
+ get_linux_configuration = self.get_lisa_function(template, 'getLinuxConfiguration')
+ output = self.get_function_output(get_linux_configuration)
+ if output.get('customData') is not None:
+ raise Exception(f"The getOSProfile function already has a 'customData'. Won't override it. Definition: {get_linux_configuration}")
+ output['provisionVMAgent'] = False
diff --git a/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py
new file mode 100755
index 0000000000..1fbc60adc4
--- /dev/null
+++ b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+# Microsoft Azure Linux Agent
+#
+# Copyright 2018 Microsoft Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import base64
+
+from typing import Any, Dict
+
+from tests_e2e.tests.agent_wait_for_cloud_init.agent_wait_for_cloud_init import AgentWaitForCloudInit
+from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate
+
+
+class AddCloudInitScript(UpdateArmTemplate):
+ """
+ Adds AgentWaitForCloudInit.CloudInitScript to the ARM template as osProfile.customData.
+ """
+ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None:
+ if not is_lisa_template:
+ raise Exception('This test can only customize LISA ARM templates.')
+
+ #
+ # cloud-init configuration needs to be added in the osProfile.customData property as a base64-encoded string.
+ #
+ # LISA uses the getOSProfile function to generate the value for osProfile; add customData to its output, checking that we do not
+ # override any existing value (the current LISA template does not have any).
+ #
+ # "getOSProfile": {
+ # "parameters": [
+ # ...
+ # ],
+ # "output": {
+ # "type": "object",
+ # "value": {
+ # "computername": "[parameters('computername')]",
+ # "adminUsername": "[parameters('admin_username')]",
+ # "adminPassword": "[if(parameters('has_password'), parameters('admin_password'), json('null'))]",
+ # "linuxConfiguration": "[if(parameters('has_linux_configuration'), parameters('linux_configuration'), json('null'))]"
+ # }
+ # }
+ # }
+ #
+ encoded_script = base64.b64encode(AgentWaitForCloudInit.CloudInitScript.encode('utf-8')).decode('utf-8')
+
+ get_os_profile = self.get_lisa_function(template, 'getOSProfile')
+ output = self.get_function_output(get_os_profile)
+ if output.get('customData') is not None:
+ raise Exception(f"The getOSProfile function already has a 'customData'. Won't override it. Definition: {get_os_profile}")
+ output['customData'] = encoded_script
+
diff --git a/tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py b/tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py
new file mode 100755
index 0000000000..d9b4ecaef1
--- /dev/null
+++ b/tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+
+# Microsoft Azure Linux Agent
+#
+# Copyright 2018 Microsoft Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+
+from assertpy import fail
+
+from tests_e2e.tests.lib.agent_test import AgentVmTest
+from tests_e2e.tests.lib.logging import log
+from tests_e2e.tests.lib.shell import CommandError
+from tests_e2e.tests.lib.ssh_client import SshClient
+
+
+class AgentWaitForCloudInit(AgentVmTest):
+ """
+ This test verifies that the Agent waits for cloud-init to complete before it starts processing extensions.
+
+ To do this, it adds 'CloudInitScript' in cloud-init's custom data. The script ensures first that the Agent
+ is waiting for cloud-init, and then sleeps for a couple of minutes before completing. The scripts appends
+ a set of known messages to waagent.log, and the test simply verifies that the messages are present in the
+ log in the expected order, and that they occur before the Agent reports that it is processing extensions.
+ """
+ CloudInitScript = """#!/usr/bin/env bash
+ set -euox pipefail
+
+ echo ">>> $(date) cloud-init script begin" >> /var/log/waagent.log
+ while ! grep 'Waiting for cloud-init to complete' /var/log/waagent.log; do
+ sleep 15
+ done
+ echo ">>> $(date) The Agent is waiting for cloud-init, will pause for a couple of minutes" >> /var/log/waagent.log
+ sleep 120
+ echo ">>> $(date) cloud-init script end" >> /var/log/waagent.log
+ """
+
+ def run(self):
+ ssh_client: SshClient = self._context.create_ssh_client()
+
+ log.info("Waiting for Agent to start processing extensions")
+ for _ in range(15):
+ try:
+ ssh_client.run_command("grep 'ProcessExtensionsGoalState started' /var/log/waagent.log")
+ break
+ except CommandError:
+ log.info("The Agent has not started to process extensions, will check again after a short delay")
+ time.sleep(60)
+ else:
+ raise Exception("Timeout while waiting for the Agent to start processing extensions")
+
+ log.info("The Agent has started to process extensions")
+
+ output = ssh_client.run_command(
+ "grep -E '^>>>|" +
+ "INFO ExtHandler ExtHandler cloud-init completed|" +
+ "INFO ExtHandler ExtHandler ProcessExtensionsGoalState started' /var/log/waagent.log")
+
+ output = output.rstrip().splitlines()
+
+ expected = [
+ 'cloud-init script begin',
+ 'The Agent is waiting for cloud-init, will pause for a couple of minutes',
+ 'cloud-init script end',
+ 'cloud-init completed',
+ 'ProcessExtensionsGoalState started'
+ ]
+
+ indent = lambda lines: "\n".join([f" {ln}" for ln in lines])
+ if len(output) == len(expected) and all([expected[i] in output[i] for i in range(len(expected))]):
+ log.info("The Agent waited for cloud-init before processing extensions.\nLog messages:\n%s", indent(output))
+ else:
+ fail(f"The Agent did not wait for cloud-init before processing extensions.\nExpected:\n{indent(expected)}\nActual:\n{indent(output)}")
+
+
+if __name__ == "__main__":
+ AgentWaitForCloudInit.run_from_command_line()
+
diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py
index af69fba048..010178ab9c 100644
--- a/tests_e2e/tests/lib/update_arm_template.py
+++ b/tests_e2e/tests/lib/update_arm_template.py
@@ -55,4 +55,87 @@ def get_resource_by_name(resources: List[Dict[str, Any]], resource_name: str, ty
return item
raise KeyError(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template")
+ @staticmethod
+ def get_lisa_function(template: Dict[str, Any], function_name: str) -> Dict[str, Any]:
+ """
+ Looks for the given function name in the LISA namespace and returns its definition. Raises KeyError if the function is not found.
+ """
+ #
+ # NOTE: LISA's functions are in the "lisa" namespace, for example:
+ #
+ # "functions": [
+ # {
+ # "namespace": "lisa",
+ # "members": {
+ # "getOSProfile": {
+ # "parameters": [
+ # {
+ # "name": "computername",
+ # "type": "string"
+ # },
+ # etc.
+ # ],
+ # "output": {
+ # "type": "object",
+ # "value": {
+ # "computername": "[parameters('computername')]",
+ # "adminUsername": "[parameters('admin_username')]",
+ # "adminPassword": "[if(parameters('has_password'), parameters('admin_password'), json('null'))]",
+ # "linuxConfiguration": "[if(parameters('has_linux_configuration'), parameters('linux_configuration'), json('null'))]"
+ # }
+ # }
+ # },
+ # }
+ # }
+ # ]
+ functions = template.get("functions")
+ if functions is None:
+ raise Exception('Cannot find "functions" in the LISA template.')
+ for namespace in functions:
+ name = namespace.get("namespace")
+ if name is None:
+ raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}')
+ if name == "lisa":
+ lisa_functions = namespace.get('members')
+ if lisa_functions is None:
+ raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}')
+ function_definition = lisa_functions.get(function_name)
+ if function_definition is None:
+ raise KeyError(f'Cannot find function {function_name} in the lisa namespace in the LISA template: {namespace}')
+ return function_definition
+ raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}')
+
+ @staticmethod
+ def get_function_output(function: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Returns the "value" property of the output for the given function.
+
+ Sample function:
+
+ {
+ "parameters": [
+ {
+ "name": "computername",
+ "type": "string"
+ },
+ etc.
+ ],
+ "output": {
+ "type": "object",
+ "value": {
+ "computername": "[parameters('computername')]",
+ "adminUsername": "[parameters('admin_username')]",
+ "adminPassword": "[if(parameters('has_password'), parameters('admin_password'), json('null'))]",
+ "linuxConfiguration": "[if(parameters('has_linux_configuration'), parameters('linux_configuration'), json('null'))]"
+ }
+ }
+ }
+ """
+ output = function.get('output')
+ if output is None:
+ raise Exception(f'Cannot find the "output" of the given function: {function}')
+ value = output.get('value')
+ if value is None:
+ raise Exception(f"Cannot find the output's value of the given function: {function}")
+ return value