diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index f0caabdac..661472e8b 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -49,7 +49,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 21d36d0b3..d5d3eaf6d 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,7 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update + default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update, fips # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition # files. Those parameters are useful for 1-off tests, like testing a VHD or checking if diff --git a/tests_e2e/test_suites/agent_bvt.yml b/tests_e2e/test_suites/agent_bvt.yml index 1f0f91405..8c840670f 100644 --- a/tests_e2e/test_suites/agent_bvt.yml +++ b/tests_e2e/test_suites/agent_bvt.yml @@ -1,8 +1,8 @@ name: "AgentBvt" tests: - - "bvts/extension_operations.py" - - "bvts/run_command.py" - - "bvts/vm_access.py" + - "agent_bvt/extension_operations.py" + - "agent_bvt/run_command.py" + - "agent_bvt/vm_access.py" images: - "endorsed" - "endorsed-arm64" diff --git a/tests_e2e/test_suites/extensions_disabled.yml b/tests_e2e/test_suites/extensions_disabled.yml index 3fbff2ebd..1e98dd9cc 100644 --- a/tests_e2e/test_suites/extensions_disabled.yml +++ b/tests_e2e/test_suites/extensions_disabled.yml @@ -4,6 +4,6 @@ # name: "ExtensionsDisabled" tests: - - "extensions_disabled.py" + - "extensions_disabled/extensions_disabled.py" images: "random(endorsed)" owns_vm: true diff --git a/tests_e2e/test_suites/fips.yml b/tests_e2e/test_suites/fips.yml new file mode 100644 index 000000000..785671d0c --- /dev/null +++ b/tests_e2e/test_suites/fips.yml @@ -0,0 +1,10 @@ +# +# FIPS should not affect extension processing. The test enables FIPS and then executes an extension. +# +# NOTE: Enabling FIPS is very specific to the distro. This test is only executed on RHEL 9.0. +# +name: "FIPS" +tests: + - source: "fips/fips.py" +images: "rhel_90" +owns_vm: true diff --git a/tests_e2e/tests/bvts/extension_operations.py b/tests_e2e/tests/agent_bvt/extension_operations.py similarity index 98% rename from tests_e2e/tests/bvts/extension_operations.py rename to tests_e2e/tests/agent_bvt/extension_operations.py index 081572874..e5c607c1d 100755 --- a/tests_e2e/tests/bvts/extension_operations.py +++ b/tests_e2e/tests/agent_bvt/extension_operations.py @@ -58,7 +58,7 @@ def run(self): log.info("Installing %s", custom_script_2_0) message = f"Hello {uuid.uuid4()}!" custom_script_2_0.enable( - settings={ + protected_settings={ 'commandToExecute': f"echo \'{message}\'" }, auto_upgrade_minor_version=False @@ -77,7 +77,7 @@ def run(self): message = f"Hello {uuid.uuid4()}!" custom_script_2_1.enable( - settings={ + protected_settings={ 'commandToExecute': f"echo \'{message}\'" } ) diff --git a/tests_e2e/tests/bvts/run_command.py b/tests_e2e/tests/agent_bvt/run_command.py similarity index 100% rename from tests_e2e/tests/bvts/run_command.py rename to tests_e2e/tests/agent_bvt/run_command.py diff --git a/tests_e2e/tests/bvts/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py similarity index 100% rename from tests_e2e/tests/bvts/vm_access.py rename to tests_e2e/tests/agent_bvt/vm_access.py diff --git a/tests_e2e/tests/bvts/__init__.py b/tests_e2e/tests/bvts/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests_e2e/tests/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py similarity index 100% rename from tests_e2e/tests/extensions_disabled.py rename to tests_e2e/tests/extensions_disabled/extensions_disabled.py diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py new file mode 100755 index 000000000..f8c27b900 --- /dev/null +++ b/tests_e2e/tests/fips/fips.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import uuid +from assertpy import fail +from typing import Any, Dict, List + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient +from tests_e2e.tests.lib.identifiers import VmExtensionIds + + +class Fips(AgentTest): + """ + Enables FIPS on the test VM, which is a RHEL 9 VM (see https://access.redhat.com/solutions/137833#rhel9), then executes the CustomScript extension. + + TODO: Investigate whether extensions with protected settings are supported on FIPS-enabled systems. The Agent has issues handling the tenant + certificate on those systems (additional configuration on FIPS may be needed). + """ + def run(self): + ssh_client: SshClient = self._context.create_ssh_client() + + try: + command = "fips-mode-setup --enable" + log.info("Enabling FIPS on the test VM [%s]", command) + output = ssh_client.run_command(command, use_sudo=True) + log.info("Enable FIPS completed\n%s", output) + except CommandError as e: + raise Exception(f"Failed to enable FIPS: {e}") + + log.info("Restarting test VM") + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + vm.restart(wait_for_boot=True, ssh_client=ssh_client) + + try: + command = "fips-mode-setup --check" + log.info("Verifying that FIPS is enabled [%s]", command) + output = ssh_client.run_command(command).rstrip() + if output != "FIPS mode is enabled.": + fail(f"FIPS i not enabled - '{command}' returned '{output}'") + log.info(output) + except CommandError as e: + raise Exception(f"Failed to verify that FIPS is enabled: {e}") + + custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + + log.info("Installing %s", custom_script) + message = f"Hello {uuid.uuid4()}!" + custom_script.enable( + settings={ + 'commandToExecute': f"echo \'{message}\'" + }, + auto_upgrade_minor_version=False + ) + custom_script.assert_instance_view(expected_version="2.0", expected_message=message) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + """ + Some extensions added by policy on the test subscription use protected settings, which produce this error. + """ + return [ + {'message': r'Failed to decrypt /var/lib/waagent/Certificates.p7m'} + ] + + +if __name__ == "__main__": + Fips.run_from_command_line() + diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index 31ce94cb2..3366aecdc 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -40,24 +40,22 @@ def execute_with_retry(operation: Callable[[], Any]) -> Any: time.sleep(30) -def retry_ssh_run(operation: Callable[[], Any]) -> Any: +def retry_ssh_run(operation: Callable[[], Any], attempts: int, attempt_delay: int) -> Any: """ This method attempts to retry ssh run command a few times if operation failed with connection time out """ - attempts = 3 - while attempts > 0: - attempts -= 1 + i = 1 + while i <= attempts: try: return operation() except Exception as e: # We raise CommandError on !=0 exit codes in the called method if isinstance(e, CommandError): # Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError - if e.exit_code != 255 or attempts == 0: # pylint: disable=no-member + if e.exit_code != 255 or i == attempts: # pylint: disable=no-member raise - log.warning("The operation failed, retrying in 30 secs.\n%s", e) - time.sleep(30) - + log.warning("The SSH operation failed, retrying in %s secs [Attempt %s/%s].\n%s", e, attempt_delay, i, attempts) + time.sleep(attempt_delay) def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool: """ diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index fda9911d9..3e0d7269c 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -23,6 +23,9 @@ from tests_e2e.tests.lib import shell from tests_e2e.tests.lib.retry import retry_ssh_run +ATTEMPTS: int = 3 +ATTEMPT_DELAY: int = 30 + class SshClient(object): def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22): @@ -31,7 +34,7 @@ def __init__(self, ip_address: str, username: str, private_key_file: Path, port: self._private_key_file: Path = private_key_file self._port: int = port - def run_command(self, command: str, use_sudo: bool = False) -> str: + def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> str: """ Executes the given command over SSH and returns its stdout. If the command returns a non-zero exit code, the function raises a CommandError. @@ -44,9 +47,12 @@ def run_command(self, command: str, use_sudo: bool = False) -> str: # Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there. # Note, too, that when using sudo we need to carry over the value of PATH to the sudo session sudo = "sudo env PATH=$PATH PYTHONPATH=$PYTHONPATH" if use_sudo else '' - return retry_ssh_run(lambda: shell.run_command([ - "ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, destination, - f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}"])) + command = [ + "ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, + destination, + f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}" + ] + return retry_ssh_run(lambda: shell.run_command(command), attempts, attempt_delay) @staticmethod def generate_ssh_key(private_key_file: Path): @@ -59,19 +65,19 @@ def generate_ssh_key(private_key_file: Path): def get_architecture(self): return self.run_command("uname -m").rstrip() - def copy_to_node(self, local_path: Path, remote_path: Path, recursive: bool = False) -> None: + def copy_to_node(self, local_path: Path, remote_path: Path, recursive: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: """ File copy to a remote node """ - self._copy(local_path, remote_path, remote_source=False, remote_target=True, recursive=recursive) + self._copy(local_path, remote_path, remote_source=False, remote_target=True, recursive=recursive, attempts=attempts, attempt_delay=attempt_delay) - def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = False) -> None: + def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: """ File copy from a remote node """ - self._copy(remote_path, local_path, remote_source=True, remote_target=False, recursive=recursive) + self._copy(remote_path, local_path, remote_source=True, remote_target=False, recursive=recursive, attempts=attempts, attempt_delay=attempt_delay) - def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool) -> None: + def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool, attempts: int, attempt_delay: int) -> None: if remote_source: source = f"{self._username}@{self._ip_address}:{source}" if remote_target: @@ -82,4 +88,4 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: command.append("-r") command.extend([str(source), str(target)]) - shell.run_command(command) + return retry_ssh_run(lambda: shell.run_command(command), attempts, attempt_delay) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index f7e67a823..38d35aee5 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -19,6 +19,9 @@ # This module includes facilities to execute operations on virtual machines (list extensions, restart, etc). # +import datetime +import json +import time from typing import Any, Dict, List from azure.identity import DefaultAzureCredential @@ -32,6 +35,8 @@ from tests_e2e.tests.lib.identifiers import VmIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient class VirtualMachineClient(AzureClient): @@ -54,11 +59,11 @@ def __init__(self, vm: VmIdentifier): base_url=cloud.endpoints.resource_manager, credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - def get_description(self) -> VirtualMachine: + def get_model(self) -> VirtualMachine: """ - Retrieves the description of the virtual machine. + Retrieves the model of the virtual machine. """ - log.info("Retrieving description for %s", self._identifier) + log.info("Retrieving VM model for %s", self._identifier) return execute_with_retry( lambda: self._compute_client.virtual_machines.get( resource_group_name=self._identifier.resource_group, @@ -103,10 +108,25 @@ def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT operation_name=f"Update {self._identifier}", timeout=timeout) - def restart(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def restart( + self, + wait_for_boot, + ssh_client: SshClient = None, + boot_timeout: datetime.timedelta = datetime.timedelta(minutes=5), + timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: """ - Restarts the virtual machine or scale set + Restarts (reboots) the virtual machine. + + NOTES: + * If wait_for_boot is True, an SshClient must be provided in order to verify that the restart was successful. + * 'timeout' is the timeout for the restart operation itself, while 'boot_timeout' is the timeout for waiting + the boot to complete. """ + if wait_for_boot and ssh_client is None: + raise ValueError("An SshClient must be provided if wait_for_boot is True") + + before_restart = datetime.datetime.utcnow() + self._execute_async_operation( lambda: self._compute_client.virtual_machines.begin_restart( resource_group_name=self._identifier.resource_group, @@ -114,6 +134,37 @@ def restart(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: operation_name=f"Restart {self._identifier}", timeout=timeout) + if not wait_for_boot: + return + + start = datetime.datetime.utcnow() + while datetime.datetime.utcnow() < start + boot_timeout: + log.info("Waiting for VM %s to boot", self._identifier) + time.sleep(15) # Note that we always sleep at least 1 time, to give the reboot time to start + instance_view = self.get_instance_view() + power_state = [s.code for s in instance_view.statuses if "PowerState" in s.code] + if len(power_state) != 1: + raise Exception(f"Could not find PowerState in the instance view statuses:\n{json.dumps(instance_view.statuses)}") + log.info("VM's Power State: %s", power_state[0]) + if power_state[0] == "PowerState/running": + # We may get an instance view captured before the reboot actually happened; verify + # that the reboot actually happened by checking the system's uptime. + log.info("Verifying VM's uptime to ensure the reboot has completed...") + try: + uptime = ssh_client.run_command("cat /proc/uptime | sed 's/ .*//'", attempts=1).rstrip() # The uptime is the first field in the file + log.info("Uptime: %s", uptime) + boot_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=float(uptime)) + if boot_time > before_restart: + log.info("VM %s completed boot and is running. Boot time: %s", self._identifier, boot_time) + return + log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time) + except CommandError as e: + if e.exit_code == 255 and "Connection refused" in str(e): + log.info("VM %s is not yet accepting SSH connections", self._identifier) + else: + raise + raise Exception(f"VM {self._identifier} did not boot after {boot_timeout}") + def __str__(self): return f"{self._identifier}"