diff --git a/CHANGELOG.md b/CHANGELOG.md index d963bfe065..7ac3b8be2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ CHANGELOG **BUG FIXES** -- Fix sanity checks with ARM instance types by using alinux2 AMI with correct architecture during dryrun +- Fix sanity checks with ARM instance types by using cluster AMI when performing validation - Fix `enable_efa` parameter validation when using Centos8 and Slurm or ARM instances. - Use non interactive `apt update` command when building custom Ubuntu AMIs. - Fix `encrypted_ephemeral = true` when using Alinux2 or CentOS8 diff --git a/cli/src/pcluster/cluster_model.py b/cli/src/pcluster/cluster_model.py index e514e68b27..807d0e9c22 100644 --- a/cli/src/pcluster/cluster_model.py +++ b/cli/src/pcluster/cluster_model.py @@ -16,8 +16,9 @@ from botocore.exceptions import ClientError from pcluster.utils import ( - error, + Cache, get_availability_zone_of_subnet, + get_installed_version, get_supported_az_for_one_instance_type, is_hit_enabled_cluster, ) @@ -115,20 +116,70 @@ def _ec2_run_instance(self, pcluster_config, **kwargs): # noqa: C901 FIXME!!! "Please double check your cluster configuration.\n{1}".format(kwargs["InstanceType"], message) ) - def _get_latest_alinux_ami_id(self, architecture): - """Get latest alinux ami id.""" + @Cache.cached + def _get_official_image_id(self, os, architecture): + """Return the id of the current official image, for the provided os-architecture combination.""" + ami_id = None try: - alinux_ami_id = ( - boto3.client("ssm") - .get_parameter(Name="/aws/service/ami-amazon-linux-latest/amzn2-ami-minimal-hvm-%s-ebs" % architecture) - .get("Parameter") - .get("Value") - ) + image_prefix = self._get_official_image_name_prefix(os, architecture) + # Look for public ParallelCluster AMI in released version + ami_id = self._get_image_id(image_prefix, "amazon", True) + if not ami_id: + # Look for dev account owner during development + ami_id = self._get_image_id(image_prefix, "self", False) except ClientError as e: - error("Unable to retrieve Amazon Linux 2 AMI id.\n{0}".format(e.response.get("Error").get("Message"))) - raise + raise Exception( + "Unable to retrieve official image id for base_os='{0}' and architecture='{1}': {2}".format( + os, architecture, e.response.get("Error").get("Message") + ) + ) + if not ami_id: + raise Exception( + "No official image id found for base_os='{0}' and architecture='{1}'".format(os, architecture) + ) + + return ami_id + + def _get_image_id(self, image_prefix, owner, public=False): + """Search for the ami with the specified prefix from the given account and returns the id if found.""" + filters = [ + { + "Name": "name", + "Values": ["{0}*".format(image_prefix)], + } + ] + if public: + filters.append({"Name": "is-public", "Values": ["true"]}) + + images = boto3.client("ec2").describe_images(Filters=filters, Owners=[owner]).get("Images") + return images[0].get("ImageId") if images else None + + def _get_official_image_name_prefix(self, os, architecture): + """Return the prefix of the current official image, for the provided os-architecture combination.""" + suffixes = { + "alinux": "amzn-hvm", + "alinux2": "amzn2-hvm", + "centos7": "centos7-hvm", + "centos8": "centos8-hvm", + "ubuntu1604": "ubuntu-1604-lts-hvm", + "ubuntu1804": "ubuntu-1804-lts-hvm", + } + return "aws-parallelcluster-{version}-{suffix}-{arch}".format( + version=get_installed_version(), suffix=suffixes[os], arch=architecture + ) + + def _get_cluster_ami_id(self, pcluster_config): + """Get the image id of the cluster.""" + cluster_ami = None + os = pcluster_config.get_section("cluster").get_param_value("base_os") + architecture = pcluster_config.get_section("cluster").get_param_value("architecture") + custom_ami = pcluster_config.get_section("cluster").get_param_value("custom_ami") + try: + cluster_ami = custom_ami if custom_ami else self._get_official_image_id(os, architecture) + except Exception as e: + pcluster_config.error("Error when resolving cluster ami id: {0}".format(e)) - return alinux_ami_id + return cluster_ami def public_ips_in_compute_subnet(self, pcluster_config, network_interfaces_count): """Tell if public IPs will be used in compute subnet.""" diff --git a/cli/src/pcluster/models/hit/hit_cluster_model.py b/cli/src/pcluster/models/hit/hit_cluster_model.py index 4d508b5342..5c39f27d2c 100644 --- a/cli/src/pcluster/models/hit/hit_cluster_model.py +++ b/cli/src/pcluster/models/hit/hit_cluster_model.py @@ -73,7 +73,7 @@ def test_configuration(self, pcluster_config): ) try: - latest_alinux_ami_id = self._get_latest_alinux_ami_id(cluster_section.get_param_value("architecture")) + cluster_ami_id = self._get_cluster_ami_id(pcluster_config) head_node_network_interfaces = self.build_launch_network_interfaces( network_interfaces_count=int(cluster_section.get_param_value("network_interfaces_count")[0]), @@ -89,7 +89,7 @@ def test_configuration(self, pcluster_config): InstanceType=head_node_instance_type, MinCount=1, MaxCount=1, - ImageId=latest_alinux_ami_id, + ImageId=cluster_ami_id, CpuOptions=head_node_cpu_options, NetworkInterfaces=head_node_network_interfaces, DryRun=True, @@ -112,7 +112,7 @@ def test_configuration(self, pcluster_config): pcluster_config, compute_resource_section, disable_hyperthreading=disable_hyperthreading, - ami_id=latest_alinux_ami_id, + ami_id=cluster_ami_id, subnet=compute_subnet, security_groups_ids=security_groups_ids, placement_group=queue_placement_group, diff --git a/cli/src/pcluster/models/sit/sit_cluster_model.py b/cli/src/pcluster/models/sit/sit_cluster_model.py index b7d29d018c..f03d43e1d7 100644 --- a/cli/src/pcluster/models/sit/sit_cluster_model.py +++ b/cli/src/pcluster/models/sit/sit_cluster_model.py @@ -111,7 +111,7 @@ def test_configuration(self, pcluster_config): ) try: - latest_alinux_ami_id = self._get_latest_alinux_ami_id(cluster_section.get_param_value("architecture")) + cluster_ami_id = self._get_cluster_ami_id(pcluster_config) head_node_network_interfaces = self.build_launch_network_interfaces( network_interfaces_count=int(cluster_section.get_param_value("network_interfaces_count")[0]), @@ -127,7 +127,7 @@ def test_configuration(self, pcluster_config): InstanceType=head_node_instance_type, MinCount=1, MaxCount=1, - ImageId=latest_alinux_ami_id, + ImageId=cluster_ami_id, CpuOptions=head_node_cpu_options, NetworkInterfaces=head_node_network_interfaces, Placement=head_node_placement_group, @@ -153,7 +153,7 @@ def test_configuration(self, pcluster_config): InstanceType=compute_instance_type, MinCount=1, MaxCount=1, - ImageId=latest_alinux_ami_id, + ImageId=cluster_ami_id, CpuOptions=compute_cpu_options, Placement=compute_placement_group, NetworkInterfaces=network_interfaces, diff --git a/cli/tests/pcluster/config/test_pcluster_config.py b/cli/tests/pcluster/config/test_pcluster_config.py index 843cb8e64e..8ee6da99bb 100644 --- a/cli/tests/pcluster/config/test_pcluster_config.py +++ b/cli/tests/pcluster/config/test_pcluster_config.py @@ -13,70 +13,21 @@ from assertpy import assert_that from pytest import fail +from pcluster.utils import get_installed_version from tests.common import MockedBoto3Request from tests.pcluster.config.utils import get_mocked_pcluster_config, init_pcluster_config_from_configparser -@pytest.fixture() -def boto3_stubber_path(): - return "pcluster.cluster_model.boto3" +@pytest.fixture(autouse=True) +def clear_cache(): + from pcluster.utils import Cache + Cache.clear_all() -@pytest.mark.parametrize( - "architecture, boto3_response, expected_message", - [ - ( - "arm64", - { - "Parameter": { - "Name": "/aws/service/ami-amazon-linux-latest/amzn2-ami-minimal-hvm-arm64-ebs", - "Type": "String", - "Value": "ami-0aaf2d8fefcde5893", - "Version": 27, - "LastModifiedDate": 1614231667.121, - "DataType": "text", - } - }, - None, - ), - ( - "x86_64", - { - "Parameter": { - "Name": "/aws/service/ami-amazon-linux-latest/amzn2-ami-minimal-hvm-x86_64-ebs", - "Type": "String", - "Value": "ami-0962afb8e2794cd6e", - "Version": 40, - "LastModifiedDate": 1614231667.235, - "DataType": "text", - } - }, - None, - ), - ("x86_64", "Generic Error", "Unable to retrieve Amazon Linux 2 AMI id"), - ], -) -def test_get_latest_alinux_ami_id(mocker, boto3_stubber, architecture, boto3_response, expected_message): - mocked_requests = [ - MockedBoto3Request( - method="get_parameter", - response=boto3_response, - expected_params={ - "Name": "/aws/service/ami-amazon-linux-latest/amzn2-ami-minimal-hvm-%s-ebs" % architecture - }, - generate_error=True if expected_message else False, - ) - ] - - boto3_stubber("ssm", mocked_requests) - pcluster_config = get_mocked_pcluster_config(mocker) - if expected_message: - with pytest.raises(SystemExit, match=expected_message): - _ = pcluster_config.cluster_model._get_latest_alinux_ami_id(architecture) - else: - latest_linux_ami_id = pcluster_config.cluster_model._get_latest_alinux_ami_id(architecture) - assert_that(latest_linux_ami_id).is_equal_to(boto3_response.get("Parameter").get("Value")) +@pytest.fixture() +def boto3_stubber_path(): + return "pcluster.cluster_model.boto3" @pytest.mark.parametrize( @@ -157,3 +108,126 @@ def test_load_from_file_errors(capsys, config_parser_dict, expected_message): assert_that(e.args[0]).matches(expected_message) else: fail("Unexpected failure when loading file") + + +@pytest.mark.parametrize( + "custom_ami_id, os, architecture, expected_ami_suffix, expected_public_ami_id, expected_self_ami_id, " + "expected_error_message, raise_boto3_error", + [ + # Official ami + (None, "alinux", "x86_64", "amzn-hvm-x86_64", "ami-official", None, None, False), + (None, "alinux2", "x86_64", "amzn2-hvm-x86_64", "ami-official", None, None, False), + (None, "centos7", "x86_64", "centos7-hvm-x86_64", "ami-official", None, None, False), + (None, "centos8", "x86_64", "centos8-hvm-x86_64", "ami-official", None, None, False), + (None, "ubuntu1604", "x86_64", "ubuntu-1604-lts-hvm-x86_64", "ami-official", None, None, False), + (None, "ubuntu1804", "x86_64", "ubuntu-1804-lts-hvm-x86_64", "ami-official", None, None, False), + (None, "alinux", "arm_64", "amzn-hvm-arm_64", "ami-official", None, None, False), + (None, "alinux2", "arm_64", "amzn2-hvm-arm_64", "ami-official", None, None, False), + (None, "centos7", "arm_64", "centos7-hvm-arm_64", "ami-official", None, None, False), + (None, "centos8", "arm_64", "centos8-hvm-arm_64", "ami-official", None, None, False), + (None, "ubuntu1604", "arm_64", "ubuntu-1604-lts-hvm-arm_64", "ami-official", None, None, False), + (None, "ubuntu1804", "arm_64", "ubuntu-1804-lts-hvm-arm_64", "ami-official", None, None, False), + # Custom ami + ("ami-custom", "alinux2", "x86_64", None, "ami-custom", None, None, False), + ("ami-custom", "alinux2", "arm_64", None, "ami-custom", None, None, False), + # Self ami + (None, "ubuntu1804", "arm_64", "ubuntu-1804-lts-hvm-arm_64", None, "ami-self", None, False), + # No ami found + (None, "alinux2", "x86_64", "amzn2-hvm-x86_64", None, None, "No official image id found", False), + # Boto3 error + (None, "alinux2", "x86_64", "amzn2-hvm-x86_64", None, None, "Unable to retrieve official image id", True), + ], +) +def test_get_cluster_ami_id( + mocker, + boto3_stubber, + custom_ami_id, + os, + architecture, + expected_ami_suffix, + expected_public_ami_id, + expected_self_ami_id, + expected_error_message, + raise_boto3_error, +): + if not custom_ami_id: + # Expected request for public ami + mocked_requests = [ + MockedBoto3Request( + method="describe_images", + response={ + "Images": [ + { + "Architecture": architecture, + "CreationDate": "2020-12-22T13:30:33.000Z", + "ImageId": expected_public_ami_id, + } + ] + if expected_public_ami_id + else [] + }, + expected_params={ + "Filters": [ + { + "Name": "name", + "Values": [ + "aws-parallelcluster-{version}-{suffix}*".format( + version=get_installed_version(), suffix=expected_ami_suffix + ) + ], + }, + {"Name": "is-public", "Values": ["true"]}, + ], + "Owners": ["amazon"], + }, + generate_error=raise_boto3_error, + ) + ] + + if not expected_public_ami_id and not raise_boto3_error: + # Expected request for self ami + mocked_requests.append( + MockedBoto3Request( + method="describe_images", + response={ + "Images": [ + { + "Architecture": architecture, + "CreationDate": "2020-12-22T13:30:33.000Z", + "ImageId": expected_self_ami_id, + } + ] + if expected_self_ami_id + else [] + }, + expected_params={ + "Filters": [ + { + "Name": "name", + "Values": [ + "aws-parallelcluster-{version}-{suffix}*".format( + version=get_installed_version(), suffix=expected_ami_suffix + ) + ], + }, + ], + "Owners": ["self"], + }, + generate_error=raise_boto3_error, + ) + ) + + boto3_stubber("ec2", mocked_requests) + + pcluster_config = get_mocked_pcluster_config(mocker) + pcluster_config.get_section("cluster").get_param("custom_ami").value = custom_ami_id + pcluster_config.get_section("cluster").get_param("base_os").value = os + pcluster_config.get_section("cluster").get_param("architecture").value = architecture + + if expected_error_message: + with pytest.raises(SystemExit, match=expected_error_message): + _ = pcluster_config.cluster_model._get_cluster_ami_id(pcluster_config) + else: + expected_ami_id = expected_public_ami_id if expected_public_ami_id else expected_self_ami_id + cluster_ami_id = pcluster_config.cluster_model._get_cluster_ami_id(pcluster_config) + assert_that(cluster_ami_id).is_equal_to(expected_ami_id)