Skip to content

Commit

Permalink
[Test] Include testing for detached retained managed storage to the i…
Browse files Browse the repository at this point in the history
…ntegration tests covering the dynamic file system mounting.

Signed-off-by: Giacomo Marciani <mgiacomo@amazon.com>
  • Loading branch information
gmarciani committed Jan 4, 2023
1 parent c6def5f commit 550806e
Show file tree
Hide file tree
Showing 9 changed files with 396 additions and 3 deletions.
34 changes: 34 additions & 0 deletions tests/integration-tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@

from tests.common.osu_common import run_osu_benchmarks
from tests.common.schedulers_common import get_scheduler_commands
from tests.common.storage.constants import StorageType
from tests.common.storage.ebs_utils import delete_ebs_volume
from tests.common.storage.efs_utils import delete_efs_filesystem
from tests.common.storage.fsx_utils import delete_fsx_filesystem
from tests.common.utils import (
fetch_instance_slots,
get_installed_parallelcluster_version,
Expand Down Expand Up @@ -2012,3 +2016,33 @@ def _add_mount_targets(subnet_ids, efs_ids, security_group, template):
)
)
availability_zones_with_mount_target.add(subnet["AvailabilityZone"])


@pytest.fixture(scope="class")
def delete_storage_on_teardown(request, region):
supported_storage_types = [StorageType.STORAGE_EBS, StorageType.STORAGE_EFS, StorageType.STORAGE_FSX]
delete_storage_function = {
StorageType.STORAGE_EBS: delete_ebs_volume,
StorageType.STORAGE_EFS: delete_efs_filesystem,
StorageType.STORAGE_FSX: delete_fsx_filesystem,
}
storage_resources = {storage_type: set() for storage_type in supported_storage_types}

def _add_storage(storage_type: StorageType, storage_id: str):
logging.info(
f"Adding storage for deletion on teardown: storage of type {storage_type.name} with id {storage_id}"
)
storage_resources[storage_type].add(storage_id)

def _delete_storage_resources():
logging.info("Deleting storage resource on teardown")
for storage_type, storage_ids in storage_resources.items():
for storage_id in storage_ids:
delete_storage_function[storage_type](region, storage_id)

yield _add_storage

if request.config.getoption("no_delete"):
logging.info("Not deleting storage resources marked for removal because --no-delete option was specified")
else:
_delete_storage_resources()
39 changes: 39 additions & 0 deletions tests/integration-tests/tests/common/networking/security_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import logging

import boto3
from botocore.exceptions import ClientError
from retrying import retry
from time_utils import seconds


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def delete_security_group(region: str, security_group_id: str):
logging.info(f"Deleting Security Group {security_group_id}")
try:
_ec2(region).delete_security_group(GroupId=security_group_id)
except Exception as e:
if isinstance(e, ClientError) and "InvalidGroup.NotFound" in str(e):
logging.warning(f"Cannot delete Security Group {security_group_id} because it does not exist")
else:
logging.error(f"Cannot delete Security Group {security_group_id}: {e}")
raise e


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_security_groups_for_network_interface(region: str, network_interface_id: str):
logging.info(f"Describing Security Groups for Network Interface {network_interface_id}")
try:
network_inyterface_description = _ec2(region).describe_network_interfaces(
NetworkInterfaceIds=[network_interface_id]
)
return [
security_group["GroupId"]
for security_group in network_inyterface_description["NetworkInterfaces"][0]["Groups"]
]
except Exception as e:
logging.error(f"Cannot describe Security Groups for Network Interface {network_interface_id}: {e}")
raise e


def _ec2(region):
return boto3.client("ec2", region)
40 changes: 40 additions & 0 deletions tests/integration-tests/tests/common/storage/assertions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import logging
from typing import List

from assertpy import assert_that

from tests.common.storage.constants import StorageType
from tests.common.storage.ebs_utils import describe_ebs_volume
from tests.common.storage.efs_utils import describe_efs_filesystem
from tests.common.storage.fsx_utils import describe_fsx_filesystem


def assert_storage_existence(
region: str, storage_type: StorageType, storage_id: str, should_exist: bool, expected_states: List[str] = None
):
logging.info(
f"Checking existence for {storage_type.name} resource {storage_id}: "
f"expected to{' not ' if not should_exist else ' '}exist"
)
if storage_type == StorageType.STORAGE_EBS:
description = describe_ebs_volume(region, storage_id)
state = description.get("State") if description else None
elif storage_type == StorageType.STORAGE_EFS:
description = describe_efs_filesystem(region, storage_id)
state = description.get("LifeCycleState") if description else None
elif storage_type == StorageType.STORAGE_FSX:
description = describe_fsx_filesystem(region, storage_id)
state = description.get("Lifecycle") if description else None
else:
raise Exception(f"Cannot check existence for storage type {storage_type.name}.")
exists = description is not None
assert_that(
exists, f"The {storage_type.name} resource {storage_id} does{' not ' if not exists else ' '}exist"
).is_equal_to(should_exist)

if should_exist and expected_states:
assert_that(
expected_states,
f"The {storage_type.name} resource {storage_id} is not in the expected state: "
f"expected states are {expected_states}, but actual is {state}",
).contains(state)
9 changes: 9 additions & 0 deletions tests/integration-tests/tests/common/storage/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from enum import Enum


class StorageType(Enum):
"""Types of storage resources."""

STORAGE_EBS = "EBS"
STORAGE_EFS = "EFS"
STORAGE_FSX = "FSX"
36 changes: 36 additions & 0 deletions tests/integration-tests/tests/common/storage/ebs_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import logging

import boto3
from botocore.exceptions import ClientError
from retrying import retry
from time_utils import seconds


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_ebs_volume(region: str, volume_id: str):
logging.info(f"Describing EBS Volume {volume_id}")
try:
return _ec2(region).describe_volumes(VolumeIds=[volume_id])["Volumes"][0]
except Exception as e:
if isinstance(e, ClientError) and "InvalidVolume.NotFound" in str(e):
return None
else:
logging.error(f"Cannot describe EBS Volume {volume_id}: {e}")
raise e


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def delete_ebs_volume(region: str, volume_id: str):
logging.info(f"Deleting EBS Volume {volume_id}")
try:
_ec2(region).delete_volume(VolumeId=volume_id)
except Exception as e:
if isinstance(e, ClientError) and "InvalidVolume.NotFound" in str(e):
logging.warning(f"Cannot delete EBS Volume {volume_id} because it does not exist")
else:
logging.error(f"Cannot delete EBS Volume {volume_id}: {e}")
raise e


def _ec2(region):
return boto3.client("ec2", region)
86 changes: 86 additions & 0 deletions tests/integration-tests/tests/common/storage/efs_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import logging

import boto3
from botocore.exceptions import ClientError
from retrying import retry
from time_utils import seconds

from tests.common.networking.security_groups import delete_security_group


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_efs_filesystem(region: str, file_system_id: str):
logging.info(f"Describing EFS File System {file_system_id}")
try:
return _efs(region).describe_file_systems(FileSystemId=file_system_id)["FileSystems"][0]
except Exception as e:
if isinstance(e, ClientError) and "FileSystemNotFound" in str(e):
return None
else:
logging.error(f"Cannot describe EFS File System {file_system_id}: {e}")
raise e


@retry(stop_max_attempt_number=10, wait_fixed=seconds(30))
def delete_efs_filesystem(region: str, file_system_id: str, delete_dependent_resources: bool = True):
logging.info(f"Deleting EFS File System {file_system_id}")
try:
if delete_dependent_resources:
mount_targets = describe_efs_mount_targets(region, file_system_id)
security_group_ids = set()
for mount_target in mount_targets:
mount_target_id = mount_target["MountTargetId"]
security_groups = describe_mount_target_security_groups(region, mount_target_id)
for security_group_id in security_groups:
security_group_ids.add(security_group_id)
delete_efs_mount_target(region, mount_target_id)
logging.info(
"The following Security Groups will be deleted as part of "
f"the deletion for the EFS File System {file_system_id}: {security_group_ids}"
)
for mount_target in mount_targets:
mount_target_id = mount_target["MountTargetId"]
wait_for_efs_mount_target_deletion(region, file_system_id, mount_target_id)
for security_group_id in security_group_ids:
delete_security_group(region, security_group_id)
_efs(region).delete_file_system(FileSystemId=file_system_id)
except Exception as e:
if isinstance(e, ClientError) and "FileSystemNotFound" in str(e):
logging.warning(f"Cannot delete EFS File System {file_system_id} because it does not exist")
else:
logging.error(f"Cannot delete EFS File System {file_system_id}: {e}")
raise e


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_efs_mount_targets(region: str, file_system_id: str):
logging.info(f"Describing Mount Targets for EFS File System {file_system_id}")
return _efs(region).describe_mount_targets(FileSystemId=file_system_id).get("MountTargets", [])


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_mount_target_security_groups(region: str, mount_target_id: str):
logging.info(f"Describing Security Groups for EFS Mount Target {mount_target_id}")
return _efs(region).describe_mount_target_security_groups(MountTargetId=mount_target_id).get("SecurityGroups", [])


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def delete_efs_mount_target(region: str, mount_target_id: str):
logging.info(f"Deleting EFS Mount Target {mount_target_id}")
_efs(region).delete_mount_target(MountTargetId=mount_target_id)


@retry(stop_max_attempt_number=10, wait_fixed=seconds(60))
def wait_for_efs_mount_target_deletion(region: str, file_system_id: str, mount_target_id: str):
logging.info(f"Waiting for deletion of EFS Mount Target {mount_target_id} in EFS File System {file_system_id}")
mount_targets = describe_efs_mount_targets(region, file_system_id)
mount_target_ids = [mt["MountTargetId"] for mt in mount_targets]
if mount_target_id in mount_target_ids:
raise Exception(
f"EFs Mount Target {mount_target_id} in EFS File System {file_system_id} not deleted, yet. "
"Sleeping 60 seconds ..."
)


def _efs(region):
return boto3.client("efs", region)
73 changes: 73 additions & 0 deletions tests/integration-tests/tests/common/storage/fsx_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import logging

import boto3
from botocore.exceptions import ClientError
from retrying import retry
from time_utils import seconds

from tests.common.networking.security_groups import (
delete_security_group,
describe_security_groups_for_network_interface,
)


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_fsx_filesystem(region: str, file_system_id: str):
logging.info(f"Describing FSx File System {file_system_id}")
try:
return _fsx(region).describe_file_systems(FileSystemIds=[file_system_id])["FileSystems"][0]
except Exception as e:
if isinstance(e, ClientError) and "FileSystemNotFound" in str(e):
return None
else:
logging.error(f"Cannot describe FSx File System {file_system_id}: {e}")
raise e


@retry(stop_max_attempt_number=10, wait_fixed=seconds(30))
def delete_fsx_filesystem(region: str, file_system_id: str, delete_dependent_resources: bool = True):
logging.info(f"Deleting FSx File System {file_system_id}")
try:
security_group_ids = set()
if delete_dependent_resources:
security_group_ids |= describe_fsx_filesystem_security_groups(region, file_system_id)
_fsx(region).delete_file_system(FileSystemId=file_system_id)
if delete_dependent_resources:
logging.info(
"The following Security Groups will be deleted as part of "
f"the deletion for the FSx File System {file_system_id}: {security_group_ids}"
)
wait_for_fsx_filesystem_deletion(region, file_system_id)
for security_group_id in security_group_ids:
delete_security_group(region, security_group_id)
except Exception as e:
if isinstance(e, ClientError) and "FileSystemNotFound" in str(e):
logging.warning(f"Cannot delete FSx File System {file_system_id} because it does not exist")
else:
logging.error(f"Cannot delete FSx File System {file_system_id}: {e}")
raise e


@retry(stop_max_attempt_number=3, wait_fixed=seconds(5))
def describe_fsx_filesystem_security_groups(region: str, file_system_id: str):
logging.info(f"Describing Security Groups for FSx File System {file_system_id}")
fs_description = _fsx(region).describe_file_systems(FileSystemIds=[file_system_id])
network_interface_ids = fs_description["FileSystems"][0]["NetworkInterfaceIds"]
security_group_ids = set()
for network_interface_id in network_interface_ids:
for security_group_id in describe_security_groups_for_network_interface(region, network_interface_id):
security_group_ids.add(security_group_id)
return security_group_ids


@retry(stop_max_attempt_number=10, wait_fixed=seconds(60))
def wait_for_fsx_filesystem_deletion(region: str, file_system_id: str):
logging.info(f"Waiting for deletion of FSx File System {file_system_id}")
fs_description = describe_fsx_filesystem(region, file_system_id)
if fs_description is not None:
state = fs_description.get("Lifecycle")
raise Exception(f"FSx File System {file_system_id} in state {state} not deleted, yet. Sleeping 60 seconds ...")


def _fsx(region):
return boto3.client("fsx", region)
Loading

0 comments on commit 550806e

Please sign in to comment.