Feature issue 1219 (#1221)

* [ISSUE-1219] Add E2E test for auto drive replacement with Storage Group (HDD SC) - One volume per pod Signed-off-by: Andrzej Zukowski <andrzej.zukowski@dell.com> * [ISSUE-1219] Check CR removal before event generation Signed-off-by: Andrzej Zukowski <andrzej.zukowski@dell.com> * [ISSUE-1219] Conflict resolution Signed-off-by: Andrzej Zukowski <andrzej.zukowski@dell.com> * [ISSUE-1219] Function declaration adjustment Signed-off-by: Andrzej Zukowski <andrzej.zukowski@dell.com> --------- Signed-off-by: Andrzej Zukowski <andrzej.zukowski@dell.com>
dell · Jul 25, 2024 · f835144 · f835144
1 parent a6443ee
commit f835144
Showing 1 changed file with 180 additions and 0 deletions.
diff --git a/tests/e2e-test-framework/tests/test_drive_replacement_one_volume.py b/tests/e2e-test-framework/tests/test_drive_replacement_one_volume.py
@@ -0,0 +1,180 @@
+import logging
+from typing import Dict
+import pytest
+
+import framework.const as const
+
+from framework.sts import STS
+from framework.utils import Utils
+from framework.drive import DriveUtils
+
+
+class TestAutoDriveReplacementWithOneVolumePerPod:
+ @classmethod
+ @pytest.fixture(autouse=True)
+ def setup_class(
+ cls,
+ namespace: str,
+ drive_utils_executors: Dict[str, DriveUtils],
+ utils: Utils,
+ ):
+ cls.namespace = namespace
+ cls.name = "test-auto-drive-replacement-one-volume"
+ cls.timeout = 120
+ cls.replicas = 1
+
+ cls.utils = utils
+
+ cls.drive_utils = drive_utils_executors
+ cls.sts = STS(cls.namespace, cls.name, cls.replicas)
+ cls.sts.delete()
+ cls.sts.create(storage_classes=[const.HDD_SC])
+
+ yield
+
+ cls.sts.delete()
+
+ @pytest.mark.hal
+ def test_5771_auto_drive_replacement_with_one_volume_per_pod(self):
+ # 1. get volume for deployed pod
+ assert (
+ self.sts.verify(self.timeout) is True
+ ), f"STS: {self.name} failed to reach desired number of replicas: {self.replicas}"
+ pod = self.utils.list_pods(name_prefix=self.name)[0]
+ node_ip = self.utils.get_pod_node_ip(
+ pod_name=pod.metadata.name, namespace=self.namespace
+ )
+ volumes = self.utils.list_volumes(pod_name=pod.metadata.name)
+ assert ( len(volumes) is 1 ), f"volumes: Unexpected number of volumes: {len(volumes)}"
+ volume = volumes[0]
+
+ # get drive
+ drive = self.utils.get_drive_cr(
+ volume_name=volume["metadata"]["name"],
+ namespace=volume["metadata"]["namespace"])
+
+ # 2.1 simulate drive failure. Annotate drive used by pod with health=BAD
+ drive_name = drive["metadata"]["name"]
+ self.utils.annotate_custom_resource(
+ resource_name=drive_name,
+ resource_type="drives",
+ annotation_key=const.DRIVE_HEALTH_ANNOTATION,
+ annotation_value=const.DRIVE_HEALTH_BAD_ANNOTATION,
+ )
+ logging.info(f"drive: {drive_name} was annotated with health=BAD")
+
+ # 2.2 wait until drive health is BAD, status=ONLINE, usage=RELEASING.
+ drive_name = drive["metadata"]["name"]
+ logging.info(f"Waiting for drive: {drive_name}")
+ assert self.utils.wait_drive(
+ name=drive_name,
+ expected_status=const.STATUS_ONLINE,
+ expected_health=const.HEALTH_BAD,
+ expected_usage=const.USAGE_RELEASING
+ ), f"Drive {drive_name} failed to reach expected Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}"
+ logging.info(f"drive {drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}")
+
+ # 2.3. wait until volume health is BAD, status=OPERATIVE, usage=RELEASING.
+ volume_name = volume["metadata"]["name"]
+ logging.info(f"Waiting for volume: {volume_name}")
+ assert self.utils.wait_volume(
+ name=volume_name,
+ expected_health=const.HEALTH_BAD,
+ expected_usage=const.USAGE_RELEASING,
+ expected_operational_status=const.STATUS_OPERATIVE
+ ), f"Volume {volume_name} failed to reach OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}"
+ logging.info(f"volume {volume_name} went in OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}")
+
+ # 3. check events and locate event related to DriveHealthFailure
+ drive_name = drive["metadata"]["name"]
+ assert self.utils.event_in(
+ resource_name=drive_name,
+ reason=const.DRIVE_HEALTH_FAILURE,
+ ), f"event {const.DRIVE_HEALTH_FAILURE} for drive {drive_name} not found"
+
+ # 6.1. annotate volume with release=done
+ volume_name = volume["metadata"]["name"]
+ self.utils.annotate_custom_resource(
+ resource_name=volume_name,
+ resource_type="volumes",
+ annotation_key="release",
+ annotation_value="done",
+ namespace=volume['metadata']['namespace']
+ )
+ logging.info(f"volume: {volume_name} was annotated with release=done")
+
+ # 6.2. check drive usages are RELEASED
+ assert self.utils.wait_drive(
+ name=drive['metadata']['name'],
+ expected_usage=const.USAGE_RELEASED
+ ), f"Drive {drive_name} failed to reach expected Usage: {const.USAGE_RELEASED}"
+ logging.info(f"drive {drive_name} went in Usage: {const.USAGE_RELEASED}")
+
+ # 6.3. check volume is RELEASED
+ assert self.utils.wait_volume(
+ name=volume['metadata']['name'],
+ expected_usage=const.USAGE_RELEASED
+ ), f"Volume {volume_name} failed to reach expected Usage {const.USAGE_RELEASED}"
+ logging.info(f"volume {volume_name} went in Usage: {const.USAGE_RELEASED}")
+
+ # 7.check event DriveReadyForRemoval is generated, check events and locate event related to VolumeBadHealth
+ drive_name = drive["metadata"]["name"]
+ assert self.utils.event_in(
+ resource_name=drive_name,
+ reason=const.DRIVE_READY_FOR_REMOVAL,
+ ), f"event {const.DRIVE_READY_FOR_REMOVAL} for drive {drive_name} not found"
+
+ volume_name = volume["metadata"]["name"]
+ assert self.utils.event_in(
+ resource_name=volume_name,
+ reason=const.VOLUME_BAD_HEALTH,
+ ), f"event {const.VOLUME_BAD_HEALTH} for volume {volume_name} not found"
+
+ # 8. delete pod and pvc
+ self.utils.clear_pvc_and_pod(pod_name=pod.metadata.name, namespace=self.namespace)
+
+ # 9. check drive status to be REMOVING or REMOVED
+ # 10. check LED state to be 1 (if drive supports LED ) or 2 (if drive does not support LED)
+ # 11. check drive status to be ONLINE
+ assert self.utils.wait_drive(
+ name=drive['metadata']['name'],
+ expected_status=const.STATUS_ONLINE,
+ expected_usage=const.USAGE_REMOVED,
+ expected_health=const.HEALTH_BAD,
+ expected_led_state=const.LED_STATE
+ ), f"Drive {drive_name} failed to reach expected Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}"
+ logging.info(f"drive {drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}")
+
+ # 12. check for events: DriveReadyForPhysicalRemoval
+ drive_name = drive["metadata"]["name"]
+ assert self.utils.event_in(
+ resource_name=drive_name,
+ reason=const.DRIVE_READY_FOR_PHYSICAL_REMOVAL,
+ ), f"event {const.DRIVE_READY_FOR_PHYSICAL_REMOVAL} for drive {drive_name} not found"
+
+ # 13. get Node ID on which drive resides, Obtain path for affected drive, identify node name for corresponding node id and remove drive
+ drive_name = drive["metadata"]["name"]
+ drive_path = drive["spec"]["Path"]
+ assert drive_path, f"Drive path for drive {drive_name} not found"
+ logging.info(f"drive_path: {drive_path}")
+
+ scsi_id = self.drive_utils[node_ip].get_scsi_id(drive_path)
+ assert scsi_id, f"scsi_id for drive {drive_name} not found"
+ logging.info(f"scsi_id: {scsi_id}")
+
+ # 14. remove drive
+ self.drive_utils[node_ip].remove(scsi_id)
+ logging.info(f"drive {drive_path}, {scsi_id} removed")
+
+ # 15. check driveCR succesfully removed
+ drive_name = drive["metadata"]["name"]
+ assert self.utils.check_drive_cr_exist_or_not(
+ drive_name=drive_name, cr_existence=False
+ ), f"Drive CR {drive_name} still exists"
+
+ # 16. check for events DriveSuccessfullyRemoved in kubernetes events
+ drive_name = drive["metadata"]["name"]
+ assert self.utils.event_in(
+ resource_name=drive_name,
+ reason=const.DRIVE_SUCCESSFULLY_REMOVED,
+ ), f"event {const.DRIVE_SUCCESSFULLY_REMOVED} for drive {drive_name} not found"