From ca994e166bf4f30d8b9332c44e1d1ad0acded6ca Mon Sep 17 00:00:00 2001 From: Petr Balogh Date: Mon, 16 Oct 2023 13:03:17 +0200 Subject: [PATCH 1/3] Implement additional force destroy functionality for IBM Cloud Signed-off-by: Petr Balogh --- conf/README.md | 2 + conf/ocsci/skip_ocp_destroy.yaml | 4 + ocs_ci/deployment/deployment.py | 5 ++ ocs_ci/deployment/ibmcloud.py | 131 ++++++++++++++++++++++++++++--- ocs_ci/ocs/exceptions.py | 4 + 5 files changed, 137 insertions(+), 9 deletions(-) create mode 100644 conf/ocsci/skip_ocp_destroy.yaml diff --git a/conf/README.md b/conf/README.md index 12a40c7af55..1236fefec6d 100644 --- a/conf/README.md +++ b/conf/README.md @@ -140,6 +140,8 @@ anywhere else. * `customized_deployment_storage_class` - Customize the storage class type in the deployment. * `ibmcloud_disable_addon` - Disable OCS addon * `in_transit_encryption` - Enable in-transit encryption. +* `skip_ocp_installer_destroy` - Skip OCP installer to destroy the cluster - + useful for enforcing force deploy steps only. #### REPORTING diff --git a/conf/ocsci/skip_ocp_destroy.yaml b/conf/ocsci/skip_ocp_destroy.yaml new file mode 100644 index 00000000000..2eeb2d8d714 --- /dev/null +++ b/conf/ocsci/skip_ocp_destroy.yaml @@ -0,0 +1,4 @@ +# Use this file to skip OCP installer to destroy the cluster +--- +DEPLOYMENT: + skip_ocp_installer_destroy: true diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 192939b51fb..91b86d66b41 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -1784,6 +1784,11 @@ def destroy_cluster(self, log_level="DEBUG"): Args: log_level (str): log level for installer (default: DEBUG) """ + if config.DEPLOYMENT.get("skip_ocp_installer_destroy"): + logger.info( + "OCP Destroy is skipped because skip_ocp_installer_destroy was enabled!" + ) + return if self.platform == constants.IBM_POWER_PLATFORM: if not config.ENV_DATA["skip_ocs_deployment"]: self.destroy_ocs() diff --git a/ocs_ci/deployment/ibmcloud.py b/ocs_ci/deployment/ibmcloud.py index 5e57f94a192..98e11ec03fd 100644 --- a/ocs_ci/deployment/ibmcloud.py +++ b/ocs_ci/deployment/ibmcloud.py @@ -15,6 +15,7 @@ from ocs_ci.ocs.exceptions import ( CommandFailed, UnsupportedPlatformVersionError, + LeftoversExistError, VolumesExistError, ) from ocs_ci.utility import ibmcloud, version @@ -177,11 +178,17 @@ def destroy_cluster(self, log_level="DEBUG"): """ self.export_api_key() - logger.info("Destroying the IBM Cloud cluster") - super(IBMCloudIPI, self).destroy_cluster(log_level) - self.delete_service_id() resource_group = self.get_resource_group() + if resource_group: + logger.info("Destroying the IBM Cloud cluster") + super(IBMCloudIPI, self).destroy_cluster(log_level) + else: + logger.warning( + "Resource group for the cluster doesn't exist! Will not run installer to destroy the cluster!" + ) + self.delete_service_id() self.delete_volumes(resource_group) + self.delete_leftover_resources(resource_group) self.delete_resource_group(resource_group) def manually_create_iam_for_vpc(self): @@ -219,19 +226,29 @@ def get_release_image(self): if "release image" in line: return line.split(" ")[2].strip() - def get_resource_group(self): + def get_resource_group(self, return_id=False): """ Retrieve and set the resource group being utilized for the cluster assets. + + Args: + return_id (bool): If True, it will return ID instead of name. + + Returns: + str: name or ID of resource group if found. + None: in case no RG found. + """ cmd = "ibmcloud resource groups --output json" proc = exec_cmd(cmd) logger.info("Retrieving cluster resource group") resource_data = json.loads(proc.stdout) for group in resource_data: - if group["name"].startswith(self.cluster_name): - # TODO: error prone if cluster_name is a substring of another cluster + if group["name"][:-6] == self.cluster_name: logger.info(f"Found resource group: {group['name']}") - return group["name"] + if not return_id: + return group["name"] + else: + return group["id"] logger.info(f"No resource group found with cluster name: {self.cluster_name}") def delete_service_id(self): @@ -250,7 +267,7 @@ def delete_volumes(self, resource_group): Delete the pvc volumes created in IBM Cloud that the openshift installer doesn't remove. Args: - resource_group: Resource group in IBM Cloud that contains the cluster resources. + resource_group (str): Resource group in IBM Cloud that contains the cluster resources. """ @@ -287,12 +304,108 @@ def _verify_volumes_deleted(resourece_group): _verify_volumes_deleted(resource_group) + def delete_leftover_resources(self, resource_group): + """ + Delete leftovers from IBM Cloud. + + Args: + resource_group (str): Resource group in IBM Cloud that contains the cluster resources. + + """ + + def _get_resources(resource_group): + """ + Return a list leftover resources for the specified Resource Group + """ + cmd = f"ibmcloud resource service-instances --type all -g {resource_group} --output json" + proc = exec_cmd(cmd) + + return json.loads(proc.stdout) + + def _get_reclamations(resource_group): + """ + Get reclamations for resource group. + + Args: + rsource_group (str): Resource group name + + Returns: + list: Reclamations for resource group if found. + """ + rg_id = self.get_resource_group(return_id=True) + cmd = "ibmcloud resource reclamations --output json" + proc = exec_cmd(cmd) + reclamations = json.loads(proc.stdout) + rg_reclamations = [] + for reclamation in reclamations: + if reclamation["resource_group_id"] == rg_id: + rg_reclamations.append(reclamation) + return rg_reclamations + + def _delete_reclamations(reclamations): + """ + Delete reclamations + + Args: + reclamations (list): Reclamations to delete + + """ + for reclamation in reclamations: + logger.info(f"Deleting reclamation: {reclamation}") + cmd = ( + f"ibmcloud resource reclamation-delete {reclamation['id']} " + "--comment 'Force deleting leftovers' -f" + ) + exec_cmd(cmd) + + def _delete_resources(resources, ignore_errors=False): + """ + Deleting leftover resources. + + Args: + resources (list): Resource leftover names. + ignore_errors (bool): If True, it will be ignoring errors from ibmcloud cmd. + + """ + for resource in resources: + logger.info(f"Deleting leftover {resource}") + delete_cmd = f"ibmcloud resource service-instance-delete -g {resource_group} -f --recursive {resource}" + if ignore_errors: + try: + exec_cmd(delete_cmd) + except CommandFailed as ex: + logger.debug( + f"Exception will be ignored because ignore_error is set to true! Exception: {ex}" + ) + else: + exec_cmd(delete_cmd) + + if resource_group: + leftovers = _get_resources(resource_group) + if not leftovers: + logger.info("No leftovers found") + else: + resource_names = set([r["name"] for r in leftovers]) + logger.info(f"Deleting leftovers {resource_names}") + _delete_resources(resource_names, ignore_errors=True) + reclamations = _get_reclamations(resource_group) + if reclamations: + _delete_reclamations(reclamations) + # Additional check if all resources got really deleted: + if leftovers: + leftovers = _get_resources(resource_group) + if leftovers: + raise LeftoversExistError( + "Leftovers detected, you can use the details below to report support case in IBM Cloud:\n" + f"{leftovers}" + ) + def delete_resource_group(self, resource_group): """ Delete the resource group that contained the cluster assets. Args: - resource_group: Resource group in IBM Cloud that contains the cluster resources. + resource_group (str): Resource group in IBM Cloud that contains the cluster resources. """ diff --git a/ocs_ci/ocs/exceptions.py b/ocs_ci/ocs/exceptions.py index fe63ff7b9b8..e230f122256 100644 --- a/ocs_ci/ocs/exceptions.py +++ b/ocs_ci/ocs/exceptions.py @@ -600,6 +600,10 @@ class VolumesExistError(Exception): pass +class LeftoversExistError(Exception): + pass + + class ExternalClusterNodeRoleNotFound(Exception): pass From 2a0758e3f9ed3922bdc44d15acfc038176ce25fd Mon Sep 17 00:00:00 2001 From: Petr Balogh Date: Mon, 16 Oct 2023 15:49:44 +0200 Subject: [PATCH 2/3] Move deletion of volumes before openshift-installer destroy Signed-off-by: Petr Balogh --- ocs_ci/deployment/ibmcloud.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocs_ci/deployment/ibmcloud.py b/ocs_ci/deployment/ibmcloud.py index 98e11ec03fd..33ac98fd612 100644 --- a/ocs_ci/deployment/ibmcloud.py +++ b/ocs_ci/deployment/ibmcloud.py @@ -180,6 +180,10 @@ def destroy_cluster(self, log_level="DEBUG"): self.export_api_key() resource_group = self.get_resource_group() if resource_group: + # Based on docs: + # https://docs.openshift.com/container-platform/4.13/installing/installing_ibm_cloud_public/uninstalling-cluster-ibm-cloud.html + # The volumes should be removed before running openshift-installer for destroy. + self.delete_volumes(resource_group) logger.info("Destroying the IBM Cloud cluster") super(IBMCloudIPI, self).destroy_cluster(log_level) else: @@ -187,7 +191,6 @@ def destroy_cluster(self, log_level="DEBUG"): "Resource group for the cluster doesn't exist! Will not run installer to destroy the cluster!" ) self.delete_service_id() - self.delete_volumes(resource_group) self.delete_leftover_resources(resource_group) self.delete_resource_group(resource_group) From c52d3424fbaae4f40caf753bdf338989dd883c32 Mon Sep 17 00:00:00 2001 From: Petr Balogh Date: Mon, 16 Oct 2023 17:00:29 +0200 Subject: [PATCH 3/3] Add re-try for leftover delete logic Signed-off-by: Petr Balogh --- ocs_ci/deployment/ibmcloud.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ocs_ci/deployment/ibmcloud.py b/ocs_ci/deployment/ibmcloud.py index 33ac98fd612..95dc32d346d 100644 --- a/ocs_ci/deployment/ibmcloud.py +++ b/ocs_ci/deployment/ibmcloud.py @@ -307,6 +307,7 @@ def _verify_volumes_deleted(resourece_group): _verify_volumes_deleted(resource_group) + @retry((LeftoversExistError, CommandFailed), tries=3, delay=30, backoff=1) def delete_leftover_resources(self, resource_group): """ Delete leftovers from IBM Cloud. @@ -314,6 +315,9 @@ def delete_leftover_resources(self, resource_group): Args: resource_group (str): Resource group in IBM Cloud that contains the cluster resources. + Raises: + LeftoversExistError: In case the leftovers after attempt to clean them out. + """ def _get_resources(resource_group):