canonical · shayancanonical · Sep 16, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/lib/charms/mysql/v0/backups.py b/lib/charms/mysql/v0/backups.py
@@ -60,7 +60,8 @@ def is_unit_blocked(self) -> bool:
     MySQLDeleteTempRestoreDirectoryError,
     MySQLEmptyDataDirectoryError,
     MySQLExecuteBackupCommandsError,
-    MySQLGetMemberStateError,
+    MySQLUnableToGetMemberStateError,
+    MySQLNoMemberStateError,
     MySQLInitializeJujuOperationsTableError,
     MySQLKillSessionError,
     MySQLOfflineModeAndHiddenInstanceExistsError,
@@ -99,7 +100,7 @@ def is_unit_blocked(self) -> bool:
 
 # Increment this PATCH version before using `charmcraft publish-lib` or reset
 # to 0 if you are raising the major API version
-LIBPATCH = 11
+LIBPATCH = 12
 
 
 if typing.TYPE_CHECKING:
@@ -339,7 +340,7 @@ def _can_unit_perform_backup(self) -> Tuple[bool, Optional[str]]:
 
         try:
             state, role = self.charm._mysql.get_member_state()
-        except MySQLGetMemberStateError:
+        except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError):
             return False, "Error obtaining member state"
 
         if role == "primary" and self.charm.app.planned_units() > 1:

diff --git a/lib/charms/mysql/v0/mysql.py b/lib/charms/mysql/v0/mysql.py
@@ -134,7 +134,7 @@ def wait_until_mysql_connection(self) -> None:
 # Increment this major API version when introducing breaking changes
 LIBAPI = 0
 
-LIBPATCH = 70
+LIBPATCH = 71
 
 UNIT_TEARDOWN_LOCKNAME = "unit-teardown"
 UNIT_ADD_LOCKNAME = "unit-add"
@@ -275,8 +275,12 @@ class MySQLGrantPrivilegesToUserError(Error):
     """Exception raised when there is an issue granting privileges to user."""
 
 
-class MySQLGetMemberStateError(Error):
-    """Exception raised when there is an issue getting member state."""
+class MySQLNoMemberStateError(Error):
+    """Exception raised when there is no member state."""
+
+
+class MySQLUnableToGetMemberStateError(Error):
+    """Exception raised when unable to get member state."""
 
 
 class MySQLGetClusterEndpointsError(Error):
@@ -619,6 +623,22 @@ def cluster_initialized(self) -> bool:
 
         return False
 
+    @property
+    def only_one_cluster_node_thats_uninitialized(self) -> Optional[bool]:
+        """Check if only a single cluster node exists across all units."""
+        if not self.app_peer_data.get("cluster-name"):
+            return None
+
+        total_cluster_nodes = 0
+        for unit in self.app_units:
+            total_cluster_nodes += self._mysql.get_cluster_node_count(from_instance=self.get_unit_address(unit))
+
+        total_online_cluster_nodes = 0
+        for unit in self.app_units:
+            total_online_cluster_nodes += self._mysql.get_cluster_node_count(from_instance=self.get_unit_address(unit), node_status=MySQLMemberState["ONLINE"])
+
+        return total_cluster_nodes == 1 and total_online_cluster_nodes == 0
+
     @property
     def cluster_fully_initialized(self) -> bool:
         """Returns True if the cluster is fully initialized.
@@ -1699,6 +1719,18 @@ def is_instance_configured_for_innodb(
             )
             return False
 
+    def drop_group_replication_metadata_schema(self) -> None:
+        """Drop the group replication metadata schema from current unit."""
+        commands = (
+            f"shell.connect('{self.server_config_user}:{self.server_config_password}@{self.instance_address}')",
+            "dba.drop_metadata_schema()",
+        )
+
+        try:
+            self._run_mysqlsh_script("\n".join(commands))
+        except MySQLClientError:
+            logger.exception("Failed to drop group replication metadata schema")
+
     def are_locks_acquired(self, from_instance: Optional[str] = None) -> bool:
         """Report if any topology change is being executed."""
         commands = (
@@ -2319,13 +2351,13 @@ def get_member_state(self) -> Tuple[str, str]:
             logger.error(
                 "Failed to get member state: mysqld daemon is down",
             )
-            raise MySQLGetMemberStateError(e.message)
+            raise MySQLUnableToGetMemberStateError(e.message)
 
         # output is like:
         # 'MEMBER_STATE\tMEMBER_ROLE\tMEMBER_ID\t@@server_uuid\nONLINE\tPRIMARY\t<uuid>\t<uuid>\n'
         lines = output.strip().lower().split("\n")
         if len(lines) < 2:
-            raise MySQLGetMemberStateError("No member state retrieved")
+            raise MySQLNoMemberStateError("No member state retrieved")
 
         if len(lines) == 2:
             # Instance just know it own state
@@ -2341,7 +2373,7 @@ def get_member_state(self) -> Tuple[str, str]:
                 # filter server uuid
                 return results[0], results[1] or "unknown"
 
-        raise MySQLGetMemberStateError("No member state retrieved")
+        raise MySQLNoMemberStateError("No member state retrieved")
 
     def is_cluster_replica(self, from_instance: Optional[str] = None) -> Optional[bool]:
         """Check if this cluster is a replica in a cluster set."""
@@ -2398,7 +2430,7 @@ def hold_if_recovering(self) -> None:
         while True:
             try:
                 member_state, _ = self.get_member_state()
-            except MySQLGetMemberStateError:
+            except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError):
                 break
             if member_state == MySQLMemberState.RECOVERING:
                 logger.debug("Unit is recovering")

diff --git a/src/charm.py b/src/charm.py
@@ -28,13 +28,14 @@
     MySQLConfigureMySQLUsersError,
     MySQLCreateClusterError,
     MySQLGetClusterPrimaryAddressError,
-    MySQLGetMemberStateError,
     MySQLGetMySQLVersionError,
     MySQLInitializeJujuOperationsTableError,
     MySQLLockAcquisitionError,
+    MySQLNoMemberStateError,
     MySQLRebootFromCompleteOutageError,
     MySQLServiceNotRunningError,
     MySQLSetClusterPrimaryError,
+    MySQLUnableToGetMemberStateError,
 )
 from charms.mysql.v0.tls import MySQLTLS
 from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
@@ -700,7 +701,12 @@ def _on_mysql_pebble_ready(self, event) -> None:
         # First run setup
         self._configure_instance(container)
 
-        if not self.unit.is_leader() or self.cluster_initialized:
+        # We consider cluster initialized only if a primary already exists
+        # (as there can be metadata in the database but no primary if pod
+        # crashes while cluster is being created)
+        if not self.unit.is_leader() or (
+            self.cluster_initialized and self._get_primary_from_online_peer()
+        ):
             # Non-leader units try to join cluster
             self.unit.status = WaitingStatus("Waiting for instance to join the cluster")
             self.unit_peer_data.update({"member-role": "secondary", "member-state": "waiting"})
@@ -710,12 +716,14 @@ def _on_mysql_pebble_ready(self, event) -> None:
         try:
             # Create the cluster when is the leader unit
             logger.info(f"Creating cluster {self.app_peer_data['cluster-name']}")
+            self.unit.status = MaintenanceStatus("Creating cluster")
             self.create_cluster()
             self.unit.status = ops.ActiveStatus(self.active_status_message)
 
         except (
             MySQLCreateClusterError,
-            MySQLGetMemberStateError,
+            MySQLUnableToGetMemberStateError,
+            MySQLNoMemberStateError,
             MySQLInitializeJujuOperationsTableError,
             MySQLCreateClusterError,
         ):
@@ -728,19 +736,24 @@ def _handle_potential_cluster_crash_scenario(self) -> bool:
         Returns:
             bool indicating whether the caller should return
         """
-        if not self.cluster_initialized or not self.unit_peer_data.get("member-role"):
-            # health checks are only after cluster and members are initialized
+        if not self._mysql.is_mysqld_running():
             return True
 
-        if not self._mysql.is_mysqld_running():
+        only_single_unitialized_node_across_cluster = (
+            self.only_one_cluster_node_thats_uninitialized
+        )
+
+        if (
+            not self.cluster_initialized and not only_single_unitialized_node_across_cluster
+        ) or not self.unit_peer_data.get("member-role"):
             return True
 
         # retrieve and persist state for every unit
         try:
             state, role = self._mysql.get_member_state()
             self.unit_peer_data["member-state"] = state
             self.unit_peer_data["member-role"] = role
-        except MySQLGetMemberStateError:
+        except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError):
             logger.error("Error getting member state. Avoiding potential cluster crash recovery")
             self.unit.status = MaintenanceStatus("Unable to get member state")
             return True
@@ -757,23 +770,33 @@ def _handle_potential_cluster_crash_scenario(self) -> bool:
         if state == "recovering":
             return True
 
-        if state in ["offline"]:
+        if state == "offline":
             # Group Replication is active but the member does not belong to any group
             all_states = {
                 self.peers.data[unit].get("member-state", "unknown") for unit in self.peers.units
             }
-            # Add state for this unit (self.peers.units does not include this unit)
-            all_states.add("offline")
 
-            if all_states == {"offline"} and self.unit.is_leader():
+            # Add state 'offline' for this unit (self.peers.unit does not
+            # include this unit)
+            if (all_states | {"offline"} == {"offline"} and self.unit.is_leader()) or (
+                only_single_unitialized_node_across_cluster and all_states == {"waiting"}
+            ):
                 # All instance are off, reboot cluster from outage from the leader unit
 
                 logger.info("Attempting reboot from complete outage.")
                 try:
-                    self._mysql.reboot_from_complete_outage()
+                    # Need condition to avoid rebooting on all units of application
+                    if self.unit.is_leader() or only_single_unitialized_node_across_cluster:
+                        self._mysql.reboot_from_complete_outage()
                 except MySQLRebootFromCompleteOutageError:
                     logger.error("Failed to reboot cluster from complete outage.")
-                    self.unit.status = BlockedStatus("failed to recover cluster.")
+
+                    if only_single_unitialized_node_across_cluster and all_states == {"waiting"}:
+                        self._mysql.drop_group_replication_metadata_schema()
+                        self.create_cluster()
+                        self.unit.status = ActiveStatus(self.active_status_message)
+                    else:
+                        self.unit.status = BlockedStatus("failed to recover cluster.")
 
             return True
 
@@ -785,10 +808,23 @@ def _is_cluster_blocked(self) -> bool:
         Returns: a boolean indicating whether the update-status (caller) should
             no-op and return.
         """
-        unit_member_state = self.unit_peer_data.get("member-state")
-        if unit_member_state in ["waiting", "restarting"]:
+        # We need to query member state from the server since member state would
+        # be 'offline' if pod rescheduled during cluster creation, however
+        # member-state in the unit peer databag will be 'waiting'
+        member_state_exists = True
+        try:
+            member_state, _ = self._mysql.get_member_state()
+        except MySQLUnableToGetMemberStateError:
+            logger.error("Error getting member state while checking if cluster is blocked")
+            self.unit.status = MaintenanceStatus("Unable to get member state")
+            return True
+        except MySQLNoMemberStateError:
+            member_state_exists = False
+
+        if not member_state_exists or member_state == "restarting":
             # avoid changing status while tls is being set up or charm is being initialized
-            logger.info(f"Unit state is {unit_member_state}")
+            logger.info("Unit is waiting or restarting")
+            logger.debug(f"{member_state_exists=}, {member_state=}")
             return True
 
         # avoid changing status while async replication is setting up
@@ -812,6 +848,7 @@ def _on_update_status(self, _: Optional[UpdateStatusEvent]) -> None:
 
         container = self.unit.get_container(CONTAINER_NAME)
         if not container.can_connect():
+            logger.debug("Cannot connect to pebble in the mysql container")
             return
 
         if self._handle_potential_cluster_crash_scenario():

diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py
@@ -581,7 +581,7 @@ async def write_content_to_file_in_unit(
         )
 
 
-async def read_contents_from_file_in_unit(
+def read_contents_from_file_in_unit(
     ops_test: OpsTest, unit: Unit, path: str, container_name: str = CONTAINER_NAME
 ) -> str:
     """Read contents from file in the provided unit.

diff --git a/tests/integration/high_availability/high_availability_helpers.py b/tests/integration/high_availability/high_availability_helpers.py
@@ -653,3 +653,19 @@ def delete_pvcs(pvcs: list[PersistentVolumeClaim]) -> None:
             namespace=pvc.metadata.namespace,
             grace_period=0,
         )
+
+
+def delete_pod(ops_test: OpsTest, unit: Unit) -> None:
+    """Delete the provided pod."""
+    pod_name = unit.name.replace("/", "-")
+    subprocess.run(
+        [
+            "microk8s.kubectl",
+            "-n",
+            ops_test.model.info.name,
+            "delete",
+            "pod",
+            pod_name,
+        ],
+        check=True,
+    )