-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Recover from pod restarts during cluster creation during setup #499
Changes from 7 commits
2cb54cb
b7d7cde
10d5405
ced154e
f0daa96
ab63d53
915d71d
c8d8c21
45070d5
1f4fb72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,7 +134,7 @@ def wait_until_mysql_connection(self) -> None: | |
# Increment this major API version when introducing breaking changes | ||
LIBAPI = 0 | ||
|
||
LIBPATCH = 70 | ||
LIBPATCH = 71 | ||
|
||
UNIT_TEARDOWN_LOCKNAME = "unit-teardown" | ||
UNIT_ADD_LOCKNAME = "unit-add" | ||
|
@@ -275,8 +275,12 @@ class MySQLGrantPrivilegesToUserError(Error): | |
"""Exception raised when there is an issue granting privileges to user.""" | ||
|
||
|
||
class MySQLGetMemberStateError(Error): | ||
"""Exception raised when there is an issue getting member state.""" | ||
class MySQLNoMemberStateError(Error): | ||
"""Exception raised when there is no member state.""" | ||
|
||
|
||
class MySQLUnableToGetMemberStateError(Error): | ||
"""Exception raised when unable to get member state.""" | ||
|
||
|
||
class MySQLGetClusterEndpointsError(Error): | ||
|
@@ -619,6 +623,22 @@ def cluster_initialized(self) -> bool: | |
|
||
return False | ||
|
||
@property | ||
def only_one_cluster_node_thats_uninitialized(self) -> Optional[bool]: | ||
"""Check if only a single cluster node exists across all units.""" | ||
if not self.app_peer_data.get("cluster-name"): | ||
return None | ||
|
||
total_cluster_nodes = 0 | ||
for unit in self.app_units: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this method name is misleading. I understand the usage though, where if it sums to one, is possible to frame the case. Since we already have a method for getting cluster node count, how about changing this for a boolean returning method that test for cluster metadata in only a single unit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. renamed, and refactored to be more precise in f0daa96 (with the property renamed in a following commit) |
||
total_cluster_nodes += self._mysql.get_cluster_node_count(from_instance=self.get_unit_address(unit)) | ||
|
||
total_online_cluster_nodes = 0 | ||
for unit in self.app_units: | ||
total_online_cluster_nodes += self._mysql.get_cluster_node_count(from_instance=self.get_unit_address(unit), node_status=MySQLMemberState["ONLINE"]) | ||
|
||
return total_cluster_nodes == 1 and total_online_cluster_nodes == 0 | ||
|
||
@property | ||
def cluster_fully_initialized(self) -> bool: | ||
"""Returns True if the cluster is fully initialized. | ||
|
@@ -1699,6 +1719,18 @@ def is_instance_configured_for_innodb( | |
) | ||
return False | ||
|
||
def drop_group_replication_metadata_schema(self) -> None: | ||
"""Drop the group replication metadata schema from current unit.""" | ||
commands = ( | ||
f"shell.connect('{self.server_config_user}:{self.server_config_password}@{self.instance_address}')", | ||
"dba.drop_metadata_schema()", | ||
) | ||
|
||
try: | ||
self._run_mysqlsh_script("\n".join(commands)) | ||
except MySQLClientError: | ||
logger.exception("Failed to drop group replication metadata schema") | ||
|
||
def are_locks_acquired(self, from_instance: Optional[str] = None) -> bool: | ||
"""Report if any topology change is being executed.""" | ||
commands = ( | ||
|
@@ -2319,13 +2351,13 @@ def get_member_state(self) -> Tuple[str, str]: | |
logger.error( | ||
"Failed to get member state: mysqld daemon is down", | ||
) | ||
raise MySQLGetMemberStateError(e.message) | ||
raise MySQLUnableToGetMemberStateError(e.message) | ||
|
||
# output is like: | ||
# 'MEMBER_STATE\tMEMBER_ROLE\tMEMBER_ID\t@@server_uuid\nONLINE\tPRIMARY\t<uuid>\t<uuid>\n' | ||
lines = output.strip().lower().split("\n") | ||
if len(lines) < 2: | ||
raise MySQLGetMemberStateError("No member state retrieved") | ||
raise MySQLNoMemberStateError("No member state retrieved") | ||
|
||
if len(lines) == 2: | ||
# Instance just know it own state | ||
|
@@ -2341,7 +2373,7 @@ def get_member_state(self) -> Tuple[str, str]: | |
# filter server uuid | ||
return results[0], results[1] or "unknown" | ||
|
||
raise MySQLGetMemberStateError("No member state retrieved") | ||
raise MySQLNoMemberStateError("No member state retrieved") | ||
|
||
def is_cluster_replica(self, from_instance: Optional[str] = None) -> Optional[bool]: | ||
"""Check if this cluster is a replica in a cluster set.""" | ||
|
@@ -2398,7 +2430,7 @@ def hold_if_recovering(self) -> None: | |
while True: | ||
try: | ||
member_state, _ = self.get_member_state() | ||
except MySQLGetMemberStateError: | ||
except (MySQLNoMemberStateError, MySQLUnableToGetMemberStateError): | ||
break | ||
if member_state == MySQLMemberState.RECOVERING: | ||
logger.debug("Unit is recovering") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't forget the libpatch bump
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated libpatch - will make sure that these changes get propagated to the libs in the vm repo and published correctly