-
Notifications
You must be signed in to change notification settings - Fork 26
[DPE-6484] Add scope to promote to primary #850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e0e3bba
ced2bc0
11ebf5b
d287454
8ab236a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,7 +112,7 @@ | |
WORKLOAD_OS_GROUP, | ||
WORKLOAD_OS_USER, | ||
) | ||
from patroni import NotReadyError, Patroni, SwitchoverFailedError | ||
from patroni import NotReadyError, Patroni, SwitchoverFailedError, SwitchoverNotSyncError | ||
from relations.async_replication import ( | ||
REPLICATION_CONSUMER_RELATION, | ||
REPLICATION_OFFER_RELATION, | ||
|
@@ -211,6 +211,7 @@ def __init__(self, *args): | |
self.framework.observe(self.on.stop, self._on_stop) | ||
self.framework.observe(self.on.get_password_action, self._on_get_password) | ||
self.framework.observe(self.on.set_password_action, self._on_set_password) | ||
self.framework.observe(self.on.promote_to_primary_action, self._on_promote_to_primary) | ||
self.framework.observe(self.on.get_primary_action, self._on_get_primary) | ||
self.framework.observe(self.on.update_status, self._on_update_status) | ||
self._storage_path = self.meta.storages["pgdata"].location | ||
|
@@ -1305,6 +1306,26 @@ def _on_set_password(self, event: ActionEvent) -> None: | |
|
||
event.set_results({"password": password}) | ||
|
||
def _on_promote_to_primary(self, event: ActionEvent) -> None: | ||
if event.params.get("scope") == "cluster": | ||
return self.async_replication.promote_to_primary(event) | ||
elif event.params.get("scope") == "unit": | ||
return self.promote_primary_unit(event) | ||
else: | ||
event.fail("Scope should be either cluster or unit") | ||
|
||
def promote_primary_unit(self, event: ActionEvent) -> None: | ||
"""Handles promote to primary for unit scope.""" | ||
if event.params.get("force"): | ||
event.fail("Suprerfluous force flag with unit scope") | ||
else: | ||
try: | ||
self._patroni.switchover(self.unit.name, wait=False) | ||
except SwitchoverNotSyncError: | ||
event.fail("Unit is not sync standby") | ||
except SwitchoverFailedError: | ||
event.fail("Switchover failed or timed out, check the logs for details") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||
|
||
def _on_get_primary(self, event: ActionEvent) -> None: | ||
"""Get primary instance.""" | ||
try: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,10 @@ class SwitchoverFailedError(Exception): | |
"""Raised when a switchover failed for some reason.""" | ||
|
||
|
||
class SwitchoverNotSyncError(SwitchoverFailedError): | ||
"""Raised when a switchover failed because node is not sync.""" | ||
|
||
|
||
class UpdateSyncNodeCountError(Exception): | ||
"""Raised when updating synchronous_node_count failed for some reason.""" | ||
|
||
|
@@ -612,7 +616,7 @@ def restart_postgresql(self) -> None: | |
timeout=PATRONI_TIMEOUT, | ||
) | ||
|
||
def switchover(self, candidate: str | None = None) -> None: | ||
def switchover(self, candidate: str | None = None, wait: bool = True) -> None: | ||
"""Trigger a switchover.""" | ||
# Try to trigger the switchover. | ||
if candidate is not None: | ||
|
@@ -631,8 +635,18 @@ def switchover(self, candidate: str | None = None) -> None: | |
|
||
# Check whether the switchover was unsuccessful. | ||
if r.status_code != 200: | ||
if ( | ||
r.status_code == 412 | ||
and r.text == "candidate name does not match with sync_standby" | ||
): | ||
logger.debug("Unit is not sync standby") | ||
raise SwitchoverNotSyncError() | ||
logger.warning(f"Switchover call failed with code {r.status_code} {r.text}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Log Patroni's error code, the body should contain the reason. |
||
raise SwitchoverFailedError(f"received {r.status_code}") | ||
|
||
if not wait: | ||
return | ||
Comment on lines
+647
to
+648
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When running the action we shouldn't wait for the switchover itself to happen. |
||
|
||
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3), reraise=True): | ||
with attempt: | ||
new_primary = self.get_primary() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No RAFT, so no need to force.