-
Notifications
You must be signed in to change notification settings - Fork 26
[MISC] Fix timeouts in 14 to 16 merge #959
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4bf37c0
0f7eaa6
9ca41e7
e1b0c1c
07ff24a
9d438d0
1b556d5
aeb823f
2fc2cc7
ea5a823
7db1967
5d1cf58
709e9f3
f8600d2
3c98a2c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2488,22 +2488,30 @@ def relations_user_databases_map(self) -> dict: | |
if not self.is_cluster_initialised or not self._patroni.member_started: | ||
return {USER: "all", REPLICATION_USER: "all", REWIND_USER: "all"} | ||
user_database_map = {} | ||
for user in self.postgresql.list_users_from_relation( | ||
current_host=self.is_connectivity_enabled | ||
): | ||
user_database_map[user] = ",".join( | ||
self.postgresql.list_accessible_databases_for_user( | ||
user, current_host=self.is_connectivity_enabled | ||
try: | ||
for user in self.postgresql.list_users_from_relation( | ||
current_host=self.is_connectivity_enabled | ||
): | ||
user_database_map[user] = ",".join( | ||
self.postgresql.list_accessible_databases_for_user( | ||
user, current_host=self.is_connectivity_enabled | ||
) | ||
) | ||
) | ||
# Add "landscape" superuser by default to the list when the "db-admin" relation is present. | ||
if any(True for relation in self.client_relations if relation.name == "db-admin"): | ||
user_database_map["landscape"] = "all" | ||
if self.postgresql.list_access_groups(current_host=self.is_connectivity_enabled) != set( | ||
ACCESS_GROUPS | ||
): | ||
user_database_map.update({USER: "all", REPLICATION_USER: "all", REWIND_USER: "all"}) | ||
return user_database_map | ||
# Add "landscape" superuser by default to the list when the "db-admin" relation is present. | ||
if any(True for relation in self.client_relations if relation.name == "db-admin"): | ||
user_database_map["landscape"] = "all" | ||
if self.postgresql.list_access_groups( | ||
current_host=self.is_connectivity_enabled | ||
) != set(ACCESS_GROUPS): | ||
user_database_map.update({ | ||
USER: "all", | ||
REPLICATION_USER: "all", | ||
REWIND_USER: "all", | ||
}) | ||
return user_database_map | ||
except PostgreSQLListUsersError: | ||
logger.debug("relations_user_databases_map: Unable to get users") | ||
return {USER: "all", REPLICATION_USER: "all", REWIND_USER: "all"} | ||
Comment on lines
+2512
to
+2514
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handling in the property, because otherwise we cannot regenerate the configuration. |
||
|
||
def override_patroni_restart_condition( | ||
self, new_condition: str, repeat_cause: str | None | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -495,6 +495,8 @@ def member_started(self) -> bool: | |
True if services is ready False otherwise. Retries over a period of 60 seconds times to | ||
allow server time to start up. | ||
""" | ||
if not self.is_patroni_running(): | ||
return False | ||
Comment on lines
+498
to
+499
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there's no service running, there's no point calling the rest API. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we try to backport this to plain 14/edge. Should it also be included in member_inactive() below (and maybe all other There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If the merge goes well, we should backport both this and the pg_hba check.
We can give it a go, but lets get the 14/edge changes first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Backport PR: #963. |
||
try: | ||
response = self.get_patroni_health() | ||
except RetryError: | ||
|
@@ -979,6 +981,16 @@ def reload_patroni_configuration(self): | |
timeout=PATRONI_TIMEOUT, | ||
) | ||
|
||
def is_patroni_running(self) -> bool: | ||
"""Check if the Patroni service is running.""" | ||
try: | ||
cache = snap.SnapCache() | ||
selected_snap = cache["charmed-postgresql"] | ||
return selected_snap.services["patroni"]["active"] | ||
except snap.SnapError as e: | ||
logger.debug(f"Failed to check Patroni service: {e}") | ||
return False | ||
|
||
def restart_patroni(self) -> bool: | ||
"""Restart Patroni. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
|
||
import logging | ||
import typing | ||
from datetime import datetime | ||
|
||
from charms.data_platform_libs.v0.data_interfaces import ( | ||
DatabaseProvides, | ||
|
@@ -23,6 +24,7 @@ | |
from ops.charm import RelationBrokenEvent, RelationChangedEvent | ||
from ops.framework import Object | ||
from ops.model import ActiveStatus, BlockedStatus, Relation | ||
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed | ||
|
||
from constants import ( | ||
ALL_CLIENT_RELATIONS, | ||
|
@@ -71,9 +73,6 @@ def __init__(self, charm: "PostgresqlOperatorCharm", relation_name: str = "datab | |
self.framework.observe( | ||
self.database_provides.on.database_requested, self._on_database_requested | ||
) | ||
self.framework.observe( | ||
charm.on[self.relation_name].relation_changed, self._on_relation_changed | ||
) | ||
|
||
@staticmethod | ||
def _sanitize_extra_roles(extra_roles: str | None) -> list[str]: | ||
|
@@ -153,27 +152,16 @@ def _on_database_requested(self, event: DatabaseRequestedEvent) -> None: | |
) | ||
) | ||
|
||
def _on_relation_changed(self, event: RelationChangedEvent) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Checking for pg_hba changes before releasing |
||
# Check for some conditions before trying to access the PostgreSQL instance. | ||
if not self.charm.is_cluster_initialised: | ||
logger.debug( | ||
"Deferring on_relation_changed: Cluster must be initialized before configuration can be updated with relation users" | ||
) | ||
event.defer() | ||
return | ||
|
||
user = f"relation-{event.relation.id}" | ||
# Try to wait for pg_hba trigger | ||
try: | ||
if user not in self.charm.postgresql.list_users(): | ||
logger.debug("Deferring on_relation_changed: user was not created yet") | ||
event.defer() | ||
return | ||
except PostgreSQLListUsersError: | ||
logger.debug("Deferring on_relation_changed: failed to list users") | ||
event.defer() | ||
return | ||
|
||
self.charm.update_config() | ||
for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(1)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hold the hook until the user is added to pg_hba or timeout. Locally for me it's happening on the first try, but we may want to increase the timeout to accommodate for variance in hw/storage. On timeout, the topology observer should detect the changes when they become available. |
||
with attempt: | ||
self.charm.postgresql.is_user_in_hba(user) | ||
self.charm.unit_peer_data.update({ | ||
"pg_hba_needs_update_timestamp": str(datetime.now()) | ||
}) | ||
except RetryError: | ||
logger.warning("database requested: Unable to check pg_hba rule update") | ||
|
||
def _on_relation_broken(self, event: RelationBrokenEvent) -> None: | ||
"""Correctly update the status.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1018,7 +1018,12 @@ async def run_command_on_unit(ops_test: OpsTest, unit_name: str, command: str) - | |
|
||
|
||
async def scale_application( | ||
ops_test: OpsTest, application_name: str, count: int, model: Model = None | ||
ops_test: OpsTest, | ||
application_name: str, | ||
count: int, | ||
model: Model = None, | ||
timeout=2000, | ||
idle_period: int = 30, | ||
Comment on lines
+1025
to
+1026
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Async seems to fit in the timeout/idle periods now, but leaving the parameters exposed in case we want to tweak. |
||
) -> None: | ||
"""Scale a given application to a specific unit count. | ||
|
||
|
@@ -1027,6 +1032,8 @@ async def scale_application( | |
application_name: The name of the application | ||
count: The desired number of units to scale to | ||
model: The model to scale the application in | ||
timeout: timeout period | ||
idle_period: idle period | ||
""" | ||
if model is None: | ||
model = ops_test.model | ||
|
@@ -1039,8 +1046,8 @@ async def scale_application( | |
await model.wait_for_idle( | ||
apps=[application_name], | ||
status="active", | ||
timeout=2000, | ||
idle_period=30, | ||
timeout=timeout, | ||
idle_period=idle_period, | ||
wait_for_exact_units=count, | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check if a user has been added to pg_hba