Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions actions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@ create-replication:
default: default
get-primary:
description: Get the unit which is the primary/leader in the replication.
get-password:
description: Get the system user's password, which is used by charm.
It is for internal charm users and SHOULD NOT be used by applications.
params:
username:
type: string
description: The username, the default value 'operator'.
Possible values - operator, replication, rewind, patroni.
list-backups:
description: Lists backups in s3 storage.
pre-upgrade-check:
Expand All @@ -53,17 +45,6 @@ restore:
restore-to-time:
type: string
description: Point-in-time-recovery target in PSQL format.
set-password:
description: Change the system user's password, which is used by charm.
It is for internal charm users and SHOULD NOT be used by applications.
params:
username:
type: string
description: The username, the default value 'operator'.
Possible values - operator, replication, rewind.
password:
type: string
description: The password will be auto-generated if this option is not specified.
set-tls-private-key:
description: Set the private key, which will be used for certificate signing requests (CSR). Run for each unit separately.
params:
Expand Down
8 changes: 8 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,14 @@ options:
Allowed values are: from -1 to 86400.
type: int
default: -1
system-users:
type: secret
description: |
Configure the internal system users and their passwords. The passwords will
be auto-generated if this option is not set. It is for internal use only
and SHOULD NOT be used by applications. This needs to be a Juju Secret URI pointing
to a secret that contains the following content: `<username>: <password>`.
Possible users: backup, monitoring, operator, replication, rewind.
vacuum_autovacuum_analyze_scale_factor:
description: |
Specifies a fraction of the table size to add to autovacuum_vacuum_threshold when
Expand Down
170 changes: 102 additions & 68 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
InstallEvent,
LeaderElectedEvent,
RelationDepartedEvent,
SecretChangedEvent,
StartEvent,
)
from ops.framework import EventBase
Expand All @@ -50,6 +51,7 @@
MaintenanceStatus,
ModelError,
Relation,
SecretNotFoundError,
Unit,
WaitingStatus,
)
Expand Down Expand Up @@ -180,10 +182,10 @@ def __init__(self, *args):
self.framework.observe(self.on.get_primary_action, self._on_get_primary)
self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed)
self.framework.observe(self.on.secret_changed, self._on_peer_relation_changed)
# add specific handler for updated system-user secrets
self.framework.observe(self.on.secret_changed, self._on_secret_changed)
self.framework.observe(self.on[PEER].relation_departed, self._on_peer_relation_departed)
self.framework.observe(self.on.start, self._on_start)
self.framework.observe(self.on.get_password_action, self._on_get_password)
self.framework.observe(self.on.set_password_action, self._on_set_password)
self.framework.observe(self.on.promote_to_primary_action, self._on_promote_to_primary)
self.framework.observe(self.on.update_status, self._on_update_status)
self.cluster_name = self.app.name
Expand Down Expand Up @@ -328,6 +330,25 @@ def remove_secret(self, scope: Scopes, key: str) -> None:
secret_key = self._translate_field_to_secret_key(key)
self.peer_relation_data(scope).delete_relation_data(peers.id, [secret_key])

def get_secret_from_id(self, secret_id: str) -> dict[str, str]:
"""Resolve the given id of a Juju secret and return the content as a dict.

This method can be used to retrieve any secret, not just those used via the peer relation.
If the secret is not owned by the charm, it has to be granted access to it.

Args:
secret_id (str): The id of the secret.

Returns:
dict: The content of the secret.
"""
try:
secret_content = self.model.get_secret(id=secret_id).get_content(refresh=True)
except (SecretNotFoundError, ModelError):
raise

return secret_content

@property
def is_cluster_initialised(self) -> bool:
"""Returns whether the cluster is already initialised."""
Expand Down Expand Up @@ -718,6 +739,17 @@ def _on_peer_relation_changed(self, event: HookEvent):

self._update_new_unit_status()

def _on_secret_changed(self, event: SecretChangedEvent) -> None:
"""Handle the secret_changed event."""
if not self.unit.is_leader():
return

if (admin_secret_id := self.config.system_users) and admin_secret_id == event.secret.id:
try:
self._update_admin_password(admin_secret_id)
except PostgreSQLUpdateUserPasswordError:
event.defer()

# Split off into separate function, because of complexity _on_peer_relation_changed
def _start_stop_pgbackrest_service(self, event: HookEvent) -> None:
# Start or stop the pgBackRest TLS server service when TLS certificate change.
Expand Down Expand Up @@ -1048,8 +1080,19 @@ def _on_install(self, event: InstallEvent) -> None:

self.unit.status = WaitingStatus("waiting to start PostgreSQL")

def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
def _on_leader_elected(self, event: LeaderElectedEvent) -> None: # noqa: C901
"""Handle the leader-elected event."""
# consider configured system user passwords
system_user_passwords = {}
if admin_secret_id := self.config.system_users:
try:
system_user_passwords = self.get_secret_from_id(secret_id=admin_secret_id)
except (ModelError, SecretNotFoundError) as e:
# only display the error but don't return to make sure all users have passwords
logger.error(f"Error setting internal passwords: {e}")
self.unit.status = BlockedStatus("Password setting for system users failed.")
event.defer()

# The leader sets the needed passwords if they weren't set before.
for key in (
USER_PASSWORD_KEY,
Expand All @@ -1060,7 +1103,14 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
PATRONI_PASSWORD_KEY,
):
if self.get_secret(APP_SCOPE, key) is None:
self.set_secret(APP_SCOPE, key, new_password())
if key in system_user_passwords:
# use provided passwords for system-users if available
self.set_secret(APP_SCOPE, key, system_user_passwords[key])
logger.info(f"Using configured password for {key}")
else:
# generate a password for this user if not provided
self.set_secret(APP_SCOPE, key, new_password())
logger.info(f"Generated new password for {key}")

if self.has_raft_keys():
self._raft_reinitialisation()
Expand Down Expand Up @@ -1134,6 +1184,12 @@ def _on_config_changed(self, event) -> None:
# Enable and/or disable the extensions.
self.enable_disable_extensions()

if admin_secret_id := self.config.system_users:
try:
self._update_admin_password(admin_secret_id)
except PostgreSQLUpdateUserPasswordError:
event.defer()

def enable_disable_extensions(self, database: str | None = None) -> None:
"""Enable/disable PostgreSQL extensions set through config options.

Expand Down Expand Up @@ -1362,57 +1418,21 @@ def _start_replica(self, event) -> None:
# Configure Patroni in the replica but don't start it yet.
self._patroni.configure_patroni_on_unit()

def _on_get_password(self, event: ActionEvent) -> None:
"""Returns the password for a user as an action response.

If no user is provided, the password of the operator user is returned.
"""
username = event.params.get("username", USER)
if username not in PASSWORD_USERS:
event.fail(
f"The action can be run only for users used by the charm or Patroni:"
f" {', '.join(PASSWORD_USERS)} not {username}"
)
return
event.set_results({"password": self.get_secret(APP_SCOPE, f"{username}-password")})

def _on_set_password(self, event: ActionEvent) -> None:
"""Set the password for the specified user."""
# Only leader can write the new password into peer relation.
if not self.unit.is_leader():
event.fail("The action can be run only on leader unit")
return

username = event.params.get("username", USER)
if username not in SYSTEM_USERS:
event.fail(
f"The action can be run only for users used by the charm:"
f" {', '.join(SYSTEM_USERS)} not {username}"
)
return

password = event.params.get("password", new_password())

if password == self.get_secret(APP_SCOPE, f"{username}-password"):
event.log("The old and new passwords are equal.")
event.set_results({"password": password})
return

# Ensure all members are ready before trying to reload Patroni
# configuration to avoid errors (like the API not responding in
# one instance because PostgreSQL and/or Patroni are not ready).
def _update_admin_password(self, admin_secret_id: str) -> None:
"""Check if the password of a system user was changed and update it in the database."""
if not self._patroni.are_all_members_ready():
event.fail(
# Ensure all members are ready before reloading Patroni configuration to avoid errors
# e.g. API not responding in one instance because PostgreSQL / Patroni are not ready
raise PostgreSQLUpdateUserPasswordError(
"Failed changing the password: Not all members healthy or finished initial sync."
)
return

replication_offer_relation = self.model.get_relation(REPLICATION_OFFER_RELATION)
other_cluster_primary_ip = ""
if (
replication_offer_relation is not None
and not self.async_replication.is_primary_cluster()
):
# Update the password in the other cluster PostgreSQL primary instance.
other_cluster_endpoints = self.async_replication.get_all_primary_cluster_endpoints()
other_cluster_primary = self._patroni.get_primary(
alternative_endpoints=other_cluster_endpoints
Expand All @@ -1422,37 +1442,51 @@ def _on_set_password(self, event: ActionEvent) -> None:
for unit in replication_offer_relation.units
if unit.name.replace("/", "-") == other_cluster_primary
)
try:
self.postgresql.update_user_password(
username, password, database_host=other_cluster_primary_ip
)
except PostgreSQLUpdateUserPasswordError as e:
logger.exception(e)
event.fail("Failed changing the password.")
return
elif self.model.get_relation(REPLICATION_CONSUMER_RELATION) is not None:
event.fail(
"Failed changing the password: This action can be ran only in the cluster from the offer side."
logger.error(
"Failed changing the password: This can be ran only in the cluster from the offer side."
)
self.unit.status = BlockedStatus("Password update for system users failed.")
return
else:
# Update the password in this cluster PostgreSQL primary instance.
try:
self.postgresql.update_user_password(username, password)
except PostgreSQLUpdateUserPasswordError as e:
logger.exception(e)
event.fail("Failed changing the password.")
return

# Update the password in the secret store.
self.set_secret(APP_SCOPE, f"{username}-password", password)
try:
# get the secret content and check each user configured there
# only SYSTEM_USERS with changed passwords are processed, all others ignored
updated_passwords = self.get_secret_from_id(secret_id=admin_secret_id)
for user, password in list(updated_passwords.items()):
if user not in SYSTEM_USERS:
logger.error(
f"Can only update system users: {', '.join(SYSTEM_USERS)} not {user}"
)
updated_passwords.pop(user)
continue
if password == self.get_secret(APP_SCOPE, f"{user}-password"):
updated_passwords.pop(user)
except (ModelError, SecretNotFoundError) as e:
logger.error(f"Error updating internal passwords: {e}")
self.unit.status = BlockedStatus("Password update for system users failed.")
return

try:
# perform the actual password update for the remaining users
for user, password in updated_passwords.items():
logger.info(f"Updating password for user {user}")
self.postgresql.update_user_password(
user,
password,
database_host=other_cluster_primary_ip if other_cluster_primary_ip else None,
)
# Update the password in the secret store after updating it in the database
self.set_secret(APP_SCOPE, f"{user}-password", password)
except PostgreSQLUpdateUserPasswordError as e:
logger.exception(e)
self.unit.status = BlockedStatus("Password update for system users failed.")
return

# Update and reload Patroni configuration in this unit to use the new password.
# Other units Patroni configuration will be reloaded in the peer relation changed event.
self.update_config()

event.set_results({"password": password})

def _on_promote_to_primary(self, event: ActionEvent) -> None:
if event.params.get("scope") == "cluster":
return self.async_replication.promote_to_primary(event)
Expand Down
1 change: 1 addition & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ class CharmConfig(BaseConfigModel):
storage_default_table_access_method: str | None
storage_gin_pending_list_limit: int | None
storage_old_snapshot_threshold: int | None
system_users: str | None
vacuum_autovacuum_analyze_scale_factor: float | None
vacuum_autovacuum_analyze_threshold: int | None
vacuum_autovacuum_freeze_max_age: int | None
Expand Down
1 change: 1 addition & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
PATRONI_PASSWORD_KEY = "patroni-password" # noqa: S105
SECRET_INTERNAL_LABEL = "internal-secret" # noqa: S105
SECRET_DELETED_LABEL = "None" # noqa: S105
SYSTEM_USERS_PASSWORD_CONFIG = "system-users" # noqa: S105

APP_SCOPE = "app"
UNIT_SCOPE = "unit"
Expand Down
12 changes: 3 additions & 9 deletions tests/integration/ha_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,7 @@ async def loop_wait(ops_test: OpsTest) -> None:
initial_loop_wait = await get_patroni_setting(ops_test, "loop_wait")
yield
# Rollback to the initial configuration.
app = await app_name(ops_test)
patroni_password = await get_password(
ops_test, ops_test.model.applications[app].units[0].name, "patroni"
)
patroni_password = await get_password(ops_test, "patroni")
await change_patroni_setting(
ops_test, "loop_wait", initial_loop_wait, patroni_password, use_random_unit=True
)
Expand All @@ -57,10 +54,7 @@ async def loop_wait(ops_test: OpsTest) -> None:
async def primary_start_timeout(ops_test: OpsTest) -> None:
"""Temporary change the primary start timeout configuration."""
# Change the parameter that makes the primary reelection faster.
app = await app_name(ops_test)
patroni_password = await get_password(
ops_test, ops_test.model.applications[app].units[0].name, "patroni"
)
patroni_password = await get_password(ops_test, "patroni")
initial_primary_start_timeout = await get_patroni_setting(ops_test, "primary_start_timeout")
await change_patroni_setting(ops_test, "primary_start_timeout", 0, patroni_password)
yield
Expand Down Expand Up @@ -104,7 +98,7 @@ async def wal_settings(ops_test: OpsTest) -> None:
for unit in ops_test.model.applications[app].units:
# Start Patroni if it was previously stopped.
await run_command_on_unit(ops_test, unit.name, "snap start charmed-postgresql.patroni")
patroni_password = await get_password(ops_test, unit.name, "patroni")
patroni_password = await get_password(ops_test, "patroni")

# Rollback to the initial settings.
await change_wal_settings(
Expand Down
Loading
Loading