Skip to content

Commit d7e5a8c

Browse files
authored
[DPE-6189] Manage passwords with user secrets (#833)
backported from `postgresql-k8s-operator` This PR introduces juju user secrets for managing passwords. It contains the following changes: **Functionality:** - no longer support `get-password` and `set-password` actions - new config option `system-users` to configure a secret that includes the system users' password(s) - it is no longer possible to leave out the password parameter to trigger a password rotation as secrets cannot have empty values **Implementation:** - add handler for `secret_changed` event to `charm.py` - add method `get_secret_from_id()` to `charm.py` - trigger updating the system user passwords on `config_changed` - consider pre-configured system user passwords on `leader_elected` - remove `get_password` handler - replace `set_password` handler with `_update_admin_passwords()` method - `_update_admin_passwords()` is responsible for the actual business logic: - retrieving the passwords from the configured secret - checking if password updates need to be performed - calling the `postgresql.update_user_password()` method **Testing:** - remove integration tests that are no longer required, such as testing for empty password or testing if the output of get-password is the same as the output of juju show-secret - adjust integration test helpers for get_password() and set_password to use secrets - remove unit tests for get-/set-password actions
1 parent 528d7d4 commit d7e5a8c

22 files changed

+244
-389
lines changed

actions.yaml

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,6 @@ create-replication:
2020
default: default
2121
get-primary:
2222
description: Get the unit which is the primary/leader in the replication.
23-
get-password:
24-
description: Get the system user's password, which is used by charm.
25-
It is for internal charm users and SHOULD NOT be used by applications.
26-
params:
27-
username:
28-
type: string
29-
description: The username, the default value 'operator'.
30-
Possible values - operator, replication, rewind, patroni.
3123
list-backups:
3224
description: Lists backups in s3 storage.
3325
pre-upgrade-check:
@@ -53,17 +45,6 @@ restore:
5345
restore-to-time:
5446
type: string
5547
description: Point-in-time-recovery target in PSQL format.
56-
set-password:
57-
description: Change the system user's password, which is used by charm.
58-
It is for internal charm users and SHOULD NOT be used by applications.
59-
params:
60-
username:
61-
type: string
62-
description: The username, the default value 'operator'.
63-
Possible values - operator, replication, rewind.
64-
password:
65-
type: string
66-
description: The password will be auto-generated if this option is not specified.
6748
set-tls-private-key:
6849
description: Set the private key, which will be used for certificate signing requests (CSR). Run for each unit separately.
6950
params:

config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,14 @@ options:
761761
Allowed values are: from -1 to 86400.
762762
type: int
763763
default: -1
764+
system-users:
765+
type: secret
766+
description: |
767+
Configure the internal system users and their passwords. The passwords will
768+
be auto-generated if this option is not set. It is for internal use only
769+
and SHOULD NOT be used by applications. This needs to be a Juju Secret URI pointing
770+
to a secret that contains the following content: `<username>: <password>`.
771+
Possible users: backup, monitoring, operator, replication, rewind.
764772
vacuum_autovacuum_analyze_scale_factor:
765773
description: |
766774
Specifies a fraction of the table size to add to autovacuum_vacuum_threshold when

src/charm.py

Lines changed: 102 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
InstallEvent,
4242
LeaderElectedEvent,
4343
RelationDepartedEvent,
44+
SecretChangedEvent,
4445
StartEvent,
4546
)
4647
from ops.framework import EventBase
@@ -50,6 +51,7 @@
5051
MaintenanceStatus,
5152
ModelError,
5253
Relation,
54+
SecretNotFoundError,
5355
Unit,
5456
WaitingStatus,
5557
)
@@ -180,10 +182,10 @@ def __init__(self, *args):
180182
self.framework.observe(self.on.get_primary_action, self._on_get_primary)
181183
self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed)
182184
self.framework.observe(self.on.secret_changed, self._on_peer_relation_changed)
185+
# add specific handler for updated system-user secrets
186+
self.framework.observe(self.on.secret_changed, self._on_secret_changed)
183187
self.framework.observe(self.on[PEER].relation_departed, self._on_peer_relation_departed)
184188
self.framework.observe(self.on.start, self._on_start)
185-
self.framework.observe(self.on.get_password_action, self._on_get_password)
186-
self.framework.observe(self.on.set_password_action, self._on_set_password)
187189
self.framework.observe(self.on.promote_to_primary_action, self._on_promote_to_primary)
188190
self.framework.observe(self.on.update_status, self._on_update_status)
189191
self.cluster_name = self.app.name
@@ -328,6 +330,25 @@ def remove_secret(self, scope: Scopes, key: str) -> None:
328330
secret_key = self._translate_field_to_secret_key(key)
329331
self.peer_relation_data(scope).delete_relation_data(peers.id, [secret_key])
330332

333+
def get_secret_from_id(self, secret_id: str) -> dict[str, str]:
334+
"""Resolve the given id of a Juju secret and return the content as a dict.
335+
336+
This method can be used to retrieve any secret, not just those used via the peer relation.
337+
If the secret is not owned by the charm, it has to be granted access to it.
338+
339+
Args:
340+
secret_id (str): The id of the secret.
341+
342+
Returns:
343+
dict: The content of the secret.
344+
"""
345+
try:
346+
secret_content = self.model.get_secret(id=secret_id).get_content(refresh=True)
347+
except (SecretNotFoundError, ModelError):
348+
raise
349+
350+
return secret_content
351+
331352
@property
332353
def is_cluster_initialised(self) -> bool:
333354
"""Returns whether the cluster is already initialised."""
@@ -718,6 +739,17 @@ def _on_peer_relation_changed(self, event: HookEvent):
718739

719740
self._update_new_unit_status()
720741

742+
def _on_secret_changed(self, event: SecretChangedEvent) -> None:
743+
"""Handle the secret_changed event."""
744+
if not self.unit.is_leader():
745+
return
746+
747+
if (admin_secret_id := self.config.system_users) and admin_secret_id == event.secret.id:
748+
try:
749+
self._update_admin_password(admin_secret_id)
750+
except PostgreSQLUpdateUserPasswordError:
751+
event.defer()
752+
721753
# Split off into separate function, because of complexity _on_peer_relation_changed
722754
def _start_stop_pgbackrest_service(self, event: HookEvent) -> None:
723755
# Start or stop the pgBackRest TLS server service when TLS certificate change.
@@ -1048,8 +1080,19 @@ def _on_install(self, event: InstallEvent) -> None:
10481080

10491081
self.unit.status = WaitingStatus("waiting to start PostgreSQL")
10501082

1051-
def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
1083+
def _on_leader_elected(self, event: LeaderElectedEvent) -> None: # noqa: C901
10521084
"""Handle the leader-elected event."""
1085+
# consider configured system user passwords
1086+
system_user_passwords = {}
1087+
if admin_secret_id := self.config.system_users:
1088+
try:
1089+
system_user_passwords = self.get_secret_from_id(secret_id=admin_secret_id)
1090+
except (ModelError, SecretNotFoundError) as e:
1091+
# only display the error but don't return to make sure all users have passwords
1092+
logger.error(f"Error setting internal passwords: {e}")
1093+
self.unit.status = BlockedStatus("Password setting for system users failed.")
1094+
event.defer()
1095+
10531096
# The leader sets the needed passwords if they weren't set before.
10541097
for key in (
10551098
USER_PASSWORD_KEY,
@@ -1060,7 +1103,14 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
10601103
PATRONI_PASSWORD_KEY,
10611104
):
10621105
if self.get_secret(APP_SCOPE, key) is None:
1063-
self.set_secret(APP_SCOPE, key, new_password())
1106+
if key in system_user_passwords:
1107+
# use provided passwords for system-users if available
1108+
self.set_secret(APP_SCOPE, key, system_user_passwords[key])
1109+
logger.info(f"Using configured password for {key}")
1110+
else:
1111+
# generate a password for this user if not provided
1112+
self.set_secret(APP_SCOPE, key, new_password())
1113+
logger.info(f"Generated new password for {key}")
10641114

10651115
if self.has_raft_keys():
10661116
self._raft_reinitialisation()
@@ -1134,6 +1184,12 @@ def _on_config_changed(self, event) -> None:
11341184
# Enable and/or disable the extensions.
11351185
self.enable_disable_extensions()
11361186

1187+
if admin_secret_id := self.config.system_users:
1188+
try:
1189+
self._update_admin_password(admin_secret_id)
1190+
except PostgreSQLUpdateUserPasswordError:
1191+
event.defer()
1192+
11371193
def enable_disable_extensions(self, database: str | None = None) -> None:
11381194
"""Enable/disable PostgreSQL extensions set through config options.
11391195
@@ -1362,57 +1418,21 @@ def _start_replica(self, event) -> None:
13621418
# Configure Patroni in the replica but don't start it yet.
13631419
self._patroni.configure_patroni_on_unit()
13641420

1365-
def _on_get_password(self, event: ActionEvent) -> None:
1366-
"""Returns the password for a user as an action response.
1367-
1368-
If no user is provided, the password of the operator user is returned.
1369-
"""
1370-
username = event.params.get("username", USER)
1371-
if username not in PASSWORD_USERS:
1372-
event.fail(
1373-
f"The action can be run only for users used by the charm or Patroni:"
1374-
f" {', '.join(PASSWORD_USERS)} not {username}"
1375-
)
1376-
return
1377-
event.set_results({"password": self.get_secret(APP_SCOPE, f"{username}-password")})
1378-
1379-
def _on_set_password(self, event: ActionEvent) -> None:
1380-
"""Set the password for the specified user."""
1381-
# Only leader can write the new password into peer relation.
1382-
if not self.unit.is_leader():
1383-
event.fail("The action can be run only on leader unit")
1384-
return
1385-
1386-
username = event.params.get("username", USER)
1387-
if username not in SYSTEM_USERS:
1388-
event.fail(
1389-
f"The action can be run only for users used by the charm:"
1390-
f" {', '.join(SYSTEM_USERS)} not {username}"
1391-
)
1392-
return
1393-
1394-
password = event.params.get("password", new_password())
1395-
1396-
if password == self.get_secret(APP_SCOPE, f"{username}-password"):
1397-
event.log("The old and new passwords are equal.")
1398-
event.set_results({"password": password})
1399-
return
1400-
1401-
# Ensure all members are ready before trying to reload Patroni
1402-
# configuration to avoid errors (like the API not responding in
1403-
# one instance because PostgreSQL and/or Patroni are not ready).
1421+
def _update_admin_password(self, admin_secret_id: str) -> None:
1422+
"""Check if the password of a system user was changed and update it in the database."""
14041423
if not self._patroni.are_all_members_ready():
1405-
event.fail(
1424+
# Ensure all members are ready before reloading Patroni configuration to avoid errors
1425+
# e.g. API not responding in one instance because PostgreSQL / Patroni are not ready
1426+
raise PostgreSQLUpdateUserPasswordError(
14061427
"Failed changing the password: Not all members healthy or finished initial sync."
14071428
)
1408-
return
14091429

14101430
replication_offer_relation = self.model.get_relation(REPLICATION_OFFER_RELATION)
1431+
other_cluster_primary_ip = ""
14111432
if (
14121433
replication_offer_relation is not None
14131434
and not self.async_replication.is_primary_cluster()
14141435
):
1415-
# Update the password in the other cluster PostgreSQL primary instance.
14161436
other_cluster_endpoints = self.async_replication.get_all_primary_cluster_endpoints()
14171437
other_cluster_primary = self._patroni.get_primary(
14181438
alternative_endpoints=other_cluster_endpoints
@@ -1422,37 +1442,51 @@ def _on_set_password(self, event: ActionEvent) -> None:
14221442
for unit in replication_offer_relation.units
14231443
if unit.name.replace("/", "-") == other_cluster_primary
14241444
)
1425-
try:
1426-
self.postgresql.update_user_password(
1427-
username, password, database_host=other_cluster_primary_ip
1428-
)
1429-
except PostgreSQLUpdateUserPasswordError as e:
1430-
logger.exception(e)
1431-
event.fail("Failed changing the password.")
1432-
return
14331445
elif self.model.get_relation(REPLICATION_CONSUMER_RELATION) is not None:
1434-
event.fail(
1435-
"Failed changing the password: This action can be ran only in the cluster from the offer side."
1446+
logger.error(
1447+
"Failed changing the password: This can be ran only in the cluster from the offer side."
14361448
)
1449+
self.unit.status = BlockedStatus("Password update for system users failed.")
14371450
return
1438-
else:
1439-
# Update the password in this cluster PostgreSQL primary instance.
1440-
try:
1441-
self.postgresql.update_user_password(username, password)
1442-
except PostgreSQLUpdateUserPasswordError as e:
1443-
logger.exception(e)
1444-
event.fail("Failed changing the password.")
1445-
return
14461451

1447-
# Update the password in the secret store.
1448-
self.set_secret(APP_SCOPE, f"{username}-password", password)
1452+
try:
1453+
# get the secret content and check each user configured there
1454+
# only SYSTEM_USERS with changed passwords are processed, all others ignored
1455+
updated_passwords = self.get_secret_from_id(secret_id=admin_secret_id)
1456+
for user, password in list(updated_passwords.items()):
1457+
if user not in SYSTEM_USERS:
1458+
logger.error(
1459+
f"Can only update system users: {', '.join(SYSTEM_USERS)} not {user}"
1460+
)
1461+
updated_passwords.pop(user)
1462+
continue
1463+
if password == self.get_secret(APP_SCOPE, f"{user}-password"):
1464+
updated_passwords.pop(user)
1465+
except (ModelError, SecretNotFoundError) as e:
1466+
logger.error(f"Error updating internal passwords: {e}")
1467+
self.unit.status = BlockedStatus("Password update for system users failed.")
1468+
return
1469+
1470+
try:
1471+
# perform the actual password update for the remaining users
1472+
for user, password in updated_passwords.items():
1473+
logger.info(f"Updating password for user {user}")
1474+
self.postgresql.update_user_password(
1475+
user,
1476+
password,
1477+
database_host=other_cluster_primary_ip if other_cluster_primary_ip else None,
1478+
)
1479+
# Update the password in the secret store after updating it in the database
1480+
self.set_secret(APP_SCOPE, f"{user}-password", password)
1481+
except PostgreSQLUpdateUserPasswordError as e:
1482+
logger.exception(e)
1483+
self.unit.status = BlockedStatus("Password update for system users failed.")
1484+
return
14491485

14501486
# Update and reload Patroni configuration in this unit to use the new password.
14511487
# Other units Patroni configuration will be reloaded in the peer relation changed event.
14521488
self.update_config()
14531489

1454-
event.set_results({"password": password})
1455-
14561490
def _on_promote_to_primary(self, event: ActionEvent) -> None:
14571491
if event.params.get("scope") == "cluster":
14581492
return self.async_replication.promote_to_primary(event)

src/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ class CharmConfig(BaseConfigModel):
165165
storage_default_table_access_method: str | None
166166
storage_gin_pending_list_limit: int | None
167167
storage_old_snapshot_threshold: int | None
168+
system_users: str | None
168169
vacuum_autovacuum_analyze_scale_factor: float | None
169170
vacuum_autovacuum_analyze_threshold: int | None
170171
vacuum_autovacuum_freeze_max_age: int | None

src/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
PATRONI_PASSWORD_KEY = "patroni-password" # noqa: S105
6767
SECRET_INTERNAL_LABEL = "internal-secret" # noqa: S105
6868
SECRET_DELETED_LABEL = "None" # noqa: S105
69+
SYSTEM_USERS_PASSWORD_CONFIG = "system-users" # noqa: S105
6970

7071
APP_SCOPE = "app"
7172
UNIT_SCOPE = "unit"

tests/integration/ha_tests/conftest.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,7 @@ async def loop_wait(ops_test: OpsTest) -> None:
4444
initial_loop_wait = await get_patroni_setting(ops_test, "loop_wait")
4545
yield
4646
# Rollback to the initial configuration.
47-
app = await app_name(ops_test)
48-
patroni_password = await get_password(
49-
ops_test, ops_test.model.applications[app].units[0].name, "patroni"
50-
)
47+
patroni_password = await get_password(ops_test, "patroni")
5148
await change_patroni_setting(
5249
ops_test, "loop_wait", initial_loop_wait, patroni_password, use_random_unit=True
5350
)
@@ -57,10 +54,7 @@ async def loop_wait(ops_test: OpsTest) -> None:
5754
async def primary_start_timeout(ops_test: OpsTest) -> None:
5855
"""Temporary change the primary start timeout configuration."""
5956
# Change the parameter that makes the primary reelection faster.
60-
app = await app_name(ops_test)
61-
patroni_password = await get_password(
62-
ops_test, ops_test.model.applications[app].units[0].name, "patroni"
63-
)
57+
patroni_password = await get_password(ops_test, "patroni")
6458
initial_primary_start_timeout = await get_patroni_setting(ops_test, "primary_start_timeout")
6559
await change_patroni_setting(ops_test, "primary_start_timeout", 0, patroni_password)
6660
yield
@@ -104,7 +98,7 @@ async def wal_settings(ops_test: OpsTest) -> None:
10498
for unit in ops_test.model.applications[app].units:
10599
# Start Patroni if it was previously stopped.
106100
await run_command_on_unit(ops_test, unit.name, "snap start charmed-postgresql.patroni")
107-
patroni_password = await get_password(ops_test, unit.name, "patroni")
101+
patroni_password = await get_password(ops_test, "patroni")
108102

109103
# Rollback to the initial settings.
110104
await change_wal_settings(

0 commit comments

Comments
 (0)