diff --git a/actions.yaml b/actions.yaml index 7069c4668e..ae1878d638 100644 --- a/actions.yaml +++ b/actions.yaml @@ -20,14 +20,6 @@ create-replication: default: default get-primary: description: Get the unit which is the primary/leader in the replication. -get-password: - description: Get the system user's password, which is used by charm. - It is for internal charm users and SHOULD NOT be used by applications. - params: - username: - type: string - description: The username, the default value 'operator'. - Possible values - operator, replication, rewind, patroni. list-backups: description: Lists backups in s3 storage. pre-upgrade-check: @@ -53,17 +45,6 @@ restore: restore-to-time: type: string description: Point-in-time-recovery target in PSQL format. -set-password: - description: Change the system user's password, which is used by charm. - It is for internal charm users and SHOULD NOT be used by applications. - params: - username: - type: string - description: The username, the default value 'operator'. - Possible values - operator, replication, rewind. - password: - type: string - description: The password will be auto-generated if this option is not specified. set-tls-private-key: description: Set the private key, which will be used for certificate signing requests (CSR). Run for each unit separately. params: diff --git a/config.yaml b/config.yaml index 8819015d79..244a1e97c6 100644 --- a/config.yaml +++ b/config.yaml @@ -761,6 +761,14 @@ options: Allowed values are: from -1 to 86400. type: int default: -1 + system-users: + type: secret + description: | + Configure the internal system users and their passwords. The passwords will + be auto-generated if this option is not set. It is for internal use only + and SHOULD NOT be used by applications. This needs to be a Juju Secret URI pointing + to a secret that contains the following content: `: `. + Possible users: backup, monitoring, operator, replication, rewind. vacuum_autovacuum_analyze_scale_factor: description: | Specifies a fraction of the table size to add to autovacuum_vacuum_threshold when diff --git a/src/charm.py b/src/charm.py index 2cd029c6da..67f47d8108 100755 --- a/src/charm.py +++ b/src/charm.py @@ -41,6 +41,7 @@ InstallEvent, LeaderElectedEvent, RelationDepartedEvent, + SecretChangedEvent, StartEvent, ) from ops.framework import EventBase @@ -50,6 +51,7 @@ MaintenanceStatus, ModelError, Relation, + SecretNotFoundError, Unit, WaitingStatus, ) @@ -180,10 +182,10 @@ def __init__(self, *args): self.framework.observe(self.on.get_primary_action, self._on_get_primary) self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed) self.framework.observe(self.on.secret_changed, self._on_peer_relation_changed) + # add specific handler for updated system-user secrets + self.framework.observe(self.on.secret_changed, self._on_secret_changed) self.framework.observe(self.on[PEER].relation_departed, self._on_peer_relation_departed) self.framework.observe(self.on.start, self._on_start) - self.framework.observe(self.on.get_password_action, self._on_get_password) - self.framework.observe(self.on.set_password_action, self._on_set_password) self.framework.observe(self.on.promote_to_primary_action, self._on_promote_to_primary) self.framework.observe(self.on.update_status, self._on_update_status) self.cluster_name = self.app.name @@ -328,6 +330,25 @@ def remove_secret(self, scope: Scopes, key: str) -> None: secret_key = self._translate_field_to_secret_key(key) self.peer_relation_data(scope).delete_relation_data(peers.id, [secret_key]) + def get_secret_from_id(self, secret_id: str) -> dict[str, str]: + """Resolve the given id of a Juju secret and return the content as a dict. + + This method can be used to retrieve any secret, not just those used via the peer relation. + If the secret is not owned by the charm, it has to be granted access to it. + + Args: + secret_id (str): The id of the secret. + + Returns: + dict: The content of the secret. + """ + try: + secret_content = self.model.get_secret(id=secret_id).get_content(refresh=True) + except (SecretNotFoundError, ModelError): + raise + + return secret_content + @property def is_cluster_initialised(self) -> bool: """Returns whether the cluster is already initialised.""" @@ -718,6 +739,17 @@ def _on_peer_relation_changed(self, event: HookEvent): self._update_new_unit_status() + def _on_secret_changed(self, event: SecretChangedEvent) -> None: + """Handle the secret_changed event.""" + if not self.unit.is_leader(): + return + + if (admin_secret_id := self.config.system_users) and admin_secret_id == event.secret.id: + try: + self._update_admin_password(admin_secret_id) + except PostgreSQLUpdateUserPasswordError: + event.defer() + # Split off into separate function, because of complexity _on_peer_relation_changed def _start_stop_pgbackrest_service(self, event: HookEvent) -> None: # Start or stop the pgBackRest TLS server service when TLS certificate change. @@ -1048,8 +1080,19 @@ def _on_install(self, event: InstallEvent) -> None: self.unit.status = WaitingStatus("waiting to start PostgreSQL") - def _on_leader_elected(self, event: LeaderElectedEvent) -> None: + def _on_leader_elected(self, event: LeaderElectedEvent) -> None: # noqa: C901 """Handle the leader-elected event.""" + # consider configured system user passwords + system_user_passwords = {} + if admin_secret_id := self.config.system_users: + try: + system_user_passwords = self.get_secret_from_id(secret_id=admin_secret_id) + except (ModelError, SecretNotFoundError) as e: + # only display the error but don't return to make sure all users have passwords + logger.error(f"Error setting internal passwords: {e}") + self.unit.status = BlockedStatus("Password setting for system users failed.") + event.defer() + # The leader sets the needed passwords if they weren't set before. for key in ( USER_PASSWORD_KEY, @@ -1060,7 +1103,14 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None: PATRONI_PASSWORD_KEY, ): if self.get_secret(APP_SCOPE, key) is None: - self.set_secret(APP_SCOPE, key, new_password()) + if key in system_user_passwords: + # use provided passwords for system-users if available + self.set_secret(APP_SCOPE, key, system_user_passwords[key]) + logger.info(f"Using configured password for {key}") + else: + # generate a password for this user if not provided + self.set_secret(APP_SCOPE, key, new_password()) + logger.info(f"Generated new password for {key}") if self.has_raft_keys(): self._raft_reinitialisation() @@ -1134,6 +1184,12 @@ def _on_config_changed(self, event) -> None: # Enable and/or disable the extensions. self.enable_disable_extensions() + if admin_secret_id := self.config.system_users: + try: + self._update_admin_password(admin_secret_id) + except PostgreSQLUpdateUserPasswordError: + event.defer() + def enable_disable_extensions(self, database: str | None = None) -> None: """Enable/disable PostgreSQL extensions set through config options. @@ -1362,57 +1418,21 @@ def _start_replica(self, event) -> None: # Configure Patroni in the replica but don't start it yet. self._patroni.configure_patroni_on_unit() - def _on_get_password(self, event: ActionEvent) -> None: - """Returns the password for a user as an action response. - - If no user is provided, the password of the operator user is returned. - """ - username = event.params.get("username", USER) - if username not in PASSWORD_USERS: - event.fail( - f"The action can be run only for users used by the charm or Patroni:" - f" {', '.join(PASSWORD_USERS)} not {username}" - ) - return - event.set_results({"password": self.get_secret(APP_SCOPE, f"{username}-password")}) - - def _on_set_password(self, event: ActionEvent) -> None: - """Set the password for the specified user.""" - # Only leader can write the new password into peer relation. - if not self.unit.is_leader(): - event.fail("The action can be run only on leader unit") - return - - username = event.params.get("username", USER) - if username not in SYSTEM_USERS: - event.fail( - f"The action can be run only for users used by the charm:" - f" {', '.join(SYSTEM_USERS)} not {username}" - ) - return - - password = event.params.get("password", new_password()) - - if password == self.get_secret(APP_SCOPE, f"{username}-password"): - event.log("The old and new passwords are equal.") - event.set_results({"password": password}) - return - - # Ensure all members are ready before trying to reload Patroni - # configuration to avoid errors (like the API not responding in - # one instance because PostgreSQL and/or Patroni are not ready). + def _update_admin_password(self, admin_secret_id: str) -> None: + """Check if the password of a system user was changed and update it in the database.""" if not self._patroni.are_all_members_ready(): - event.fail( + # Ensure all members are ready before reloading Patroni configuration to avoid errors + # e.g. API not responding in one instance because PostgreSQL / Patroni are not ready + raise PostgreSQLUpdateUserPasswordError( "Failed changing the password: Not all members healthy or finished initial sync." ) - return replication_offer_relation = self.model.get_relation(REPLICATION_OFFER_RELATION) + other_cluster_primary_ip = "" if ( replication_offer_relation is not None and not self.async_replication.is_primary_cluster() ): - # Update the password in the other cluster PostgreSQL primary instance. other_cluster_endpoints = self.async_replication.get_all_primary_cluster_endpoints() other_cluster_primary = self._patroni.get_primary( alternative_endpoints=other_cluster_endpoints @@ -1422,37 +1442,51 @@ def _on_set_password(self, event: ActionEvent) -> None: for unit in replication_offer_relation.units if unit.name.replace("/", "-") == other_cluster_primary ) - try: - self.postgresql.update_user_password( - username, password, database_host=other_cluster_primary_ip - ) - except PostgreSQLUpdateUserPasswordError as e: - logger.exception(e) - event.fail("Failed changing the password.") - return elif self.model.get_relation(REPLICATION_CONSUMER_RELATION) is not None: - event.fail( - "Failed changing the password: This action can be ran only in the cluster from the offer side." + logger.error( + "Failed changing the password: This can be ran only in the cluster from the offer side." ) + self.unit.status = BlockedStatus("Password update for system users failed.") return - else: - # Update the password in this cluster PostgreSQL primary instance. - try: - self.postgresql.update_user_password(username, password) - except PostgreSQLUpdateUserPasswordError as e: - logger.exception(e) - event.fail("Failed changing the password.") - return - # Update the password in the secret store. - self.set_secret(APP_SCOPE, f"{username}-password", password) + try: + # get the secret content and check each user configured there + # only SYSTEM_USERS with changed passwords are processed, all others ignored + updated_passwords = self.get_secret_from_id(secret_id=admin_secret_id) + for user, password in list(updated_passwords.items()): + if user not in SYSTEM_USERS: + logger.error( + f"Can only update system users: {', '.join(SYSTEM_USERS)} not {user}" + ) + updated_passwords.pop(user) + continue + if password == self.get_secret(APP_SCOPE, f"{user}-password"): + updated_passwords.pop(user) + except (ModelError, SecretNotFoundError) as e: + logger.error(f"Error updating internal passwords: {e}") + self.unit.status = BlockedStatus("Password update for system users failed.") + return + + try: + # perform the actual password update for the remaining users + for user, password in updated_passwords.items(): + logger.info(f"Updating password for user {user}") + self.postgresql.update_user_password( + user, + password, + database_host=other_cluster_primary_ip if other_cluster_primary_ip else None, + ) + # Update the password in the secret store after updating it in the database + self.set_secret(APP_SCOPE, f"{user}-password", password) + except PostgreSQLUpdateUserPasswordError as e: + logger.exception(e) + self.unit.status = BlockedStatus("Password update for system users failed.") + return # Update and reload Patroni configuration in this unit to use the new password. # Other units Patroni configuration will be reloaded in the peer relation changed event. self.update_config() - event.set_results({"password": password}) - def _on_promote_to_primary(self, event: ActionEvent) -> None: if event.params.get("scope") == "cluster": return self.async_replication.promote_to_primary(event) diff --git a/src/config.py b/src/config.py index a755ca0f6d..4926851f95 100644 --- a/src/config.py +++ b/src/config.py @@ -165,6 +165,7 @@ class CharmConfig(BaseConfigModel): storage_default_table_access_method: str | None storage_gin_pending_list_limit: int | None storage_old_snapshot_threshold: int | None + system_users: str | None vacuum_autovacuum_analyze_scale_factor: float | None vacuum_autovacuum_analyze_threshold: int | None vacuum_autovacuum_freeze_max_age: int | None diff --git a/src/constants.py b/src/constants.py index a4318ae1ec..afbce4f27a 100644 --- a/src/constants.py +++ b/src/constants.py @@ -66,6 +66,7 @@ PATRONI_PASSWORD_KEY = "patroni-password" # noqa: S105 SECRET_INTERNAL_LABEL = "internal-secret" # noqa: S105 SECRET_DELETED_LABEL = "None" # noqa: S105 +SYSTEM_USERS_PASSWORD_CONFIG = "system-users" # noqa: S105 APP_SCOPE = "app" UNIT_SCOPE = "unit" diff --git a/tests/integration/ha_tests/conftest.py b/tests/integration/ha_tests/conftest.py index 25c91e478f..4aafb2f669 100644 --- a/tests/integration/ha_tests/conftest.py +++ b/tests/integration/ha_tests/conftest.py @@ -44,10 +44,7 @@ async def loop_wait(ops_test: OpsTest) -> None: initial_loop_wait = await get_patroni_setting(ops_test, "loop_wait") yield # Rollback to the initial configuration. - app = await app_name(ops_test) - patroni_password = await get_password( - ops_test, ops_test.model.applications[app].units[0].name, "patroni" - ) + patroni_password = await get_password(ops_test, "patroni") await change_patroni_setting( ops_test, "loop_wait", initial_loop_wait, patroni_password, use_random_unit=True ) @@ -57,10 +54,7 @@ async def loop_wait(ops_test: OpsTest) -> None: async def primary_start_timeout(ops_test: OpsTest) -> None: """Temporary change the primary start timeout configuration.""" # Change the parameter that makes the primary reelection faster. - app = await app_name(ops_test) - patroni_password = await get_password( - ops_test, ops_test.model.applications[app].units[0].name, "patroni" - ) + patroni_password = await get_password(ops_test, "patroni") initial_primary_start_timeout = await get_patroni_setting(ops_test, "primary_start_timeout") await change_patroni_setting(ops_test, "primary_start_timeout", 0, patroni_password) yield @@ -104,7 +98,7 @@ async def wal_settings(ops_test: OpsTest) -> None: for unit in ops_test.model.applications[app].units: # Start Patroni if it was previously stopped. await run_command_on_unit(ops_test, unit.name, "snap start charmed-postgresql.patroni") - patroni_password = await get_password(ops_test, unit.name, "patroni") + patroni_password = await get_password(ops_test, "patroni") # Rollback to the initial settings. await change_wal_settings( diff --git a/tests/integration/ha_tests/helpers.py b/tests/integration/ha_tests/helpers.py index c22cc85c2c..0c56054775 100644 --- a/tests/integration/ha_tests/helpers.py +++ b/tests/integration/ha_tests/helpers.py @@ -27,6 +27,7 @@ APPLICATION_NAME, db_connect, execute_query_on_unit, + get_password, get_patroni_cluster, get_unit_address, run_command_on_unit, @@ -280,7 +281,7 @@ async def count_writes( ) -> tuple[dict[str, int], dict[str, int]]: """Count the number of writes in the database.""" app = await app_name(ops_test) - password = await get_password(ops_test, app, down_unit) + password = await get_password(ops_test, database_app_name=app) members = [] for model in [ops_test.model, extra_model]: if model is None: @@ -514,22 +515,6 @@ async def get_sync_standby(ops_test: OpsTest, model: Model, application_name: st return member["name"] -async def get_password(ops_test: OpsTest, app: str, down_unit: str | None = None) -> str: - """Use the charm action to retrieve the password from provided application. - - Returns: - string with the password stored on the peer relation databag. - """ - # Can retrieve from any unit running unit, so we pick the first. - for unit in ops_test.model.applications[app].units: - if unit.name != down_unit: - unit_name = unit.name - break - action = await ops_test.model.units.get(unit_name).run_action("get-password") - action = await action.wait() - return action.results["password"] - - async def get_unit_ip(ops_test: OpsTest, unit_name: str, model: Model = None) -> str: """Wrapper for getting unit ip. @@ -556,7 +541,7 @@ async def is_connection_possible( ) -> bool: """Test a connection to a PostgreSQL server.""" app = unit_name.split("/")[0] - password = await get_password(ops_test, app, unit_name) + password = await get_password(ops_test, database_app_name=app) address = await ( get_ip_from_inside_the_unit(ops_test, unit_name) if use_ip_from_inside @@ -781,7 +766,7 @@ async def is_secondary_up_to_date( Retries over the period of one minute to give secondary adequate time to copy over data. """ app = await app_name(ops_test) - password = await get_password(ops_test, app) + password = await get_password(ops_test, database_app_name=app) host = await next( ( get_ip_from_inside_the_unit(ops_test, unit.name) @@ -1035,7 +1020,7 @@ async def create_db(ops_test: OpsTest, app: str, db: str) -> None: """Creates database with specified name.""" unit = ops_test.model.applications[app].units[0] unit_address = await unit.get_public_address() - password = await get_password(ops_test, app) + password = await get_password(ops_test, database_app_name=app) conn = db_connect(unit_address, password) conn.autocommit = True @@ -1049,7 +1034,7 @@ async def check_db(ops_test: OpsTest, app: str, db: str) -> bool: """Returns True if database with specified name already exists.""" unit = ops_test.model.applications[app].units[0] unit_address = await unit.get_public_address() - password = await get_password(ops_test, app) + password = await get_password(ops_test, database_app_name=app) assert password is not None diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index b8ed2b30c8..56be2a831b 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -356,7 +356,7 @@ async def test_promote_standby( any_unit = ops_test.model.applications[DATABASE_APP_NAME].units[0].name primary = await get_primary(ops_test, any_unit) address = get_unit_address(ops_test, primary) - password = await get_password(ops_test, primary) + password = await get_password(ops_test) database_name = f"{APPLICATION_NAME.replace('-', '_')}_database" connection = None try: diff --git a/tests/integration/ha_tests/test_replication.py b/tests/integration/ha_tests/test_replication.py index fed67a8019..4d1bf0dcb5 100644 --- a/tests/integration/ha_tests/test_replication.py +++ b/tests/integration/ha_tests/test_replication.py @@ -132,7 +132,7 @@ async def test_no_data_replicated_between_clusters( await check_writes(ops_test) # Verify that the data from the first cluster wasn't replicated to the second cluster. - password = await get_password(ops_test, app=new_cluster_app) + password = await get_password(ops_test, database_app_name=new_cluster_app) for unit in ops_test.model.applications[new_cluster_app].units: try: with ( diff --git a/tests/integration/ha_tests/test_restore_cluster.py b/tests/integration/ha_tests/test_restore_cluster.py index 8a26b15cb5..48c3ef2b44 100644 --- a/tests/integration/ha_tests/test_restore_cluster.py +++ b/tests/integration/ha_tests/test_restore_cluster.py @@ -56,13 +56,19 @@ async def test_build_and_deploy(ops_test: OpsTest, charm) -> None: await ops_test.model.wait_for_idle(status="active", timeout=1500) # TODO have a better way to bootstrap clusters with existing storage - primary = await get_primary( - ops_test, ops_test.model.applications[FIRST_APPLICATION].units[0].name - ) for user in ["monitoring", "operator", "replication", "rewind"]: - password = await get_password(ops_test, primary, user) - second_primary = ops_test.model.applications[SECOND_APPLICATION].units[0].name - await set_password(ops_test, second_primary, user, password) + password = await get_password( + ops_test, database_app_name=FIRST_APPLICATION, username=user + ) + await set_password( + ops_test, database_app_name=SECOND_APPLICATION, username=user, password=password + ) + # wait for the password changes to be processed + await ops_test.model.wait_for_idle( + apps=[SECOND_APPLICATION], status="active", timeout=1500 + ) + + second_primary = ops_test.model.applications[SECOND_APPLICATION].units[0].name await ops_test.model.destroy_unit(second_primary) @@ -72,7 +78,7 @@ async def test_cluster_restore(ops_test): primary = await get_primary( ops_test, ops_test.model.applications[FIRST_APPLICATION].units[0].name ) - password = await get_password(ops_test, primary) + password = await get_password(ops_test, database_app_name=FIRST_APPLICATION) address = get_unit_address(ops_test, primary) logger.info("creating a table in the database") with db_connect(host=address, password=password) as connection: diff --git a/tests/integration/ha_tests/test_self_healing.py b/tests/integration/ha_tests/test_self_healing.py index f3ddc6fe88..3978e99110 100644 --- a/tests/integration/ha_tests/test_self_healing.py +++ b/tests/integration/ha_tests/test_self_healing.py @@ -234,9 +234,7 @@ async def test_full_cluster_restart( """ # Locate primary unit. app = await app_name(ops_test) - patroni_password = await get_password( - ops_test, ops_test.model.applications[app].units[0].name, "patroni" - ) + patroni_password = await get_password(ops_test, "patroni") # Change the loop wait setting to make Patroni wait more time before restarting PostgreSQL. initial_loop_wait = await get_patroni_setting(ops_test, "loop_wait") @@ -354,7 +352,7 @@ async def test_forceful_restart_without_data_and_transaction_logs( # Rotate the WAL segments. files = await list_wal_files(ops_test, app) host = get_unit_address(ops_test, new_primary_name) - password = await get_password(ops_test, new_primary_name) + password = await get_password(ops_test) with db_connect(host, password) as connection: connection.autocommit = True with connection.cursor() as cursor: diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index d777c0bb81..6b7c058d24 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -30,7 +30,7 @@ wait_fixed, ) -from constants import DATABASE_DEFAULT_NAME +from constants import DATABASE_DEFAULT_NAME, PEER, SYSTEM_USERS_PASSWORD_CONFIG CHARM_BASE = "ubuntu@22.04" METADATA = yaml.safe_load(Path("./metadata.yaml").read_text()) @@ -38,6 +38,11 @@ STORAGE_PATH = METADATA["storage"]["pgdata"]["location"] APPLICATION_NAME = "postgresql-test-app" + +class SecretNotFoundError(Exception): + """Raised when a secret is not found.""" + + logger = logging.getLogger(__name__) @@ -139,7 +144,7 @@ async def check_database_users_existence( """ unit = ops_test.model.applications[DATABASE_APP_NAME].units[0] unit_address = await unit.get_public_address() - password = await get_password(ops_test, unit.name) + password = await get_password(ops_test) # Retrieve all users in the database. users_in_db = await execute_query_on_unit( @@ -164,8 +169,7 @@ async def check_databases_creation(ops_test: OpsTest, databases: list[str]) -> N ops_test: The ops test framework databases: List of database names that should have been created """ - unit = ops_test.model.applications[DATABASE_APP_NAME].units[0] - password = await get_password(ops_test, unit.name) + password = await get_password(ops_test) for unit in ops_test.model.applications[DATABASE_APP_NAME].units: unit_address = await unit.get_public_address() @@ -581,7 +585,7 @@ async def get_landscape_api_credentials(ops_test: OpsTest) -> list[str]: ops_test: The ops test framework """ unit = ops_test.model.applications[DATABASE_APP_NAME].units[0] - password = await get_password(ops_test, unit.name) + password = await get_password(ops_test) unit_address = await unit.get_public_address() output = await execute_query_on_unit( @@ -621,21 +625,43 @@ async def get_machine_from_unit(ops_test: OpsTest, unit_name: str) -> str: return raw_hostname.strip() -async def get_password(ops_test: OpsTest, unit_name: str, username: str = "operator") -> str: - """Retrieve a user password using the action. +async def get_password( + ops_test: OpsTest, + username: str = "operator", + database_app_name: str = DATABASE_APP_NAME, +) -> str: + """Retrieve a user password from the secret. Args: ops_test: ops_test instance. - unit_name: the name of the unit. username: the user to get the password. + database_app_name: the app for getting the secret Returns: the user password. """ - unit = ops_test.model.units.get(unit_name) - action = await unit.run_action("get-password", **{"username": username}) - result = await action.wait() - return result.results["password"] + secret = await get_secret_by_label(ops_test, label=f"{PEER}.{database_app_name}.app") + password = secret.get(f"{username}-password") + + return password + + +async def get_secret_by_label(ops_test: OpsTest, label: str) -> dict[str, str]: + secrets_raw = await ops_test.juju("list-secrets") + secret_ids = [ + secret_line.split()[0] for secret_line in secrets_raw[1].split("\n")[1:] if secret_line + ] + + for secret_id in secret_ids: + secret_data_raw = await ops_test.juju( + "show-secret", "--format", "json", "--reveal", secret_id + ) + secret_data = json.loads(secret_data_raw[1]) + + if label == secret_data[secret_id].get("label"): + return secret_data[secret_id]["content"]["Data"] + + raise SecretNotFoundError(f"Secret with label {label} not found") @retry( @@ -722,7 +748,7 @@ async def check_tls(ops_test: OpsTest, unit_name: str, enabled: bool) -> bool: Whether TLS is enabled/disabled. """ unit_address = get_unit_address(ops_test, unit_name) - password = await get_password(ops_test, unit_name) + password = await get_password(ops_test) # Get the IP addresses of the other units to check that they # are connecting to the primary unit (if unit_name is the # primary unit name) using encrypted connections. @@ -787,7 +813,7 @@ async def check_tls_replication(ops_test: OpsTest, unit_name: str, enabled: bool Whether TLS is enabled/disabled. """ unit_address = get_unit_address(ops_test, unit_name) - password = await get_password(ops_test, unit_name) + password = await get_password(ops_test) # Check for the all replicas using encrypted connection output = await execute_query_on_unit( @@ -950,27 +976,39 @@ def restart_patroni(ops_test: OpsTest, unit_name: str, password: str) -> None: async def set_password( - ops_test: OpsTest, unit_name: str, username: str = "operator", password: str | None = None + ops_test: OpsTest, + username: str = "operator", + password: str | None = None, + database_app_name: str = DATABASE_APP_NAME, ): - """Set a user password using the action. + """Set a user password via secret. Args: ops_test: ops_test instance. - unit_name: the name of the unit. username: the user to set the password. password: optional password to use instead of auto-generating + database_app_name: name of the app for the secret Returns: the results from the action. """ - unit = ops_test.model.units.get(unit_name) - parameters = {"username": username} - if password is not None: - parameters["password"] = password - action = await unit.run_action("set-password", **parameters) - result = await action.wait() - return result.results + secret_name = "system_users_secret" + + try: + secret_id = await ops_test.model.add_secret( + name=secret_name, data_args=[f"{username}={password}"] + ) + await ops_test.model.grant_secret(secret_name=secret_name, application=database_app_name) + + # update the application config to include the secret + await ops_test.model.applications[database_app_name].set_config({ + SYSTEM_USERS_PASSWORD_CONFIG: secret_id + }) + except Exception: + await ops_test.model.update_secret( + name=secret_name, data_args=[f"{username}={password}"], new_name=secret_name + ) async def start_machine(ops_test: OpsTest, machine_name: str) -> None: @@ -1119,7 +1157,7 @@ async def backup_operations( break # Write some data. - password = await get_password(ops_test, primary) + password = await get_password(ops_test, database_app_name=database_app_name) address = get_unit_address(ops_test, primary) logger.info("creating a table in the database") with db_connect(host=address, password=password) as connection: diff --git a/tests/integration/test_backups_aws.py b/tests/integration/test_backups_aws.py index a300cdc817..3fccdac748 100644 --- a/tests/integration/test_backups_aws.py +++ b/tests/integration/test_backups_aws.py @@ -63,8 +63,10 @@ async def test_backup_aws(ops_test: OpsTest, aws_cloud_configs: tuple[dict, dict # Ensure replication is working correctly. address = get_unit_address(ops_test, new_unit_name) - password = await get_password(ops_test, new_unit_name) - patroni_password = await get_password(ops_test, new_unit_name, "patroni") + password = await get_password(ops_test, database_app_name=database_app_name) + patroni_password = await get_password( + ops_test, username="patroni", database_app_name=database_app_name + ) with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: cursor.execute( "SELECT EXISTS (SELECT FROM information_schema.tables" diff --git a/tests/integration/test_backups_gcp.py b/tests/integration/test_backups_gcp.py index 68b2b99c3a..b1dd416f06 100644 --- a/tests/integration/test_backups_gcp.py +++ b/tests/integration/test_backups_gcp.py @@ -143,7 +143,7 @@ async def test_restore_on_new_cluster( # Check that the backup was correctly restored by having only the first created table. logger.info("checking that the backup was correctly restored") - password = await get_password(ops_test, unit_name) + password = await get_password(ops_test, database_app_name=database_app_name) address = get_unit_address(ops_test, unit_name) with db_connect(host=address, password=password) as connection, connection.cursor() as cursor: cursor.execute( diff --git a/tests/integration/test_backups_pitr_aws.py b/tests/integration/test_backups_pitr_aws.py index dd685e1b86..068d00aa16 100644 --- a/tests/integration/test_backups_pitr_aws.py +++ b/tests/integration/test_backups_pitr_aws.py @@ -86,7 +86,7 @@ async def pitr_backup_operations( if unit.name != primary: replica = unit.name break - password = await get_password(ops_test, primary) + password = await get_password(ops_test, database_app_name=database_app_name) address = get_unit_address(ops_test, primary) logger.info("1: creating table") diff --git a/tests/integration/test_backups_pitr_gcp.py b/tests/integration/test_backups_pitr_gcp.py index c4696c06d1..fa038cef75 100644 --- a/tests/integration/test_backups_pitr_gcp.py +++ b/tests/integration/test_backups_pitr_gcp.py @@ -86,7 +86,7 @@ async def pitr_backup_operations( if unit.name != primary: replica = unit.name break - password = await get_password(ops_test, primary) + password = await get_password(ops_test, database_app_name=database_app_name) address = get_unit_address(ops_test, primary) logger.info("1: creating table") diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index 743bbdb242..9bcaeade96 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -86,8 +86,7 @@ async def test_exporter_is_up(ops_test: OpsTest, unit_id: int): async def test_settings_are_correct(ops_test: OpsTest, unit_id: int): # Connect to the PostgreSQL instance. # Retrieving the operator user password using the action. - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - password = await get_password(ops_test, any_unit_name) + password = await get_password(ops_test) # Connect to PostgreSQL. host = get_unit_address(ops_test, f"{DATABASE_APP_NAME}/{unit_id}") @@ -192,8 +191,7 @@ async def test_postgresql_parameters_change(ops_test: OpsTest) -> None: "experimental_max_connections": "200", }) await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", idle_period=30) - any_unit_name = ops_test.model.applications[DATABASE_APP_NAME].units[0].name - password = await get_password(ops_test, any_unit_name) + password = await get_password(ops_test) # Connect to PostgreSQL. for unit_id in UNIT_IDS: @@ -255,7 +253,7 @@ async def test_scale_down_and_up(ops_test: OpsTest): leader_unit = await find_unit(ops_test, leader=True, application=DATABASE_APP_NAME) # Trigger a switchover if the primary and the leader are not the same unit. - patroni_password = await get_password(ops_test, primary, "patroni") + patroni_password = await get_password(ops_test, "patroni") if primary != leader_unit.name: switchover(ops_test, primary, patroni_password, leader_unit.name) @@ -341,7 +339,7 @@ async def test_persist_data_through_primary_deletion(ops_test: OpsTest): for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(5), reraise=True): with attempt: primary = await get_primary(ops_test, any_unit_name) - password = await get_password(ops_test, primary) + password = await get_password(ops_test) # Write data to primary IP. host = get_unit_address(ops_test, primary) diff --git a/tests/integration/test_password_rotation.py b/tests/integration/test_password_rotation.py index 563626b229..f47622079f 100644 --- a/tests/integration/test_password_rotation.py +++ b/tests/integration/test_password_rotation.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # Copyright 2022 Canonical Ltd. # See LICENSE file for licensing details. -import json import re import time @@ -9,13 +8,11 @@ import pytest from pytest_operator.plugin import OpsTest -from . import markers from .helpers import ( CHARM_BASE, METADATA, check_patroni, db_connect, - get_leader_unit, get_password, get_primary, get_unit_address, @@ -45,67 +42,45 @@ async def test_deploy_active(ops_test: OpsTest, charm): async def test_password_rotation(ops_test: OpsTest): """Test password rotation action.""" # Get the initial passwords set for the system users. - any_unit_name = ops_test.model.applications[APP_NAME].units[0].name - superuser_password = await get_password(ops_test, any_unit_name) - replication_password = await get_password(ops_test, any_unit_name, "replication") - monitoring_password = await get_password(ops_test, any_unit_name, "monitoring") - backup_password = await get_password(ops_test, any_unit_name, "backup") - rewind_password = await get_password(ops_test, any_unit_name, "rewind") - patroni_password = await get_password(ops_test, any_unit_name, "patroni") - - # Get the leader unit name (because passwords can only be set through it). - leader = None - for unit in ops_test.model.applications[APP_NAME].units: - if await unit.is_leader_from_status(): - leader = unit.name - break + superuser_password = await get_password(ops_test) + replication_password = await get_password(ops_test, "replication") + monitoring_password = await get_password(ops_test, "monitoring") + backup_password = await get_password(ops_test, "backup") + rewind_password = await get_password(ops_test, "rewind") + patroni_password = await get_password(ops_test, "patroni") # Change both passwords. - result = await set_password(ops_test, unit_name=leader) - assert "password" in result + await set_password(ops_test, password="test-password") await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) + new_superuser_password = await get_password(ops_test) + assert superuser_password != new_superuser_password # For replication, generate a specific password and pass it to the action. new_replication_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="replication", password=new_replication_password - ) - assert "password" in result + await set_password(ops_test, username="replication", password=new_replication_password) await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) + assert new_replication_password == await get_password(ops_test, "replication") + assert replication_password != new_replication_password # For monitoring, generate a specific password and pass it to the action. new_monitoring_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="monitoring", password=new_monitoring_password - ) - assert "password" in result + await set_password(ops_test, username="monitoring", password=new_monitoring_password) await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) + assert new_monitoring_password == await get_password(ops_test, "monitoring") + assert monitoring_password != new_monitoring_password # For backup, generate a specific password and pass it to the action. new_backup_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="backup", password=new_backup_password - ) - assert "password" in result + await set_password(ops_test, username="backup", password=new_backup_password) await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) + assert new_backup_password == await get_password(ops_test, "backup") + assert backup_password != new_backup_password # For rewind, generate a specific password and pass it to the action. new_rewind_password = "test-password" - result = await set_password( - ops_test, unit_name=leader, username="rewind", password=new_rewind_password - ) - assert "password" in result + await set_password(ops_test, username="rewind", password=new_rewind_password) await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000) - - new_superuser_password = await get_password(ops_test, any_unit_name) - assert superuser_password != new_superuser_password - assert new_replication_password == await get_password(ops_test, any_unit_name, "replication") - assert replication_password != new_replication_password - assert new_monitoring_password == await get_password(ops_test, any_unit_name, "monitoring") - assert monitoring_password != new_monitoring_password - assert new_backup_password == await get_password(ops_test, any_unit_name, "backup") - assert backup_password != new_backup_password - assert new_rewind_password == await get_password(ops_test, any_unit_name, "rewind") + assert new_rewind_password == await get_password(ops_test, "rewind") assert rewind_password != new_rewind_password # Restart Patroni on any non-leader unit and check that @@ -117,44 +92,6 @@ async def test_password_rotation(ops_test: OpsTest): assert check_patroni(ops_test, unit.name, restart_time) -@markers.juju_secrets -async def test_password_from_secret_same_as_cli(ops_test: OpsTest): - """Checking if password is same as returned by CLI. - - I.e. we're manipulating the secret we think we're manipulating. - """ - # - # No way to retrieve a secet by label for now (https://bugs.launchpad.net/juju/+bug/2037104) - # Therefore we take advantage of the fact, that we only have ONE single secret a this point - # So we take the single member of the list - # NOTE: This would BREAK if for instance units had secrets at the start... - # - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - password = await get_password(ops_test, unit_name=leader, username="replication") - complete_command = "list-secrets" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - secret_id = stdout.split("\n")[1].split(" ")[0] - - # Getting back the pw from juju CLI - complete_command = f"show-secret {secret_id} --reveal --format=json" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - data = json.loads(stdout) - assert data[secret_id]["content"]["Data"]["replication-password"] == password - - -async def test_empty_password(ops_test: OpsTest) -> None: - """Test that the password can't be set to an empty string.""" - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - await set_password(ops_test, unit_name=leader, username="replication", password="") - password = await get_password(ops_test, unit_name=leader, username="replication") - # The password is 'None', BUT NOT because of SECRET_DELETED_LABEL - # `get_secret()` returns a None value (as the field in the secret is set to string value "None") - # And this true None value is turned to a string when the event is setting results. - assert password == "None" - - async def test_db_connection_with_empty_password(ops_test: OpsTest): """Test that user can't connect with empty password.""" primary = await get_primary(ops_test, f"{APP_NAME}/0") @@ -165,12 +102,10 @@ async def test_db_connection_with_empty_password(ops_test: OpsTest): async def test_no_password_change_on_invalid_password(ops_test: OpsTest) -> None: """Test that in general, there is no change when password validation fails.""" - leader_unit = await get_leader_unit(ops_test, APP_NAME) - leader = leader_unit.name - password1 = await get_password(ops_test, unit_name=leader, username="replication") + password1 = await get_password(ops_test, username="replication") # The password has to be minimum 3 characters - await set_password(ops_test, unit_name=leader, username="replication", password="ca" * 1000000) - password2 = await get_password(ops_test, unit_name=leader, username="replication") + await set_password(ops_test, username="replication", password="1") + password2 = await get_password(ops_test, username="replication") # The password didn't change assert password1 == password2 diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py index 4dedc11a7e..21769afe58 100644 --- a/tests/integration/test_plugins.py +++ b/tests/integration/test_plugins.py @@ -171,7 +171,7 @@ def enable_disable_config(enabled: False): # Check that the available plugins are disabled. primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - password = await get_password(ops_test, primary) + password = await get_password(ops_test) address = get_unit_address(ops_test, primary) config = enable_disable_config(False) @@ -214,7 +214,7 @@ def enable_disable_config(enabled: False): async def test_plugin_objects(ops_test: OpsTest) -> None: """Checks if charm gets blocked when trying to disable a plugin in use.""" primary = await get_primary(ops_test, f"{DATABASE_APP_NAME}/0") - password = await get_password(ops_test, primary) + password = await get_password(ops_test) address = get_unit_address(ops_test, primary) logger.info("Creating an index object which depends on the pg_trgm config") diff --git a/tests/integration/test_tls.py b/tests/integration/test_tls.py index 0eb2d23ede..c0a6423542 100644 --- a/tests/integration/test_tls.py +++ b/tests/integration/test_tls.py @@ -82,7 +82,7 @@ async def test_tls_enabled(ops_test: OpsTest) -> None: # Check if TLS enabled for replication assert await check_tls_replication(ops_test, primary, enabled=True) - patroni_password = await get_password(ops_test, primary, "patroni") + patroni_password = await get_password(ops_test, "patroni") # Enable additional logs on the PostgreSQL instance to check TLS # being used in a later step and make the fail-over to happens faster. @@ -113,7 +113,7 @@ async def test_tls_enabled(ops_test: OpsTest) -> None: # Check that the replica was promoted. host = get_unit_address(ops_test, replica) - password = await get_password(ops_test, replica) + password = await get_password(ops_test) with db_connect(host, password) as connection: connection.autocommit = True with connection.cursor() as cursor: @@ -126,7 +126,7 @@ async def test_tls_enabled(ops_test: OpsTest) -> None: # Write some data to the initial primary (this causes a divergence # in the instances' timelines). host = get_unit_address(ops_test, primary) - password = await get_password(ops_test, primary) + password = await get_password(ops_test) with db_connect(host, password) as connection: connection.autocommit = True with connection.cursor() as cursor: diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 22237b16a6..982302941d 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -507,7 +507,7 @@ def test_execute_command(harness): patch("pwd.getpwnam") as _getpwnam, ): # Test when the command fails. - command = "rm -r /var/lib/postgresql/data/pgdata".split() + command = ["rm", "-r", "/var/lib/postgresql/data/pgdata"] _run.return_value = CompletedProcess(command, 1, b"", b"fake stderr") assert harness.charm.backup._execute_command(command) == (1, "", "fake stderr") _run.assert_called_once_with( diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 3c22af7be5..573abb1af4 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -13,7 +13,6 @@ from charms.postgresql_k8s.v0.postgresql import ( PostgreSQLCreateUserError, PostgreSQLEnableDisableExtensionError, - PostgreSQLUpdateUserPasswordError, ) from ops import Unit from ops.framework import EventBase @@ -86,9 +85,9 @@ def test_on_install(harness): pg_snap.alias.assert_any_call("patronictl") assert _check_call.call_count == 3 - _check_call.assert_any_call("mkdir -p /home/snap_daemon".split()) - _check_call.assert_any_call("chown snap_daemon:snap_daemon /home/snap_daemon".split()) - _check_call.assert_any_call("usermod -d /home/snap_daemon snap_daemon".split()) + _check_call.assert_any_call(["mkdir", "-p", "/home/snap_daemon"]) + _check_call.assert_any_call(["chown", "snap_daemon:snap_daemon", "/home/snap_daemon"]) + _check_call.assert_any_call(["usermod", "-d", "/home/snap_daemon", "snap_daemon"]) # Assert the status set by the event handler. assert isinstance(harness.model.unit.status, WaitingStatus) @@ -774,102 +773,6 @@ def test_on_start_after_blocked_state(harness): assert harness.model.unit.status == initial_status -def test_on_get_password(harness): - with patch("charm.PostgresqlOperatorCharm.update_config"): - rel_id = harness.model.get_relation(PEER).id - # Create a mock event and set passwords in peer relation data. - harness.set_leader(True) - mock_event = MagicMock(params={}) - harness.update_relation_data( - rel_id, - harness.charm.app.name, - { - "operator-password": "test-password", - "replication-password": "replication-test-password", - }, - ) - - # Test providing an invalid username. - mock_event.params["username"] = "user" - harness.charm._on_get_password(mock_event) - mock_event.fail.assert_called_once() - mock_event.set_results.assert_not_called() - - # Test without providing the username option. - mock_event.reset_mock() - del mock_event.params["username"] - harness.charm._on_get_password(mock_event) - mock_event.set_results.assert_called_once_with({"password": "test-password"}) - - # Also test providing the username option. - mock_event.reset_mock() - mock_event.params["username"] = "replication" - harness.charm._on_get_password(mock_event) - mock_event.set_results.assert_called_once_with({"password": "replication-test-password"}) - - -def test_on_set_password(harness): - with ( - patch("charm.PostgresqlOperatorCharm.update_config"), - patch("charm.PostgresqlOperatorCharm.set_secret") as _set_secret, - patch("charm.PostgresqlOperatorCharm.postgresql") as _postgresql, - patch("charm.Patroni.are_all_members_ready") as _are_all_members_ready, - patch("charm.PostgresqlOperatorCharm._on_leader_elected"), - ): - # Create a mock event. - mock_event = MagicMock(params={}) - - # Set some values for the other mocks. - _are_all_members_ready.side_effect = [False, True, True, True, True] - _postgresql.update_user_password = PropertyMock( - side_effect=[PostgreSQLUpdateUserPasswordError, None, None, None] - ) - - # Test trying to set a password through a non leader unit. - harness.charm._on_set_password(mock_event) - mock_event.fail.assert_called_once() - _set_secret.assert_not_called() - - # Test providing an invalid username. - harness.set_leader() - mock_event.reset_mock() - mock_event.params["username"] = "user" - harness.charm._on_set_password(mock_event) - mock_event.fail.assert_called_once() - _set_secret.assert_not_called() - - # Test without providing the username option but without all cluster members ready. - mock_event.reset_mock() - del mock_event.params["username"] - harness.charm._on_set_password(mock_event) - mock_event.fail.assert_called_once() - _set_secret.assert_not_called() - - # Test for an error updating when updating the user password in the database. - mock_event.reset_mock() - harness.charm._on_set_password(mock_event) - mock_event.fail.assert_called_once() - _set_secret.assert_not_called() - - # Test without providing the username option. - harness.charm._on_set_password(mock_event) - assert _set_secret.call_args_list[0][0][1] == "operator-password" - - # Also test providing the username option. - _set_secret.reset_mock() - mock_event.params["username"] = "replication" - harness.charm._on_set_password(mock_event) - assert _set_secret.call_args_list[0][0][1] == "replication-password" - - # And test providing both the username and password options. - _set_secret.reset_mock() - mock_event.params["password"] = "replication-test-password" - harness.charm._on_set_password(mock_event) - _set_secret.assert_called_once_with( - "app", "replication-password", "replication-test-password" - ) - - def test_on_update_status(harness): with ( patch("charm.ClusterTopologyObserver.start_observer") as _start_observer, @@ -2003,35 +1906,6 @@ def test_scope_obj(harness): assert harness.charm._scope_obj("test") is None -def test_on_get_password_secrets(harness): - with ( - patch("charm.PostgresqlOperatorCharm._on_leader_elected"), - ): - # Create a mock event and set passwords in peer relation data. - harness.set_leader() - mock_event = MagicMock(params={}) - harness.charm.set_secret("app", "operator-password", "test-password") - harness.charm.set_secret("app", "replication-password", "replication-test-password") - - # Test providing an invalid username. - mock_event.params["username"] = "user" - harness.charm._on_get_password(mock_event) - mock_event.fail.assert_called_once() - mock_event.set_results.assert_not_called() - - # Test without providing the username option. - mock_event.reset_mock() - del mock_event.params["username"] - harness.charm._on_get_password(mock_event) - mock_event.set_results.assert_called_once_with({"password": "test-password"}) - - # Also test providing the username option. - mock_event.reset_mock() - mock_event.params["username"] = "replication" - harness.charm._on_get_password(mock_event) - mock_event.set_results.assert_called_once_with({"password": "replication-test-password"}) - - @pytest.mark.parametrize("scope,field", [("app", "operator-password"), ("unit", "csr")]) def test_get_secret_secrets(harness, scope, field): with (