diff --git a/src/backups.py b/src/backups.py index 2bdc776374..d9a1fee86e 100644 --- a/src/backups.py +++ b/src/backups.py @@ -989,7 +989,7 @@ def _on_restore_action(self, event): # noqa: C901 # Stop the database service before performing the restore. logger.info("Stopping database service") try: - self.container.stop(self.charm._postgresql_service) + self.container.stop(self.charm.postgresql_service) except ChangeError as e: error_message = f"Failed to stop database service with error: {e!s}" logger.error(f"Restore failed: {error_message}") @@ -1047,7 +1047,7 @@ def _on_restore_action(self, event): # noqa: C901 # Start the database to start the restore process. logger.info("Configuring Patroni to restore the backup") - self.container.start(self.charm._postgresql_service) + self.container.start(self.charm.postgresql_service) event.set_results({"restore-status": "restore started"}) @@ -1221,7 +1221,7 @@ def _restart_database(self) -> None: """Removes the restoring backup flag and restart the database.""" self.charm.app_peer_data.update({"restoring-backup": "", "restore-to-time": ""}) self.charm.update_config() - self.container.start(self.charm._postgresql_service) + self.container.start(self.charm.postgresql_service) def _retrieve_s3_parameters(self) -> tuple[dict, list[str]]: """Retrieve S3 parameters from the S3 integrator relation.""" diff --git a/src/charm.py b/src/charm.py index 9c35d00c8e..3726e34ea9 100755 --- a/src/charm.py +++ b/src/charm.py @@ -14,6 +14,7 @@ import time from pathlib import Path from typing import Literal, get_args +from urllib.parse import urlparse # First platform-specific import, will fail on wrong architecture try: @@ -88,6 +89,7 @@ APP_SCOPE, BACKUP_USER, DATABASE_DEFAULT_NAME, + DATABASE_PORT, METRICS_PORT, MONITORING_PASSWORD_KEY, MONITORING_USER, @@ -195,10 +197,11 @@ def __init__(self, *args): deleted_label=SECRET_DELETED_LABEL, ) - self._postgresql_service = "postgresql" + self.postgresql_service = "postgresql" self.rotate_logs_service = "rotate-logs" self.pgbackrest_server_service = "pgbackrest server" - self._metrics_service = "metrics_server" + self.ldap_sync_service = "ldap-sync" + self.metrics_service = "metrics_server" self._unit = self.model.unit.name self._name = self.model.app.name self._namespace = self.model.name @@ -601,7 +604,7 @@ def _on_peer_relation_changed(self, event: HookEvent) -> None: # noqa: C901 logger.debug("on_peer_relation_changed early exit: Unit in blocked status") return - services = container.pebble.get_services(names=[self._postgresql_service]) + services = container.pebble.get_services(names=[self.postgresql_service]) if ( (self.is_cluster_restoring_backup or self.is_cluster_restoring_to_time) and len(services) > 0 @@ -1496,7 +1499,7 @@ def _on_update_status(self, _) -> None: if not self._on_update_status_early_exit_checks(container): return - services = container.pebble.get_services(names=[self._postgresql_service]) + services = container.pebble.get_services(names=[self.postgresql_service]) if len(services) == 0: # Service has not been added nor started yet, so don't try to check Patroni API. logger.debug("on_update_status early exit: Service has not been added nor started yet") @@ -1509,10 +1512,10 @@ def _on_update_status(self, _) -> None: and services[0].current != ServiceStatus.ACTIVE ): logger.warning( - f"{self._postgresql_service} pebble service inactive, restarting service" + f"{self.postgresql_service} pebble service inactive, restarting service" ) try: - container.restart(self._postgresql_service) + container.restart(self.postgresql_service) except ChangeError: logger.exception("Failed to restart patroni") # If service doesn't recover fast, exit and wait for next hook run to re-check @@ -1609,7 +1612,7 @@ def _handle_processes_failures(self) -> bool: # https://github.com/canonical/pebble/issues/149 is resolved. if not self._patroni.member_started and self._patroni.is_database_running: try: - container.restart(self._postgresql_service) + container.restart(self.postgresql_service) logger.info("restarted Patroni because it was not running") except ChangeError: logger.error("failed to restart Patroni after checking that it was not running") @@ -1746,6 +1749,37 @@ def _update_endpoints( endpoints.remove(endpoint) self._peers.data[self.app]["endpoints"] = json.dumps(endpoints) + def _generate_ldap_service(self) -> dict: + """Generate the LDAP service definition.""" + ldap_params = self.get_ldap_parameters() + + ldap_url = urlparse(ldap_params["ldapurl"]) + ldap_host = ldap_url.hostname + ldap_port = ldap_url.port + + ldap_base_dn = ldap_params["ldapbasedn"] + ldap_bind_username = ldap_params["ldapbinddn"] + ldap_bing_password = ldap_params["ldapbindpasswd"] + + return { + "override": "replace", + "summary": "synchronize LDAP users", + "command": "/start-ldap-synchronizer.sh", + "startup": "enabled", + "environment": { + "LDAP_HOST": ldap_host, + "LDAP_PORT": ldap_port, + "LDAP_BASE_DN": ldap_base_dn, + "LDAP_BIND_USERNAME": ldap_bind_username, + "LDAP_BIND_PASSWORD": ldap_bing_password, + "POSTGRES_HOST": "127.0.0.1", + "POSTGRES_PORT": DATABASE_PORT, + "POSTGRES_DATABASE": DATABASE_DEFAULT_NAME, + "POSTGRES_USERNAME": USER, + "POSTGRES_PASSWORD": self.get_secret(APP_SCOPE, USER_PASSWORD_KEY), + }, + } + def _generate_metrics_service(self) -> dict: """Generate the metrics service definition.""" return { @@ -1757,7 +1791,7 @@ def _generate_metrics_service(self) -> dict: if self.get_secret("app", MONITORING_PASSWORD_KEY) is not None else "disabled" ), - "after": [self._postgresql_service], + "after": [self.postgresql_service], "user": WORKLOAD_OS_USER, "group": WORKLOAD_OS_GROUP, "environment": { @@ -1776,7 +1810,7 @@ def _postgresql_layer(self) -> Layer: "summary": "postgresql + patroni layer", "description": "pebble config layer for postgresql + patroni", "services": { - self._postgresql_service: { + self.postgresql_service: { "override": "replace", "summary": "entrypoint of the postgresql + patroni image", "command": f"patroni {self._storage_path}/patroni.yml", @@ -1806,7 +1840,13 @@ def _postgresql_layer(self) -> Layer: "user": WORKLOAD_OS_USER, "group": WORKLOAD_OS_GROUP, }, - self._metrics_service: self._generate_metrics_service(), + self.ldap_sync_service: { + "override": "replace", + "summary": "synchronize LDAP users", + "command": "/start-ldap-synchronizer.sh", + "startup": "disabled", + }, + self.metrics_service: self._generate_metrics_service(), self.rotate_logs_service: { "override": "replace", "summary": "rotate logs", @@ -1815,7 +1855,7 @@ def _postgresql_layer(self) -> Layer: }, }, "checks": { - self._postgresql_service: { + self.postgresql_service: { "override": "replace", "level": "ready", "http": { @@ -1918,6 +1958,51 @@ def _restart(self, event: RunWithLock) -> None: # Start or stop the pgBackRest TLS server service when TLS certificate change. self.backup.start_stop_pgbackrest_service() + def _restart_metrics_service(self) -> None: + """Restart the monitoring service if the password was rotated.""" + container = self.unit.get_container("postgresql") + current_layer = container.get_plan() + + metrics_service = current_layer.services[self.metrics_service] + data_source_name = metrics_service.environment.get("DATA_SOURCE_NAME", "") + + if metrics_service and not data_source_name.startswith( + f"user={MONITORING_USER} password={self.get_secret('app', MONITORING_PASSWORD_KEY)} " + ): + container.add_layer( + self.metrics_service, + Layer({"services": {self.metrics_service: self._generate_metrics_service()}}), + combine=True, + ) + container.restart(self.metrics_service) + + def _restart_ldap_sync_service(self) -> None: + """Restart the LDAP sync service in case any configuration changed.""" + if not self._patroni.member_started: + logger.debug("Restart LDAP sync early exit: Patroni has not started yet") + return + + container = self.unit.get_container("postgresql") + sync_service = container.pebble.get_services(names=[self.ldap_sync_service]) + + if not self.is_primary and sync_service[0].is_running(): + logger.debug("Stopping LDAP sync service. It must only run in the primary") + container.stop(self.pg_ldap_sync_service) + + if self.is_primary and not self.is_ldap_enabled: + logger.debug("Stopping LDAP sync service") + container.stop(self.ldap_sync_service) + return + + if self.is_primary and self.is_ldap_enabled: + container.add_layer( + self.ldap_sync_service, + Layer({"services": {self.ldap_sync_service: self._generate_ldap_service()}}), + combine=True, + ) + logger.debug("Starting LDAP sync service") + container.restart(self.ldap_sync_service) + @property def _is_workload_running(self) -> bool: """Returns whether the workload is running (in an active state).""" @@ -1925,7 +2010,7 @@ def _is_workload_running(self) -> bool: if not container.can_connect(): return False - services = container.pebble.get_services(names=[self._postgresql_service]) + services = container.pebble.get_services(names=[self.postgresql_service]) if len(services) == 0: return False @@ -2009,21 +2094,12 @@ def update_config(self, is_creating_backup: bool = False) -> bool: }) self._handle_postgresql_restart_need() + self._restart_metrics_service() - # Restart the monitoring service if the password was rotated - container = self.unit.get_container("postgresql") - current_layer = container.get_plan() - if ( - metrics_service := current_layer.services[self._metrics_service] - ) and not metrics_service.environment.get("DATA_SOURCE_NAME", "").startswith( - f"user={MONITORING_USER} password={self.get_secret('app', MONITORING_PASSWORD_KEY)} " - ): - container.add_layer( - self._metrics_service, - Layer({"services": {self._metrics_service: self._generate_metrics_service()}}), - combine=True, - ) - container.restart(self._metrics_service) + # TODO: Un-comment + # When PostgreSQL-rock wrapping PostgreSQL-snap versions 162 / 163 gets published + # (i.e. snap contains https://github.com/canonical/charmed-postgresql-snap/pull/88) + # self._restart_ldap_sync_service() return True @@ -2102,14 +2178,14 @@ def _update_pebble_layers(self, replan: bool = True) -> None: # Check if there are any changes to layer services. if current_layer.services != new_layer.services: # Changes were made, add the new layer. - container.add_layer(self._postgresql_service, new_layer, combine=True) + container.add_layer(self.postgresql_service, new_layer, combine=True) logging.info("Added updated layer 'postgresql' to Pebble plan") if replan: container.replan() logging.info("Restarted postgresql service") if current_layer.checks != new_layer.checks: # Changes were made, add the new layer. - container.add_layer(self._postgresql_service, new_layer, combine=True) + container.add_layer(self.postgresql_service, new_layer, combine=True) logging.info("Updated health checks") def _unit_name_to_pod_name(self, unit_name: str) -> str: diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 1700de12b8..9d34409de0 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -524,7 +524,7 @@ def _on_async_relation_changed(self, event: RelationChangedEvent) -> None: if ( not self.container.can_connect() - or len(self.container.pebble.get_services(names=[self.charm._postgresql_service])) == 0 + or len(self.container.pebble.get_services(names=[self.charm.postgresql_service])) == 0 ): logger.debug("Early exit on_async_relation_changed: container hasn't started yet.") event.defer() @@ -532,7 +532,7 @@ def _on_async_relation_changed(self, event: RelationChangedEvent) -> None: # Update the asynchronous replication configuration and start the database. self.charm.update_config() - self.container.start(self.charm._postgresql_service) + self.container.start(self.charm.postgresql_service) self._handle_database_start(event) @@ -694,7 +694,7 @@ def _stop_database(self, event: RelationChangedEvent) -> bool: logger.debug("Early exit on_async_relation_changed: following promoted cluster.") return False - self.container.stop(self.charm._postgresql_service) + self.container.stop(self.charm.postgresql_service) if self.charm.unit.is_leader(): # Remove the "cluster_initialised" flag to avoid self-healing in the update status hook. diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index f8aa050393..f93053c817 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -31,6 +31,7 @@ POSTGRESQL_CONTAINER = "postgresql" POSTGRESQL_SERVICE = "postgresql" +LDAP_SYNC_SERVICE = "ldap-sync" METRICS_SERVICE = "metrics_server" PGBACKREST_SERVER_SERVICE = "pgbackrest server" ROTATE_LOGS_SERVICE = "rotate-logs" @@ -956,6 +957,20 @@ def test_postgresql_layer(harness): "PATRONI_SUPERUSER_USERNAME": "operator", }, }, + PGBACKREST_SERVER_SERVICE: { + "override": "replace", + "summary": "pgBackRest server", + "command": PGBACKREST_SERVER_SERVICE, + "startup": "disabled", + "user": "postgres", + "group": "postgres", + }, + LDAP_SYNC_SERVICE: { + "override": "replace", + "summary": "synchronize LDAP users", + "command": "/start-ldap-synchronizer.sh", + "startup": "disabled", + }, METRICS_SERVICE: { "override": "replace", "summary": "postgresql metrics exporter", @@ -972,14 +987,6 @@ def test_postgresql_layer(harness): ), }, }, - PGBACKREST_SERVER_SERVICE: { - "override": "replace", - "summary": "pgBackRest server", - "command": PGBACKREST_SERVER_SERVICE, - "startup": "disabled", - "user": "postgres", - "group": "postgres", - }, ROTATE_LOGS_SERVICE: { "override": "replace", "summary": "rotate logs", @@ -1655,6 +1662,12 @@ def test_update_config(harness): patch( "charm.PostgresqlOperatorCharm._handle_postgresql_restart_need" ) as _handle_postgresql_restart_need, + patch( + "charm.PostgresqlOperatorCharm._restart_metrics_service" + ) as _restart_metrics_service, + patch( + "charm.PostgresqlOperatorCharm._restart_ldap_sync_service" + ) as _restart_ldap_sync_service, patch("charm.Patroni.bulk_update_parameters_controller_by_patroni"), patch("charm.Patroni.member_started", new_callable=PropertyMock) as _member_started, patch( @@ -1662,6 +1675,7 @@ def test_update_config(harness): ) as _is_workload_running, patch("charm.Patroni.render_patroni_yml_file") as _render_patroni_yml_file, patch("charm.PostgreSQLUpgrade") as _upgrade, + patch("charm.PostgresqlOperatorCharm.is_primary", return_value=False), patch( "charm.PostgresqlOperatorCharm.is_tls_enabled", new_callable=PropertyMock ) as _is_tls_enabled, @@ -1696,10 +1710,14 @@ def test_update_config(harness): parameters={"test": "test"}, ) _handle_postgresql_restart_need.assert_called_once() + _restart_metrics_service.assert_called_once() + # _restart_ldap_sync_service.assert_called_once() assert "tls" not in harness.get_relation_data(rel_id, harness.charm.unit.name) # Test with TLS files available. _handle_postgresql_restart_need.reset_mock() + _restart_metrics_service.reset_mock() + # _restart_ldap_sync_service.reset_mock() harness.update_relation_data( rel_id, harness.charm.unit.name, {"tls": ""} ) # Mock some data in the relation to test that it change. @@ -1721,6 +1739,8 @@ def test_update_config(harness): parameters={"test": "test"}, ) _handle_postgresql_restart_need.assert_called_once() + _restart_metrics_service.assert_called_once() + # _restart_ldap_sync_service.assert_called_once() assert "tls" not in harness.get_relation_data( rel_id, harness.charm.unit.name ) # The "tls" flag is set in handle_postgresql_restart_need. @@ -1730,8 +1750,12 @@ def test_update_config(harness): rel_id, harness.charm.unit.name, {"tls": ""} ) # Mock some data in the relation to test that it change. _handle_postgresql_restart_need.reset_mock() + _restart_metrics_service.reset_mock() + # _restart_ldap_sync_service.reset_mock() harness.charm.update_config() _handle_postgresql_restart_need.assert_not_called() + _restart_metrics_service.assert_not_called() + # _restart_ldap_sync_service.assert_not_called() assert harness.get_relation_data(rel_id, harness.charm.unit.name)["tls"] == "enabled" # Test with member not started yet. @@ -1740,6 +1764,8 @@ def test_update_config(harness): ) # Mock some data in the relation to test that it doesn't change. harness.charm.update_config() _handle_postgresql_restart_need.assert_not_called() + _restart_metrics_service.assert_not_called() + # _restart_ldap_sync_service.assert_not_called() assert "tls" not in harness.get_relation_data(rel_id, harness.charm.unit.name)