Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ def _on_restore_action(self, event): # noqa: C901
# Stop the database service before performing the restore.
logger.info("Stopping database service")
try:
self.container.stop(self.charm._postgresql_service)
self.container.stop(self.charm.postgresql_service)
except ChangeError as e:
error_message = f"Failed to stop database service with error: {e!s}"
logger.error(f"Restore failed: {error_message}")
Expand Down Expand Up @@ -1047,7 +1047,7 @@ def _on_restore_action(self, event): # noqa: C901

# Start the database to start the restore process.
logger.info("Configuring Patroni to restore the backup")
self.container.start(self.charm._postgresql_service)
self.container.start(self.charm.postgresql_service)

event.set_results({"restore-status": "restore started"})

Expand Down Expand Up @@ -1221,7 +1221,7 @@ def _restart_database(self) -> None:
"""Removes the restoring backup flag and restart the database."""
self.charm.app_peer_data.update({"restoring-backup": "", "restore-to-time": ""})
self.charm.update_config()
self.container.start(self.charm._postgresql_service)
self.container.start(self.charm.postgresql_service)

def _retrieve_s3_parameters(self) -> tuple[dict, list[str]]:
"""Retrieve S3 parameters from the S3 integrator relation."""
Expand Down
132 changes: 104 additions & 28 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import time
from pathlib import Path
from typing import Literal, get_args
from urllib.parse import urlparse

# First platform-specific import, will fail on wrong architecture
try:
Expand Down Expand Up @@ -88,6 +89,7 @@
APP_SCOPE,
BACKUP_USER,
DATABASE_DEFAULT_NAME,
DATABASE_PORT,
METRICS_PORT,
MONITORING_PASSWORD_KEY,
MONITORING_USER,
Expand Down Expand Up @@ -195,10 +197,11 @@ def __init__(self, *args):
deleted_label=SECRET_DELETED_LABEL,
)

self._postgresql_service = "postgresql"
self.postgresql_service = "postgresql"
self.rotate_logs_service = "rotate-logs"
self.pgbackrest_server_service = "pgbackrest server"
self._metrics_service = "metrics_server"
self.ldap_sync_service = "ldap-sync"
self.metrics_service = "metrics_server"
self._unit = self.model.unit.name
self._name = self.model.app.name
self._namespace = self.model.name
Expand Down Expand Up @@ -601,7 +604,7 @@ def _on_peer_relation_changed(self, event: HookEvent) -> None: # noqa: C901
logger.debug("on_peer_relation_changed early exit: Unit in blocked status")
return

services = container.pebble.get_services(names=[self._postgresql_service])
services = container.pebble.get_services(names=[self.postgresql_service])
if (
(self.is_cluster_restoring_backup or self.is_cluster_restoring_to_time)
and len(services) > 0
Expand Down Expand Up @@ -1496,7 +1499,7 @@ def _on_update_status(self, _) -> None:
if not self._on_update_status_early_exit_checks(container):
return

services = container.pebble.get_services(names=[self._postgresql_service])
services = container.pebble.get_services(names=[self.postgresql_service])
if len(services) == 0:
# Service has not been added nor started yet, so don't try to check Patroni API.
logger.debug("on_update_status early exit: Service has not been added nor started yet")
Expand All @@ -1509,10 +1512,10 @@ def _on_update_status(self, _) -> None:
and services[0].current != ServiceStatus.ACTIVE
):
logger.warning(
f"{self._postgresql_service} pebble service inactive, restarting service"
f"{self.postgresql_service} pebble service inactive, restarting service"
)
try:
container.restart(self._postgresql_service)
container.restart(self.postgresql_service)
except ChangeError:
logger.exception("Failed to restart patroni")
# If service doesn't recover fast, exit and wait for next hook run to re-check
Expand Down Expand Up @@ -1609,7 +1612,7 @@ def _handle_processes_failures(self) -> bool:
# https://github.com/canonical/pebble/issues/149 is resolved.
if not self._patroni.member_started and self._patroni.is_database_running:
try:
container.restart(self._postgresql_service)
container.restart(self.postgresql_service)
logger.info("restarted Patroni because it was not running")
except ChangeError:
logger.error("failed to restart Patroni after checking that it was not running")
Expand Down Expand Up @@ -1746,6 +1749,37 @@ def _update_endpoints(
endpoints.remove(endpoint)
self._peers.data[self.app]["endpoints"] = json.dumps(endpoints)

def _generate_ldap_service(self) -> dict:
"""Generate the LDAP service definition."""
ldap_params = self.get_ldap_parameters()

ldap_url = urlparse(ldap_params["ldapurl"])
ldap_host = ldap_url.hostname
ldap_port = ldap_url.port

ldap_base_dn = ldap_params["ldapbasedn"]
ldap_bind_username = ldap_params["ldapbinddn"]
ldap_bing_password = ldap_params["ldapbindpasswd"]

return {
"override": "replace",
"summary": "synchronize LDAP users",
"command": "/start-ldap-synchronizer.sh",
"startup": "enabled",
"environment": {
"LDAP_HOST": ldap_host,
"LDAP_PORT": ldap_port,
"LDAP_BASE_DN": ldap_base_dn,
"LDAP_BIND_USERNAME": ldap_bind_username,
"LDAP_BIND_PASSWORD": ldap_bing_password,
"POSTGRES_HOST": "127.0.0.1",
"POSTGRES_PORT": DATABASE_PORT,
"POSTGRES_DATABASE": DATABASE_DEFAULT_NAME,
"POSTGRES_USERNAME": USER,
"POSTGRES_PASSWORD": self.get_secret(APP_SCOPE, USER_PASSWORD_KEY),
},
}

def _generate_metrics_service(self) -> dict:
"""Generate the metrics service definition."""
return {
Expand All @@ -1757,7 +1791,7 @@ def _generate_metrics_service(self) -> dict:
if self.get_secret("app", MONITORING_PASSWORD_KEY) is not None
else "disabled"
),
"after": [self._postgresql_service],
"after": [self.postgresql_service],
"user": WORKLOAD_OS_USER,
"group": WORKLOAD_OS_GROUP,
"environment": {
Expand All @@ -1776,7 +1810,7 @@ def _postgresql_layer(self) -> Layer:
"summary": "postgresql + patroni layer",
"description": "pebble config layer for postgresql + patroni",
"services": {
self._postgresql_service: {
self.postgresql_service: {
"override": "replace",
"summary": "entrypoint of the postgresql + patroni image",
"command": f"patroni {self._storage_path}/patroni.yml",
Expand Down Expand Up @@ -1806,7 +1840,13 @@ def _postgresql_layer(self) -> Layer:
"user": WORKLOAD_OS_USER,
"group": WORKLOAD_OS_GROUP,
},
self._metrics_service: self._generate_metrics_service(),
self.ldap_sync_service: {
"override": "replace",
"summary": "synchronize LDAP users",
"command": "/start-ldap-synchronizer.sh",
"startup": "disabled",
},
self.metrics_service: self._generate_metrics_service(),
self.rotate_logs_service: {
"override": "replace",
"summary": "rotate logs",
Expand All @@ -1815,7 +1855,7 @@ def _postgresql_layer(self) -> Layer:
},
},
"checks": {
self._postgresql_service: {
self.postgresql_service: {
"override": "replace",
"level": "ready",
"http": {
Expand Down Expand Up @@ -1918,14 +1958,59 @@ def _restart(self, event: RunWithLock) -> None:
# Start or stop the pgBackRest TLS server service when TLS certificate change.
self.backup.start_stop_pgbackrest_service()

def _restart_metrics_service(self) -> None:
"""Restart the monitoring service if the password was rotated."""
container = self.unit.get_container("postgresql")
current_layer = container.get_plan()

metrics_service = current_layer.services[self.metrics_service]
data_source_name = metrics_service.environment.get("DATA_SOURCE_NAME", "")

if metrics_service and not data_source_name.startswith(
f"user={MONITORING_USER} password={self.get_secret('app', MONITORING_PASSWORD_KEY)} "
):
container.add_layer(
self.metrics_service,
Layer({"services": {self.metrics_service: self._generate_metrics_service()}}),
combine=True,
)
container.restart(self.metrics_service)

def _restart_ldap_sync_service(self) -> None:
"""Restart the LDAP sync service in case any configuration changed."""
if not self._patroni.member_started:
logger.debug("Restart LDAP sync early exit: Patroni has not started yet")
return

container = self.unit.get_container("postgresql")
sync_service = container.pebble.get_services(names=[self.ldap_sync_service])

if not self.is_primary and sync_service[0].is_running():
logger.debug("Stopping LDAP sync service. It must only run in the primary")
container.stop(self.pg_ldap_sync_service)

if self.is_primary and not self.is_ldap_enabled:
logger.debug("Stopping LDAP sync service")
container.stop(self.ldap_sync_service)
return

if self.is_primary and self.is_ldap_enabled:
container.add_layer(
self.ldap_sync_service,
Layer({"services": {self.ldap_sync_service: self._generate_ldap_service()}}),
combine=True,
)
logger.debug("Starting LDAP sync service")
container.restart(self.ldap_sync_service)

@property
def _is_workload_running(self) -> bool:
"""Returns whether the workload is running (in an active state)."""
container = self.unit.get_container("postgresql")
if not container.can_connect():
return False

services = container.pebble.get_services(names=[self._postgresql_service])
services = container.pebble.get_services(names=[self.postgresql_service])
if len(services) == 0:
return False

Expand Down Expand Up @@ -2009,21 +2094,12 @@ def update_config(self, is_creating_backup: bool = False) -> bool:
})

self._handle_postgresql_restart_need()
self._restart_metrics_service()

# Restart the monitoring service if the password was rotated
container = self.unit.get_container("postgresql")
current_layer = container.get_plan()
if (
metrics_service := current_layer.services[self._metrics_service]
) and not metrics_service.environment.get("DATA_SOURCE_NAME", "").startswith(
f"user={MONITORING_USER} password={self.get_secret('app', MONITORING_PASSWORD_KEY)} "
):
container.add_layer(
self._metrics_service,
Layer({"services": {self._metrics_service: self._generate_metrics_service()}}),
combine=True,
)
container.restart(self._metrics_service)
# TODO: Un-comment
# When PostgreSQL-rock wrapping PostgreSQL-snap versions 162 / 163 gets published
# (i.e. snap contains https://github.com/canonical/charmed-postgresql-snap/pull/88)
# self._restart_ldap_sync_service()

return True

Expand Down Expand Up @@ -2102,14 +2178,14 @@ def _update_pebble_layers(self, replan: bool = True) -> None:
# Check if there are any changes to layer services.
if current_layer.services != new_layer.services:
# Changes were made, add the new layer.
container.add_layer(self._postgresql_service, new_layer, combine=True)
container.add_layer(self.postgresql_service, new_layer, combine=True)
logging.info("Added updated layer 'postgresql' to Pebble plan")
if replan:
container.replan()
logging.info("Restarted postgresql service")
if current_layer.checks != new_layer.checks:
# Changes were made, add the new layer.
container.add_layer(self._postgresql_service, new_layer, combine=True)
container.add_layer(self.postgresql_service, new_layer, combine=True)
logging.info("Updated health checks")

def _unit_name_to_pod_name(self, unit_name: str) -> str:
Expand Down
6 changes: 3 additions & 3 deletions src/relations/async_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,15 +524,15 @@ def _on_async_relation_changed(self, event: RelationChangedEvent) -> None:

if (
not self.container.can_connect()
or len(self.container.pebble.get_services(names=[self.charm._postgresql_service])) == 0
or len(self.container.pebble.get_services(names=[self.charm.postgresql_service])) == 0
):
logger.debug("Early exit on_async_relation_changed: container hasn't started yet.")
event.defer()
return

# Update the asynchronous replication configuration and start the database.
self.charm.update_config()
self.container.start(self.charm._postgresql_service)
self.container.start(self.charm.postgresql_service)

self._handle_database_start(event)

Expand Down Expand Up @@ -694,7 +694,7 @@ def _stop_database(self, event: RelationChangedEvent) -> bool:
logger.debug("Early exit on_async_relation_changed: following promoted cluster.")
return False

self.container.stop(self.charm._postgresql_service)
self.container.stop(self.charm.postgresql_service)

if self.charm.unit.is_leader():
# Remove the "cluster_initialised" flag to avoid self-healing in the update status hook.
Expand Down
Loading