Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
5efd7fd
Enable sync mode
dragomirp Mar 14, 2023
848e807
Merge branch 'main' into dpe-1456-sync-mode
dragomirp Mar 23, 2023
359ebd9
Merge branch 'main' into dpe-1456-sync-mode
dragomirp Mar 23, 2023
02ae4ad
Merge branch 'main' into dpe-1456-sync-mode
marceloneppel Mar 29, 2023
426a00c
Remove custom conf
marceloneppel Mar 31, 2023
349beaf
Merge branch 'main' into dpe-1456-sync-mode
marceloneppel Mar 31, 2023
aeec50d
Add new expected k8s endpoint to helper function
marceloneppel Mar 31, 2023
ce1d6d9
Fix TLS test
marceloneppel Apr 5, 2023
4637469
Add pebble health check
marceloneppel Apr 5, 2023
63cb1c2
Fix freeze db test
marceloneppel Apr 5, 2023
ac75151
Enable restart db test
marceloneppel Apr 5, 2023
4fb09d9
Remove commented code
marceloneppel Apr 5, 2023
1ace69c
Fix unit tests
marceloneppel Apr 5, 2023
7dd6f95
Merge branch 'main' into dpe-1456-sync-mode
marceloneppel Apr 6, 2023
0ef7f51
Improve TLS management
marceloneppel Apr 8, 2023
17455b6
Add health check update
marceloneppel Apr 10, 2023
7e60fcb
Remove health check
marceloneppel Apr 10, 2023
f12651e
Add health check
marceloneppel Apr 10, 2023
8dedcce
Prevent multiple primaries
marceloneppel Apr 10, 2023
4efaf74
Remove unused code
marceloneppel Apr 10, 2023
d01c7e8
Check max number written
marceloneppel Apr 11, 2023
3cc2f06
Check writes on all instances
marceloneppel Apr 11, 2023
84d810d
Remove master_start_timeout setting
marceloneppel Apr 11, 2023
2278f80
Improve logs retrieval
marceloneppel Apr 11, 2023
364adae
Add CA chain to trusted certificates
marceloneppel Apr 11, 2023
b79aa19
Revert "Remove master_start_timeout setting"
marceloneppel Apr 11, 2023
ed77452
Use the right PG process in the HA tests
marceloneppel Apr 11, 2023
f89342e
Fix order in test
marceloneppel Apr 11, 2023
c29e5cd
Remove unused call to SIGCONT
marceloneppel Apr 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 52 additions & 35 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
PostgreSQLUpdateUserPasswordError,
)
from charms.postgresql_k8s.v0.postgresql_tls import PostgreSQLTLS
from charms.rolling_ops.v0.rollingops import RollingOpsManager
from charms.rolling_ops.v0.rollingops import RollingOpsManager, RunWithLock
from lightkube import ApiError, Client, codecs
from lightkube.models.core_v1 import ServicePort
from lightkube.resources.core_v1 import Endpoints, Pod, Service
Expand Down Expand Up @@ -76,7 +76,6 @@ def __init__(self, *args):
self._context = {"namespace": self._namespace, "app_name": self._name}
self.cluster_name = f"patroni-{self._name}"

self.framework.observe(self.on.install, self._on_install)
self.framework.observe(self.on.config_changed, self._on_config_changed)
self.framework.observe(self.on.leader_elected, self._on_leader_elected)
self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed)
Expand Down Expand Up @@ -211,10 +210,6 @@ def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
self.postgresql_client_relation.update_read_only_endpoint()
self._remove_from_endpoints(endpoints_to_remove)

# Update the replication configuration.
self._patroni.render_postgresql_conf_file()
self._patroni.reload_patroni_configuration()

def _on_peer_relation_changed(self, event: RelationChangedEvent) -> None:
"""Reconfigure cluster members."""
# The cluster must be initialized first in the leader unit
Expand Down Expand Up @@ -262,11 +257,6 @@ def _on_peer_relation_changed(self, event: RelationChangedEvent) -> None:

self.unit.status = ActiveStatus()

def _on_install(self, _) -> None:
"""Event handler for InstallEvent."""
# Creates custom postgresql.conf file.
self._patroni.render_postgresql_conf_file()

def _on_config_changed(self, _) -> None:
"""Handle the config-changed event."""
# TODO: placeholder method to implement logic specific to configuration change.
Expand Down Expand Up @@ -384,13 +374,6 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:

self._add_members(event)

# Update the replication configuration.
self._patroni.render_postgresql_conf_file()
try:
self._patroni.reload_patroni_configuration()
except RetryError:
pass # This error can happen in the first leader election, as Patroni is not running yet.

def _create_pgdata(self, container: Container):
"""Create the PostgreSQL data directory."""
path = f"{self._storage_path}/pgdata"
Expand All @@ -410,9 +393,6 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
# where the volume is mounted with more restrictive permissions.
self._create_pgdata(container)

# Create a new config layer.
new_layer = self._postgresql_layer()

self.unit.set_workload_version(self._patroni.rock_postgresql_version)

# Defer the initialization of the workload in the replicas
Expand All @@ -436,18 +416,8 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
event.defer()
return

# Get the current layer.
current_layer = container.get_plan()
# Check if there are any changes to layer services.
if current_layer.services != new_layer.services:
# Changes were made, add the new layer.
container.add_layer(self._postgresql_service, new_layer, combine=True)
logging.info("Added updated layer 'postgresql' to Pebble plan")
# TODO: move this file generation to on config changed hook
# when adding configs to this charm.
# Restart it and report a new status to Juju.
container.restart(self._postgresql_service)
logging.info("Restarted postgresql service")
# Start the database service.
self._update_pebble_layers()

# Ensure the member is up and running before marking the cluster as initialised.
if not self._patroni.member_started:
Expand Down Expand Up @@ -840,6 +810,15 @@ def _postgresql_layer(self) -> Layer:
"group": WORKLOAD_OS_GROUP,
},
},
"checks": {
self._postgresql_service: {
"override": "replace",
"level": "ready",
"http": {
"url": f"{self._patroni._patroni_url}/health",
},
}
},
}
return Layer(layer_config)

Expand Down Expand Up @@ -878,6 +857,15 @@ def push_tls_files_to_workload(self, container: Container = None) -> None:
user=WORKLOAD_OS_USER,
group=WORKLOAD_OS_GROUP,
)
container.push(
"/usr/local/share/ca-certificates/ca.crt",
ca,
make_dirs=True,
permissions=0o400,
user=WORKLOAD_OS_USER,
group=WORKLOAD_OS_GROUP,
)
container.exec(["update-ca-certificates"]).wait()
if cert is not None:
container.push(
f"{self._storage_path}/{TLS_CERT_FILE}",
Expand All @@ -890,8 +878,13 @@ def push_tls_files_to_workload(self, container: Container = None) -> None:

self.update_config()

def _restart(self, _) -> None:
def _restart(self, event: RunWithLock) -> None:
"""Restart PostgreSQL."""
if not self._patroni.are_all_members_ready():
logger.debug("Early exit _restart: not all members ready yet")
event.defer()
return

try:
self._patroni.restart_postgresql()
except RetryError:
Expand All @@ -900,6 +893,9 @@ def _restart(self, _) -> None:
self.unit.status = BlockedStatus(error_message)
return

# Update health check URL.
self._update_pebble_layers()

# Start or stop the pgBackRest TLS server service when TLS certificate change.
self.backup.start_stop_pgbackrest_service()

Expand All @@ -915,7 +911,6 @@ def update_config(self) -> None:
backup_id=self.app_peer_data.get("restoring-backup"),
stanza=self.app_peer_data.get("stanza"),
)
self._patroni.render_postgresql_conf_file()
if not self._patroni.member_started:
# If Patroni/PostgreSQL has not started yet and TLS relations was initialised,
# then mark TLS as enabled. This commonly happens when the charm is deployed
Expand All @@ -934,6 +929,28 @@ def update_config(self) -> None:
if restart_postgresql:
self.on[self.restart_manager.name].acquire_lock.emit()

def _update_pebble_layers(self) -> None:
"""Update the pebble layers to keep the health check URL up-to-date."""
container = self.unit.get_container("postgresql")

# Get the current layer.
current_layer = container.get_plan()

# Create a new config layer.
new_layer = self._postgresql_layer()

# Check if there are any changes to layer services.
if current_layer.services != new_layer.services:
# Changes were made, add the new layer.
container.add_layer(self._postgresql_service, new_layer, combine=True)
logging.info("Added updated layer 'postgresql' to Pebble plan")
container.restart(self._postgresql_service)
logging.info("Restarted postgresql service")
if current_layer.checks != new_layer.checks:
# Changes were made, add the new layer.
container.add_layer(self._postgresql_service, new_layer, combine=True)
logging.info("Updated health checks")

def _unit_name_to_pod_name(self, unit_name: str) -> str:
"""Converts unit name to pod name.

Expand Down
17 changes: 2 additions & 15 deletions src/patroni.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def render_patroni_yml_file(
stanza: name of the stanza created by pgBackRest.
backup_id: id of the backup that is being restored.
"""
# Open the template postgresql.conf file.
# Open the template patroni.yml file.
with open("templates/patroni.yml.j2", "r") as file:
template = Template(file.read())
# Render the template file with the correct values.
Expand All @@ -244,24 +244,11 @@ def render_patroni_yml_file(
restoring_backup=backup_id is not None,
backup_id=backup_id,
stanza=stanza,
minority_count=self._members_count // 2,
version=self.rock_postgresql_version.split(".")[0],
)
self._render_file(f"{self._storage_path}/patroni.yml", rendered, 0o644)

def render_postgresql_conf_file(self) -> None:
"""Render the PostgreSQL configuration file."""
# Open the template postgresql.conf file.
with open("templates/postgresql.conf.j2", "r") as file:
template = Template(file.read())
# Render the template file with the correct values.
# TODO: add extra configurations here later.
rendered = template.render(
logging_collector="on",
synchronous_commit="on" if self._members_count > 1 else "off",
synchronous_standby_names="*",
)
self._render_file(f"{self._storage_path}/postgresql-k8s-operator.conf", rendered, 0o644)

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def reload_patroni_configuration(self) -> None:
"""Reloads the configuration after it was updated in the file."""
Expand Down
8 changes: 7 additions & 1 deletion templates/patroni.yml.j2
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
bootstrap:
dcs:
synchronous_mode: true
synchronous_node_count: {{ minority_count }}
postgresql:
use_pg_rewind: true
remove_data_directory_on_rewind_failure: true
remove_data_directory_on_diverged_timelines: true
bin_dir: /usr/lib/postgresql/{{ version }}/bin
parameters:
synchronous_commit: on
synchronous_standby_names: "*"
{%- if enable_pgbackrest %}
archive_command: 'pgbackrest --stanza={{ stanza }} archive-push %p'
{% else %}
archive_command: /bin/true
{%- endif %}
archive_mode: {{ archive_mode }}
log_filename: 'postgresql.log'
log_directory: '/var/log/postgresql'
logging_collector: 'on'
password_encryption: md5
wal_level: logical
{%- if restoring_backup %}
Expand Down Expand Up @@ -50,7 +57,6 @@ ctl:
pod_ip: '{{ endpoint }}'
postgresql:
connect_address: '{{ endpoint }}:5432'
custom_conf: {{ storage_path }}/postgresql-k8s-operator.conf
data_dir: {{ storage_path }}/pgdata
bin_dir: /usr/lib/postgresql/{{ version }}/bin
listen: 0.0.0.0:5432
Expand Down
7 changes: 0 additions & 7 deletions templates/postgresql.conf.j2

This file was deleted.

4 changes: 4 additions & 0 deletions tests/integration/ha_tests/application-charm/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ requires:
database:
interface: postgresql_client
limit: 1

peers:
application-peers:
interface: application-peers
Loading