Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions actions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ get-password:
username:
type: string
description: The username, the default value 'operator'.
Possible values - operator, replication.
Possible values - operator, replication, rewind.
set-password:
description: Change the system user's password, which is used by charm.
It is for internal charm users and SHOULD NOT be used by applications.
params:
username:
type: string
description: The username, the default value 'operator'.
Possible values - operator, replication.
Possible values - operator, replication rewind.
password:
type: string
description: The password will be auto-generated if this option is not specified.
Expand Down
5 changes: 5 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
PEER,
REPLICATION_PASSWORD_KEY,
REPLICATION_USER,
REWIND_PASSWORD_KEY,
SYSTEM_USERS,
TLS_CA_FILE,
TLS_CERT_FILE,
Expand Down Expand Up @@ -332,6 +333,9 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
if self.get_secret("app", REPLICATION_PASSWORD_KEY) is None:
self.set_secret("app", REPLICATION_PASSWORD_KEY, new_password())

if self.get_secret("app", REWIND_PASSWORD_KEY) is None:
self.set_secret("app", REWIND_PASSWORD_KEY, new_password())

# Create resources and add labels needed for replication.
self._create_resources()

Expand Down Expand Up @@ -621,6 +625,7 @@ def _patroni(self):
self._storage_path,
self.get_secret("app", USER_PASSWORD_KEY),
self.get_secret("app", REPLICATION_PASSWORD_KEY),
self.get_secret("app", REWIND_PASSWORD_KEY),
self.postgresql.is_tls_enabled(check_current_host=True),
)

Expand Down
4 changes: 3 additions & 1 deletion src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
PEER = "database-peers"
REPLICATION_USER = "replication"
REPLICATION_PASSWORD_KEY = "replication-password"
REWIND_USER = "rewind"
REWIND_PASSWORD_KEY = "rewind-password"
TLS_KEY_FILE = "key.pem"
TLS_CA_FILE = "ca.pem"
TLS_CERT_FILE = "cert.pem"
Expand All @@ -15,4 +17,4 @@
WORKLOAD_OS_GROUP = "postgres"
WORKLOAD_OS_USER = "postgres"
# List of system usernames needed for correct work of the charm/workload.
SYSTEM_USERS = [REPLICATION_USER, USER]
SYSTEM_USERS = [REPLICATION_USER, REWIND_USER, USER]
6 changes: 5 additions & 1 deletion src/patroni.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
wait_fixed,
)

from constants import TLS_CA_FILE
from constants import REWIND_USER, TLS_CA_FILE

logger = logging.getLogger(__name__)

Expand All @@ -46,6 +46,7 @@ def __init__(
storage_path: str,
superuser_password: str,
replication_password: str,
rewind_password: str,
tls_enabled: bool,
):
self._endpoint = endpoint
Expand All @@ -56,6 +57,7 @@ def __init__(
self._members_count = len(self._endpoints)
self._superuser_password = superuser_password
self._replication_password = replication_password
self._rewind_password = rewind_password
self._tls_enabled = tls_enabled
# Variable mapping to requests library verify parameter.
# The CA bundle file is used to validate the server certificate when
Expand Down Expand Up @@ -192,6 +194,8 @@ def render_patroni_yml_file(self, enable_tls: bool = False) -> None:
storage_path=self._storage_path,
superuser_password=self._superuser_password,
replication_password=self._replication_password,
rewind_user=REWIND_USER,
rewind_password=self._rewind_password,
)
self._render_file(f"{self._storage_path}/patroni.yml", rendered, 0o644)

Expand Down
9 changes: 9 additions & 0 deletions templates/patroni.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ bootstrap:
dcs:
postgresql:
use_pg_rewind: true
remove_data_directory_on_rewind_failure: true
remove_data_directory_on_diverged_timelines: true
parameters:
archive_command: /bin/true
archive_mode: on
wal_level: logical
initdb:
- auth-host: md5
- auth-local: trust
Expand Down Expand Up @@ -51,6 +57,9 @@ postgresql:
authentication:
replication:
password: {{ replication_password }}
rewind:
username: {{ rewind_user }}
password: {{ rewind_password }}
superuser:
password: {{ superuser_password }}
use_endpoints: true
63 changes: 63 additions & 0 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
RetryError,
Retrying,
retry,
retry_if_exception,
retry_if_result,
stop_after_attempt,
wait_exponential,
Expand Down Expand Up @@ -186,6 +187,21 @@ async def deploy_and_relate_application_with_postgresql(
return relation.id


async def enable_connections_logging(ops_test: OpsTest, unit_name: str) -> None:
"""Turn on the log of all connections made to a PostgreSQL instance.

Args:
ops_test: The ops test framework instance
unit_name: The name of the unit to turn on the connection logs
"""
unit_address = await get_unit_address(ops_test, unit_name)
requests.patch(
f"https://{unit_address}:8008/config",
json={"postgresql": {"parameters": {"log_connections": True}}},
verify=False,
)


async def execute_query_on_unit(
unit_address: str,
password: str,
Expand Down Expand Up @@ -357,6 +373,11 @@ async def get_password(
return result.results[f"{username}-password"]


@retry(
retry=retry_if_exception(KeyError),
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=2, max=30),
)
async def get_primary(ops_test: OpsTest, unit_id=0) -> str:
"""Get the primary unit.

Expand Down Expand Up @@ -450,6 +471,28 @@ async def check_tls_patroni_api(ops_test: OpsTest, unit_name: str, enabled: bool
return False


@retry(
retry=retry_if_result(lambda x: not x),
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=2, max=30),
)
async def primary_changed(ops_test: OpsTest, old_primary: str) -> bool:
"""Checks whether the primary unit has changed.

Args:
ops_test: The ops test framework instance
old_primary: The name of the unit that was the primary before.
"""
other_unit = [
unit.name
for unit in ops_test.model.applications[DATABASE_APP_NAME].units
if unit.name != old_primary
][0]
unit_id = int(other_unit.split("/")[1])
primary = await get_primary(ops_test, unit_id)
return primary != old_primary


async def restart_patroni(ops_test: OpsTest, unit_name: str) -> None:
"""Restart Patroni on a specific unit.

Expand Down Expand Up @@ -478,6 +521,26 @@ def resource_exists(client: Client, resource: GenericNamespacedResource) -> bool
return False


async def run_command_on_unit(ops_test: OpsTest, unit_name: str, command: str) -> str:
"""Run a command on a specific unit.

Args:
ops_test: The ops test framework instance
unit_name: The name of the unit to run the command on
command: The command to run

Returns:
the command output if it succeeds, otherwise raises an exception.
"""
complete_command = f"ssh --container postgresql {unit_name} {command}"
return_code, stdout, _ = await ops_test.juju(*complete_command.split())
if return_code != 0:
raise Exception(
"Expected command %s to succeed instead it failed: %s", command, return_code
)
return stdout


async def scale_application(ops_test: OpsTest, application_name: str, scale: int) -> None:
"""Scale a given application to a specific unit count.

Expand Down
35 changes: 19 additions & 16 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import requests
from lightkube import AsyncClient
from lightkube.resources.core_v1 import Pod
from psycopg2 import sql
from pytest_operator.plugin import OpsTest
from tenacity import retry, retry_if_result, stop_after_attempt, wait_exponential

from tests.helpers import METADATA, STORAGE_PATH
from tests.integration.helpers import (
Expand Down Expand Up @@ -100,27 +100,41 @@ async def test_settings_are_correct(ops_test: OpsTest, unit_id: int):
# Here the SQL query gets a key-value pair composed by the name of the setting
# and its value, filtering the retrieved data to return only the settings
# that were set by Patroni.
settings_names = [
"archive_command",
"archive_mode",
"data_directory",
"cluster_name",
"data_checksums",
"listen_addresses",
"wal_level",
]
cursor.execute(
"""SELECT name,setting
FROM pg_settings
WHERE name IN
('data_directory', 'cluster_name', 'data_checksums', 'listen_addresses');"""
sql.SQL("SELECT name,setting FROM pg_settings WHERE name IN ({});").format(
sql.SQL(", ").join(sql.Placeholder() * len(settings_names))
),
settings_names,
)
records = cursor.fetchall()
settings = convert_records_to_dict(records)

# Validate each configuration set by Patroni on PostgreSQL.
assert settings["archive_command"] == "/bin/true"
assert settings["archive_mode"] == "on"
assert settings["cluster_name"] == f"patroni-{APP_NAME}"
assert settings["data_directory"] == f"{STORAGE_PATH}/pgdata"
assert settings["data_checksums"] == "on"
assert settings["listen_addresses"] == "0.0.0.0"
assert settings["wal_level"] == "logical"

# Retrieve settings from Patroni REST API.
result = requests.get(f"http://{host}:8008/config")
settings = result.json()

# Validate configuration exposed by Patroni.
assert settings["postgresql"]["use_pg_rewind"]
assert settings["postgresql"]["remove_data_directory_on_rewind_failure"]
assert settings["postgresql"]["remove_data_directory_on_diverged_timelines"]


@pytest.mark.charm_tests
Expand Down Expand Up @@ -308,17 +322,6 @@ async def test_redeploy_charm_same_model(ops_test: OpsTest):
)


@retry(
retry=retry_if_result(lambda x: not x),
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=2, max=30),
)
async def primary_changed(ops_test: OpsTest, old_primary: str) -> bool:
"""Checks whether or not the primary unit has changed."""
primary = await get_primary(ops_test)
return primary != old_primary


async def get_primary(ops_test: OpsTest, unit_id=0) -> str:
"""Get the primary unit.

Expand Down
39 changes: 39 additions & 0 deletions tests/integration/test_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
check_tls,
check_tls_patroni_api,
deploy_and_relate_application_with_postgresql,
enable_connections_logging,
get_primary,
primary_changed,
run_command_on_unit,
)

MATTERMOST_APP_NAME = "mattermost"
Expand Down Expand Up @@ -65,6 +69,41 @@ async def test_mattermost_db(ops_test: OpsTest) -> None:
assert await check_tls(ops_test, unit.name, enabled=True)
assert await check_tls_patroni_api(ops_test, unit.name, enabled=True)

# Test TLS being used by pg_rewind. To accomplish that, get the primary unit
# and a replica that will be promoted to primary (this should trigger a rewind
# operation when the old primary is started again).
primary = await get_primary(ops_test)
replica = [
unit.name
for unit in ops_test.model.applications[DATABASE_APP_NAME].units
if unit.name != primary
][0]

# Enable additional logs on the PostgreSQL instance to check TLS
# being used in a later step.
await enable_connections_logging(ops_test, primary)

# Promote the replica to primary.
await run_command_on_unit(
ops_test,
replica,
'su postgres -c "/usr/lib/postgresql/14/bin/pg_ctl -D /var/lib/postgresql/data/pgdata promote"',
)

# Stop the initial primary.
await run_command_on_unit(ops_test, primary, "/charm/bin/pebble stop postgresql")

# Check that the primary changed.
assert await primary_changed(ops_test, primary), "primary not changed"

# Restart the initial primary and check the logs to ensure TLS is being used by pg_rewind.
await run_command_on_unit(ops_test, primary, "/charm/bin/pebble start postgresql")
logs = await run_command_on_unit(ops_test, replica, "/charm/bin/pebble logs")
assert (
"connection authorized: user=rewind database=postgres"
" SSL enabled (protocol=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384, bits=256)" in logs
), "TLS is not being used on pg_rewind connections"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow so thorough 🤩


# Deploy and check Mattermost user and database existence.
relation_id = await deploy_and_relate_application_with_postgresql(
ops_test, "mattermost-k8s", MATTERMOST_APP_NAME, APPLICATION_UNITS, status="waiting"
Expand Down
9 changes: 8 additions & 1 deletion tests/unit/test_patroni.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jinja2 import Template
from tenacity import RetryError

from constants import REWIND_USER
from patroni import Patroni
from tests.helpers import STORAGE_PATH

Expand All @@ -23,6 +24,7 @@ def setUp(self):
STORAGE_PATH,
"superuser-password",
"replication-password",
"rewind-password",
False,
)

Expand Down Expand Up @@ -87,6 +89,8 @@ def test_render_patroni_yml_file(self, _render_file):
storage_path=self.patroni._storage_path,
superuser_password=self.patroni._superuser_password,
replication_password=self.patroni._replication_password,
rewind_user=REWIND_USER,
rewind_password=self.patroni._rewind_password,
)

# Setup a mock for the `open` method, set returned data to postgresql.conf template.
Expand Down Expand Up @@ -117,6 +121,8 @@ def test_render_patroni_yml_file(self, _render_file):
storage_path=self.patroni._storage_path,
superuser_password=self.patroni._superuser_password,
replication_password=self.patroni._replication_password,
rewind_user=REWIND_USER,
rewind_password=self.patroni._rewind_password,
)
self.assertNotEqual(expected_content_with_tls, expected_content)

Expand Down Expand Up @@ -173,12 +179,13 @@ def test_render_postgresql_conf_file(self, _render_file):
# Also test with multiple planned units (synchronous_commit is turned on).
self.patroni = Patroni(
"postgresql-k8s-0",
"postgresql-k8s-0",
["postgresql-k8s-0", "postgresql-k8s-1"],
"postgresql-k8s-primary.dev.svc.cluster.local",
"test-model",
STORAGE_PATH,
"superuser-password",
"replication-password",
"rewind-password",
False,
)
expected_content = template.render(
Expand Down