Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions lib/charms/postgresql_k8s/v0/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 51
LIBPATCH = 52

# Groups to distinguish HBA access
ACCESS_GROUP_IDENTITY = "identity_access"
Expand Down Expand Up @@ -698,15 +698,22 @@ def list_valid_privileges_and_roles(self) -> Tuple[Set[str], Set[str]]:
"superuser",
}, {role[0] for role in cursor.fetchall() if role[0]}

def set_up_database(self) -> None:
def set_up_database(self, temp_location: Optional[str] = None) -> None:
"""Set up postgres database with the right permissions."""
connection = None
cursor = None
try:
with self._connect_to_database() as connection, connection.cursor() as cursor:
cursor.execute("SELECT TRUE FROM pg_roles WHERE rolname='admin';")
if cursor.fetchone() is not None:
return
connection = self._connect_to_database()
cursor = connection.cursor()

if temp_location is not None:
cursor.execute("SELECT TRUE FROM pg_tablespace WHERE spcname='temp';")
if cursor.fetchone() is None:
cursor.execute(f"CREATE TABLESPACE temp LOCATION '{temp_location}';")
cursor.execute("GRANT CREATE ON TABLESPACE temp TO public;")
Comment on lines +709 to +713
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Configure PostgreSQL to use the temp storage for temporary tablespaces.


cursor.execute("SELECT TRUE FROM pg_roles WHERE rolname='admin';")
if cursor.fetchone() is None:
# Allow access to the postgres database only to the system users.
cursor.execute("REVOKE ALL PRIVILEGES ON DATABASE postgres FROM PUBLIC;")
cursor.execute("REVOKE CREATE ON SCHEMA public FROM PUBLIC;")
Expand All @@ -725,6 +732,8 @@ def set_up_database(self) -> None:
logger.error(f"Failed to set up databases: {e}")
raise PostgreSQLDatabasesSetupError() from e
finally:
if cursor is not None:
cursor.close()
if connection is not None:
connection.close()

Expand Down
23 changes: 21 additions & 2 deletions metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,14 @@ containers:
postgresql:
resource: postgresql-image
mounts:
- storage: pgdata
- storage: archive
location: /var/lib/postgresql/archive
- storage: data
location: /var/lib/postgresql/data
- storage: logs
location: /var/lib/postgresql/logs
- storage: temp
location: /var/lib/postgresql/temp

resources:
postgresql-image:
Expand Down Expand Up @@ -80,9 +86,22 @@ requires:
optional: true

storage:
pgdata:
archive:
type: filesystem
description: Storage mount used for holding local backups (before typically sending them to remote object storage) when relevant/needed.
location: /var/lib/postgresql/archive
data:
type: filesystem
description: Storage mount used for storing all tables, indexes, and so on (except those from temporary tablespaces).
location: /var/lib/postgresql/data
logs:
type: filesystem
description: Storage mount used for storing all the logs that are part of the transactional commit path (WAL files).
location: /var/lib/postgresql/logs
temp:
type: filesystem
description: Storage mount used for storing temporary tablespaces (where typically sort operations happen).
location: /var/lib/postgresql/temp

assumes:
- k8s-api
Expand Down
10 changes: 4 additions & 6 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 21 additions & 3 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,14 @@ def __init__(self, *args):
self.framework.observe(self.on.secret_changed, self._on_secret_changed)
self.framework.observe(self.on[PEER].relation_departed, self._on_peer_relation_departed)
self.framework.observe(self.on.postgresql_pebble_ready, self._on_postgresql_pebble_ready)
self.framework.observe(self.on.pgdata_storage_detaching, self._on_pgdata_storage_detaching)
self.framework.observe(self.on.data_storage_detaching, self._on_pgdata_storage_detaching)
self.framework.observe(self.on.stop, self._on_stop)
self.framework.observe(self.on.promote_to_primary_action, self._on_promote_to_primary)
self.framework.observe(self.on.get_primary_action, self._on_get_primary)
self.framework.observe(self.on.update_status, self._on_update_status)

self._certs_path = "/usr/local/share/ca-certificates"
self._storage_path = self.meta.storages["pgdata"].location
self._storage_path = self.meta.storages["data"].location
self.pgdata_path = f"{self._storage_path}/pgdata"

self.upgrade = PostgreSQLUpgrade(
Expand Down Expand Up @@ -980,11 +980,26 @@ def _create_pgdata(self, container: Container):
self.pgdata_path, permissions=0o750, user=WORKLOAD_OS_USER, group=WORKLOAD_OS_GROUP
)
# Also, fix the permissions from the parent directory.
container.exec([
"chown",
f"{WORKLOAD_OS_USER}:{WORKLOAD_OS_GROUP}",
"/var/lib/postgresql/archive",
]).wait()
container.exec([
"chown",
f"{WORKLOAD_OS_USER}:{WORKLOAD_OS_GROUP}",
self._storage_path,
]).wait()
container.exec([
"chown",
f"{WORKLOAD_OS_USER}:{WORKLOAD_OS_GROUP}",
"/var/lib/postgresql/logs",
]).wait()
container.exec([
"chown",
f"{WORKLOAD_OS_USER}:{WORKLOAD_OS_GROUP}",
"/var/lib/postgresql/temp",
]).wait()
Comment on lines +983 to +1002
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Set ownership of storage volumes folders because on clouds like AWS and GCP, the ownership might be different.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh testing this on Clouds will surpise us... diffrerent storages on different AZ/NAS/Performance.... we will have fun :-)

Can we add AWS test here? can be separate backlog task.


def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
"""Event handler for PostgreSQL container on PebbleReadyEvent."""
Expand Down Expand Up @@ -1129,7 +1144,7 @@ def _initialize_cluster(self, event: WorkloadEvent) -> bool:
extra_user_roles=["pg_monitor"],
)

self.postgresql.set_up_database()
self.postgresql.set_up_database(temp_location="/var/lib/postgresql/temp")

access_groups = self.postgresql.list_access_groups()
if access_groups != set(ACCESS_GROUPS):
Expand Down Expand Up @@ -1463,6 +1478,9 @@ def _on_update_status_early_exit_checks(self, container) -> bool:
self.enable_disable_extensions()
return True

logger.error("calling self.fix_leader_annotation()")
self.fix_leader_annotation()

logger.debug("on_update_status early exit: Unit is in Blocked/Waiting status")
return False
return True
Expand Down
10 changes: 8 additions & 2 deletions src/relations/async_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,8 +704,14 @@ def _stop_database(self, event: RelationChangedEvent) -> bool:

# Remove and recreate the pgdata folder to enable replication of the data from the
# primary cluster.
logger.info("Removing and recreating pgdata folder")
self.container.exec(f"rm -r {POSTGRESQL_DATA_PATH}".split()).wait_output()
for path in [
"/var/lib/postgresql/archive",
POSTGRESQL_DATA_PATH,
"/var/lib/postgresql/logs",
"/var/lib/postgresql/temp",
]:
logger.info(f"Removing contents from {path}")
self.container.exec(f"find {path} -mindepth 1 -delete".split()).wait_output()
self.charm._create_pgdata(self.container)

self.charm._peers.data[self.charm.unit].update({"stopped": "True"})
Expand Down
4 changes: 4 additions & 0 deletions templates/patroni.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ bootstrap:
- auth-local: trust
- encoding: UTF8
- data-checksums
- waldir: /var/lib/postgresql/logs
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mapping the WAL directory to the logs storage in the primary.

{%- endif %}
pg_hba:
- {{ 'hostssl' if enable_tls else 'host' }} all all 0.0.0.0/0 md5
Expand Down Expand Up @@ -106,6 +107,8 @@ ctl:
{%- endif %}
pod_ip: '{{ endpoint }}'
postgresql:
basebackup:
- waldir: /var/lib/postgresql/logs
Comment on lines +110 to +111
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mapping the WAL directory to the logs storage in the replicas.

connect_address: '{{ endpoint }}:5432'
data_dir: {{ storage_path }}/pgdata
bin_dir: /usr/lib/postgresql/{{ version }}/bin
Expand All @@ -123,6 +126,7 @@ postgresql:
ssl_cert_file: {{ storage_path }}/cert.pem
ssl_key_file: {{ storage_path }}/key.pem
{%- endif %}
temp_tablespaces: temp
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Configure PostgreSQL to use the temp storage for temporary tablespaces.

{%- if pg_parameters %}
{%- for key, value in pg_parameters.items() %}
{{key}}: {{value}}
Expand Down
2 changes: 1 addition & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
import yaml

METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
STORAGE_PATH = METADATA["storage"]["pgdata"]["location"]
STORAGE_PATH = METADATA["storage"]["data"]["location"]
3 changes: 3 additions & 0 deletions tests/integration/ha_tests/clean-data-dir.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/usr/bin/env bash

set -Eeuo pipefail
rm -rf /var/lib/postgresql/archive/*
rm -rf /var/lib/postgresql/data/pgdata/*
rm -rf /var/lib/postgresql/logs/*
rm -rf /var/lib/postgresql/temp/*
75 changes: 49 additions & 26 deletions tests/integration/ha_tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from kubernetes.client.api import core_v1_api
from kubernetes.stream import stream
from lightkube.core.client import Client, GlobalResource
from lightkube.core.exceptions import ApiError
from lightkube.resources.core_v1 import (
PersistentVolume,
PersistentVolumeClaim,
Expand Down Expand Up @@ -954,14 +955,17 @@ async def clear_continuous_writes(ops_test: OpsTest) -> None:
action = await action.wait()


async def get_storage_id(ops_test: OpsTest, unit_name: str) -> str:
"""Retrieves storage id associated with provided unit.
async def get_storage_ids(ops_test: OpsTest, unit_name: str) -> list[str]:
"""Retrieves storage ids associated with provided unit.

Note: this function exists as a temporary solution until this issue is ported to libjuju 2:
https://github.com/juju/python-libjuju/issues/694
"""
storage_ids = []
model_name = ops_test.model.info.name
proc = subprocess.check_output(f"juju storage --model={model_name}".split())
proc = subprocess.check_output(
f"juju storage --model={ops_test.controller_name}:{model_name}".split()
)
proc = proc.decode("utf-8")
for line in proc.splitlines():
if "Storage" in line:
Expand All @@ -973,8 +977,9 @@ async def get_storage_id(ops_test: OpsTest, unit_name: str) -> str:
if "detached" in line:
continue

if line.split()[0] == unit_name:
return line.split()[1]
if line.split()[0] == unit_name and line.split()[1].startswith("data"):
storage_ids.append(line.split()[1])
return storage_ids


def is_pods_exists(ops_test: OpsTest, unit_name: str) -> bool:
Expand Down Expand Up @@ -1047,18 +1052,26 @@ async def check_db(ops_test: OpsTest, app: str, db: str) -> bool:
return db in query


async def get_any_deatached_storage(ops_test: OpsTest) -> str:
"""Returns any of the current available deatached storage."""
async def get_detached_storages(ops_test: OpsTest) -> list[str]:
"""Returns the current available detached storages."""
return_code, storages_list, stderr = await ops_test.juju(
"storage", "-m", f"{ops_test.controller_name}:{ops_test.model.info.name}", "--format=json"
)
if return_code != 0:
raise Exception(f"failed to get storages info with error: {stderr}")

parsed_storages_list = json.loads(storages_list)
detached_storages = []
for storage_name, storage in parsed_storages_list["storage"].items():
if (str(storage["status"]["current"]) == "detached") and (str(storage["life"] == "alive")):
return storage_name
if (
(storage_name.startswith("data"))
and (str(storage["status"]["current"]) == "detached")
and (str(storage["life"] == "alive"))
):
detached_storages.append(storage_name)

if len(detached_storages) > 0:
return detached_storages

raise Exception("failed to get deatached storage")

Expand All @@ -1078,39 +1091,49 @@ async def check_system_id_mismatch(ops_test: OpsTest, unit_name: str) -> bool:
def delete_pvc(ops_test: OpsTest, pvc: GlobalResource):
"""Deletes PersistentVolumeClaim."""
client = Client(namespace=ops_test.model.name)
client.delete(PersistentVolumeClaim, namespace=ops_test.model.name, name=pvc.metadata.name)
try:
client.delete(PersistentVolumeClaim, namespace=ops_test.model.name, name=pvc.metadata.name)
except ApiError as e:
logger.warning(f"failed to delete pvc {pvc.metadata.name}: {e}")
pass


def get_pvc(ops_test: OpsTest, unit_name: str):
"""Get PersistentVolumeClaim for unit."""
def get_pvcs(ops_test: OpsTest, unit_name: str):
"""Get PersistentVolumeClaims for unit."""
pvcs = []
client = Client(namespace=ops_test.model.name)
pvc_list = client.list(PersistentVolumeClaim, namespace=ops_test.model.name)
for pvc in pvc_list:
if unit_name.replace("/", "-") in pvc.metadata.name:
return pvc
return None
pvcs.append(pvc)
return pvcs


def get_pv(ops_test: OpsTest, unit_name: str):
"""Get PersistentVolume for unit."""
def get_pvs(ops_test: OpsTest, unit_name: str):
"""Get PersistentVolumes for unit."""
pvs = []
client = Client(namespace=ops_test.model.name)
pv_list = client.list(PersistentVolume, namespace=ops_test.model.name)
for pv in pv_list:
if unit_name.replace("/", "-") in str(pv.spec.hostPath.path):
return pv
return None
pvs.append(pv)
return pvs


def change_pv_reclaim_policy(ops_test: OpsTest, pvc_config: PersistentVolumeClaim, policy: str):
def change_pvs_reclaim_policy(ops_test: OpsTest, pvs_configs: list[PersistentVolume], policy: str):
"""Change PersistentVolume reclaim policy config value."""
client = Client(namespace=ops_test.model.name)
res = client.patch(
PersistentVolume,
pvc_config.metadata.name,
{"spec": {"persistentVolumeReclaimPolicy": f"{policy}"}},
namespace=ops_test.model.name,
)
return res
results = []
for pv_config in pvs_configs:
results.append(
client.patch(
PersistentVolume,
pv_config.metadata.name,
{"spec": {"persistentVolumeReclaimPolicy": f"{policy}"}},
namespace=ops_test.model.name,
)
)
return results


def remove_pv_claimref(ops_test: OpsTest, pv_config: PersistentVolume):
Expand Down
Loading