From d4eaa2bba614a7f9ee6d0740d29b2e07871bf61e Mon Sep 17 00:00:00 2001 From: Mykola Marzhan Date: Sat, 11 May 2024 08:31:51 +0200 Subject: [PATCH] Add Differential backups support --- actions.yaml | 7 +++ src/backups.py | 14 ++++- templates/pgbackrest.conf.j2 | 3 ++ tests/integration/test_backups.py | 85 ++++++++++++++++++++++++++++++- tests/unit/test_backups.py | 16 +++++- 5 files changed, 120 insertions(+), 5 deletions(-) diff --git a/actions.yaml b/actions.yaml index ba46f1108b..a2d7d41c6f 100644 --- a/actions.yaml +++ b/actions.yaml @@ -3,6 +3,13 @@ create-backup: description: Creates a backup to s3 storage in AWS. + params: + type: + type: string + description: The backup type, the default value is 'full'. + Full backup is a full copy of all data. + Differential backup is a copy only of changed data since the last full backup. + Possible values - full, differential. get-primary: description: Get the unit with is the primary/leader in the replication. get-password: diff --git a/src/backups.py b/src/backups.py index 952658e736..1046215ee6 100644 --- a/src/backups.py +++ b/src/backups.py @@ -454,8 +454,18 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent): self._initialise_stanza() - def _on_create_backup_action(self, event) -> None: + def _on_create_backup_action(self, event) -> None: # noqa: C901 """Request that pgBackRest creates a backup.""" + backup_type = event.params.get("type", "full").lower()[:4] + if backup_type not in ["full", "diff"]: + error_message = ( + f"Invalid backup type: {backup_type}. Possible values: full, differential." + ) + logger.error(f"Backup failed: {error_message}") + event.fail(error_message) + return + + logger.info(f"A {backup_type} backup has been requested on unit") can_unit_perform_backup, validation_message = self._can_unit_perform_backup() if not can_unit_perform_backup: logger.error(f"Backup failed: {validation_message}") @@ -502,7 +512,7 @@ def _on_create_backup_action(self, event) -> None: "pgbackrest", f"--stanza={self.stanza_name}", "--log-level-console=debug", - "--type=full", + f"--type={backup_type}", "backup", ] if self.charm.is_primary: diff --git a/templates/pgbackrest.conf.j2 b/templates/pgbackrest.conf.j2 index 3c2349bf4f..40e540107f 100644 --- a/templates/pgbackrest.conf.j2 +++ b/templates/pgbackrest.conf.j2 @@ -1,6 +1,7 @@ [global] backup-standby=y repo1-retention-full=9999999 +repo1-retention-history=365 repo1-type=s3 repo1-path={{ path }} repo1-s3-region={{ region }} @@ -9,6 +10,8 @@ repo1-s3-bucket={{ bucket }} repo1-s3-uri-style={{ s3_uri_style }} repo1-s3-key={{ access_key }} repo1-s3-key-secret={{ secret_key }} +repo1-block=y +repo1-bundle=y start-fast=y {%- if enable_tls %} tls-server-address=* diff --git a/tests/integration/test_backups.py b/tests/integration/test_backups.py index 261f5fff05..27aabe49cf 100644 --- a/tests/integration/test_backups.py +++ b/tests/integration/test_backups.py @@ -156,7 +156,7 @@ async def test_backup_and_restore(ops_test: OpsTest, cloud_configs: Tuple[Dict, action = await ops_test.model.units.get(replica).run_action("list-backups") await action.wait() backups = action.results.get("backups") - assert backups, "backups not outputted" + assert len(backups.split("\n")) == 1, "full backup is not outputted" await ops_test.model.wait_for_idle(status="active", timeout=1000) # Write some data. @@ -166,11 +166,85 @@ async def test_backup_and_restore(ops_test: OpsTest, cloud_configs: Tuple[Dict, connection.cursor().execute("CREATE TABLE backup_table_2 (test_collumn INT );") connection.close() + # Run the "create backup" action. + logger.info("creating a backup") + action = await ops_test.model.units.get(replica).run_action( + "create-backup", **{"type": "diff"} + ) + await action.wait() + backup_status = action.results.get("backup-status") + assert backup_status, "backup hasn't succeeded" + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Run the "list backups" action. + logger.info("listing the available backups") + action = await ops_test.model.units.get(replica).run_action("list-backups") + await action.wait() + backups = action.results.get("backups") + assert len(backups.split("\n")) == 2, "differential backup is not outputted" + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Write some data. + logger.info("creating a second table in the database") + with db_connect(host=address, password=password) as connection: + connection.autocommit = True + connection.cursor().execute("CREATE TABLE backup_table_3 (test_collumn INT );") + connection.close() # Scale down to be able to restore. async with ops_test.fast_forward(fast_interval="60s"): await scale_application(ops_test, database_app_name, 1) - # Run the "restore backup" action. + # Run the "restore backup" action for differential backup. + for attempt in Retrying( + stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) + ): + with attempt: + logger.info("restoring the backup") + most_recent_backup = backups.split("\n")[-1] + backup_id = most_recent_backup.split()[0] + action = await ops_test.model.units.get(f"{database_app_name}/0").run_action( + "restore", **{"backup-id": backup_id} + ) + await action.wait() + restore_status = action.results.get("restore-status") + assert restore_status, "restore hasn't succeeded" + + # Wait for the restore to complete. + async with ops_test.fast_forward(): + await ops_test.model.wait_for_idle(status="active", timeout=1000) + + # Check that the backup was correctly restored by having only the first created table. + logger.info("checking that the backup was correctly restored") + primary = await get_primary(ops_test, database_app_name) + address = await get_unit_address(ops_test, primary) + with db_connect( + host=address, password=password + ) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_1');" + ) + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_1' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_2');" + ) + assert cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_2' doesn't exist" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_3' exists" + connection.close() + + # Run the "restore backup" action for full backup. for attempt in Retrying( stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=30) ): @@ -210,6 +284,13 @@ async def test_backup_and_restore(ops_test: OpsTest, cloud_configs: Tuple[Dict, assert not cursor.fetchone()[ 0 ], "backup wasn't correctly restored: table 'backup_table_2' exists" + cursor.execute( + "SELECT EXISTS (SELECT FROM information_schema.tables" + " WHERE table_schema = 'public' AND table_name = 'backup_table_3');" + ) + assert not cursor.fetchone()[ + 0 + ], "backup wasn't correctly restored: table 'backup_table_3' exists" connection.close() # Run the following steps only in one cloud (it's enough for those checks). diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 356505a7e1..9bd19a86a1 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -1064,8 +1064,16 @@ def test_on_create_backup_action(harness): patch("charm.PostgreSQLBackups._retrieve_s3_parameters") as _retrieve_s3_parameters, patch("charm.PostgreSQLBackups._can_unit_perform_backup") as _can_unit_perform_backup, ): - # Test when the unit cannot perform a backup. + # Test when the unit cannot perform a backup because of type. mock_event = MagicMock() + mock_event.params = {"type": "wrong"} + harness.charm.backup._on_create_backup_action(mock_event) + mock_event.fail.assert_called_once() + mock_event.set_results.assert_not_called() + + # Test when the unit cannot perform a backup because of preflight check. + mock_event = MagicMock() + mock_event.params = {"type": "full"} _can_unit_perform_backup.return_value = (False, "fake validation message") harness.charm.backup._on_create_backup_action(mock_event) mock_event.fail.assert_called_once() @@ -1073,6 +1081,7 @@ def test_on_create_backup_action(harness): # Test when the charm fails to upload a file to S3. mock_event.reset_mock() + mock_event.params = {"type": "full"} _can_unit_perform_backup.return_value = (True, None) mock_s3_parameters = { "bucket": "test-bucket", @@ -1106,6 +1115,7 @@ def test_on_create_backup_action(harness): # Test when the backup fails. mock_event.reset_mock() + mock_event.params = {"type": "full"} _upload_content_to_s3.return_value = True _is_primary.return_value = True _execute_command.side_effect = ExecError( @@ -1122,12 +1132,14 @@ def test_on_create_backup_action(harness): # Test when the backup succeeds but the charm fails to upload the backup logs. mock_event.reset_mock() + mock_event.params = {"type": "full"} _upload_content_to_s3.reset_mock() _upload_content_to_s3.side_effect = [True, False] _execute_command.side_effect = None _execute_command.return_value = "fake stdout", "fake stderr" _list_backups.return_value = {"2023-01-01T09:00:00Z": harness.charm.backup.stanza_name} _update_config.reset_mock() + mock_event.params = {"type": "full"} harness.charm.backup._on_create_backup_action(mock_event) _upload_content_to_s3.assert_has_calls([ call( @@ -1147,6 +1159,7 @@ def test_on_create_backup_action(harness): # Test when the backup succeeds (including the upload of the backup logs). mock_event.reset_mock() + mock_event.params = {"type": "full"} _upload_content_to_s3.reset_mock() _upload_content_to_s3.side_effect = None _upload_content_to_s3.return_value = True @@ -1171,6 +1184,7 @@ def test_on_create_backup_action(harness): # Test when this unit is a replica (the connectivity to the database should be changed). mock_event.reset_mock() + mock_event.params = {"type": "full"} _upload_content_to_s3.reset_mock() _is_primary.return_value = False harness.charm.backup._on_create_backup_action(mock_event)