Skip to content

Commit

Permalink
backup.php improved
Browse files Browse the repository at this point in the history
* --compressionLevel, --chunkSize and --dbConn parameters added (see
  below)
* compression is now done after the tar which allows control over the
  compression method and makes the progress meter track the size of
  processed repository resources and not the compressed backup file size
  (which is more meaningful)
* included files list is preserved next to the output file which makes
  easier finding the file you are searching for if compression is used
* the output can be now split in chunks of roughly defined size using
  the --chunkSize parameter
  (roughly because resources are not split between chunks and the it is
  based on the original resource size and not the resulting target file
  size)
* name of the db connection settings can now be chosen using a parameter
  (--dbConn)
* small fixes to error reporting and cleanup code
* passes phpstan level 6 checks
  • Loading branch information
zozlak committed Nov 4, 2024
1 parent 3b70bcf commit 51d63d5
Showing 1 changed file with 115 additions and 61 deletions.
176 changes: 115 additions & 61 deletions backup.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/php
<?php

/*
* The MIT License
*
Expand Down Expand Up @@ -43,7 +44,7 @@ class BackupException extends Exception {
$targetFile = $params[1] ?? '';
if (empty($targetFile) || empty($params[0] ?? '')) {
exit(<<<AAA
backup.php [--dateFile path] [--dateFrom yyyy-mm-ddThh:mm:ss] [--dateTo yyyy-mm-ddThh:mm:ss] [--compression method] [--include mode] [--lock mode] repoConfigFile targetFile
backup.php [--dateFile path] [--dateFrom yyyy-mm-ddThh:mm:ss] [--dateTo yyyy-mm-ddThh:mm:ss] [--compression method] [--compressionLevel level] [--include mode] [--lock mode] [--chunkSize sizeMiB] [--dbConn connectionName] repoConfigFile targetFile
Creates a repository backup.
Expand All @@ -53,14 +54,16 @@ class BackupException extends Exception {
--dateFile path to a file storying the --dateFrom parameter value
the file content is automatically updated with the --dateTo value after a successful dump
whcich provides an easy implementation of incremental backups
which provides an easy implementation of incremental backups
if the file doesn't exist, 1900-01-01T00:00:00 is assumed as the --dateFrom value
--dateFrom takes precedence over --dateFile content
--dateFrom, --dateTo only binaries modified within a given time period are included in the dump
(--dateFrom default is 1900-01-01T00:00:00, --dateTo default is current date and time)
--dateFrom takes precedence over --dateFile
--compression (default none) compression method - one of none/bzip2/gzip
--compressionLevel (default 1) compression level from 1 to 9 to be passed to bzip2/gzip
--include (default all) set of database tables to include:
none - don't include a database dump
all - include all tables
skipSearch - skip full text search and spatial tables
skipHistory - skip metadata history table
Expand All @@ -69,6 +72,11 @@ class BackupException extends Exception {
try - try to acquire a lock on all matching binaries and fail if it's not possible
wait - wait until it's possible to acquire a lock on all matching binaries
skip - acquire lock on all matching binaries which are not cuurently locked by other transactions
--chunkSize maximum size of resources included in a single targetFile.
After a resource added to the targetFile exceeds this size a new targetFile with a name suffix
is created.
If compression is used, the targetSize files can be smaller than this size.
--dbConn (default backup) name of the database connection parameters in the config file
AAA
Expand All @@ -91,11 +99,12 @@ class BackupException extends Exception {
$cfg = json_decode(json_encode($cfg));

if (substr($cfg->storage->dir, 0, 1) !== '/') {
throw Exception('Storage dir set up as a relative path in the repository config file - can not determine paths');
throw new Exception('Storage dir set up as a relative path in the repository config file - can not determine paths');
}

$pgdumpConnParam = ['host' => '-h', 'port' => '-p', 'dbname' => '', 'user' => '-U'];
$pdoConnStr = $cfg->dbConnStr->backup ?? 'pgsql:';
$connName = $params['dbConn'] ?? 'backup';
$pdoConnStr = $cfg->dbConnStr->$connName ?? 'pgsql:';
$pgdumpConnStr = 'pg_dump';
foreach (explode(' ', preg_replace('/ +/', ' ', trim(substr($pdoConnStr, 6)))) as $i) {
if (!empty($i)) {
Expand Down Expand Up @@ -128,9 +137,10 @@ class BackupException extends Exception {
// BEGINNING TRANSACTION
echo "Acquiring database locks\n";

$pdo = new PDO($pdoConnStr);
if ($pdo === false) {
throw new Exception('Database connection failed.');
try {
$pdo = new PDO($pdoConnStr);
} catch (PDOException) {
throw new BackupException("Could not connect to the database using the settings '$pdoConnStr'");
}
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$pdo->query("SET application_name TO backupscript");
Expand All @@ -144,7 +154,7 @@ class BackupException extends Exception {
$txId = $query->fetchColumn();

$matchQuery = "
SELECT id
SELECT id
FROM
resources
JOIN metadata m1 USING (id)
Expand Down Expand Up @@ -188,18 +198,20 @@ class BackupException extends Exception {
$snapshot = $pdo->query("SELECT pg_export_snapshot()")->fetchColumn();

// DATABASE
$dbDumpCmd = "$pgdumpConnStr -a -T *_seq -T transactions --snapshot $snapshot -f $targetFileSql";
$dbDumpCmd .= ($params['include'] ?? '') == 'skipSearch' ? ' -T full_text_search -T spatial_search' : '';
$dbDumpCmd .= ($params['include'] ?? '') == 'skipHistory' ? ' -T metadata_history' : '';
$dbDumpCmd .= ($params['include'] ?? '') == 'skipSearchHistory' ? ' -T full_text_search -T metadata_history' : '';
echo "Dumping database with:\n\t$dbDumpCmd\n";
$out = $ret = null;
exec($dbDumpCmd, $out, $ret);
if ($ret !== 0) {
throw new Exception("Dumping database failed:\n\n" . $out);
if ($params['include'] !== 'none') {
$dbDumpCmd = "$pgdumpConnStr -a -T *_seq -T transactions --snapshot $snapshot -f $targetFileSql";
$dbDumpCmd .= ($params['include']) == 'skipSearch' ? ' -T full_text_search -T spatial_search' : '';
$dbDumpCmd .= ($params['include']) == 'skipHistory' ? ' -T metadata_history' : '';
$dbDumpCmd .= ($params['include']) == 'skipSearchHistory' ? ' -T full_text_search -T metadata_history' : '';
echo "Dumping database with:\n\t$dbDumpCmd\n";
$out = $ret = null;
exec($dbDumpCmd, $out, $ret);
if ($ret !== 0) {
throw new Exception("Dumping database failed:\n\n" . implode("\n", $out));
}
printf("\tdump size: %.3f MB\n", filesize($targetFileSql) / 1024 / 1024);
$pdo->commit(); // must be here so the snapshot passed to pg_dump exists
}
printf("\tdump size: %.3f MB\n", filesize($targetFileSql) / 1024 / 1024);
$pdo->commit(); // must be here so the snapshot passed to pg_dump exists
// BINARIES LIST FILE
echo "Preparing binary files list\n";

Expand All @@ -210,48 +222,86 @@ function getStorageDir(int $id, string $path, int $level, int $levelMax): string
}
return $path;
}
$out = $ret = null;
exec('pv -h', $out, $ret);
$pv = $ret === 0 ? " | pv -F ' %b ellapsed: %t cur: %r avg: %a'" : '';
$level = $params['compressionLevel'] ?? 1;
$tarCmdTmpl = "tar -c -T $targetFileList $pv";
$tarCmdTmpl .= match ($params['compression'] ?? '') {
'gzip' => " | gzip -$level -c",
'bzip2' => " | bzip2 -$level -c",
default => '',
};
$tarCmdTmpl .= " > %targetFile% ; exit \${PIPESTATUS[0]}";
$targetFileExt = match ($params['compression'] ?? '') {
'gzip' => '.gz',
'bzip2' => '.bz',
default => '.tar',
};
$chunkNo = 0;
$targetFiles = [];

$query = $pdo->prepare("SELECT id FROM resources WHERE transaction_id = ?");
$query->execute([$txId]);
$tfl = fopen($targetFileList, 'w');
if ($tfl === false) {
throw new Exception('Can not create binary files index file');
/**
* @param resource $tflHandle
*/
function writeOutput($tflHandle, int $chunkNo): mixed {
global $targetFiles, $targetFile, $tarCmdTmpl, $targetFileList, $targetFileExt;

fclose($tflHandle);

$tfName = $targetFile . ($chunkNo > 0 ? "_$chunkNo" : '') . $targetFileExt;
$targetFiles[] = $tfName;
$targetFiles[] = $tfName . '.tmp';
$tflName = preg_replace('/[.][^.]+$/', '.list', $tfName);
$targetFiles[] = $tflName;
$tarCmd = str_replace('%targetFile%', $tfName . '.tmp', $tarCmdTmpl);
echo "Creating dump with:\n\t$tarCmd\n";
$ret = null;
system('bash -c ' . escapeshellarg($tarCmd), $ret); // bash is needed to use the $PIPESTATUS
if ($ret !== 0) {
throw new Exception("Dump file creation failed");
}
rename($targetFileList, $tflName);
rename($tfName . '.tmp', $tfName);
return fopen($targetFileList, 'w') ?: throw new Exception('Can not create binary files index file');
;
}
$nStrip = strlen(preg_replace('|/$|', '', $cfg->storage->dir)) + 1;
$n = $size = 0;
while ($id = $query->fetchColumn()) {
$chunkSize = (($params['chunkSize'] ?? 0) << 20) ?: PHP_INT_MAX;
$nStrip = strlen(preg_replace('|/$|', '', $cfg->storage->dir)) + 1;
chdir($cfg->storage->dir);

$query = $pdo->prepare("SELECT count(*), sum(value_n) FROM resources JOIN metadata USING (id) WHERE transaction_id = ? AND property = ?");
$query->execute([$txId, $cfg->schema->binarySize]);
list($n, $totalSize) = $query->fetch(PDO::FETCH_NUM);
$size = sprintf('%.3f', $totalSize / 1024 / 1024);
echo "\tfound $n file(s) with a total size of $size MB\n";

$query = $pdo->prepare("SELECT id FROM resources WHERE transaction_id = ?");
$query->execute([$txId]);
$tflHandle = fopen($targetFileList, 'w') ?: throw new Exception('Can not create binary files index file');
$size = $chunksCount = 0;
while ($id = $query->fetchColumn()) {
$path = getStorageDir($id, $cfg->storage->dir, 0, $cfg->storage->levels) . '/' . $id;
if (file_exists($path)) {
fwrite($tfl, substr($path, $nStrip) . "\n");
$n++;
$size += filesize($path);
} else {
if (!file_exists($path)) {
echo "\twarning - binary $path is missing\n";
continue;
}
$fSize = filesize($path);
fwrite($tflHandle, substr($path, $nStrip) . "\n");
$n++;
$size += $fSize;
if ($size > $chunkSize) {
$chunksCount++;
$tflHandle = writeOutput($tflHandle, $chunksCount);
$size = 0;
}
}
$size = sprintf('%.3f', $size / 1024 / 1024);
echo "\tfound $n file(s) with a total size of $size MB\n";

fwrite($tfl, basename($targetFileSql) . "\n");
fclose($tfl);
$tfl = null;

// OUTPUT FILE creation
chdir($cfg->storage->dir);
$tarCmd = "tar -c -T $targetFileList";
$tarCmd .= ($params['compression'] ?? '') === 'gzip' ? ' -z' : '';
$tarCmd .= ($params['compression'] ?? '') === 'bzip2' ? ' -j' : '';
$out = $ret = null;
exec('pv -h', $out, $ret);
$tarCmd .= $ret === 0 ? " | pv -F ' %b ellapsed: %t cur: %r avg: %a' > $targetFile" : "-f $targetFile";
$tarCmd .= "; exit \${PIPESTATUS[0]}";
echo "Creating dump with:\n\t$tarCmd\n";
$tarCmd = 'bash -c ' . escapeshellarg($tarCmd); // bash is needed to use the $PIPESTATUS
$ret = null;
system($tarCmd, $ret);
if ($ret !== 0) {
throw new Exception("Dump file creation failed");
if (file_exists($targetFileSql)) {
fwrite($tflHandle, basename($targetFileSql) . "\n");
}
$chunksCount += $chunksCount > 0;
$tflHandle = writeOutput($tflHandle, $chunksCount);
fclose($tflHandle);

// FINISHING
if (isset($params['dateFile'])) {
Expand All @@ -261,21 +311,25 @@ function getStorageDir(int $id, string $path, int $level, int $levelMax): string
echo "Dump completed successfully\n";
} catch (BackupException $e) {
// Well-known errors which don't require stack traces
if (file_exists($targetFile)) {
unlink($targetFile);
foreach ($targetFiles ?? [] as $i) {
if (file_exists($i)) {
unlink($i);
}
}
echo 'ERROR: ' . $e->getMessage() . "\n";
$exit = 1;
} catch (Throwable $e) {
if (file_exists($targetFile)) {
unlink($targetFile);
foreach ($targetFiles ?? [] as $i) {
if (file_exists($i)) {
unlink($i);
}
}
throw $e;
} finally {
if ($tfl ?? null) {
fclose($tfl);
if (is_resource($tflHandle ?? null)) {
fclose($tflHandle);
}
foreach ([$targetFileSql, $targetFileList] as $f) {
foreach ([$targetFileSql ?? '', $targetFileList ?? ''] as $f) {
if (file_exists($f)) {
unlink($f);
}
Expand Down

0 comments on commit 51d63d5

Please sign in to comment.