Skip to content

Commit

Permalink
Refactor configuration, compression and encrypt methods
Browse files Browse the repository at this point in the history
  • Loading branch information
melchor629 committed Nov 29, 2018
1 parent e06de7c commit 04a7537
Show file tree
Hide file tree
Showing 7 changed files with 437 additions and 40 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ This allows you to auto-complete with the elements available in the configuratio
"maxBackupsKept": 7, //If not defined, by default are 7 backups to keep
"env": {
"gpg_passphrase": "If set, it will cypher everything with GPG and this value will be used as passphrase",
"gpg_keys": "If set, the compressed files will be cyphered and compressed using GPG with the keys of the recipient emails given (is an array of strings)",
"docker": "If set, the utilities will run in a docker container instead of using native commands",
"pgnetwork": "[Docker] Defines which network will use to connect to the database (default host)",
"pgimage": "[Docker] Defines which image will use to run the container (default postgres)",
Expand All @@ -64,6 +65,10 @@ This allows you to auto-complete with the elements available in the configuratio
"mysqluser": "The username to connect to the database",
"mysqlpassword": "If defined, sets the password which will be used to connect to the database"
},
"compression": {
"strategy": "gzip|xz",
"level": 8
},
"providers": [
{
"type": "gdrive",
Expand Down Expand Up @@ -105,6 +110,13 @@ You can define as many steps as you wish. The idea is to keep every step as simp

### Predefined utility functions

`compress-encrypt`:
- **Description**: Executes a command and the output it generates, will apply the compression and encription strategies, based on the configuration.
- **Parameters**:
1. Command to run that will output something
2. Base file name that will be created
- **Example**: `compress-encrypt "cat /dev/random" "random.bin"`

`backup-folder`:
- **Description**: Copies a folder to another that will be inside the backup folder, using `rsync`.
- **Parameters**:
Expand Down
40 changes: 40 additions & 0 deletions config/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@
],
"pattern": "^(.*)$"
},
"gpg_keys": {
"$id": "#/properties/env/properties/gpg_keys",
"type": "array",
"title": "If set, the compressed files will be cyphered and compressed using GPG with the keys of the recipient emails given",
"examples": [
[ "melchor9000@gmail.com" ]
],
"items": {
"$id": "#/properties/env/properties/gpg_keys/items",
"type": "string",
"pattern": "^(.+@.+\\..+)$"
}
},
"docker": {
"$id": "#/properties/env/properties/docker",
"type": "string",
Expand Down Expand Up @@ -180,6 +193,33 @@
},
"required": []
},
"compression": {
"$id": "#/properties/compression",
"type": ["object", "null"],
"title": "Configures the compression of things",
"required": [
"strategy"
],
"properties": {
"strategy": {
"$id": "#/properties/compression/properties/strategy",
"type": "string",
"title": "Selects the compression strategy",
"default": "gzip",
"enum": [
"gzip",
"xz"
]
},
"level": {
"$id": "#/properties/compression/properties/level",
"type": "number",
"title": "Selects the compression level",
"default": 5,
"enum": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]
}
}
},
"providers": {
"$id": "#/properties/providers",
"type": "array",
Expand Down
68 changes: 41 additions & 27 deletions mdbackup/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,51 +22,65 @@
import subprocess
import sys

from .archive import (
archive_folder,
get_compression_strategy,
gpg_passphrase_strategy,
gpg_key_strategy,
gzip_strategy,
)
from .backup import do_backup, get_backup_folders_sorted
from .config import Config
from .storage.drive import GDriveStorage

def main():
#Check if configuration file exists
if not Path('config/config.json').exists():
print('Config file "' + str(Path('config/config.json').absolute()) + '" does not exist')
#Check if configuration file exists and read it
try:
config = Config(Path('config/config.json'))
except (FileNotFoundError, IsADirectoryError, NotADirectoryError) as e:
print(e.args[0])
print('Check the paths and run again the utility')
sys.exit(1)

#Read the configuration
with open('config/config.json') as config_file:
config = json.load(config_file)
except KeyError as e:
print('Configuration is malformed')
print(e.args[0])
sys.exit(2)

logging.basicConfig(format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
level=logging.getLevelName(config.get('logLevel', 'WARNING')))
level=config.log_level)
logger = logging.getLogger('mdbackups')

#Do backups
backups_path = Path(config['backupsPath'])
backup = do_backup(backups_path, config.get('customUtilsScript'), **config['env'])
backups_path = config.backups_path
backup = do_backup(backups_path,
config.custom_utils_script,
**config.env,
compression_strategy=config.compression_strategy,
compression_level=config.compression_level)
final_items = []
items_to_remove = []

#(do the following only if there are any providers defined)
if len(config['providers']) > 0:
if len(config.providers) > 0:
#Compress directories
for item in backup.iterdir():
#Compress if it is a directory
if item.is_dir():
filename = item.parts[-1] + '.tar'
directory = item.relative_to(backup)
logger.info(f'Compressing directory {item} into {filename}')
#If a GPG passphrase is defined, compress and cypher using GPG
if 'gpg_passphrase' in config['env']:
filename += '.gpg'
end_cmd = f'| gpg --output "{filename}" --batch --passphrase "{config["env"]["gpg_passphrase"]}" --symmetric -'
else:
filename += '.gz'
end_cmd = f'| gzip > "{filename}"'
strategies = []

if config.compression_strategy is not None:
strategies.append(get_compression_strategy(
config.compression_strategy,
config.compression_level,
))

if 'gpg_key' in config.env:
strategies.append(gpg_key_strategy(config.env['gpg_keys']))
elif 'gpg_passphrase' in config.env:
strategies.append(gpg_passphrase_strategy(config.env['gpg_passphrase']))

filename = archive_folder(backup, item, strategies)

#Do the compression
logger.debug(f'Executing command ["bash", "-c", \'tar -c "{str(directory)}" {end_cmd}\']')
_exec = subprocess.run(['bash', '-c', f'tar -c "{str(directory)}" {end_cmd}'],
cwd=str(backup), check=True)
final_items.append(Path(backup, filename))
items_to_remove.append(Path(backup, filename))
else:
Expand All @@ -75,7 +89,7 @@ def main():
try:
#Upload files to storage providers
backup_folder_name = backup.relative_to(backups_path).parts[0]
for prov_config in config['providers']:
for prov_config in config.providers:
gd = None
#Detect provider type and instantiate it
if 'gdrive' == prov_config['type']:
Expand Down Expand Up @@ -109,7 +123,7 @@ def main():
item.unlink()

#Cleanup old backups
max_backups = config.get('maxBackupsKept', 7)
max_backups = config.max_backups_kept
backups_list = get_backup_folders_sorted(backups_path)
logger.debug('List of folders available:\n{}'.format('\n'.join([str(b) for b in backups_list])))
for old in backups_list[0:max(0, len(backups_list)-max_backups)]:
Expand Down
111 changes: 111 additions & 0 deletions mdbackup/archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Small but customizable utility to create backups and store them in
# cloud storage providers
# Copyright (C) 2018 Melchor Alejo Garau Madrigal
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import logging
from pathlib import Path
import subprocess
from typing import List, Optional


__all__ = [
archive_folder,
get_compression_strategy,
gpg_passphrase_strategy,
gpg_key_strategy,
]


def archive_folder(backup_path: Path, folder: Path, strategies: List = []) -> str:
"""
Given a folder of a backup, archives it into a ``tar`` file and, optionally, compresses the file using different
strategies. By default, no compression is done.
A strategy function must be a function that returns a tuple of the command to execute (as pipe) for compress the
``tar`` file and the extension to add to the file name. There's some predefined strategies that you can
use to compress the folder, all available in this package.
The returned value is the file name for the archived folder.
"""
logger = logging.getLogger(__name__)
filename = folder.parts[-1] + '.tar'
directory = folder.relative_to(backup_path)
logger.info(f'Compressing directory {folder} into {filename}')

if len(strategies) == 0:
end_cmd = f' > "{filename}"'
else:
for strategy in strategies:
cmd, ext = strategy()
filename += ext
end_cmd = f'| {cmd}'

#Do the compression
logger.debug(f'Executing command ["bash", "-c", \'tar -c "{str(directory)}" {end_cmd} > "{filename}"\']')
_exec = subprocess.run(['bash', '-c', f'tar -c "{str(directory)}" {end_cmd} > "{filename}"'],
cwd=str(backup_path), check=True)

return filename


def gzip_strategy(level: int = 5):
"""
Compression strategy that uses ``gzip`` to compress the ``tar`` file.
"""
def gzip():
return f'gzip -{level}', '.gz'

return gzip


def xz_strategy(level: int = 5):
"""
Compression strategy that uses ``xz`` to compress the ``tar`` file.
"""
def xz():
return f'xz -z -T 0 -{level} -c -', '.xz'

return xz


def get_compression_strategy(strategy_name: str, level: Optional[int]):
if strategy_name == 'gzip':
return gzip_strategy(level)
elif strategy_name == 'xz':
return xz_strategy(level)
else:
raise ValueError(f'Unknown compression strategy "{strategy_name}"')


def gpg_passphrase_strategy(passphrase: str):
"""
Compression and encryption strategy that uses ``gpg`` (using passphrase) to compress and encrypt the ``tar`` file.
"""
def gpg(filename: str):
return f'gpg --compress-algo 0 --output - --batch --passphrase "{passphrase}" --symmetric -', '.asc'

return gpg


def gpg_key_strategy(recipients: List[str]):
"""
Compression and encryption strategy that uses ``gpg`` (using a key) to compress and encrypt the ``tar`` file.
"""
def gpg(filename: str):
recv = ' '.join([f'-r {email}' for email in recipients])
return f'gpg --compress-algo 0 --output - --encrypt {recv} -', '.asc'

return gpg
1 change: 0 additions & 1 deletion mdbackup/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,3 @@ def get_backup_folders_sorted(backups_folder: Path) -> List[Path]:
folders = [folder for folder in backups_folder.iterdir() if folder.is_dir() and regex.match(folder.name)]
folders.sort()
return [folder.absolute() for folder in folders]

Loading

0 comments on commit 04a7537

Please sign in to comment.