Skip to content

Commit

Permalink
Implement seek-snapshot-contents action
Browse files Browse the repository at this point in the history
- Require core 3.3.0+
- Extract from the bakckup snapshot the index of share contents
- Search matching file/dir names with the index
- Clean up the index cache after 8 hours
  • Loading branch information
DavidePrincipi committed Nov 13, 2024
1 parent 5ef4445 commit ad07d0c
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 0 deletions.
1 change: 1 addition & 0 deletions build-images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ buildah add "${container}" ns8-user-manager-${user_manager_version}.tar.gz /imag
buildah add "${container}" ui/dist /ui
buildah config \
--label="org.nethserver.max-per-node=1" \
--label="org.nethserver.min-core=3.3.0-0" \
--label "org.nethserver.images=ghcr.io/nethserver/samba-dc:${IMAGETAG:-latest}" \
--label 'org.nethserver.authorizations=node:fwadm ldapproxy@node:accountprovider cluster:accountprovider traefik@node:routeadm' \
--label="org.nethserver.tcp-ports-demand=1" \
Expand Down
74 changes: 74 additions & 0 deletions imageroot/actions/seek-snapshot-contents/50seek_snapshot_contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3

#
# Copyright (C) 2024 Nethesis S.r.l.
# SPDX-License-Identifier: GPL-3.0-or-later
#

import json
import sys
import os
import subprocess
import agent
import hashlib

request = json.load(sys.stdin)

limit_reached = False
contents_limit = request.get("limit", 10)
cache_seed = "%s-%s" % (request['destination'], request['snapshot'])
plocate_cache = hashlib.md5(cache_seed.encode(), usedforsecurity=False).hexdigest()

podman_exec = ["podman", "exec", "samba-dc"]
pcheck = agent.run_helper(*(podman_exec + ["net", "conf", "showshare", request["share"]]), stdout=subprocess.DEVNULL)
if pcheck.returncode != 0:
agent.set_status('validation-failed')
json.dump([{'field':'share', 'parameter':'share','value': request['share'], 'error':'share_not_found'}], fp=sys.stdout)
sys.exit(2)

def locate_share_content():
global limit_reached, contents, contents_limit
# Search share paths matching the query:
plocate_cmd = ['podman', 'exec', 'samba-dc', 'locate-share-content', '-d', f"/var/lib/samba/plocate/{plocate_cache}", '-s', request['share'], "-q", request.get('query', "")]
with subprocess.Popen(plocate_cmd, stdout=subprocess.PIPE, stderr=sys.stderr, text=True, errors='replace') as vproc:
contents = []
while True:
line = vproc.stdout.readline()
if not line:
break
content = line.rstrip()
if not content in contents:
contents.append(content)
if len(contents) >= contents_limit:
limit_reached = True
break
return vproc.wait(timeout=1)

def purge_plocate_cache():
# Remove cache dirs older than 8 hours
purge_script='echo Removing old plocate databases ; find /var/lib/samba/plocate/ -mindepth 1 -maxdepth 1 -type d -cmin +480 -print0 | xargs -r -0 -- rm -rvf'
agent.run_helper('podman', 'exec', 'samba-dc', 'bash', '-c', purge_script)

if locate_share_content() == 3:
print(agent.SD_INFO + "DB is not cached, fetch it from the backup snapshot", file=sys.stderr)
purge_plocate_cache()
# Cache is missing. Extract the .plocate files from the snapshot and
# store them under a temporary cache directory:
podman_args = ["--workdir=/srv"] + agent.agent.get_state_volume_args()
restic_args = [
"restore",
f"{request['snapshot']}:volumes/data/backup",
"--include=*.plocate",
f"--target=/srv/volumes/data/plocate/{plocate_cache}"
]
agent.run_restic(agent.redis_connect(), request["destination"], request["repopath"], podman_args, restic_args, stdout=sys.stderr).check_returncode()
# Repeat the search
if not limit_reached and locate_share_content() != 0:
print(agent.SD_ERR + f"locate-share-content failed with exit code {vproc.returncode}.", file=sys.stderr)
sys.exit(1)

json.dump({
"request": request,
"contents": contents,
"limit_reached": limit_reached,
}, fp=sys.stdout)
56 changes: 56 additions & 0 deletions imageroot/actions/seek-snapshot-contents/validate-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "seek-snapshot-contents input",
"$id": "http://schema.nethserver.org/mail/seek-snapshot-contents-input.json",
"description": "Locate a file or directory in a backup snapshot",
"examples": [
{
"snapshot": "b9ae143be9a5cf86fccff4bd7907296a3feb9f904457e3d521f215c5445cdac7",
"destination": "86d1a8ac-ef89-557a-8e19-8582ab86b7c4",
"repopath": "samba/8efb6625-e70f-4a5f-9cb5-2836096d5054",
"share": "Complex Shar€ name"
},
{
"snapshot": "b9ae143be9a5cf86fccff4bd7907296a3feb9f904457e3d521f215c5445cdac7",
"destination": "86d1a8ac-ef89-557a-8e19-8582ab86b7c4",
"repopath": "samba/8efb6625-e70f-4a5f-9cb5-2836096d5054",
"share": "Complex Shar€ name",
"limit": 50,
"query": "MYFILE.TXT"
}
],
"type": "object",
"required": [
"destination",
"repopath",
"snapshot",
"share"
],
"properties": {
"destination": {
"type": "string",
"description": "The UUID of the backup destination where the Restic repository resides."
},
"repopath": {
"type": "string",
"description": "Restic repository path, under the backup destination"
},
"snapshot": {
"type": "string",
"description": "Restic snapshot ID to restore"
},
"share": {
"type": "string",
"pattern": "^[^/\\\\:><\"|?*]+$",
"description": "Seek the paths of this Samba share"
},
"query": {
"type": "string",
"description": "Seek matching paths"
},
"limit": {
"type": "integer",
"description": "Limit the number of returned contents"
}
}
}
46 changes: 46 additions & 0 deletions imageroot/actions/seek-snapshot-contents/validate-output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "seek-snapshot-content output",
"$id": "http://schema.nethserver.org/mail/seek-snapshot-content-output.json",
"description": "Locate a file or directory in a backup snapshot",
"examples": [
{
"request": {
"destination": "14030a59-a4e6-54cc-b8ea-cd5f97fe44c8",
"repopath": "mail/4372a5d5-0886-45d3-82e7-68d913716a4c",
"snapshot": "latest",
"share": "myshare",
"query": "*.php",
"limit": 100
},
"contents": [
"dir1/file001.php",
"dir1/file002.php",
"Project/NethServer/Main.php"
],
"limit_reached": false
}
],
"type": "object",
"required": [
"contents",
"limit_reached"
],
"properties": {
"contents": {
"type": "array",
"description": "List of absolute share content paths",
"items": {
"type": "string"
}
},
"limit_reached": {
"type": "boolean",
"description": "If true, the query matches more contents than the returned items"
},
"request": {
"type": "object",
"title": "Original request object"
}
}
}
76 changes: 76 additions & 0 deletions samba-dc/usr/local/sbin/locate-share-content
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash

#
# Copyright (C) 2024 Nethesis S.r.l.
# SPDX-License-Identifier: GPL-3.0-or-later
#

set -e
set -o pipefail

usage_error ()
{
exec 1>&2
[ -n "$1" ] && printf "Error: %s\n" "$1"
printf 'Usage: %s -d DATABASEDIR -s SHARE -q QUERY\n' "$0"
printf ' OPTIONS\n'
printf ' -d DATABASEDIR base directory of plocate databases\n'
printf ' -s SHARE name of the share to search\n'
printf ' -q QUERY query string to search\n'
printf ' -h Print this help\n'
exit 2
}

query_regex ()
{
# Ignore non-alphanumeric chars
querymod=$(tr -c -d '[:alnum:]' <<<"$1")
# Build a wide-match regexp with remaining chars
for (( i=0 ; i<${#querymod} ; i++)); do
if (( i == 0 )); then
echo -n "${querymod:${i}:1}"
else
echo -n ".*${querymod:${i}:1}"
fi
done
# The resulting regexp is like "q.*u.*e.*r.*y"
echo
}

# Parse command-line arguments
while getopts "hd:q:s:" optname ; do
case ${optname} in
q) query="${OPTARG}" ;;
d) databasedir="${OPTARG}" ;;
s) share="${OPTARG}" ;;
h) usage_error "${@}" ;;
*) usage_error "${@}" ;;
esac
done
shift $((OPTIND - 1))

[ -z "${databasedir}" ] && usage_error "${@}"
[ -z "${query+x}" ] && usage_error "${@}"

trap 'rm -f ${basename_results} ${wholename_results} ${regexp_results}' EXIT

database="${databasedir}/${share}.plocate"
if [ ! -f "${database}" ]; then
echo "Missing plocate database file: ${database}" 1>&2
exit 3
fi

basename_results=$(mktemp)
plocate -i -b -d "${database}" "${query}" >"${basename_results}" &

if [ -n "${query}" ]; then
wholename_results=$(mktemp)
regexp_results=$(mktemp)
plocate -i -w -d "${database}" "${query}" >"${wholename_results}" &
plocate -i -r -d "${database}" "$(query_regex "${query}")" >"${regexp_results}" &
fi

wait

stripprefix="${SAMBA_SHARES_DIR}/${share}/"
cat "${basename_results}" "${wholename_results:-/dev/null}" "${regexp_results:-/dev/null}" | sed "s|^${stripprefix}||"

0 comments on commit ad07d0c

Please sign in to comment.