Skip to content

Commit

Permalink
Move legal backup to use webdav
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanw committed Jul 18, 2024
1 parent 121d3be commit 42ffa1c
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 106 deletions.
3 changes: 3 additions & 0 deletions fragdenstaat_de/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,9 @@ def GEOIP_PATH(self):
DONATION_BACKUP_URL = env("DONATION_BACKUP_URL")
DONATION_BACKUP_CREDENTIALS = env("DONATION_BACKUP_CREDENTIALS")

FDS_LEGAL_BACKUP_URL = env("FDS_LEGAL_BACKUP_URL")
FDS_LEGAL_BACKUP_CREDENTIALS = env("FDS_LEGAL_BACKUP_CREDENTIALS")

EMAIL_BACKEND = "fragdenstaat_de.theme.email_backend.CustomCeleryEmailBackend"
CELERY_EMAIL_BACKEND = "froide.foirequest.smtp.EmailBackend"
CELERY_EMAIL_TASK_CONFIG = {
Expand Down
123 changes: 53 additions & 70 deletions fragdenstaat_de/theme/legal_backup.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,86 @@
import base64
import io
import json
import logging
import os
from datetime import date, timedelta
from urllib.parse import quote_plus, urlparse
from xml.dom import minidom

from django.conf import settings

import requests

from froide.foirequest.pdf_generator import FoiRequestPDFGenerator

logger = logging.getLogger(__name__)

SCOPES = ["https://www.googleapis.com/auth/drive"]

RETENTION_PERIOD = timedelta(days=365 * 3) # 3 years


def get_drive_service():
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build

service_account_info = json.loads(
base64.b64decode(os.environ["FDS_LEGAL_BACKUP_CREDENTIALS"]).decode("utf-8")
)
creds = Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
return build("drive", "v3", credentials=creds)
def get_webdav():
webdav_url = settings.FDS_LEGAL_BACKUP_URL
credentials = settings.FDS_LEGAL_BACKUP_CREDENTIALS
if not credentials or not webdav_url:
return
webdav_username, webdav_password = credentials.split(":")
if webdav_url.endswith("/"):
webdav_url = webdav_url[:-1]
return webdav_url, webdav_username, webdav_password


def make_legal_backup_for_user(user):
from googleapiclient.http import MediaIoBaseUpload

logger.info("Creating legal backup of user %s", user.id)

webdav_url, webdav_username, webdav_password = get_webdav()

folder_name = "{date}:{pk}:{email}:{name}".format(
date=user.date_left.date().isoformat(),
pk=user.pk,
email=user.email,
name=user.get_full_name(),
)
file_metadata = {
"name": folder_name,
"parents": [os.environ["FDS_LEGAL_BACKUP_FOLDER_ID"]],
"mimeType": "application/vnd.google-apps.folder",
}
drive_service = get_drive_service()
folder = drive_service.files().create(body=file_metadata, fields="id").execute()
folder_id = folder.get("id")
folder_url = f"{webdav_url}/{quote_plus(folder_name)}"
response = requests.request(
"MKCOL", folder_url, auth=(webdav_username, webdav_password)
)
response.raise_for_status()

foirequests = user.foirequest_set.all()
for foirequest in foirequests:
pdf_generator = FoiRequestPDFGenerator(foirequest)

file_metadata = {
"name": "{}-{}.pdf".format(foirequest.pk, foirequest.slug),
"parents": [folder_id],
}
media = MediaIoBaseUpload(
io.BytesIO(pdf_generator.get_pdf_bytes()),
mimetype="application/pdf",
resumable=True,
filename = "{}-{}.pdf".format(foirequest.pk, foirequest.slug)
file_handle = io.BytesIO(pdf_generator.get_pdf_bytes())
r = requests.put(
f"{folder_url}/{quote_plus(filename)}",
data=file_handle,
auth=(webdav_username, webdav_password),
)
drive_service.files().create(
body=file_metadata, media_body=media, fields="id"
).execute()
logger.info("Created legal backup of user %s with drive id %s", user.id, folder_id)
r.raise_for_status()

logger.info("Created legal backup of user %s at %s", user.id, folder_url)


def cleanup_legal_backups():
parent = os.environ["FDS_LEGAL_BACKUP_FOLDER_ID"]
drive_service = get_drive_service()
page_token = None
deleted_any = False

while True:
response = (
drive_service.files()
.list(
q="mimeType='application/vnd.google-apps.folder' and '{parent}' in parents".format(
parent=parent
),
fields="nextPageToken, files(id, name)",
pageToken=page_token,
webdav_url, webdav_username, webdav_password = get_webdav()
webdav_domain = urlparse(webdav_url).netloc

response = requests.request(
"PROPFIND", webdav_url, auth=(webdav_username, webdav_password)
)
response.raise_for_status()
today = date.today()

parser = minidom.parseString(response.text)

for entry in parser.getElementsByTagName("d:response"):
href = entry.getElementsByTagName("d:href")[0].firstChild.data.strip()
name = href.split("/")[-1]
cancel_date = date.fromisoformat(name.split(":")[0])
if cancel_date + RETENTION_PERIOD < today:
logger.info(
"Deleting expired legal backup %s at %s",
name,
href,
)
.execute()
)
today = date.today()
for folder in response.get("files", []):
name = folder["name"]
cancel_date = date.fromisoformat(name.split(":")[0])
if cancel_date + RETENTION_PERIOD < today:
logger.info(
"Deleting expired legal backup %s with drive id %s",
name,
folder["id"],
)
drive_service.files().delete(fileId=folder["id"]).execute()
deleted_any = True

page_token = response.get("nextPageToken", None)
if page_token is None:
break

if deleted_any:
drive_service.files().emptyTrash().execute()
full_url = f"https://{webdav_domain}{href}"
requests.delete(full_url, auth=(webdav_username, webdav_password))
2 changes: 0 additions & 2 deletions requirements-production.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,3 @@ channels_redis
uvicorn
sentry-sdk
pillow-avif-plugin
google-api-python-client
google-auth-httplib2
36 changes: 2 additions & 34 deletions requirements-production.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ bleach==6.0.0
# froide-govplan
brotli==1.0.9
# via fonttools
cachetools==5.3.0
# via google-auth
celery==5.2.7
# via
# -r requirements.in
Expand Down Expand Up @@ -449,21 +447,7 @@ geopy==2.3.0
# via
# django-amenities
# froide-food
google-api-core==2.11.0
# via google-api-python-client
google-api-python-client==2.86.0
# via -r requirements-production.in
google-auth==2.18.1
# via
# google-api-core
# google-api-python-client
# google-auth-httplib2
google-auth-httplib2==0.1.0
# via
# -r requirements-production.in
# google-api-python-client
googleapis-common-protos==1.59.0
# via google-api-core

gunicorn==20.1.0
# via -r requirements-production.in
h11==0.14.0
Expand All @@ -476,10 +460,6 @@ html5lib==1.1
# weasyprint
htmldocx==0.0.6
# via django-legal-advice-builder
httplib2==0.22.0
# via
# google-api-python-client
# google-auth-httplib2
icalendar==5.0.5
# via
# -r requirements.in
Expand Down Expand Up @@ -586,10 +566,6 @@ prices==1.1.1
# froide-payment
prompt-toolkit==3.0.38
# via click-repl
protobuf==4.23.1
# via
# google-api-core
# googleapis-common-protos
psycopg[binary]==3.1.9
# via
# -r requirements.in
Expand Down Expand Up @@ -621,8 +597,6 @@ pyopenssl==23.2.0
# via -r requirements.in
pyotp==2.8.0
# via django-mfa3
pyparsing==3.0.9
# via httplib2
pypdf==3.14.0
# via
# -r requirements.in
Expand Down Expand Up @@ -693,13 +667,10 @@ requests==2.31.0
# froide-payment
# geocoder
# geoip2
# google-api-core
# stripe
# torchvision
rlpycairo==0.2.0
# via reportlab
rsa==4.9
# via google-auth
sentry-sdk==1.24.0
# via
# -r requirements-production.in
Expand All @@ -711,8 +682,6 @@ six==1.16.0
# django-elasticsearch-dsl
# djangorestframework-csv
# geocoder
# google-auth
# google-auth-httplib2
# html5lib
# python-dateutil
soupsieve==2.4.1
Expand Down Expand Up @@ -762,8 +731,7 @@ uritemplate==4.1.1
# via
# coreapi
# drf-spectacular
# google-api-python-client
urllib3==1.26.16
urllib3==1.26.13
# via
# elastic-transport
# google-auth
Expand Down

0 comments on commit 42ffa1c

Please sign in to comment.