Skip to content

Commit

Permalink
Enable files download offloading (#112)
Browse files Browse the repository at this point in the history
* files: enable download offloading
  • Loading branch information
anikachurilova authored Feb 9, 2024
1 parent 76b79a4 commit 76dd719
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 0 deletions.
13 changes: 13 additions & 0 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ from cds_rdm.permissions import (
CDSCommunitiesPermissionPolicy,
CDSRDMRecordPermissionPolicy
)
from cds_rdm.files import storage_factory
from invenio_app_rdm.config import CELERY_BEAT_SCHEDULE as APP_RDM_CELERY_BEAT_SCHEDULE
from celery.schedules import crontab

Expand Down Expand Up @@ -316,3 +317,15 @@ CERN_AUTHORIZATION_SERVICE_API_GROUP = "Group"
# Permissions: define who can create new communities
CDS_EMAILS_ALLOW_CREATE_COMMUNITIES = []
CDS_GROUPS_ALLOW_CREATE_COMMUNITIES = []


# Invenio-Files-REST
# ==================
XROOTD_ENABLED = False
# control file download offloading
FILES_REST_STORAGE_FACTORY = storage_factory
FILES_REST_XSENDFILE_ENABLED = False
CDS_EOS_OFFLOAD_ENABLED = False
CDS_LOCAL_OFFLOAD_ENABLED = False
CDS_LOCAL_OFFLOAD_FILES = ["file.txt", "file2.txt"]
CDS_LOCAL_OFFLOAD_STORAGE = ""
104 changes: 104 additions & 0 deletions site/cds_rdm/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2024 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""CDS files utilities."""

import mimetypes
import unicodedata
from urllib.parse import quote

from flask import current_app, make_response, request
from invenio_files_rest.helpers import sanitize_mimetype
from invenio_files_rest.storage.pyfs import pyfs_storage_factory
from invenio_files_rest.storage.pyfs import PyFSFileStorage as BaseFileStorage


class OffloadFileStorage(BaseFileStorage):
"""Offload file downloads to another server."""

def send_file(
self,
filename,
mimetype=None,
restricted=True,
checksum=None,
trusted=False,
chunk_size=None,
as_attachment=False,
**kwargs,
):
"""Send file."""
# No need to proxy HEAD requests
offload_enabled = (
request.method != "HEAD"
and current_app.config["FILES_REST_XSENDFILE_ENABLED"]
)

should_offload_locally = (
current_app.config["CDS_LOCAL_OFFLOAD_ENABLED"]
and filename in current_app.config["CDS_LOCAL_OFFLOAD_FILES"]
)

if offload_enabled and should_offload_locally:
response = make_response()

try:
response.headers["X-Accel-Redirect"] = current_app.config["CDS_LOCAL_OFFLOAD_STORAGE"]
except Exception as ex:
current_app.logger.exception(ex)
# fallback to normal file download
return super().send_file(filename, **kwargs)

response.headers["X-Accel-Buffering"] = "yes"
response.headers["X-Accel-Limit-Rate"] = "off"

mimetype = mimetypes.guess_type(filename)[0]
if mimetype is not None:
mimetype = sanitize_mimetype(mimetype, filename=filename)

if mimetype is None:
mimetype = "application/octet-stream"

response.mimetype = mimetype

# Force Content-Disposition for application/octet-stream to prevent
# Content-Type sniffing.
# (from invenio-files-rest)
if as_attachment or mimetype == "application/octet-stream":
# See https://github.com/pallets/flask/commit/0049922f2e690a6d
try:
filenames = {"filename": filename.encode("latin-1")}
except UnicodeEncodeError:
# safe = RFC 5987 attr-char
quoted = quote(filename, safe="!#$&+-.^_`|~")

filenames = {"filename*": "UTF-8''%s" % quoted}
encoded_filename = unicodedata.normalize("NFKD", filename).encode(
"latin-1", "ignore"
)
if encoded_filename:
filenames["filename"] = encoded_filename
response.headers.set("Content-Disposition", "attachment", **filenames)
else:
response.headers.set("Content-Disposition", "inline")

# Security-related headers for the download (from invenio-files-rest)
response.headers["Content-Security-Policy"] = "default-src 'none';"
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["X-Download-Options"] = "noopen"
response.headers["X-Permitted-Cross-Domain-Policies"] = "none"
response.headers["X-Frame-Options"] = "deny"
response.headers["X-XSS-Protection"] = "1; mode=block"
return response
else:
return super().send_file(filename, **kwargs)


def storage_factory(**kwargs):
"""Create custom storage factory to enable file offloading."""
return pyfs_storage_factory(filestorage_class=OffloadFileStorage, **kwargs)

0 comments on commit 76dd719

Please sign in to comment.