Skip to content

Commit

Permalink
Extend cleaning for downloaded file names (#6492)
Browse files Browse the repository at this point in the history
Control characters in filenames may conflict with the Content-Disposition header
value restrictions, as it needs to include the downloaded file name. The problem is
that many tools (including sendfile) just check for ascii/unicode conversion,
while there are also ascii chars that can't be used.
Ref: RFC 8178

This PR adds extra cleanup for downloaded file names.

Added a custom replacement for the sendfile() function
  • Loading branch information
zhiltsov-max authored Jul 21, 2023
1 parent be81d28 commit 9659f82
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/opencv/cvat/pull/6506>)
- Server-side validation for attribute specifications
(<https://github.com/opencv/cvat/pull/6447>)
- \[API\] File downloading failures for filenames with special characters l(<https://github.com/opencv/cvat/pull/6492>)

### Security
- TDB
Expand Down
5 changes: 3 additions & 2 deletions cvat/apps/engine/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response
from rest_framework.exceptions import ValidationError, PermissionDenied, NotFound
from django_sendfile import sendfile
from distutils.util import strtobool

import cvat.apps.dataset_manager as dm
Expand All @@ -37,7 +36,9 @@
LabeledDataSerializer, SegmentSerializer, SimpleJobSerializer, TaskReadSerializer,
ProjectReadSerializer, ProjectFileSerializer, TaskFileSerializer, RqIdSerializer)
from cvat.apps.engine.utils import (
av_scan_paths, process_failed_job, configure_dependent_job, get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after
av_scan_paths, process_failed_job, configure_dependent_job,
get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after,
sendfile
)
from cvat.apps.engine.models import (
StorageChoice, StorageMethodChoice, DataChoice, Task, Project, Location)
Expand Down
45 changes: 45 additions & 0 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import subprocess
import os
import urllib.parse
import re
import logging
import platform

Expand All @@ -31,6 +32,7 @@
from multiprocessing import cpu_count

from django.core.exceptions import ValidationError
from django_sendfile import sendfile as _sendfile

Import = namedtuple("Import", ["module", "name", "alias"])

Expand Down Expand Up @@ -284,3 +286,46 @@ def get_cpu_number() -> int:
# the number of cpu cannot be determined
cpu_number = 1
return cpu_number

def make_attachment_file_name(filename: str) -> str:
# Borrowed from sendfile() to minimize changes for users.
# Added whitespace conversion and squashing into a single space
# Added removal of control characters

filename = str(filename).replace("\\", "\\\\").replace('"', r"\"")
filename = re.sub(r"\s+", " ", filename)

# From https://github.com/encode/uvicorn/blob/cd18c3b14aa810a4a6ebb264b9a297d6f8afb9ac/uvicorn/protocols/http/httptools_impl.py#L51
filename = re.sub(r"[\x00-\x1F\x7F]", "", filename)

return filename

def sendfile(
request, filename,
attachment=False, attachment_filename=None, mimetype=None, encoding=None
):
"""
Create a response to send file using backend configured in ``SENDFILE_BACKEND``
``filename`` is the absolute path to the file to send.
If ``attachment`` is ``True`` the ``Content-Disposition`` header will be set accordingly.
This will typically prompt the user to download the file, rather
than view it. But even if ``False``, the user may still be prompted, depending
on the browser capabilities and configuration.
The ``Content-Disposition`` filename depends on the value of ``attachment_filename``:
``None`` (default): Same as ``filename``
``False``: No ``Content-Disposition`` filename
``String``: Value used as filename
If neither ``mimetype`` or ``encoding`` are specified, then they will be guessed via the
filename (using the standard Python mimetypes module)
"""
# A drop-in replacement for sendfile with extra filename cleaning

if attachment_filename:
attachment_filename = make_attachment_file_name(attachment_filename)

return _sendfile(request, filename, attachment, attachment_filename, mimetype, encoding)
3 changes: 1 addition & 2 deletions cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from rest_framework.permissions import SAFE_METHODS
from rest_framework.response import Response
from rest_framework.settings import api_settings
from django_sendfile import sendfile

import cvat.apps.dataset_manager as dm
import cvat.apps.dataset_manager.views # pylint: disable=unused-import
Expand Down Expand Up @@ -75,7 +74,7 @@
from cvat.apps.engine.utils import (
av_scan_paths, process_failed_job, configure_dependent_job,
parse_exception_message, get_rq_job_meta, get_import_rq_id,
import_resource_with_clean_up_after
import_resource_with_clean_up_after, sendfile
)
from cvat.apps.engine import backup
from cvat.apps.engine.mixins import PartialUpdateModelMixin, UploadMixin, AnnotationMixin, SerializeMixin
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/events/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@

from rest_framework import serializers, status
from rest_framework.response import Response
from django_sendfile import sendfile

from cvat.apps.dataset_manager.views import clear_export_cache, log_exception
from cvat.apps.engine.log import slogger
from cvat.apps.engine.utils import sendfile

DEFAULT_CACHE_TTL = timedelta(hours=1)

Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/lambda_manager/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import os
import textwrap
import glob
from django_sendfile import sendfile
from copy import deepcopy
from enum import Enum
from functools import wraps
Expand Down Expand Up @@ -39,6 +38,7 @@
from cvat.apps.lambda_manager.serializers import (
FunctionCallRequestSerializer, FunctionCallSerializer
)
from cvat.apps.engine.utils import sendfile
from cvat.utils.http import make_requests_session
from cvat.apps.iam.permissions import LambdaPermission
from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/opencv/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import glob
from django.conf import settings
from django_sendfile import sendfile
from cvat.apps.engine.utils import sendfile

def OpenCVLibrary(request):
dirname = os.path.join(settings.STATIC_ROOT, 'opencv', 'js')
Expand Down
21 changes: 21 additions & 0 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,27 @@ def test_can_export_task_to_coco_format(self, admin_user, tid):
assert annotations["tracks"][0]["shapes"][0]["frame"] == 0
assert annotations["tracks"][0]["elements"][0]["shapes"][0]["frame"] == 0

@pytest.mark.usefixtures("restore_db_per_function")
def test_can_download_task_with_special_chars_in_name(self, admin_user):
# Control characters in filenames may conflict with the Content-Disposition header
# value restrictions, as it needs to include the downloaded file name.

task_spec = {
"name": "test_special_chars_{}_in_name".format("".join(chr(c) for c in range(1, 127))),
"labels": [{"name": "cat"}],
}

task_data = {
"image_quality": 75,
"client_files": generate_image_files(1),
}

task_id, _ = create_task(admin_user, task_spec, task_data)

response = self._test_export_task(admin_user, task_id, format="CVAT for images 1.1")
assert response.status == HTTPStatus.OK
assert zipfile.is_zipfile(io.BytesIO(response.data))


@pytest.mark.usefixtures("restore_db_per_function")
@pytest.mark.usefixtures("restore_cvat_data")
Expand Down

0 comments on commit 9659f82

Please sign in to comment.