Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend cleaning for downloaded file names #6492

Merged
merged 9 commits into from
Jul 21, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Memory leak related to unclosed av container (<https://github.com/opencv/cvat/pull/6501>)
- Using initial frame from query parameter to open specific frame in a job
(<https://github.com/opencv/cvat/pull/6506>)
- \[API\] File downloading failures for filenames with special characters l(<https://github.com/opencv/cvat/pull/6492>)

### Security
- TDB
Expand Down
5 changes: 3 additions & 2 deletions cvat/apps/engine/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response
from rest_framework.exceptions import ValidationError, PermissionDenied, NotFound
from django_sendfile import sendfile
from distutils.util import strtobool

import cvat.apps.dataset_manager as dm
Expand All @@ -37,7 +36,9 @@
LabeledDataSerializer, SegmentSerializer, SimpleJobSerializer, TaskReadSerializer,
ProjectReadSerializer, ProjectFileSerializer, TaskFileSerializer, RqIdSerializer)
from cvat.apps.engine.utils import (
av_scan_paths, process_failed_job, configure_dependent_job, get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after
av_scan_paths, process_failed_job, configure_dependent_job,
get_rq_job_meta, get_import_rq_id, import_resource_with_clean_up_after,
sendfile
)
from cvat.apps.engine.models import (
StorageChoice, StorageMethodChoice, DataChoice, Task, Project, Location)
Expand Down
45 changes: 45 additions & 0 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import subprocess
import os
import urllib.parse
import re
import logging
import platform

Expand All @@ -31,6 +32,7 @@
from multiprocessing import cpu_count

from django.core.exceptions import ValidationError
from django_sendfile import sendfile as _sendfile

Import = namedtuple("Import", ["module", "name", "alias"])

Expand Down Expand Up @@ -284,3 +286,46 @@ def get_cpu_number() -> int:
# the number of cpu cannot be determined
cpu_number = 1
return cpu_number

def make_attachment_file_name(filename: str) -> str:
# Borrowed from sendfile() to minimize changes for users.
# Added whitespace conversion and squashing into a single space
# Added removal of control characters

filename = str(filename).replace("\\", "\\\\").replace('"', r"\"")
filename = re.sub(r"\s+", " ", filename)

# From https://github.com/encode/uvicorn/blob/cd18c3b14aa810a4a6ebb264b9a297d6f8afb9ac/uvicorn/protocols/http/httptools_impl.py#L51
filename = re.sub(r"[\x00-\x1F\x7F]", "", filename)

return filename

def sendfile(
request, filename,
attachment=False, attachment_filename=None, mimetype=None, encoding=None
):
"""
Create a response to send file using backend configured in ``SENDFILE_BACKEND``

``filename`` is the absolute path to the file to send.

If ``attachment`` is ``True`` the ``Content-Disposition`` header will be set accordingly.
This will typically prompt the user to download the file, rather
than view it. But even if ``False``, the user may still be prompted, depending
on the browser capabilities and configuration.

The ``Content-Disposition`` filename depends on the value of ``attachment_filename``:

``None`` (default): Same as ``filename``
``False``: No ``Content-Disposition`` filename
``String``: Value used as filename

If neither ``mimetype`` or ``encoding`` are specified, then they will be guessed via the
filename (using the standard Python mimetypes module)
"""
# A drop-in replacement for sendfile with extra filename cleaning

if attachment_filename:
attachment_filename = make_attachment_file_name(attachment_filename)

return _sendfile(request, filename, attachment, attachment_filename, mimetype, encoding)
3 changes: 1 addition & 2 deletions cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from rest_framework.permissions import SAFE_METHODS
from rest_framework.response import Response
from rest_framework.settings import api_settings
from django_sendfile import sendfile

import cvat.apps.dataset_manager as dm
import cvat.apps.dataset_manager.views # pylint: disable=unused-import
Expand Down Expand Up @@ -75,7 +74,7 @@
from cvat.apps.engine.utils import (
av_scan_paths, process_failed_job, configure_dependent_job,
parse_exception_message, get_rq_job_meta, get_import_rq_id,
import_resource_with_clean_up_after
import_resource_with_clean_up_after, sendfile
)
from cvat.apps.engine import backup
from cvat.apps.engine.mixins import PartialUpdateModelMixin, UploadMixin, AnnotationMixin, SerializeMixin
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/events/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@

from rest_framework import serializers, status
from rest_framework.response import Response
from django_sendfile import sendfile

from cvat.apps.dataset_manager.views import clear_export_cache, log_exception
from cvat.apps.engine.log import slogger
from cvat.apps.engine.utils import sendfile

DEFAULT_CACHE_TTL = timedelta(hours=1)

Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/lambda_manager/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import os
import textwrap
import glob
from django_sendfile import sendfile
from copy import deepcopy
from enum import Enum
from functools import wraps
Expand Down Expand Up @@ -39,6 +38,7 @@
from cvat.apps.lambda_manager.serializers import (
FunctionCallRequestSerializer, FunctionCallSerializer
)
from cvat.apps.engine.utils import sendfile
from cvat.utils.http import make_requests_session
from cvat.apps.iam.permissions import LambdaPermission
from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/opencv/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import glob
from django.conf import settings
from django_sendfile import sendfile
from cvat.apps.engine.utils import sendfile

def OpenCVLibrary(request):
dirname = os.path.join(settings.STATIC_ROOT, 'opencv', 'js')
Expand Down
21 changes: 21 additions & 0 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,27 @@ def test_can_export_task_to_coco_format(self, admin_user, tid):
assert annotations["tracks"][0]["shapes"][0]["frame"] == 0
assert annotations["tracks"][0]["elements"][0]["shapes"][0]["frame"] == 0

@pytest.mark.usefixtures("restore_db_per_function")
def test_can_download_task_with_special_chars_in_name(self, admin_user):
# Control characters in filenames may conflict with the Content-Disposition header
# value restrictions, as it needs to include the downloaded file name.

task_spec = {
"name": "test_special_chars_{}_in_name".format("".join(chr(c) for c in range(1, 127))),
"labels": [{"name": "cat"}],
}

task_data = {
"image_quality": 75,
"client_files": generate_image_files(1),
}

task_id, _ = create_task(admin_user, task_spec, task_data)

response = self._test_export_task(admin_user, task_id, format="CVAT for images 1.1")
assert response.status == HTTPStatus.OK
assert zipfile.is_zipfile(io.BytesIO(response.data))


@pytest.mark.usefixtures("restore_db_per_function")
@pytest.mark.usefixtures("restore_cvat_data")
Expand Down