Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove check of matching claimed and detected MIME type #60

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 5 additions & 34 deletions src/matrix_content_scanner/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,13 +332,8 @@ async def _scan_media(
# If the file is encrypted, we need to decrypt it before we can scan it.
media_content = self._decrypt_file(media_content, metadata)

# Check the file's MIME type to see if it's allowed and, if the file is not
# encrypted, if it matches the Content-Type header the homeserver sent us.
self._check_mimetype(
media_content=media_content,
claimed_mimetype=media.content_type,
encrypted=metadata is not None,
)
# Check the file's MIME type to see if it's allowed.
self._check_mimetype(media_content)

# Write the file to disk.
file_path = self._write_file_to_disk(media_path, media_content)
Expand Down Expand Up @@ -498,42 +493,18 @@ async def _run_scan(self, file_name: str) -> int:

return retcode

def _check_mimetype(
self,
media_content: bytes,
claimed_mimetype: str,
encrypted: bool,
) -> None:
"""Detects the MIME type of the provided bytes, and checks that:
* it matches with the Content-Type header that was received when downloading this
file (if the media isn't encrypted, since otherwise the Content-Type header
is always 'application/octet-stream')
* files with this MIME type are allowed (if an allow list is provided in the
configuration)
def _check_mimetype(self,media_content: bytes) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def _check_mimetype(self,media_content: bytes) -> None:
def _check_mimetype(self, media_content: bytes) -> None:

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should fix the linter error.

"""Detects the MIME type of the provided bytes, and checks that this type is allowed
(if an allow list is provided in the configuration)
Args:
media_content: The file's content. If the file is encrypted, this is its
decrypted content.
claimed_mimetype: The value of the Content-Type header received when
downloading the file.
encrypted: Whether the file was encrypted (in which case we don't want to
check that its MIME type matches with the Content-Type header).
Raises:
FileDirtyError if one of the checks fail.
"""
detected_mimetype = magic.mimetype(media_content)
logger.debug("Detected MIME type for file is %s", detected_mimetype)

# Check if the MIME type is matching the one that's expected, but only if the file
# is not encrypted (because otherwise we'll always have 'application/octet-stream'
# in the Content-Type header regardless of the actual MIME type of the file).
if encrypted is False and detected_mimetype != claimed_mimetype:
logger.error(
"Mismatching MIME type (%s) and Content-Type header (%s)",
detected_mimetype,
claimed_mimetype,
)
raise FileDirtyError("File type not supported")

# If there's an allow list for MIME types, check that the MIME type that's been
# detected for this file is in it.
if (
Expand Down
13 changes: 0 additions & 13 deletions tests/scanner/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,19 +247,6 @@ async def test_mimetype_encrypted(self) -> None:
with self.assertRaises(FileDirtyError):
await self.scanner.scan_file(MEDIA_PATH, ENCRYPTED_FILE_METADATA)

async def test_mimetype_content_type_mismatch(self) -> None:
"""Tests that a scan fails if the detected MIME type does not match the value of
the Content-Type header sent by the homeserver.
"""
# Set up the file description to make it look as if the homeserver tried to tell
# us the file is a JPEG (even though it's actually a PNG).
self.downloader_res.content_type = "image/jpeg"

# Check that the scan fails since the file's detected MIME type doesn't match the
# value of the Content-Type header.
with self.assertRaises(FileDirtyError):
await self.scanner.scan_file(MEDIA_PATH)

async def test_dont_cache_exit_codes(self) -> None:
"""Tests that if the configuration specifies exit codes to ignore when running
the scanning script, we don't cache them.
Expand Down
Loading