From 2f3bb1f55aab57c48e8bce97b170fcf9615f9df8 Mon Sep 17 00:00:00 2001 From: Denis Kasak Date: Thu, 10 Feb 2022 11:53:00 +0100 Subject: [PATCH] Switch to using an allow list for URL previewable content types. --- synapse/rest/media/v1/preview_url_resource.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index b52e92305c60..9dd7425929f6 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -403,7 +403,7 @@ async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResu output_stream=output_stream, max_size=self.max_spider_size, headers={"Accept-Language": self.url_preview_accept_language}, - is_allowed_content_type=_is_not_av_media, + is_allowed_content_type=_is_previewable, ) except SynapseError: # Pass SynapseErrors through directly, so that the servlet @@ -764,9 +764,14 @@ def _is_json(content_type: str) -> bool: return content_type.lower().startswith("application/json") -def _is_not_av_media(content_type: bytes) -> bool: - """Returns False if the content type is audio or video.""" +def _is_previewable(content_type: bytes) -> bool: + """Returns True for content types for which we will perform URL preview and False + otherwise.""" + content_type = content_type.lower() - return not content_type.startswith(b"video/") and not content_type.startswith( - b"audio/" + return ( + content_type.startswith(b"text/html") + or content_type.startswith(b"application/xhtml") + or content_type.startswith(b"image/") + or content_type.startswith(b"application/json") )