From d98b560e525d37edde6a3f02e22d952153a13f16 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 28 Nov 2023 15:25:31 -0500 Subject: [PATCH] Follow redirects when fetching from /download. --- changelog.d/16701.feature | 1 + synapse/federation/transport/client.py | 4 ++ synapse/http/matrixfederationclient.py | 77 +++++++++++++++++++------- tests/media/test_media_storage.py | 4 +- 4 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 changelog.d/16701.feature diff --git a/changelog.d/16701.feature b/changelog.d/16701.feature new file mode 100644 index 000000000000..2a66fc932adc --- /dev/null +++ b/changelog.d/16701.feature @@ -0,0 +1 @@ +Follow redirects when downloading media over federation (per [MSC3860](https://github.com/matrix-org/matrix-spec-proposals/pull/3860)). diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 0bb49057ec0a..30c4c50a2fd3 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -850,7 +850,11 @@ async def download_media_v3( # end up with a routing loop. "allow_remote": "false", "timeout_ms": str(max_timeout_ms), + # Matix 1.7 allows for this to redirect to another URL, this should + # just be ignored for an old homeserver, so always provide it. + "allow_redirect": "true", }, + follow_redirects=True, ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index d5013e8e97c1..cc1db763ae4e 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -153,12 +153,18 @@ class MatrixFederationRequest: """Query arguments. """ - txn_id: Optional[str] = None - """Unique ID for this request (for logging) + txn_id: str = attr.ib(init=False) + """Unique ID for this request (for logging), this is autogenerated. """ - uri: bytes = attr.ib(init=False) - """The URI of this request + uri: bytes = b"" + """The URI of this request, usually generated from the above information. + """ + + _generate_uri: bool = True + """True to automatically generate the uri field based on the above information. + + Set to False if manually configuring the URI. """ def __attrs_post_init__(self) -> None: @@ -168,22 +174,23 @@ def __attrs_post_init__(self) -> None: object.__setattr__(self, "txn_id", txn_id) - destination_bytes = self.destination.encode("ascii") - path_bytes = self.path.encode("ascii") - query_bytes = encode_query_args(self.query) - - # The object is frozen so we can pre-compute this. - uri = urllib.parse.urlunparse( - ( - b"matrix-federation", - destination_bytes, - path_bytes, - None, - query_bytes, - b"", + if self._generate_uri: + destination_bytes = self.destination.encode("ascii") + path_bytes = self.path.encode("ascii") + query_bytes = encode_query_args(self.query) + + # The object is frozen so we can pre-compute this. + uri = urllib.parse.urlunparse( + ( + b"matrix-federation", + destination_bytes, + path_bytes, + None, + query_bytes, + b"", + ) ) - ) - object.__setattr__(self, "uri", uri) + object.__setattr__(self, "uri", uri) def get_json(self) -> Optional[JsonDict]: if self.json_callback: @@ -513,6 +520,7 @@ async def _send_request( ignore_backoff: bool = False, backoff_on_404: bool = False, backoff_on_all_error_codes: bool = False, + follow_redirects: bool = False, ) -> IResponse: """ Sends a request to the given server. @@ -555,6 +563,9 @@ async def _send_request( backoff_on_404: Back off if we get a 404 backoff_on_all_error_codes: Back off if we get any error response + follow_redirects: True to follow the Location header of 307/308 redirect + responses. This does not recurse. + Returns: Resolves with the HTTP response object on success. @@ -714,6 +725,26 @@ async def _send_request( response.code, response_phrase, ) + elif ( + response.code in (307, 308) + and follow_redirects + and response.headers.hasHeader("Location") + ): + # The Location header *might* be relative so resolve it. + location = response.headers.getRawHeaders(b"Location")[0] + new_uri = urllib.parse.urljoin(request.uri, location) + + return await self._send_request( + attr.evolve(request, uri=new_uri, generate_uri=False), + retry_on_dns_fail, + timeout, + long_retries, + ignore_backoff, + backoff_on_404, + backoff_on_all_error_codes, + # Do not continue following redirects. + follow_redirects=False, + ) else: logger.info( "{%s} [%s] Got response headers: %d %s", @@ -1383,6 +1414,7 @@ async def get_file( retry_on_dns_fail: bool = True, max_size: Optional[int] = None, ignore_backoff: bool = False, + follow_redirects: bool = False, ) -> Tuple[int, Dict[bytes, List[bytes]]]: """GETs a file from a given homeserver Args: @@ -1392,6 +1424,8 @@ async def get_file( args: Optional dictionary used to create the query string. ignore_backoff: true to ignore the historical backoff data and try the request anyway. + follow_redirects: True to follow the Location header of 307/308 redirect + responses. This does not recurse. Returns: Resolves with an (int,dict) tuple of @@ -1412,7 +1446,10 @@ async def get_file( ) response = await self._send_request( - request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff + request, + retry_on_dns_fail=retry_on_dns_fail, + ignore_backoff=ignore_backoff, + follow_redirects=follow_redirects, ) headers = dict(response.headers.getAllRawHeaders()) diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index e30b43475f4e..f981d1c0d8dd 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -248,6 +248,7 @@ def get_file( retry_on_dns_fail: bool = True, max_size: Optional[int] = None, ignore_backoff: bool = False, + follow_redirects: bool = False, ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]": """A mock for MatrixFederationHttpClient.get_file.""" @@ -325,7 +326,8 @@ def _req( self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id ) self.assertEqual( - self.fetches[0][3], {"allow_remote": "false", "timeout_ms": "20000"} + self.fetches[0][3], + {"allow_remote": "false", "timeout_ms": "20000", "allow_redirect": "true"}, ) headers = {