Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Request & follow redirects for /media/v3/download #16701

Merged
merged 5 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/16701.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Follow redirects when downloading media over federation (per [MSC3860](https://github.com/matrix-org/matrix-spec-proposals/pull/3860)).
4 changes: 4 additions & 0 deletions synapse/federation/transport/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,11 @@ async def download_media_v3(
# end up with a routing loop.
"allow_remote": "false",
"timeout_ms": str(max_timeout_ms),
# Matix 1.7 allows for this to redirect to another URL, this should
clokep marked this conversation as resolved.
Show resolved Hide resolved
# just be ignored for an old homeserver, so always provide it.
"allow_redirect": "true",
},
follow_redirects=True,
)


Expand Down
77 changes: 57 additions & 20 deletions synapse/http/matrixfederationclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,18 @@ class MatrixFederationRequest:
"""Query arguments.
"""

txn_id: Optional[str] = None
"""Unique ID for this request (for logging)
txn_id: str = attr.ib(init=False)
"""Unique ID for this request (for logging), this is autogenerated.
"""

uri: bytes = attr.ib(init=False)
"""The URI of this request
uri: bytes = b""
"""The URI of this request, usually generated from the above information.
"""

_generate_uri: bool = True
"""True to automatically generate the uri field based on the above information.

Set to False if manually configuring the URI.
"""
Comment on lines +164 to 168
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't figure out a better way to force attrs to do what I want. Using a default/factory works if you're either just creating the instance or evolving it and updating the URI, but not when evolving it and wanting to generate the URI again. I figured being explicit was best.


def __attrs_post_init__(self) -> None:
Expand All @@ -168,22 +174,23 @@ def __attrs_post_init__(self) -> None:

object.__setattr__(self, "txn_id", txn_id)

destination_bytes = self.destination.encode("ascii")
path_bytes = self.path.encode("ascii")
query_bytes = encode_query_args(self.query)

# The object is frozen so we can pre-compute this.
uri = urllib.parse.urlunparse(
(
b"matrix-federation",
destination_bytes,
path_bytes,
None,
query_bytes,
b"",
if self._generate_uri:
destination_bytes = self.destination.encode("ascii")
path_bytes = self.path.encode("ascii")
query_bytes = encode_query_args(self.query)

# The object is frozen so we can pre-compute this.
uri = urllib.parse.urlunparse(
(
b"matrix-federation",
destination_bytes,
path_bytes,
None,
query_bytes,
b"",
)
)
)
object.__setattr__(self, "uri", uri)
object.__setattr__(self, "uri", uri)

def get_json(self) -> Optional[JsonDict]:
if self.json_callback:
Expand Down Expand Up @@ -513,6 +520,7 @@ async def _send_request(
ignore_backoff: bool = False,
backoff_on_404: bool = False,
backoff_on_all_error_codes: bool = False,
follow_redirects: bool = False,
) -> IResponse:
"""
Sends a request to the given server.
Expand Down Expand Up @@ -555,6 +563,9 @@ async def _send_request(
backoff_on_404: Back off if we get a 404
backoff_on_all_error_codes: Back off if we get any error response

follow_redirects: True to follow the Location header of 307/308 redirect
responses. This does not recurse.

Returns:
Resolves with the HTTP response object on success.

Expand Down Expand Up @@ -714,6 +725,26 @@ async def _send_request(
response.code,
response_phrase,
)
elif (
response.code in (307, 308)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about wrapping the agent in a RedirectAgent, but that handles additional codes sadly.

and follow_redirects
and response.headers.hasHeader("Location")
):
# The Location header *might* be relative so resolve it.
location = response.headers.getRawHeaders(b"Location")[0]
new_uri = urllib.parse.urljoin(request.uri, location)

return await self._send_request(
attr.evolve(request, uri=new_uri, generate_uri=False),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's spelled _generate_uri in the struct definition. Is this some attrs way of declaring a private attribute that you can initialise via the constructor?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this some attrs way of declaring a private attribute that you can initialise via the constructor?

Yes, pretty much. attrs strips the preceding underscores to let you have 'private' attributes that can be initialized.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

retry_on_dns_fail,
timeout,
long_retries,
ignore_backoff,
backoff_on_404,
backoff_on_all_error_codes,
# Do not continue following redirects.
follow_redirects=False,
)
else:
logger.info(
"{%s} [%s] Got response headers: %d %s",
Expand Down Expand Up @@ -1383,6 +1414,7 @@ async def get_file(
retry_on_dns_fail: bool = True,
max_size: Optional[int] = None,
ignore_backoff: bool = False,
follow_redirects: bool = False,
) -> Tuple[int, Dict[bytes, List[bytes]]]:
"""GETs a file from a given homeserver
Args:
Expand All @@ -1392,6 +1424,8 @@ async def get_file(
args: Optional dictionary used to create the query string.
ignore_backoff: true to ignore the historical backoff data
and try the request anyway.
follow_redirects: True to follow the Location header of 307/308 redirect
responses. This does not recurse.

Returns:
Resolves with an (int,dict) tuple of
Expand All @@ -1412,7 +1446,10 @@ async def get_file(
)

response = await self._send_request(
request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff
request,
retry_on_dns_fail=retry_on_dns_fail,
ignore_backoff=ignore_backoff,
follow_redirects=follow_redirects,
)

headers = dict(response.headers.getAllRawHeaders())
Expand Down
4 changes: 3 additions & 1 deletion tests/media/test_media_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ def get_file(
retry_on_dns_fail: bool = True,
max_size: Optional[int] = None,
ignore_backoff: bool = False,
follow_redirects: bool = False,
) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
"""A mock for MatrixFederationHttpClient.get_file."""

Expand Down Expand Up @@ -325,7 +326,8 @@ def _req(
self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id
)
self.assertEqual(
self.fetches[0][3], {"allow_remote": "false", "timeout_ms": "20000"}
self.fetches[0][3],
{"allow_remote": "false", "timeout_ms": "20000", "allow_redirect": "true"},
)

headers = {
Expand Down
2 changes: 1 addition & 1 deletion tests/replication/test_multi_media_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def _get_media_req(
self.assertEqual(request.method, b"GET")
self.assertEqual(
request.path,
f"/_matrix/media/r0/download/{target}/{media_id}".encode(),
f"/_matrix/media/v3/download/{target}/{media_id}".encode(),
)
self.assertEqual(
request.requestHeaders.getRawHeaders(b"host"), [target.encode("utf-8")]
Expand Down
Loading