From 0523511d2fb40f2738f8a8549868f44b96e5dab7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 17 Mar 2021 12:23:06 -0400 Subject: [PATCH] Limit the size of .well-known lookups. This is a port of matrix-org/synapse#8950 and matrix-org/synapse#9108. This is not a straight port, the code is simplified a bit: * Type hints are removed since sydent supports Python 2.7. * The size of the response is not returned, only the bytes. --- sydent/http/httpcommon.py | 101 +++++++++++++++++++++++++++ sydent/http/matrixfederationagent.py | 9 ++- 2 files changed, 108 insertions(+), 2 deletions(-) diff --git a/sydent/http/httpcommon.py b/sydent/http/httpcommon.py index cbcf95f8..9dde0ec1 100644 --- a/sydent/http/httpcommon.py +++ b/sydent/http/httpcommon.py @@ -15,8 +15,14 @@ # limitations under the License. import logging +from io import BytesIO import twisted.internet.ssl +from twisted.internet import defer, protocol +from twisted.internet.protocol import connectionDone +from twisted.web._newclient import ResponseDone +from twisted.web.http import PotentialDataLoss +from twisted.web.iweb import UNKNOWN_LENGTH logger = logging.getLogger(__name__) @@ -62,3 +68,98 @@ def makeTrustRoot(self): return twisted.internet._sslverify.OpenSSLCertificateAuthorities([caCert.original]) else: return twisted.internet.ssl.OpenSSLDefaultPaths() + + + +class BodyExceededMaxSize(Exception): + """The maximum allowed size of the HTTP body was exceeded.""" + + +class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol): + """A protocol which immediately errors upon receiving data.""" + + def __init__(self, deferred): + self.deferred = deferred + + def _maybe_fail(self): + """ + Report a max size exceed error and disconnect the first time this is called. + """ + if not self.deferred.called: + self.deferred.errback(BodyExceededMaxSize()) + # Close the connection (forcefully) since all the data will get + # discarded anyway. + self.transport.abortConnection() + + def dataReceived(self, data) -> None: + self._maybe_fail() + + def connectionLost(self, reason) -> None: + self._maybe_fail() + + +class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): + """A protocol which reads body to a stream, erroring if the body exceeds a maximum size.""" + + def __init__(self, deferred, max_size): + self.stream = BytesIO() + self.deferred = deferred + self.length = 0 + self.max_size = max_size + + def dataReceived(self, data) -> None: + # If the deferred was called, bail early. + if self.deferred.called: + return + + self.stream.write(data) + self.length += len(data) + # The first time the maximum size is exceeded, error and cancel the + # connection. dataReceived might be called again if data was received + # in the meantime. + if self.max_size is not None and self.length >= self.max_size: + self.deferred.errback(BodyExceededMaxSize()) + # Close the connection (forcefully) since all the data will get + # discarded anyway. + self.transport.abortConnection() + + def connectionLost(self, reason = connectionDone) -> None: + # If the maximum size was already exceeded, there's nothing to do. + if self.deferred.called: + return + + if reason.check(ResponseDone): + self.deferred.callback(self.stream.getvalue()) + elif reason.check(PotentialDataLoss): + # stolen from https://github.com/twisted/treq/pull/49/files + # http://twistedmatrix.com/trac/ticket/4840 + self.deferred.callback(self.stream.getvalue()) + else: + self.deferred.errback(reason) + + +def read_body_with_max_size(response, max_size): + """ + Read a HTTP response body to a file-object. Optionally enforcing a maximum file size. + + If the maximum file size is reached, the returned Deferred will resolve to a + Failure with a BodyExceededMaxSize exception. + + Args: + response: The HTTP response to read from. + max_size: The maximum file size to allow. + + Returns: + A Deferred which resolves to the read body. + """ + d = defer.Deferred() + + # If the Content-Length header gives a size larger than the maximum allowed + # size, do not bother downloading the body. + if max_size is not None and response.length != UNKNOWN_LENGTH: + if response.length > max_size: + response.deliverBody(_DiscardBodyWithMaxSizeProtocol(d)) + return d + + response.deliverBody(_ReadBodyWithMaxSizeProtocol(d, max_size)) + return d diff --git a/sydent/http/matrixfederationagent.py b/sydent/http/matrixfederationagent.py index f7995c9d..bc4a968f 100644 --- a/sydent/http/matrixfederationagent.py +++ b/sydent/http/matrixfederationagent.py @@ -26,11 +26,12 @@ from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.internet.interfaces import IStreamClientEndpoint -from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent, readBody +from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent from twisted.web.http import stringToDatetime from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent +from sydent.http.httpcommon import BodyExceededMaxSize, read_body_with_max_size from sydent.http.srvresolver import SrvResolver, pick_server_from_list from sydent.util.ttlcache import TTLCache @@ -46,6 +47,9 @@ # cap for .well-known cache period WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600 +# The maximum size (in bytes) to allow a well-known file to be. +WELL_KNOWN_MAX_SIZE = 50 * 1024 # 50 KiB + logger = logging.getLogger(__name__) well_known_cache = TTLCache('well-known') @@ -316,7 +320,7 @@ def _do_get_well_known(self, server_name): logger.info("Fetching %s", uri_str) try: response = yield self._well_known_agent.request(b"GET", uri) - body = yield readBody(response) + body = yield read_body_with_max_size(response, WELL_KNOWN_MAX_SIZE) if response.code != 200: raise Exception("Non-200 response %s" % (response.code, )) @@ -334,6 +338,7 @@ def _do_get_well_known(self, server_name): cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER) defer.returnValue((None, cache_period)) + return result = parsed_body["m.server"].encode("ascii")