From c2ab0e5bb015c2581ed021dc9feaa5384171ea60 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Oct 2021 13:49:02 +0100 Subject: [PATCH 1/7] Move DNS lookups into separate thread pool This is to stop large bursts of lookups starving out other users of the thread pools. Fixes #11049. --- synapse/app/_base.py | 10 ++- synapse/util/gai_resolver.py | 136 +++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 synapse/util/gai_resolver.py diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 2ca2e051e43a..f770d82bbe2e 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -31,6 +31,7 @@ from twisted.internet import defer, error, reactor from twisted.logger import LoggingFile, LogLevel from twisted.protocols.tls import TLSMemoryBIOFactory +from twisted.python.threadpool import ThreadPool import synapse from synapse.api.constants import MAX_PDU_SIZE @@ -48,6 +49,7 @@ from synapse.metrics.jemalloc import setup_jemalloc_stats from synapse.util.caches.lrucache import setup_expire_lru_cache_entries from synapse.util.daemonize import daemonize_process +from synapse.util.gai_resolver import GAIResolver from synapse.util.rlimit import change_resource_limit from synapse.util.versionstring import get_version_string @@ -338,9 +340,10 @@ async def start(hs: "HomeServer"): Args: hs: homeserver instance """ + reactor = hs.get_reactor() + # Set up the SIGHUP machinery. if hasattr(signal, "SIGHUP"): - reactor = hs.get_reactor() @wrap_as_background_process("sighup") def handle_sighup(*args, **kwargs): @@ -371,6 +374,11 @@ def run_sighup(*args, **kwargs): # Start the tracer synapse.logging.opentracing.init_tracer(hs) # type: ignore[attr-defined] # noqa + # We want to use a separate thread pool for the resolver so that large + # numbers of DNS requests don't starve out other users of the threadpool. + resolver_threadpool = ThreadPool(name="gai_resolver") + reactor.installNameResolver(GAIResolver(reactor, getThreadPool=resolver_threadpool)) + # Instantiate the modules so they can register their web resources to the module API # before we start the listeners. module_api = hs.get_module_api() diff --git a/synapse/util/gai_resolver.py b/synapse/util/gai_resolver.py new file mode 100644 index 000000000000..52e19a83e979 --- /dev/null +++ b/synapse/util/gai_resolver.py @@ -0,0 +1,136 @@ +# This is a direct lift from +# https://github.com/twisted/twisted/blob/release-21.2.0-10091/src/twisted/internet/_resolver.py. +# We copy it here as we need to instansiate `GAIResolver` manually, but it is a +# private class. + + +from socket import ( + AF_INET, + AF_INET6, + AF_UNSPEC, + SOCK_DGRAM, + SOCK_STREAM, + gaierror, + getaddrinfo, +) + +from zope.interface import implementer + +from twisted.internet.address import IPv4Address, IPv6Address +from twisted.internet.interfaces import IHostnameResolver, IHostResolution +from twisted.internet.threads import deferToThreadPool + + +@implementer(IHostResolution) +class HostResolution: + """ + The in-progress resolution of a given hostname. + """ + + def __init__(self, name): + """ + Create a L{HostResolution} with the given name. + """ + self.name = name + + def cancel(self): + # IHostResolution.cancel + raise NotImplementedError() + + +_any = frozenset([IPv4Address, IPv6Address]) + +_typesToAF = { + frozenset([IPv4Address]): AF_INET, + frozenset([IPv6Address]): AF_INET6, + _any: AF_UNSPEC, +} + +_afToType = { + AF_INET: IPv4Address, + AF_INET6: IPv6Address, +} + +_transportToSocket = { + "TCP": SOCK_STREAM, + "UDP": SOCK_DGRAM, +} + +_socktypeToType = { + SOCK_STREAM: "TCP", + SOCK_DGRAM: "UDP", +} + + +@implementer(IHostnameResolver) +class GAIResolver: + """ + L{IHostnameResolver} implementation that resolves hostnames by calling + L{getaddrinfo} in a thread. + """ + + def __init__(self, reactor, getThreadPool=None, getaddrinfo=getaddrinfo): + """ + Create a L{GAIResolver}. + @param reactor: the reactor to schedule result-delivery on + @type reactor: L{IReactorThreads} + @param getThreadPool: a function to retrieve the thread pool to use for + scheduling name resolutions. If not supplied, the use the given + C{reactor}'s thread pool. + @type getThreadPool: 0-argument callable returning a + L{twisted.python.threadpool.ThreadPool} + @param getaddrinfo: a reference to the L{getaddrinfo} to use - mainly + parameterized for testing. + @type getaddrinfo: callable with the same signature as L{getaddrinfo} + """ + self._reactor = reactor + self._getThreadPool = ( + reactor.getThreadPool if getThreadPool is None else getThreadPool + ) + self._getaddrinfo = getaddrinfo + + def resolveHostName( + self, + resolutionReceiver, + hostName, + portNumber=0, + addressTypes=None, + transportSemantics="TCP", + ): + """ + See L{IHostnameResolver.resolveHostName} + @param resolutionReceiver: see interface + @param hostName: see interface + @param portNumber: see interface + @param addressTypes: see interface + @param transportSemantics: see interface + @return: see interface + """ + pool = self._getThreadPool() + addressFamily = _typesToAF[ + _any if addressTypes is None else frozenset(addressTypes) + ] + socketType = _transportToSocket[transportSemantics] + + def get(): + try: + return self._getaddrinfo( + hostName, portNumber, addressFamily, socketType + ) + except gaierror: + return [] + + d = deferToThreadPool(self._reactor, pool, get) + resolution = HostResolution(hostName) + resolutionReceiver.resolutionBegan(resolution) + + @d.addCallback + def deliverResults(result): + for family, socktype, _proto, _cannoname, sockaddr in result: + addrType = _afToType[family] + resolutionReceiver.addressResolved( + addrType(_socktypeToType.get(socktype, "TCP"), *sockaddr) + ) + resolutionReceiver.resolutionComplete() + + return resolution From 2b3a8cecb1fa492396e86f7729930f5038810fc4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Oct 2021 13:51:31 +0100 Subject: [PATCH 2/7] Newsfile --- changelog.d/11177.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/11177.bugfix diff --git a/changelog.d/11177.bugfix b/changelog.d/11177.bugfix new file mode 100644 index 000000000000..2ee26a01e1b2 --- /dev/null +++ b/changelog.d/11177.bugfix @@ -0,0 +1 @@ +Fix performance regression when doing large number of hostname lookups at once. Introduced in v1.44.0. From e6e7819160abf2cfafc5f276974c15b4a18b8bbc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Oct 2021 14:01:22 +0100 Subject: [PATCH 3/7] getThreadPool expects a function --- synapse/app/_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index f770d82bbe2e..1e1a5ac54b74 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -377,7 +377,9 @@ def run_sighup(*args, **kwargs): # We want to use a separate thread pool for the resolver so that large # numbers of DNS requests don't starve out other users of the threadpool. resolver_threadpool = ThreadPool(name="gai_resolver") - reactor.installNameResolver(GAIResolver(reactor, getThreadPool=resolver_threadpool)) + reactor.installNameResolver( + GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) + ) # Instantiate the modules so they can register their web resources to the module API # before we start the listeners. From 6f08f137542cd41d859648522db34145cd0ca0ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Oct 2021 14:18:51 +0100 Subject: [PATCH 4/7] Start threadpool --- synapse/app/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 1e1a5ac54b74..9315b088c3b7 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -377,6 +377,7 @@ def run_sighup(*args, **kwargs): # We want to use a separate thread pool for the resolver so that large # numbers of DNS requests don't starve out other users of the threadpool. resolver_threadpool = ThreadPool(name="gai_resolver") + resolver_threadpool.start() reactor.installNameResolver( GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) ) From 9fd961d8bc7a063153cc9c6e3c3b9bee4e1feddf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Oct 2021 14:44:01 +0100 Subject: [PATCH 5/7] Update synapse/util/gai_resolver.py Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- synapse/util/gai_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/gai_resolver.py b/synapse/util/gai_resolver.py index 52e19a83e979..a447ce4e5595 100644 --- a/synapse/util/gai_resolver.py +++ b/synapse/util/gai_resolver.py @@ -1,6 +1,6 @@ # This is a direct lift from # https://github.com/twisted/twisted/blob/release-21.2.0-10091/src/twisted/internet/_resolver.py. -# We copy it here as we need to instansiate `GAIResolver` manually, but it is a +# We copy it here as we need to instantiate `GAIResolver` manually, but it is a # private class. From 965944073c64d3687333062626ea1b9184b2726c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Oct 2021 15:27:42 +0100 Subject: [PATCH 6/7] Update changelog.d/11177.bugfix Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/11177.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/11177.bugfix b/changelog.d/11177.bugfix index 2ee26a01e1b2..ca5bc0df28ad 100644 --- a/changelog.d/11177.bugfix +++ b/changelog.d/11177.bugfix @@ -1 +1 @@ -Fix performance regression when doing large number of hostname lookups at once. Introduced in v1.44.0. +Fix a performance regression introduced in v1.44.0 which could cause client requests to time out when making large numbers of outbound requests. From d502181011864132b7f31f0ab157cf2f4b871d71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 26 Oct 2021 12:11:33 +0100 Subject: [PATCH 7/7] Move to top --- synapse/app/_base.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 9315b088c3b7..03627cdcbad3 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -342,6 +342,14 @@ async def start(hs: "HomeServer"): """ reactor = hs.get_reactor() + # We want to use a separate thread pool for the resolver so that large + # numbers of DNS requests don't starve out other users of the threadpool. + resolver_threadpool = ThreadPool(name="gai_resolver") + resolver_threadpool.start() + reactor.installNameResolver( + GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) + ) + # Set up the SIGHUP machinery. if hasattr(signal, "SIGHUP"): @@ -374,14 +382,6 @@ def run_sighup(*args, **kwargs): # Start the tracer synapse.logging.opentracing.init_tracer(hs) # type: ignore[attr-defined] # noqa - # We want to use a separate thread pool for the resolver so that large - # numbers of DNS requests don't starve out other users of the threadpool. - resolver_threadpool = ThreadPool(name="gai_resolver") - resolver_threadpool.start() - reactor.installNameResolver( - GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) - ) - # Instantiate the modules so they can register their web resources to the module API # before we start the listeners. module_api = hs.get_module_api()