From 1e736473ee090a94de2e05e903c536e2cccef559 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 9 Jun 2023 16:57:33 +0200
Subject: [PATCH 01/14] Add configuration of the backoff algorithm for the
 federation client

---
 .../configuration/config_documentation.md     |  8 +++++
 synapse/config/federation.py                  | 14 +++++++++
 synapse/util/retryutils.py                    | 29 ++++++++++---------
 tests/storage/test_transactions.py            |  6 ++--
 tests/util/test_retryutils.py                 | 22 +++++++-------
 5 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 8426de04179b..5c511d09956b 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1212,6 +1212,14 @@ like sending a federation transaction.
 * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
 * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
 
+The following options are related to configuring the backoff parameters used fo a specific destination.
+Unlike previous configuration those values applies across all requests,
+and the state of the backoff is stored on DB.
+
+* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Default to 10mn.
+* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Default to 2.
+* `destination_max_retry_interval`: a cap on the backoff. Default to one day.
+
 Example configuration:
 ```yaml
 federation:
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index d21f7fd02a5e..b2dbec9d32c0 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -59,5 +59,19 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.max_long_retries = federation_config.get("max_long_retries", 10)
         self.max_short_retries = federation_config.get("max_short_retries", 3)
 
+        # Allow for the configuration of the backoff algorithm used
+        # when trying to reach an unavailable destination.
+        # Unlike previous configuration those values applies across
+        # multiple requests and the state of the backoff is stored on DB.
+        self.destination_min_retry_interval = federation_config.get(
+            "destination_min_retry_interval", 10 * 60
+        )
+        self.destination_retry_multiplier = federation_config.get(
+            "destination_retry_multiplier", 2
+        )
+        self.destination_max_retry_interval = federation_config.get(
+            "destination_max_retry_interval", 24 * 60 * 60
+        )
+
 
 _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index dcc037b9822e..aceda7bf133e 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -27,15 +27,6 @@
 
 logger = logging.getLogger(__name__)
 
-# the initial backoff, after the first transaction fails
-MIN_RETRY_INTERVAL = 10 * 60 * 1000
-
-# how much we multiply the backoff by after each subsequent fail
-RETRY_MULTIPLIER = 5
-
-# a cap on the backoff. (Essentially none)
-MAX_RETRY_INTERVAL = 2**62
-
 
 class NotRetryingDestination(Exception):
     def __init__(self, retry_last_ts: int, retry_interval: int, destination: str):
@@ -169,6 +160,16 @@ def __init__(
         self.notifier = notifier
         self.replication_client = replication_client
 
+        self.destination_min_retry_interval = (
+            self.store.hs.config.federation.destination_min_retry_interval * 1000
+        )
+        self.destination_retry_multiplier = (
+            self.store.hs.config.federation.destination_retry_multiplier
+        )
+        self.destination_max_retry_interval = (
+            self.store.hs.config.federation.destination_max_retry_interval * 1000
+        )
+
     def __enter__(self) -> None:
         pass
 
@@ -220,13 +221,15 @@ def __exit__(
             # We couldn't connect.
             if self.retry_interval:
                 self.retry_interval = int(
-                    self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4)
+                    self.retry_interval
+                    * self.destination_retry_multiplier
+                    * random.uniform(0.8, 1.4)
                 )
 
-                if self.retry_interval >= MAX_RETRY_INTERVAL:
-                    self.retry_interval = MAX_RETRY_INTERVAL
+                if self.retry_interval >= self.destination_max_retry_interval:
+                    self.retry_interval = self.destination_max_retry_interval
             else:
-                self.retry_interval = MIN_RETRY_INTERVAL
+                self.retry_interval = self.destination_min_retry_interval
 
             logger.info(
                 "Connection to %s was unsuccessful (%s(%s)); backoff now %i",
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index 2fab84a52939..52d5bda31aca 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -17,7 +17,6 @@
 from synapse.server import HomeServer
 from synapse.storage.databases.main.transactions import DestinationRetryTimings
 from synapse.util import Clock
-from synapse.util.retryutils import MAX_RETRY_INTERVAL
 
 from tests.unittest import HomeserverTestCase
 
@@ -57,8 +56,11 @@ def test_initial_set_transactions(self) -> None:
         self.get_success(d)
 
     def test_large_destination_retry(self) -> None:
+        max_retry_interval = (
+            self.hs.config.federation.destination_max_retry_interval * 1000
+        )
         d = self.store.set_destination_retry_timings(
-            "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL
+            "example.com", max_retry_interval, max_retry_interval, max_retry_interval
         )
         self.get_success(d)
 
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index 5f8f4e76b544..4e252d1a616d 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -11,12 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from synapse.util.retryutils import (
-    MIN_RETRY_INTERVAL,
-    RETRY_MULTIPLIER,
-    NotRetryingDestination,
-    get_retry_limiter,
-)
+from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
 
 from tests.unittest import HomeserverTestCase
 
@@ -42,6 +37,11 @@ def test_limiter(self) -> None:
 
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
+        min_retry_interval = (
+            self.hs.config.federation.destination_min_retry_interval * 1000
+        )
+        retry_multiplier = self.hs.config.federation.destination_retry_multiplier
+
         self.pump(1)
         try:
             with limiter:
@@ -57,7 +57,7 @@ def test_limiter(self) -> None:
         assert new_timings is not None
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, failure_ts)
-        self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL)
+        self.assertEqual(new_timings.retry_interval, min_retry_interval)
 
         # now if we try again we should get a failure
         self.get_failure(
@@ -68,7 +68,7 @@ def test_limiter(self) -> None:
         # advance the clock and try again
         #
 
-        self.pump(MIN_RETRY_INTERVAL)
+        self.pump(min_retry_interval)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)
@@ -87,16 +87,16 @@ def test_limiter(self) -> None:
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, retry_ts)
         self.assertGreaterEqual(
-            new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5
+            new_timings.retry_interval, min_retry_interval * retry_multiplier * 0.5
         )
         self.assertLessEqual(
-            new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0
+            new_timings.retry_interval, min_retry_interval * retry_multiplier * 2.0
         )
 
         #
         # one more go, with success
         #
-        self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0)
+        self.reactor.advance(min_retry_interval * retry_multiplier * 2.0)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)

From 2f09c03fd970ebdf1d3de0fdccccd242d2559c8d Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 9 Jun 2023 17:01:29 +0200
Subject: [PATCH 02/14] Add changelog

---
 changelog.d/15754.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/15754.misc

diff --git a/changelog.d/15754.misc b/changelog.d/15754.misc
new file mode 100644
index 000000000000..4314d415a372
--- /dev/null
+++ b/changelog.d/15754.misc
@@ -0,0 +1 @@
+Allow for the configuration of the backoff algorithm for federation destinations.

From 35fbd934ce407245d0f5a7221bbcd6a22d8bd738 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Tue, 13 Jun 2023 18:11:11 +0200
Subject: [PATCH 03/14] Apply suggestions from code review

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 docs/usage/configuration/config_documentation.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 5c511d09956b..2feb11016e12 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1212,13 +1212,13 @@ like sending a federation transaction.
 * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
 * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
 
-The following options are related to configuring the backoff parameters used fo a specific destination.
-Unlike previous configuration those values applies across all requests,
-and the state of the backoff is stored on DB.
+The following options are related to configuring the backoff parameters used for a specific destination.
+Unlike previous configuration options, these values apply across all requests
+and the state of the backoff is stored in the database.
 
-* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Default to 10mn.
-* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Default to 2.
-* `destination_max_retry_interval`: a cap on the backoff. Default to one day.
+* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Defaults to 10m.
+* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2.
+* `destination_max_retry_interval`: a cap on the backoff. Defaults to one day.
 
 Example configuration:
 ```yaml

From 404a471e37e03c7e8313086fea5669af4167d90e Mon Sep 17 00:00:00 2001
From: Mathieu Velten <matmaul@gmail.com>
Date: Tue, 13 Jun 2023 23:33:45 +0200
Subject: [PATCH 04/14] Comments

---
 docs/usage/configuration/config_documentation.md | 5 ++++-
 synapse/config/federation.py                     | 8 ++++----
 synapse/util/retryutils.py                       | 4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 2feb11016e12..93b0ece3e97a 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1216,7 +1216,7 @@ The following options are related to configuring the backoff parameters used for
 Unlike previous configuration options, these values apply across all requests
 and the state of the backoff is stored in the database.
 
-* `destination_min_retry_interval`: the initial backoff, after the first request fails, in seconds. Defaults to 10m.
+* `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m.
 * `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2.
 * `destination_max_retry_interval`: a cap on the backoff. Defaults to one day.
 
@@ -1228,6 +1228,9 @@ federation:
   max_long_retry_delay: 100
   max_short_retries: 5
   max_long_retries: 20
+  destination_min_retry_interval: 30s
+  destination_retry_multiplier: 5
+  destination_max_retry_interval: 12h
 ```
 ---
 ## Caching
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index b2dbec9d32c0..b46ebf30c224 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -63,14 +63,14 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         # when trying to reach an unavailable destination.
         # Unlike previous configuration those values applies across
         # multiple requests and the state of the backoff is stored on DB.
-        self.destination_min_retry_interval = federation_config.get(
-            "destination_min_retry_interval", 10 * 60
+        self.destination_min_retry_interval = Config.parse_duration(
+            federation_config.get("destination_min_retry_interval", "10m")
         )
         self.destination_retry_multiplier = federation_config.get(
             "destination_retry_multiplier", 2
         )
-        self.destination_max_retry_interval = federation_config.get(
-            "destination_max_retry_interval", 24 * 60 * 60
+        self.destination_max_retry_interval = Config.parse_duration(
+            federation_config.get("destination_max_retry_interval", "1d")
         )
 
 
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index aceda7bf133e..fefa31988e35 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -161,13 +161,13 @@ def __init__(
         self.replication_client = replication_client
 
         self.destination_min_retry_interval = (
-            self.store.hs.config.federation.destination_min_retry_interval * 1000
+            self.store.hs.config.federation.destination_min_retry_interval
         )
         self.destination_retry_multiplier = (
             self.store.hs.config.federation.destination_retry_multiplier
         )
         self.destination_max_retry_interval = (
-            self.store.hs.config.federation.destination_max_retry_interval * 1000
+            self.store.hs.config.federation.destination_max_retry_interval
         )
 
     def __enter__(self) -> None:

From 5ec6e48c368bfd1c2ebbc8a4708869bc0ce63a19 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <matmaul@gmail.com>
Date: Tue, 13 Jun 2023 23:58:53 +0200
Subject: [PATCH 05/14] Fix test

---
 tests/util/test_retryutils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index 4e252d1a616d..3174ac5e4364 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -37,9 +37,7 @@ def test_limiter(self) -> None:
 
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
-        min_retry_interval = (
-            self.hs.config.federation.destination_min_retry_interval * 1000
-        )
+        min_retry_interval = self.hs.config.federation.destination_min_retry_interval
         retry_multiplier = self.hs.config.federation.destination_retry_multiplier
 
         self.pump(1)

From 702bc7dd1dbfe378e45e95b9d6d1d91416192c42 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 15 Jun 2023 15:06:22 +0200
Subject: [PATCH 06/14] Add unit

---
 synapse/config/federation.py  | 4 ++--
 synapse/util/retryutils.py    | 4 ++--
 tests/util/test_retryutils.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index b46ebf30c224..e88f479c9b1e 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -63,13 +63,13 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         # when trying to reach an unavailable destination.
         # Unlike previous configuration those values applies across
         # multiple requests and the state of the backoff is stored on DB.
-        self.destination_min_retry_interval = Config.parse_duration(
+        self.destination_min_retry_interval_ms = Config.parse_duration(
             federation_config.get("destination_min_retry_interval", "10m")
         )
         self.destination_retry_multiplier = federation_config.get(
             "destination_retry_multiplier", 2
         )
-        self.destination_max_retry_interval = Config.parse_duration(
+        self.destination_max_retry_interval_ms = Config.parse_duration(
             federation_config.get("destination_max_retry_interval", "1d")
         )
 
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index fefa31988e35..99a7257e74cd 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -161,13 +161,13 @@ def __init__(
         self.replication_client = replication_client
 
         self.destination_min_retry_interval = (
-            self.store.hs.config.federation.destination_min_retry_interval
+            self.store.hs.config.federation.destination_min_retry_interval_ms
         )
         self.destination_retry_multiplier = (
             self.store.hs.config.federation.destination_retry_multiplier
         )
         self.destination_max_retry_interval = (
-            self.store.hs.config.federation.destination_max_retry_interval
+            self.store.hs.config.federation.destination_max_retry_interval_ms
         )
 
     def __enter__(self) -> None:
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index 3174ac5e4364..cf95d77c8c76 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -37,7 +37,7 @@ def test_limiter(self) -> None:
 
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
-        min_retry_interval = self.hs.config.federation.destination_min_retry_interval
+        min_retry_interval = self.hs.config.federation.destination_min_retry_interval_ms
         retry_multiplier = self.hs.config.federation.destination_retry_multiplier
 
         self.pump(1)

From 1079dca4ec35233ffa8b716db37f972018943bca Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 21 Jun 2023 14:25:03 +0200
Subject: [PATCH 07/14] Add unit to var names

---
 synapse/util/retryutils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 99a7257e74cd..4e03e4f2044f 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -160,13 +160,13 @@ def __init__(
         self.notifier = notifier
         self.replication_client = replication_client
 
-        self.destination_min_retry_interval = (
+        self.destination_min_retry_interval_ms = (
             self.store.hs.config.federation.destination_min_retry_interval_ms
         )
         self.destination_retry_multiplier = (
             self.store.hs.config.federation.destination_retry_multiplier
         )
-        self.destination_max_retry_interval = (
+        self.destination_max_retry_interval_ms = (
             self.store.hs.config.federation.destination_max_retry_interval_ms
         )
 
@@ -226,10 +226,10 @@ def __exit__(
                     * random.uniform(0.8, 1.4)
                 )
 
-                if self.retry_interval >= self.destination_max_retry_interval:
-                    self.retry_interval = self.destination_max_retry_interval
+                if self.retry_interval >= self.destination_max_retry_interva_ms:
+                    self.retry_interval = self.destination_max_retry_interva_ms
             else:
-                self.retry_interval = self.destination_min_retry_interval
+                self.retry_interval = self.destination_min_retry_interval_ms
 
             logger.info(
                 "Connection to %s was unsuccessful (%s(%s)); backoff now %i",

From ab4ccfc9e747370b72c737e10457d61aae7216f3 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 21 Jun 2023 16:00:42 +0200
Subject: [PATCH 08/14] typos

---
 pyproject.toml                     | 2 +-
 synapse/util/retryutils.py         | 4 ++--
 tests/storage/test_transactions.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 90812de019e8..752f333b8a98 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -369,7 +369,7 @@ furo = ">=2022.12.7,<2024.0.0"
 # system changes.
 # We are happy to raise these upper bounds upon request,
 # provided we check that it's safe to do so (i.e. that CI passes).
-requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0"]
+requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 4e03e4f2044f..27e9fc976c10 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -226,8 +226,8 @@ def __exit__(
                     * random.uniform(0.8, 1.4)
                 )
 
-                if self.retry_interval >= self.destination_max_retry_interva_ms:
-                    self.retry_interval = self.destination_max_retry_interva_ms
+                if self.retry_interval >= self.destination_max_retry_interval_ms:
+                    self.retry_interval = self.destination_max_retry_interval_ms
             else:
                 self.retry_interval = self.destination_min_retry_interval_ms
 
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index 52d5bda31aca..0e903e1dca77 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -57,7 +57,7 @@ def test_initial_set_transactions(self) -> None:
 
     def test_large_destination_retry(self) -> None:
         max_retry_interval = (
-            self.hs.config.federation.destination_max_retry_interval * 1000
+            self.hs.config.federation.destination_max_retry_interval_ms * 1000
         )
         d = self.store.set_destination_retry_timings(
             "example.com", max_retry_interval, max_retry_interval, max_retry_interval

From 68ca3a0e735107af27bb7e48b5925a2911a53e07 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Mon, 26 Jun 2023 16:00:10 +0200
Subject: [PATCH 09/14] Update docs/usage/configuration/config_documentation.md

Co-authored-by: Eric Eastwood <erice@element.io>
---
 docs/usage/configuration/config_documentation.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 78edeb812368..f1e3813dce8e 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1212,9 +1212,9 @@ like sending a federation transaction.
 * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
 * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
 
-The following options are related to configuring the backoff parameters used for a specific destination.
-Unlike previous configuration options, these values apply across all requests
-and the state of the backoff is stored in the database.
+The following options control the retry logic when communicating with a specific homeserver destination.
+Unlike the previous configuration options, these values apply across all requests
+for a given destination and the state of the backoff is stored in the database.
 
 * `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m.
 * `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2.

From e476f3c3f65a9af9f846335c22b370da56b04773 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <matmaul@gmail.com>
Date: Mon, 10 Jul 2023 16:48:50 +0200
Subject: [PATCH 10/14] Remove unneeded added dependency

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3799fee15f45..fc1b8c0dadd2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -367,7 +367,7 @@ furo = ">=2022.12.7,<2024.0.0"
 # system changes.
 # We are happy to raise these upper bounds upon request,
 # provided we check that it's safe to do so (i.e. that CI passes).
-requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0", "wheel"]
+requires = ["poetry-core>=1.1.0,<=1.6.0", "setuptools_rust>=1.3,<=1.6.0"]
 build-backend = "poetry.core.masonry.api"
 
 

From 7f7a5e8c7af6c9e3da6a228a47c2194443ef23a9 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <matmaul@gmail.com>
Date: Tue, 11 Jul 2023 10:53:05 +0200
Subject: [PATCH 11/14] Change destination_max_retry_interval default to a week

---
 docs/usage/configuration/config_documentation.md | 2 +-
 synapse/config/federation.py                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index f5972d1dc348..7fc960297478 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1218,7 +1218,7 @@ for a given destination and the state of the backoff is stored in the database.
 
 * `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m.
 * `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2.
-* `destination_max_retry_interval`: a cap on the backoff. Defaults to one day.
+* `destination_max_retry_interval`: a cap on the backoff. Defaults to a week.
 
 Example configuration:
 ```yaml
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 30397f5ef842..eb6f0b46a320 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -76,7 +76,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
             "destination_retry_multiplier", 2
         )
         self.destination_max_retry_interval_ms = Config.parse_duration(
-            federation_config.get("destination_max_retry_interval", "1d")
+            federation_config.get("destination_max_retry_interval", "7d")
         )
 
 

From a58ef664566f82b116db83e376ac65c4748fde36 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 3 Aug 2023 15:03:31 +0200
Subject: [PATCH 12/14] Clarify units

---
 tests/storage/test_transactions.py |  9 ++++++---
 tests/util/test_retryutils.py      | 14 ++++++++------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index 0e903e1dca77..ef06b50dbb7b 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -56,11 +56,14 @@ def test_initial_set_transactions(self) -> None:
         self.get_success(d)
 
     def test_large_destination_retry(self) -> None:
-        max_retry_interval = (
-            self.hs.config.federation.destination_max_retry_interval_ms * 1000
+        max_retry_interval_ms = (
+            self.hs.config.federation.destination_max_retry_interval_ms
         )
         d = self.store.set_destination_retry_timings(
-            "example.com", max_retry_interval, max_retry_interval, max_retry_interval
+            "example.com",
+            max_retry_interval_ms,
+            max_retry_interval_ms,
+            max_retry_interval_ms,
         )
         self.get_success(d)
 
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index cf95d77c8c76..1277e1a865ff 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -37,7 +37,9 @@ def test_limiter(self) -> None:
 
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
-        min_retry_interval = self.hs.config.federation.destination_min_retry_interval_ms
+        min_retry_interval_ms = (
+            self.hs.config.federation.destination_min_retry_interval_ms
+        )
         retry_multiplier = self.hs.config.federation.destination_retry_multiplier
 
         self.pump(1)
@@ -55,7 +57,7 @@ def test_limiter(self) -> None:
         assert new_timings is not None
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, failure_ts)
-        self.assertEqual(new_timings.retry_interval, min_retry_interval)
+        self.assertEqual(new_timings.retry_interval, min_retry_interval_ms)
 
         # now if we try again we should get a failure
         self.get_failure(
@@ -66,7 +68,7 @@ def test_limiter(self) -> None:
         # advance the clock and try again
         #
 
-        self.pump(min_retry_interval)
+        self.pump(min_retry_interval_ms)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)
@@ -85,16 +87,16 @@ def test_limiter(self) -> None:
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, retry_ts)
         self.assertGreaterEqual(
-            new_timings.retry_interval, min_retry_interval * retry_multiplier * 0.5
+            new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 0.5
         )
         self.assertLessEqual(
-            new_timings.retry_interval, min_retry_interval * retry_multiplier * 2.0
+            new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 2.0
         )
 
         #
         # one more go, with success
         #
-        self.reactor.advance(min_retry_interval * retry_multiplier * 2.0)
+        self.reactor.advance(min_retry_interval_ms * retry_multiplier * 2.0)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)

From 033e59f54a2a6ca2ab57b1a737db6e3f6b1785ad Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 3 Aug 2023 15:38:12 +0200
Subject: [PATCH 13/14] Limit retry interval values to avoid overflowing the DB

---
 synapse/config/federation.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index eb6f0b46a320..4c83b6565afb 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -69,14 +69,20 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         # when trying to reach an unavailable destination.
         # Unlike previous configuration those values applies across
         # multiple requests and the state of the backoff is stored on DB.
-        self.destination_min_retry_interval_ms = Config.parse_duration(
-            federation_config.get("destination_min_retry_interval", "10m")
+        self.destination_min_retry_interval_ms = min(
+            Config.parse_duration(
+                federation_config.get("destination_min_retry_interval", "10m")
+            ),
+            2**62,
         )
         self.destination_retry_multiplier = federation_config.get(
             "destination_retry_multiplier", 2
         )
-        self.destination_max_retry_interval_ms = Config.parse_duration(
-            federation_config.get("destination_max_retry_interval", "7d")
+        self.destination_max_retry_interval_ms = min(
+            Config.parse_duration(
+                federation_config.get("destination_max_retry_interval", "7d")
+            ),
+            2**62,
         )
 
 

From 2e633c3f907dcc2ff0bcf47a02838a34fcf6b0be Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 3 Aug 2023 12:32:57 -0400
Subject: [PATCH 14/14] Review comments

---
 synapse/config/federation.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 4c83b6565afb..97636039b8ad 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -69,11 +69,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         # when trying to reach an unavailable destination.
         # Unlike previous configuration those values applies across
         # multiple requests and the state of the backoff is stored on DB.
-        self.destination_min_retry_interval_ms = min(
-            Config.parse_duration(
-                federation_config.get("destination_min_retry_interval", "10m")
-            ),
-            2**62,
+        self.destination_min_retry_interval_ms = Config.parse_duration(
+            federation_config.get("destination_min_retry_interval", "10m")
         )
         self.destination_retry_multiplier = federation_config.get(
             "destination_retry_multiplier", 2
@@ -82,6 +79,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
             Config.parse_duration(
                 federation_config.get("destination_max_retry_interval", "7d")
             ),
+            # Set a hard-limit to not overflow the database column.
             2**62,
         )