matrix-org · reivilibre · Jul 23, 2020 · Aug 14, 2020 · Aug 14, 2020 · Aug 14, 2020
@@ -0,0 +1 @@
+Send events to homeservers that they may have missed in rooms during a period of unreachability.
@@ -211,7 +211,7 @@ async def handle_event(event: EventBase) -> None:
                     logger.debug("Sending %s to %r", event, destinations)
 
                     if destinations:
-                        self._send_pdu(event, destinations)
+                        await self._send_pdu(event, destinations)
 
                         now = self.clock.time_msec()
                         ts = await self.store.get_received_ts(event.event_id)
@@ -267,7 +267,7 @@ async def handle_room_events(events: Iterable[EventBase]) -> None:
         finally:
             self._is_processing = False
 
-    def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
+    async def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
         # We loop through all destinations to see whether we already have
         # a transaction in progress. If we do, stick it in the pending_pdus
         # table and we'll get back to it later.
@@ -285,6 +285,16 @@ def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
         sent_pdus_destination_dist_total.inc(len(destinations))
         sent_pdus_destination_dist_count.inc()
 
+        # track the fact that we are enqueuing this PDU for these destinations,
+        # to allow us to perform catch-up later on if the remote is unreachable
+        # for a while.
+        await self.store.store_destination_rooms_entries(
+            destinations,
+            pdu.room_id,
+            pdu.event_id,
+            pdu.internal_metadata.stream_ordering,
+        )
+
         for destination in destinations:
             self._get_per_destination_queue(destination).send_pdu(pdu, order)
 

@@ -15,7 +15,7 @@
 # limitations under the License.
 import datetime
 import logging
-from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Tuple
+from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Tuple, cast
 
 from prometheus_client import Counter
 
@@ -92,6 +92,18 @@ def __init__(
         self._destination = destination
         self.transmission_loop_running = False
 
+        # True whilst we are sending events that the remote homeserver missed
+        # because it was unreachable.
+        # New events will only be sent once this is finished, at which point
+        # _catching_up is flipped to False.
+        self._catching_up = True
+        # the maximum stream order to catch up to (PDUs after this are expected
+        # to be in the main transmission queue), inclusive
+        self._catch_up_max_stream_order = None  # type: Optional[int]
+        # Cache of the last successfully-transmitted stream ordering for this
+        # destination (we are the only updater so this is safe)
+        self._last_successful_stream_order = None  # type: Optional[int]
+
         # a list of tuples of (pending pdu, order)
         self._pending_pdus = []  # type: List[Tuple[EventBase, int]]
 
@@ -137,8 +149,15 @@ def send_pdu(self, pdu: EventBase, order: int) -> None:
 
         Args:
             pdu: pdu to send
-            order
+            order: an arbitrary order for the PDU — NOT the stream ordering
         """
+        if (
+            self._catch_up_max_stream_order
+            and pdu.internal_metadata.stream_ordering <= self._catch_up_max_stream_order
+        ):
+            # we are in catch-up mode and this PDU is already scheduled to be
+            # part of the catch-up
+            return
         self._pending_pdus.append((pdu, order))
         self.attempt_new_transaction()
 
@@ -219,6 +238,17 @@ async def _transaction_transmission_loop(self) -> None:
             # hence why we throw the result away.
             await get_retry_limiter(self._destination, self._clock, self._store)
 
+            if self._catching_up:
+                # we're catching up, so we should send old events instead
+                # in this case, we don't send anything from the new queue
+                # this keeps the catching-up logic simple
+                await self._catch_up_transmission_loop()
+                if self._catching_up:
+                    # XXX if we aren't actually caught up still, shouldn't
+                    #  carry on to the main loop
+                    #  (but need to consider what we do in a failure...?)
+                    return
+
             pending_pdus = []
             while True:
                 # We have to keep 2 free slots for presence and rr_edus
@@ -326,6 +356,15 @@ async def _transaction_transmission_loop(self) -> None:
 
                     self._last_device_stream_id = device_stream_id
                     self._last_device_list_stream_id = dev_list_id
+
+                    if pending_pdus:
+                        final_pdu, _ = pending_pdus[-1]
+                        self._last_successful_stream_order = (
+                            final_pdu.internal_metadata.stream_ordering
+                        )
+                        await self._store.set_last_successful_stream_ordering(
+                            self._destination, self._last_successful_stream_order
+                        )
                 else:
                     break
         except NotRetryingDestination as e:
@@ -338,6 +377,11 @@ async def _transaction_transmission_loop(self) -> None:
                 ),
             )
 
+            # XXX REVIEW needs scrutiny
+            #  to note: up to 50 pdus can be lost from the
+            #  main queue by a transaction that triggers a backoff — do we
+            #  clear the main queue now? I can see arguments for and against.
+
             if e.retry_interval > 60 * 60 * 1000:
                 # we won't retry for another hour!
                 # (this suggests a significant outage)
@@ -359,6 +403,10 @@ async def _transaction_transmission_loop(self) -> None:
                 self._pending_edus_keyed = {}
                 self._pending_presence = {}
                 self._pending_rrs = {}
+
+            self._catching_up = True
+            # reset max catch up since we have dropped PDUs here
+            self._catch_up_max_stream_order = None
         except FederationDeniedError as e:
             logger.info(e)
         except HttpResponseException as e:
@@ -368,6 +416,8 @@ async def _transaction_transmission_loop(self) -> None:
                 e.code,
                 e,
             )
+
+            # XXX REVIEW should we be catching up?
         except RequestSendFailed as e:
             logger.warning(
                 "TX [%s] Failed to send transaction: %s", self._destination, e
@@ -387,6 +437,101 @@ async def _transaction_transmission_loop(self) -> None:
             # We want to be *very* sure we clear this after we stop processing
             self.transmission_loop_running = False
 
+    async def _catch_up_transmission_loop(self) -> None:
+        if self._last_successful_stream_order is None:
+            # first catch-up, so get from database
+            self._last_successful_stream_order = await self._store.get_last_successful_stream_ordering(
+                self._destination
+            )
+
+        if self._last_successful_stream_order is None:
+            # if it's still None, then this means we don't have the information
+            # in our database (oh, the perils of being a new feature).
+            # So we can't actually do anything here, and in this case, we don't
+            # know what to catch up, sadly.
+            # Trying to catch up right now is futile, so let's stop.
+            self._catching_up = False
+            return
+
+        if self._catch_up_max_stream_order is None:
+            # this is our first catch-up so we need to determine how much we
+            # want to catch-up.
+            if self._pending_pdus:
+                # we have PDUs already in the main queue so no need to ask the
+                # database
+                first_non_catch_up_pdu, _ = self._pending_pdus[0]
+                # -1 because we wish to exclude that one — we don't need to catch
+                # it up as it's in our main queue
+                self._catch_up_max_stream_order = (
+                    first_non_catch_up_pdu.internal_metadata.stream_ordering - 1
+                )
+            else:
+                # we don't have any PDUs in the main queue so instead find out
+                # the largest stream order that we know of that has, once upon a
+                # time, been queued for this destination (i.e. this is what we
+                # *should* have sent if the remote server was reachable).
+                self._catch_up_max_stream_order = await self._store.get_largest_destination_rooms_stream_order(
+                    self._destination
+                )
+                if self._catch_up_max_stream_order is None:
+                    # not enough info to catch up
+                    self._catching_up = False
+                    return
+
+        # get at most 50 catchup room/PDUs
+        while self._last_successful_stream_order < self._catch_up_max_stream_order:
+            event_ids = await self._store.get_catch_up_room_event_ids(
+                self._destination,
+                self._last_successful_stream_order,
+                self._catch_up_max_stream_order,
+            )
+
+            if not event_ids:
+                # I don't believe this *should* happen unless someone has been
+                # tinkering with the database, but I also have limited foresight,
+                # so let's handle this properly
+                logger.warning(
+                    "Unexpectedly, no event IDs were found for catch-up: "
+                    "last successful = %d, max catch up = %d",
+                    self._last_successful_stream_order,
+                    self._catch_up_max_stream_order,
+                )
+                self._catching_up = False
+                break
+
+            # fetch the relevant events from the event store
+            # - redacted behaviour of REDACT is fine, since we only send metadata
+            #   of redacted events to the destination.
+            # - don't need to worry about rejected events as we do not actively
+            #   forward received events over federation.
+            events = await self._store.get_events_as_list(event_ids)
+
+            # zip them together with their stream orderings
+            catch_up_pdus = [
+                (event, event.internal_metadata.stream_ordering) for event in events
+            ]
+
+            if not catch_up_pdus:
+                break
+
+            success = await self._transaction_manager.send_new_transaction(
+                self._destination, catch_up_pdus, []
+            )
+            if success:
+                sent_transactions_counter.inc()
+                final_pdu, _ = catch_up_pdus[-1]
+                self._last_successful_stream_order = cast(
+                    int, final_pdu.internal_metadata.stream_ordering
+                )
+                await self._store.set_last_successful_stream_ordering(
+                    self._destination, self._last_successful_stream_order
+                )
+            else:
+                return
+
+        # once we have reached this point, catch-up is done!
+        self._catching_up = False
+
     def _get_rr_edus(self, force_flush: bool) -> Iterable[Edu]:
         if not self._pending_rrs:
             return

@@ -0,0 +1,35 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-- This schema delta alters the schema to enable 'catching up' remote homeservers
+-- after there has been a connectivity problem for any reason.
+
+-- This stores, for each (destination, room) pair, the event_id and stream_ordering
+-- of the latest event to be enqueued for transmission to that destination.
+CREATE TABLE IF NOT EXISTS destination_rooms (
+  -- the destination in question
+  destination TEXT NOT NULL,
+  -- the ID of the room in question
+  room_id TEXT NOT NULL,
+  -- the stream_ordering of the event
+  stream_ordering INTEGER,
+  -- the event_id of the event
+  event_id TEXT NOT NULL,
+  PRIMARY KEY (destination, room_id)
+);
+
+-- this column tracks the stream_ordering of the event that was most recently
+-- successfully transmitted to the destination.
+ALTER TABLE destinations
+  ADD COLUMN last_successful_stream_ordering INTEGER;