From ea33ee005e164de9fa3ba4111f29a6ef3fa4f651 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 5 Aug 2022 22:06:08 +0100 Subject: [PATCH 1/6] Add some miscellaneous comments around sync Signed-off-by: Sean Quah --- changelog.d/13474.misc | 1 + synapse/handlers/sync.py | 48 +++++++++++++++++++++++++++++++++++----- synapse/visibility.py | 4 ++-- 3 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 changelog.d/13474.misc diff --git a/changelog.d/13474.misc b/changelog.d/13474.misc new file mode 100644 index 000000000000..d34c661fed0f --- /dev/null +++ b/changelog.d/13474.misc @@ -0,0 +1 @@ +Add some miscellaneous comments to document sync, especially around `compute_state_delta`. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index d827c03ad187..0cb734ab6ff5 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -13,7 +13,17 @@ # limitations under the License. import itertools import logging -from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Dict, + FrozenSet, + List, + Optional, + Sequence, + Set, + Tuple, +) import attr from prometheus_client import Counter @@ -89,7 +99,7 @@ class SyncConfig: @attr.s(slots=True, frozen=True, auto_attribs=True) class TimelineBatch: prev_batch: StreamToken - events: List[EventBase] + events: Sequence[EventBase] limited: bool # A mapping of event ID to the bundled aggregations for the above events. # This is only calculated if limited is true. @@ -852,16 +862,26 @@ async def compute_state_delta( now_token: StreamToken, full_state: bool, ) -> MutableStateMap[EventBase]: - """Works out the difference in state between the start of the timeline - and the previous sync. + """Works out the difference in state between the end of the previous sync and + the start of the timeline. Args: room_id: batch: The timeline batch for the room that will be sent to the user. sync_config: - since_token: Token of the end of the previous batch. May be None. + since_token: Token of the end of the previous batch. May be `None`. now_token: Token of the end of the current batch. full_state: Whether to force returning the full state. + `lazy_load_members` still applies when `full_state` is `True`. + + Returns: + The state to return in the sync response for the room. + + Clients will overlay this onto the state at the end of the previous sync to + arrive at the state at the start of the timeline. + + Clients will then overlay state events in the timeline to arrive at the + state at the end of the timeline, in preparation for the next sync. """ # TODO(mjark) Check if the state events were received by the server # after the previous sync, since we need to include those state @@ -869,7 +889,8 @@ async def compute_state_delta( # TODO(mjark) Check for new redactions in the state events. with Measure(self.clock, "compute_state_delta"): - + # The memberships needed for events in the timeline. + # Only calculated when `lazy_load_members` is on. members_to_fetch = None lazy_load_members = sync_config.filter_collection.lazy_load_members() @@ -897,12 +918,20 @@ async def compute_state_delta( else: state_filter = StateFilter.all() + # The contribution to the room state from state events in the timeline. + # Only contains the last event for any given state key. timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # Now calculate the state to return in the sync response for the room. + # This is more or less the change in state between the end of the previous + # sync's timeline and the start of the current sync's timeline. + # See the docstring above for details. + state_ids: StateMap[str] + if full_state: if batch: current_state_ids = ( @@ -1010,6 +1039,13 @@ async def compute_state_delta( ), ) + # At this point, `state_ids` includes the memberships of all event senders + # in the timeline when `lazy_load_members` is enabled. This is because we + # may not have sent the memberships in a previous sync. + + # When `include_redundant_members` is on, we send all the lazy-loaded + # memberships of event senders. Otherwise we make an effort to prune the set + # of memberships we send: if lazy_load_members and not include_redundant_members: cache_key = (sync_config.user.to_string(), sync_config.device_id) cache = self.get_lazy_loaded_members_cache(cache_key) diff --git a/synapse/visibility.py b/synapse/visibility.py index d947edde66dd..c810a0590773 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -73,8 +73,8 @@ async def filter_events_for_client( * the user is not currently a member of the room, and: * the user has not been a member of the room since the given events - always_include_ids: set of event ids to specifically - include (unless sender is ignored) + always_include_ids: set of event ids to specifically include, if present + in events (unless sender is ignored) filter_send_to_client: Whether we're checking an event that's going to be sent to a client. This might not always be the case since this function can also be called to check whether a user can see the state at a given point. From d9607ef3732e428ebd22683412bde2043ee2e12f Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 8 Aug 2022 19:57:53 +0100 Subject: [PATCH 2/6] Update synapse/handlers/sync.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/handlers/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0cb734ab6ff5..374d68685e03 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1044,8 +1044,8 @@ async def compute_state_delta( # may not have sent the memberships in a previous sync. # When `include_redundant_members` is on, we send all the lazy-loaded - # memberships of event senders. Otherwise we make an effort to prune the set - # of memberships we send: + # memberships of event senders. Otherwise we make an effort to limit the set + # of memberships we send to those that we have not already sent to this client. if lazy_load_members and not include_redundant_members: cache_key = (sync_config.user.to_string(), sync_config.device_id) cache = self.get_lazy_loaded_members_cache(cache_key) From 1d2ff88ac558dd7aaa0f47c3385db642f641e2d8 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 8 Aug 2022 19:57:59 +0100 Subject: [PATCH 3/6] Update synapse/handlers/sync.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/handlers/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 374d68685e03..74691472a30a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1039,8 +1039,8 @@ async def compute_state_delta( ), ) - # At this point, `state_ids` includes the memberships of all event senders - # in the timeline when `lazy_load_members` is enabled. This is because we + # At this point, if `lazy_load_members` is enabled, `state_ids` includes + # the memberships of all event senders in the timeline. This is because we # may not have sent the memberships in a previous sync. # When `include_redundant_members` is on, we send all the lazy-loaded From c15c94e69836027e585e3d5be3f26f4d0a5139f8 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 8 Aug 2022 20:54:28 +0100 Subject: [PATCH 4/6] Rename state variables in `compute_state_delta` --- synapse/handlers/sync.py | 58 +++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 74691472a30a..3a6dc7b94f81 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -934,30 +934,30 @@ async def compute_state_delta( if full_state: if batch: - current_state_ids = ( + state_at_timeline_end = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[-1].event_id, state_filter=state_filter ) ) - state_ids = ( + state_at_timeline_start = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[0].event_id, state_filter=state_filter ) ) else: - current_state_ids = await self.get_state_at( + state_at_timeline_end = await self.get_state_at( room_id, stream_position=now_token, state_filter=state_filter ) - state_ids = current_state_ids + state_at_timeline_start = state_at_timeline_end state_ids = _calculate_state( timeline_contains=timeline_state, - timeline_start=state_ids, - previous={}, - current=current_state_ids, + timeline_start=state_at_timeline_start, + timeline_end=state_at_timeline_end, + previous_timeline_end={}, lazy_load_members=lazy_load_members, ) elif batch.limited: @@ -997,7 +997,7 @@ async def compute_state_delta( ) if batch: - current_state_ids = ( + state_at_timeline_end = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[-1].event_id, state_filter=state_filter ) @@ -1006,15 +1006,15 @@ async def compute_state_delta( # Its not clear how we get here, but empirically we do # (#5407). Logging has been added elsewhere to try and # figure out where this state comes from. - current_state_ids = await self.get_state_at( + state_at_timeline_end = await self.get_state_at( room_id, stream_position=now_token, state_filter=state_filter ) state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - previous=state_at_previous_sync, - current=current_state_ids, + timeline_end=state_at_timeline_end, + previous_timeline_end=state_at_previous_sync, # we have to include LL members in case LL initial sync missed them lazy_load_members=lazy_load_members, ) @@ -2252,8 +2252,8 @@ def _action_has_highlight(actions: List[JsonDict]) -> bool: def _calculate_state( timeline_contains: StateMap[str], timeline_start: StateMap[str], - previous: StateMap[str], - current: StateMap[str], + timeline_end: StateMap[str], + previous_timeline_end: StateMap[str], lazy_load_members: bool, ) -> StateMap[str]: """Works out what state to include in a sync response. @@ -2261,27 +2261,27 @@ def _calculate_state( Args: timeline_contains: state in the timeline timeline_start: state at the start of the timeline - previous: state at the end of the previous sync (or empty dict + timeline_end: state at the end of the timeline + previous_timeline_end: state at the end of the previous sync (or empty dict if this is an initial sync) - current: state at the end of the timeline lazy_load_members: whether to return members from timeline_start or not. assumes that timeline_start has already been filtered to include only the members the client needs to know about. """ - event_id_to_key = { - e: key - for key, e in itertools.chain( + event_id_to_state_key = { + event_id: state_key + for state_key, event_id in itertools.chain( timeline_contains.items(), - previous.items(), timeline_start.items(), - current.items(), + timeline_end.items(), + previous_timeline_end.items(), ) } - c_ids = set(current.values()) - ts_ids = set(timeline_start.values()) - p_ids = set(previous.values()) - tc_ids = set(timeline_contains.values()) + timeline_end_ids = set(timeline_end.values()) + timeline_start_ids = set(timeline_start.values()) + previous_timeline_end_ids = set(previous_timeline_end.values()) + timeline_contains_ids = set(timeline_contains.values()) # If we are lazyloading room members, we explicitly add the membership events # for the senders in the timeline into the state block returned by /sync, @@ -2293,13 +2293,17 @@ def _calculate_state( # see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: - p_ids.difference_update( + previous_timeline_end_ids.difference_update( e for t, e in timeline_start.items() if t[0] == EventTypes.Member ) - state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids + state_ids = ( + (timeline_end_ids | timeline_start_ids) + - previous_timeline_end_ids + - timeline_contains_ids + ) - return {event_id_to_key[e]: e for e in state_ids} + return {event_id_to_state_key[e]: e for e in state_ids} @attr.s(slots=True, auto_attribs=True) From ccec65ddf49a479dfef9c1588556751203452652 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Mon, 8 Aug 2022 20:56:38 +0100 Subject: [PATCH 5/6] Update stale comment --- synapse/handlers/sync.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 3a6dc7b94f81..9c963cb93896 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1003,9 +1003,8 @@ async def compute_state_delta( ) ) else: - # Its not clear how we get here, but empirically we do - # (#5407). Logging has been added elsewhere to try and - # figure out where this state comes from. + # We can get here if the user has ignored the senders of all + # the recent events. state_at_timeline_end = await self.get_state_at( room_id, stream_position=now_token, state_filter=state_filter ) From bcaa6afd16c8ae3a4c48dad38f388006585a30dd Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Wed, 10 Aug 2022 12:21:59 +0100 Subject: [PATCH 6/6] fixup rename mention of p_ids in comments --- synapse/handlers/sync.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9c963cb93896..3ca01391c9cf 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -2287,8 +2287,9 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. - # In practice, we can do this by removing them from the p_ids list, - # which is the list of relevant state we know we have already sent to the client. + # In practice, we can do this by removing them from the previous_timeline_end_ids + # list, which is the list of relevant state we know we have already sent to the + # client. # see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: