From 079194c54740e5046bb988a1b6d602bdd21044ec Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:03:02 -0500 Subject: [PATCH 01/68] Return some room timeline data in Sliding Sync --- synapse/handlers/sliding_sync.py | 202 ++++++++++++++++++++++++-- synapse/rest/client/sync.py | 89 ++++++++++-- synapse/types/handlers/__init__.py | 7 +- synapse/types/rest/client/__init__.py | 7 - 4 files changed, 275 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 16d94925f54..cf448fa3cdf 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,22 +18,25 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +import attr from immutabledict import immutabledict -from synapse.api.constants import AccountDataTypes, EventTypes, Membership +from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.storage.roommember import RoomsForUser from synapse.types import ( PersistedEventPosition, Requester, RoomStreamToken, + StreamKeyType, StreamToken, UserID, ) from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult -from synapse.types.state import StateFilter +from synapse.types.state import StateFilter, StateKey +from synapse.visibility import filter_events_for_client if TYPE_CHECKING: from synapse.server import HomeServer @@ -82,6 +85,18 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> return membership != Membership.LEAVE or sender != user_id +# We can't freeze this class because we want to update it in place with the +# de-duplicated data. +@attr.s(slots=True, auto_attribs=True) +class RoomSyncConfig: + """ + Holds the config for what data we should fetch for a room in the sync response. + """ + + timeline_limit: int + required_state: Set[StateKey] + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -201,6 +216,7 @@ async def current_sync_for_user( # Assemble sliding window lists lists: Dict[str, SlidingSyncResult.SlidingWindowList] = {} + relevant_room_map: Dict[str, RoomSyncConfig] = {} if sync_config.lists: # Get all of the room IDs that the user should be able to see in the sync # response @@ -225,29 +241,66 @@ async def current_sync_for_user( ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: + room_id_set = { + room_id + for room_id, _ in sorted_room_info[range[0] : range[1]] + } + ops.append( SlidingSyncResult.SlidingWindowList.Operation( op=OperationType.SYNC, range=range, - room_ids=[ - room_id - for room_id, _ in sorted_room_info[ - range[0] : range[1] - ] - ], + room_ids=list(room_id_set), ) ) + # Update the relevant room map + for room_id in room_id_set: + if relevant_room_map.get(room_id) is not None: + # Take the highest timeline limit + if ( + relevant_room_map[room_id].timeline_limit + < list_config.timeline_limit + ): + relevant_room_map[room_id].timeline_limit = ( + list_config.timeline_limit + ) + + # Union the required state + relevant_room_map[room_id].required_state.update( + list_config.required_state + ) + else: + relevant_room_map[room_id] = RoomSyncConfig( + timeline_limit=list_config.timeline_limit, + required_state=set(list_config.required_state), + ) + lists[list_key] = SlidingSyncResult.SlidingWindowList( count=len(sorted_room_info), ops=ops, ) + # TODO: if (sync_config.room_subscriptions): + + # Fetch room data + rooms: Dict[str, SlidingSyncResult.RoomResult] = {} + for room_id, room_sync_config in relevant_room_map.items(): + room_sync_result = await self.get_room_sync_data( + user=sync_config.user, + room_id=room_id, + room_sync_config=room_sync_config, + rooms_for_user_membership_at_to_token=sync_room_map[room_id], + from_token=from_token, + to_token=to_token, + ) + + rooms[room_id] = room_sync_result + return SlidingSyncResult( next_pos=to_token, lists=lists, - # TODO: Gather room data for rooms in lists and `sync_config.room_subscriptions` - rooms={}, + rooms=rooms, extensions={}, ) @@ -665,3 +718,130 @@ async def sort_rooms( # We want descending order reverse=True, ) + + async def get_room_sync_data( + self, + user: UserID, + room_id: str, + room_sync_config: RoomSyncConfig, + rooms_for_user_membership_at_to_token: RoomsForUser, + from_token: Optional[StreamToken], + to_token: StreamToken, + ) -> SlidingSyncResult.RoomResult: + """ + Fetch room data for a room. + + We fetch data according to the token range (> `from_token` and <= `to_token`). + + Args: + user: User to fetch data for + room_id: The room ID to fetch data for + room_sync_config: Config for what data we should fetch for a room in the + sync response. + rooms_for_user_membership_at_to_token: Membership information for the user + in the room at the time of `to_token`. + from_token: The point in the stream to sync from. + to_token: The point in the stream to sync up to. + """ + + timeline_events: List[EventBase] = [] + limited = False + # We want to use `to_token` (vs `from_token`) because we look backwards from the + # `to_token` up to the `timeline_limit` and we might not reach `from_token` + # before we hit the limit. We will update the room stream position once we've + # fetched the events. + prev_batch_token = to_token + if room_sync_config.timeline_limit > 0: + timeline_events, new_room_key = await self.store.paginate_room_events( + room_id=room_id, + # We're going to paginate backwards from the `to_token` + from_key=to_token.room_key, + to_key=from_token.room_key if from_token is not None else None, + direction=Direction.BACKWARDS, + # We add one so we can determine if there are enough events to saturate + # the limit or not (see `limited`) + limit=room_sync_config.timeline_limit + 1, + event_filter=None, + ) + + # We want to return the events in ascending order (the last event is the + # most recent). + timeline_events.reverse() + + timeline_events = await filter_events_for_client( + self.storage_controllers, + user.to_string(), + timeline_events, + is_peeking=rooms_for_user_membership_at_to_token.membership + != Membership.JOIN, + filter_send_to_client=True, + ) + + # Determine our `limited` status + if len(timeline_events) > room_sync_config.timeline_limit: + limited = True + # Get rid of that extra "+ 1" event because we only used it to determine + # if we hit the limit or not + timeline_events = timeline_events[-room_sync_config.timeline_limit :] + assert timeline_events[0].internal_metadata.stream_ordering + new_room_key = RoomStreamToken( + stream=timeline_events[0].internal_metadata.stream_ordering - 1 + ) + + prev_batch_token = prev_batch_token.copy_and_replace( + StreamKeyType.ROOM, new_room_key + ) + + # Figure out any stripped state events for invite/knocks + stripped_state: List[EventBase] = [] + if rooms_for_user_membership_at_to_token.membership in { + Membership.INVITE, + Membership.KNOCK, + }: + invite_or_knock_event = await self.store.get_event( + rooms_for_user_membership_at_to_token.event_id + ) + + stripped_state = [] + if invite_or_knock_event.membership == Membership.INVITE: + stripped_state = invite_or_knock_event.unsigned.get( + "invite_room_state", [] + ) + elif invite_or_knock_event.membership == Membership.KNOCK: + stripped_state = invite_or_knock_event.unsigned.get( + "knock_room_state", [] + ) + + stripped_state.append(invite_or_knock_event) + + return SlidingSyncResult.RoomResult( + # TODO: Dummy value + name="TODO", + # TODO: Dummy value + avatar=None, + # TODO: Dummy value + heroes=None, + # Since we can't determine whether we've already sent a room down this + # Sliding Sync connection before (we plan to add this optimization in the + # future), we're always returning the requested room state instead of + # updates. + initial=True, + # TODO: Dummy value + required_state=[], + timeline=timeline_events, + # TODO: Dummy value + is_dm=False, + stripped_state=stripped_state, + prev_batch=prev_batch_token, + limited=limited, + # TODO: Dummy values + joined_count=0, + invited_count=0, + # TODO: These are just dummy values. We could potentially just remove these + # since notifications can only really be done correctly on the client anyway + # (encrypted rooms). + notification_count=0, + highlight_count=0, + # TODO: Dummy value + num_live=0, + ) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 1b0ac20d94b..b261b2dd88c 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -761,7 +761,6 @@ class SlidingSyncRestServlet(RestServlet): "lists": { "foo-list": { "ranges": [ [0, 99] ], - "sort": [ "by_notification_level", "by_recency", "by_name" ], "required_state": [ ["m.room.join_rules", ""], ["m.room.history_visibility", ""], @@ -771,7 +770,6 @@ class SlidingSyncRestServlet(RestServlet): "filters": { "is_dm": true }, - "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } }, // Room Subscriptions API @@ -779,10 +777,6 @@ class SlidingSyncRestServlet(RestServlet): "!sub1:bar": { "required_state": [ ["*","*"] ], "timeline_limit": 10, - "include_old_rooms": { - "timeline_limit": 1, - "required_state": [ ["m.room.tombstone", ""], ["m.room.create", ""] ], - } } }, // Extensions API @@ -871,10 +865,11 @@ def __init__(self, hs: "HomeServer"): super().__init__() self.auth = hs.get_auth() self.store = hs.get_datastores().main + self.clock = hs.get_clock() self.filtering = hs.get_filtering() self.sliding_sync_handler = hs.get_sliding_sync_handler() + self.event_serializer = hs.get_event_client_serializer() - # TODO: Update this to `on_GET` once we figure out how we want to handle params async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user = requester.user @@ -920,13 +915,14 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: logger.info("Client has disconnected; not serializing response.") return 200, {} - response_content = await self.encode_response(sliding_sync_results) + response_content = await self.encode_response(requester, sliding_sync_results) return 200, response_content # TODO: Is there a better way to encode things? async def encode_response( self, + requester: Requester, sliding_sync_result: SlidingSyncResult, ) -> JsonDict: response: JsonDict = defaultdict(dict) @@ -935,7 +931,9 @@ async def encode_response( serialized_lists = self.encode_lists(sliding_sync_result.lists) if serialized_lists: response["lists"] = serialized_lists - response["rooms"] = {} # TODO: sliding_sync_result.rooms + response["rooms"] = await self.encode_rooms( + requester, sliding_sync_result.rooms + ) response["extensions"] = {} # TODO: sliding_sync_result.extensions return response @@ -961,6 +959,79 @@ def encode_operation( return serialized_lists + async def encode_rooms( + self, + requester: Requester, + rooms: Dict[str, SlidingSyncResult.RoomResult], + ) -> JsonDict: + time_now = self.clock.time_msec() + + serialize_options = SerializeEventConfig( + event_format=format_event_for_client_v2_without_room_id, + requester=requester, + ) + + serialized_rooms = {} + for room_id, room_result in rooms.items(): + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline, + time_now, + config=serialize_options, + # TODO + # bundle_aggregations=room.timeline.bundled_aggregations, + ) + + serialized_required_state = await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + + serialized_rooms[room_id] = { + "name": room_result.name, + "required_state": serialized_required_state, + "timeline": serialized_timeline, + "prev_batch": await room_result.prev_batch.to_string(self.store), + "limited": room_result.limited, + "joined_count": room_result.joined_count, + "invited_count": room_result.invited_count, + "notification_count": room_result.notification_count, + "highlight_count": room_result.highlight_count, + "num_live": room_result.num_live, + } + + if room_result.avatar: + serialized_rooms[room_id]["avatar"] = room_result.avatar + + if room_result.heroes: + serialized_rooms[room_id]["heroes"] = room_result.heroes + + # We should only include the `initial` key if it's `True` to save bandwidth. + # The absense of this flag means `False`. + if room_result.initial: + serialized_rooms[room_id]["initial"] = room_result.initial + + # Field should be absent on non-DM rooms + if room_result.is_dm: + serialized_rooms[room_id]["is_dm"] = room_result.is_dm + + # Stripped state only applies to invite/knock rooms + if room_result.stripped_state: + serialized_stripped_state = ( + await self.event_serializer.serialize_events( + room_result.stripped_state, + time_now, + config=serialize_options, + ) + ) + + # TODO: Would be good to rename this to `stripped_state` so it can be + # shared between invite and knock rooms, see + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 + serialized_rooms[room_id]["invite_state"] = serialized_stripped_state + + return serialized_rooms + def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: SyncRestServlet(hs).register(http_server) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1d65551d5b5..b544398a35d 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -162,8 +162,9 @@ class RoomResult: timeline: Latest events in the room. The last event is the most recent is_dm: Flag to specify whether the room is a direct-message room (most likely between two people). - invite_state: Stripped state events. Same as `rooms.invite.$room_id.invite_state` - in sync v2, absent on joined/left rooms + stripped_state: Stripped state events (for rooms where the usre is + invited/knocked). Same as `rooms.invite.$room_id.invite_state` in sync v2, + absent on joined/left rooms prev_batch: A token that can be passed as a start parameter to the `/rooms//messages` API to retrieve earlier messages. limited: True if their are more events than fit between the given position and now. @@ -192,7 +193,7 @@ class RoomResult: required_state: List[EventBase] timeline: List[EventBase] is_dm: bool - invite_state: List[EventBase] + stripped_state: Optional[List[EventBase]] prev_batch: StreamToken limited: bool joined_count: int diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index e2c79c41064..25fbd772f6f 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -152,9 +152,6 @@ class CommonRoomParameters(RequestBodyModel): anyway. timeline_limit: The maximum number of timeline events to return per response. (Max 1000 messages) - include_old_rooms: Determines if `predecessor` rooms are included in the - `rooms` response. The user MUST be joined to old rooms for them to show up - in the response. """ class IncludeOldRooms(RequestBodyModel): @@ -167,7 +164,6 @@ class IncludeOldRooms(RequestBodyModel): timeline_limit: int else: timeline_limit: conint(le=1000, strict=True) # type: ignore[valid-type] - include_old_rooms: Optional[IncludeOldRooms] = None class SlidingSyncList(CommonRoomParameters): """ @@ -208,9 +204,6 @@ class SlidingSyncList(CommonRoomParameters): } timeline_limit: The maximum number of timeline events to return per response. - include_old_rooms: Determines if `predecessor` rooms are included in the - `rooms` response. The user MUST be joined to old rooms for them to show up - in the response. include_heroes: Return a stripped variant of membership events (containing `user_id` and optionally `avatar_url` and `displayname`) for the users used to calculate the room name. From 3e0f759dbc34cb3be0a1946cd36e617fc3c5a17c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:26:59 -0500 Subject: [PATCH 02/68] Strip invite/knock event itself and avoid mutating event `unsigned` Make sure we don't run into https://github.com/element-hq/synapse/issues/14919 (https://github.com/matrix-org/synapse/issues/14919) --- synapse/events/utils.py | 18 ++++++++++++++++++ synapse/handlers/sliding_sync.py | 14 ++++++++------ synapse/rest/client/sync.py | 10 +--------- .../storage/databases/main/events_worker.py | 12 ++---------- synapse/types/handlers/__init__.py | 4 ++-- 5 files changed, 31 insertions(+), 27 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index b997d82d71f..f937fd46980 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -836,3 +836,21 @@ def maybe_upsert_event_field( del container[key] return upsert_okay + + +def strip_event(event: EventBase) -> JsonDict: + """ + Used for "stripped state" events which provide a simplified view of the state of a + room intended to help a potential joiner identify the room (relevant when the user + is invited or knocked). + + Stripped state events can only have the `sender`, `type`, `state_key` and `content` + properties present. + """ + + return { + "type": event.type, + "state_key": event.state_key, + "content": event.content, + "sender": event.sender, + } diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cf448fa3cdf..23f971c1f78 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -25,8 +25,10 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase +from synapse.events.utils import strip_event from synapse.storage.roommember import RoomsForUser from synapse.types import ( + JsonDict, PersistedEventPosition, Requester, RoomStreamToken, @@ -793,7 +795,7 @@ async def get_room_sync_data( ) # Figure out any stripped state events for invite/knocks - stripped_state: List[EventBase] = [] + stripped_state: List[JsonDict] = [] if rooms_for_user_membership_at_to_token.membership in { Membership.INVITE, Membership.KNOCK, @@ -804,15 +806,15 @@ async def get_room_sync_data( stripped_state = [] if invite_or_knock_event.membership == Membership.INVITE: - stripped_state = invite_or_knock_event.unsigned.get( - "invite_room_state", [] + stripped_state.extend( + invite_or_knock_event.unsigned.get("invite_room_state", []) ) elif invite_or_knock_event.membership == Membership.KNOCK: - stripped_state = invite_or_knock_event.unsigned.get( - "knock_room_state", [] + stripped_state.extend( + invite_or_knock_event.unsigned.get("knock_room_state", []) ) - stripped_state.append(invite_or_knock_event) + stripped_state.append(strip_event(invite_or_knock_event)) return SlidingSyncResult.RoomResult( # TODO: Dummy value diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b261b2dd88c..a9be37bbf32 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1017,18 +1017,10 @@ async def encode_rooms( # Stripped state only applies to invite/knock rooms if room_result.stripped_state: - serialized_stripped_state = ( - await self.event_serializer.serialize_events( - room_result.stripped_state, - time_now, - config=serialize_options, - ) - ) - # TODO: Would be good to rename this to `stripped_state` so it can be # shared between invite and knock rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 - serialized_rooms[room_id]["invite_state"] = serialized_stripped_state + serialized_rooms[room_id]["invite_state"] = room_result.stripped_state return serialized_rooms diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e264d36f025..f0f390cec46 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -55,7 +55,7 @@ ) from synapse.events import EventBase, make_event_from_dict from synapse.events.snapshot import EventContext -from synapse.events.utils import prune_event +from synapse.events.utils import prune_event, strip_event from synapse.logging.context import ( PreserveLoggingContext, current_context, @@ -1025,15 +1025,7 @@ async def get_stripped_room_state_from_event_context( state_to_include = await self.get_events(selected_state_ids.values()) - return [ - { - "type": e.type, - "state_key": e.state_key, - "content": e.content, - "sender": e.sender, - } - for e in state_to_include.values() - ] + return [strip_event(e) for e in state_to_include.values()] def _maybe_start_fetch_thread(self) -> None: """Starts an event fetch thread if we are not yet at the maximum number.""" diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index b544398a35d..04b0ab972b8 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,7 +31,7 @@ from pydantic import Extra from synapse.events import EventBase -from synapse.types import JsonMapping, StreamToken, UserID +from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody @@ -193,7 +193,7 @@ class RoomResult: required_state: List[EventBase] timeline: List[EventBase] is_dm: bool - stripped_state: Optional[List[EventBase]] + stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken limited: bool joined_count: int From 5e2fd4e93ca2084ee92533b59e6d45b3a914fa89 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:29:44 -0500 Subject: [PATCH 03/68] Add changelog --- changelog.d/17320.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17320.feature diff --git a/changelog.d/17320.feature b/changelog.d/17320.feature new file mode 100644 index 00000000000..1e524f3eca1 --- /dev/null +++ b/changelog.d/17320.feature @@ -0,0 +1 @@ +Add `rooms` data to experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. From 8ce06f145260540f0c81c1594a011556e90f32c8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 17 Jun 2024 18:54:23 -0500 Subject: [PATCH 04/68] Fix sort being lost --- synapse/handlers/sliding_sync.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 23f971c1f78..e61b86d7792 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -243,21 +243,21 @@ async def current_sync_for_user( ops: List[SlidingSyncResult.SlidingWindowList.Operation] = [] if list_config.ranges: for range in list_config.ranges: - room_id_set = { + sliced_room_ids = [ room_id for room_id, _ in sorted_room_info[range[0] : range[1]] - } + ] ops.append( SlidingSyncResult.SlidingWindowList.Operation( op=OperationType.SYNC, range=range, - room_ids=list(room_id_set), + room_ids=sliced_room_ids, ) ) # Update the relevant room map - for room_id in room_id_set: + for room_id in sliced_room_ids: if relevant_room_map.get(room_id) is not None: # Take the highest timeline limit if ( From aa5f54aa135de8ae7fdc201792d548de494cbd40 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 08:26:10 -0500 Subject: [PATCH 05/68] Start on required_state --- synapse/handlers/sliding_sync.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e61b86d7792..5b834fe9ef6 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -93,10 +93,16 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> class RoomSyncConfig: """ Holds the config for what data we should fetch for a room in the sync response. + + Attributes: + timeline_limit: The maximum number of events to return in the timeline. + required_state: The minimum set of state events requested for the room. The + values are close to `StateKey` but actually use a syntax where you can provide + `*` and `$LAZY` as the state key part of the tuple (type, state_key). """ timeline_limit: int - required_state: Set[StateKey] + required_state: Set[Tuple[str, str]] class SlidingSyncHandler: @@ -816,6 +822,14 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) + required_state = [] + if len(room_sync_config.required_state) > 0: + required_state = await self.storage_controllers.state.get_state_at( + room_id, + to_token, + state_filter=StateFilter.from_types(TODO), + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name="TODO", From 5c175d5488ac7b700906a722ee16404527d8d711 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 09:35:20 -0500 Subject: [PATCH 06/68] Add some notes from pairing --- synapse/handlers/sliding_sync.py | 20 ++++++++++++++++++-- synapse/rest/client/sync.py | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5b834fe9ef6..f9ec4f79610 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -764,6 +764,13 @@ async def get_room_sync_data( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, + # We should always return historical messages (outside token range) in + # these cases because clients want to be able to show a basic screen of + # information: + # - Initial sync (because no `from_token`) + # - When users newly_join + # - TODO: For incremental sync where we haven't sent it down this + # connection before to_key=from_token.room_key if from_token is not None else None, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate @@ -824,14 +831,23 @@ async def get_room_sync_data( required_state = [] if len(room_sync_config.required_state) > 0: - required_state = await self.storage_controllers.state.get_state_at( + await self.storage_controllers.state.get_current_state( room_id, - to_token, state_filter=StateFilter.from_types(TODO), + await_full_state=False, ) + # TODO: rewind + + # required_state = await self.storage_controllers.state.get_state_at( + # room_id, + # to_token, + # state_filter=StateFilter.from_types(TODO), + # ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value + # TODO: Make this optional because a computed name doesn't make sense for translated cases name="TODO", # TODO: Dummy value avatar=None, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index a9be37bbf32..0ae31f23e96 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1021,6 +1021,7 @@ async def encode_rooms( # shared between invite and knock rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 serialized_rooms[room_id]["invite_state"] = room_result.stripped_state + # TODO: `knocked_state` but that isn't specced yet return serialized_rooms From 9089bfe4dc505c02739968cdb1b67220e060580d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:06:29 -0500 Subject: [PATCH 07/68] Remove required_state for now --- synapse/handlers/sliding_sync.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f9ec4f79610..f8fd2c6c5ea 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -96,9 +96,10 @@ class RoomSyncConfig: Attributes: timeline_limit: The maximum number of events to return in the timeline. - required_state: The minimum set of state events requested for the room. The - values are close to `StateKey` but actually use a syntax where you can provide - `*` and `$LAZY` as the state key part of the tuple (type, state_key). + required_state: The set of state events requested for the room. The + values are close to `StateKey` but actually use a syntax where you can + provide `*` wildcard and `$LAZY` for lazy room members as the `state_key` part + of the tuple (type, state_key). """ timeline_limit: int @@ -829,22 +830,6 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) - required_state = [] - if len(room_sync_config.required_state) > 0: - await self.storage_controllers.state.get_current_state( - room_id, - state_filter=StateFilter.from_types(TODO), - await_full_state=False, - ) - - # TODO: rewind - - # required_state = await self.storage_controllers.state.get_state_at( - # room_id, - # to_token, - # state_filter=StateFilter.from_types(TODO), - # ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value # TODO: Make this optional because a computed name doesn't make sense for translated cases From 94279915d4432fefb87b2d210a8cd03fd633c002 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:09:33 -0500 Subject: [PATCH 08/68] Clean up knock_state comments --- synapse/rest/client/sync.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 0ae31f23e96..db447738249 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -1017,11 +1017,13 @@ async def encode_rooms( # Stripped state only applies to invite/knock rooms if room_result.stripped_state: - # TODO: Would be good to rename this to `stripped_state` so it can be - # shared between invite and knock rooms, see + # TODO: `knocked_state` but that isn't specced yet. + # + # TODO: Instead of adding `knocked_state`, it would be good to rename + # this to `stripped_state` so it can be shared between invite and knock + # rooms, see # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1117629919 serialized_rooms[room_id]["invite_state"] = room_result.stripped_state - # TODO: `knocked_state` but that isn't specced yet return serialized_rooms From 19b22971711da0c8bdbaebed0d2f7a7ccb01e2ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 10:36:50 -0500 Subject: [PATCH 09/68] Calculate `num_live` --- synapse/handlers/sliding_sync.py | 55 ++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f8fd2c6c5ea..1d07e22c914 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -753,6 +753,7 @@ async def get_room_sync_data( to_token: The point in the stream to sync up to. """ + # Assemble the list of timeline events timeline_events: List[EventBase] = [] limited = False # We want to use `to_token` (vs `from_token`) because we look backwards from the @@ -761,18 +762,34 @@ async def get_room_sync_data( # fetched the events. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: + newly_joined = False + if ( + from_token is not None + and rooms_for_user_membership_at_to_token.membership == Membership.JOIN + ): + newly_joined = ( + rooms_for_user_membership_at_to_token.event_pos.stream + > from_token.room_key.get_stream_pos_for_instance( + rooms_for_user_membership_at_to_token.event_pos.instance_name + ) + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should always return historical messages (outside token range) in - # these cases because clients want to be able to show a basic screen of - # information: - # - Initial sync (because no `from_token`) - # - When users newly_join - # - TODO: For incremental sync where we haven't sent it down this + # We should return historical messages (outside token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users newly_joined + # - TODO: For an incremental sync where we haven't sent it down this # connection before - to_key=from_token.room_key if from_token is not None else None, + to_key=( + from_token.room_key + if from_token is not None and not newly_joined + else None + ), direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -804,6 +821,25 @@ async def get_room_sync_data( stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) + # Determine how many "live" events we have (events within the given token range). + # + # This is mostly useful to determine whether a given @mention event should + # make a noise or not. Clients cannot rely solely on the absence of + # `initial: true` to determine live events because if a room not in the + # sliding window bumps into the window because of an @mention it will have + # `initial: true` yet contain a single live event (with potentially other + # old events in the timeline) + num_live = 0 + if from_token is not None: + for timeline_event in timeline_events: + if ( + timeline_event.internal_metadata.stream_ordering + > from_token.room_key.get_stream_pos_for_instance( + timeline_event.internal_metadata.instance_name + ) + ): + num_live += 1 + prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key ) @@ -838,7 +874,7 @@ async def get_room_sync_data( avatar=None, # TODO: Dummy value heroes=None, - # Since we can't determine whether we've already sent a room down this + # TODO: Since we can't determine whether we've already sent a room down this # Sliding Sync connection before (we plan to add this optimization in the # future), we're always returning the requested room state instead of # updates. @@ -859,6 +895,5 @@ async def get_room_sync_data( # (encrypted rooms). notification_count=0, highlight_count=0, - # TODO: Dummy value - num_live=0, + num_live=num_live, ) From 81d36f36c1731738b38f0b7842de1ce84a570d74 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 15:28:23 -0500 Subject: [PATCH 10/68] Add tests for `limited` --- synapse/handlers/sliding_sync.py | 22 +++-- tests/rest/client/test_sync.py | 140 ++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1d07e22c914..90991031aa8 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -37,7 +37,7 @@ UserID, ) from synapse.types.handlers import OperationType, SlidingSyncConfig, SlidingSyncResult -from synapse.types.state import StateFilter, StateKey +from synapse.types.state import StateFilter from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -764,6 +764,7 @@ async def get_room_sync_data( if room_sync_config.timeline_limit > 0: newly_joined = False if ( + # We can only determine new-ness if we have a `from_token` to define our range from_token is not None and rooms_for_user_membership_at_to_token.membership == Membership.JOIN ): @@ -778,11 +779,11 @@ async def get_room_sync_data( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should return historical messages (outside token range) in the + # We should return historical messages (before token range) in the # following cases because we want clients to be able to show a basic # screen of information: # - Initial sync (because no `from_token` to limit us anyway) - # - When users newly_joined + # - When users `newly_joined` # - TODO: For an incremental sync where we haven't sent it down this # connection before to_key=( @@ -832,12 +833,15 @@ async def get_room_sync_data( num_live = 0 if from_token is not None: for timeline_event in timeline_events: - if ( - timeline_event.internal_metadata.stream_ordering - > from_token.room_key.get_stream_pos_for_instance( - timeline_event.internal_metadata.instance_name - ) - ): + # This fields should be present for all persisted events + assert timeline_event.internal_metadata.stream_ordering is not None + assert timeline_event.internal_metadata.instance_name is not None + + persisted_position = PersistedEventPosition( + instance_name=timeline_event.internal_metadata.instance_name, + stream=timeline_event.internal_metadata.stream_ordering, + ) + if persisted_position.persisted_after(from_token.room_key): num_live += 1 prev_batch_token = prev_batch_token.copy_and_replace( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 2b06767b8a8..5b611cd0962 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -35,7 +35,7 @@ ) from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken from synapse.util import Clock from tests import unittest @@ -1282,7 +1282,7 @@ def _create_dm_room( def test_sync_list(self) -> None: """ - Test that room IDs show up in the Sliding Sync lists + Test that room IDs show up in the Sliding Sync `lists` """ alice_user_id = self.register_user("alice", "correcthorse") alice_access_token = self.login(alice_user_id, "correcthorse") @@ -1387,7 +1387,7 @@ def test_wait_for_sync_token(self) -> None: def test_filter_list(self) -> None: """ - Test that filters apply to lists + Test that filters apply to `lists` """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1462,7 +1462,7 @@ def test_filter_list(self) -> None: def test_sort_list(self) -> None: """ - Test that the lists are sorted by `stream_ordering` + Test that the `lists` are sorted by `stream_ordering` """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1516,3 +1516,135 @@ def test_sort_list(self) -> None: ], channel.json_body["lists"]["foo-list"], ) + + def test_rooms_limited_initial_sync(self) -> None: + """ + Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` + on initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity1", tok=user2_tok) + self.helper.send(room_id1, "activity2", tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity3", tok=user2_tok) + event_pos3 = self.get_success( + self.store.get_position_for_event(event_response3["event_id"]) + ) + event_response4 = self.helper.send(room_id1, "activity4", tok=user2_tok) + event_pos4 = self.get_success( + self.store.get_position_for_event(event_response4["event_id"]) + ) + event_response5 = self.helper.send(room_id1, "activity5", tok=user2_tok) + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We expect to saturate the `timeline_limit` (there are more than 3 messages in the room) + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + # Check to make sure the latest events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response4["event_id"], + event_response5["event_id"], + user1_join_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # Check to make sure the `prev_batch` points at the right place + prev_batch_token = self.get_success( + StreamToken.from_string( + self.store, channel.json_body["rooms"][room_id1]["prev_batch"] + ) + ) + prev_batch_room_stream_token_serialized = self.get_success( + prev_batch_token.room_key.to_string(self.store) + ) + # If we use the `prev_batch` token to look backwards, we should see `event3` + # next so make sure the token encompasses it + self.assertEqual( + event_pos3.persisted_after(prev_batch_token.room_key), + False, + f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be >= event_pos3={self.get_success(event_pos3.to_room_stream_token().to_string(self.store))}", + ) + # If we use the `prev_batch` token to look backwards, we shouldn't see `event4` + # anymore since it was just returned in this response. + self.assertEqual( + event_pos4.persisted_after(prev_batch_token.room_key), + True, + f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", + ) + + def test_not_limited_initial_sync(self) -> None: + """ + Test that we mark `rooms` as `limited=False` when there are no more events to + paginate to. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity1", tok=user2_tok) + self.helper.send(room_id1, "activity2", tok=user2_tok) + self.helper.send(room_id1, "activity3", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 100, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # The timeline should be `limited=False` because we have all of the events (no + # more to paginate to) + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) + # We're just looking to make sure we got all of the events before hitting the `timeline_limit` + self.assertEqual( + len(channel.json_body["rooms"][room_id1]["timeline"]), + 9, + channel.json_body["rooms"][room_id1]["timeline"], + ) From 9791209a3d5c82ad9975acea06aaacb55de2326a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 18:10:17 -0500 Subject: [PATCH 11/68] Add more tests --- synapse/handlers/sliding_sync.py | 33 ++-- synapse/rest/client/sync.py | 10 +- synapse/types/__init__.py | 3 + tests/rest/client/test_sync.py | 274 ++++++++++++++++++++++++++++++- 4 files changed, 296 insertions(+), 24 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 90991031aa8..c1b0b2153a1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -769,26 +769,29 @@ async def get_room_sync_data( and rooms_for_user_membership_at_to_token.membership == Membership.JOIN ): newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.stream - > from_token.room_key.get_stream_pos_for_instance( - rooms_for_user_membership_at_to_token.event_pos.instance_name + rooms_for_user_membership_at_to_token.event_pos.persisted_after( + from_token.room_key ) ) + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before + should_limit_timeline_to_token_range = ( + from_token is not None and not newly_joined + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before to_key=( from_token.room_key - if from_token is not None and not newly_joined + if should_limit_timeline_to_token_range else None ), direction=Direction.BACKWARDS, @@ -832,7 +835,7 @@ async def get_room_sync_data( # old events in the timeline) num_live = 0 if from_token is not None: - for timeline_event in timeline_events: + for timeline_event in reversed(timeline_events): # This fields should be present for all persisted events assert timeline_event.internal_metadata.stream_ordering is not None assert timeline_event.internal_metadata.instance_name is not None @@ -843,6 +846,12 @@ async def get_room_sync_data( ) if persisted_position.persisted_after(from_token.room_key): num_live += 1 + else: + # Since we're iterating over the timeline events in + # reverse-chronological order, we can break once we hit an event + # that's not live. In the future, we could potentially optimize + # this more with a binary search (bisect). + break prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index db447738249..434eaa4789a 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -785,7 +785,7 @@ class SlidingSyncRestServlet(RestServlet): Response JSON:: { - "next_pos": "s58_224_0_13_10_1_1_16_0_1", + "pos": "s58_224_0_13_10_1_1_16_0_1", "lists": { "foo-list": { "count": 1337, @@ -824,7 +824,8 @@ class SlidingSyncRestServlet(RestServlet): "joined_count": 41, "invited_count": 1, "notification_count": 1, - "highlight_count": 0 + "highlight_count": 0, + "num_live": 2" }, // rooms from list "!foo:bar": { @@ -849,7 +850,8 @@ class SlidingSyncRestServlet(RestServlet): "joined_count": 4, "invited_count": 0, "notification_count": 54, - "highlight_count": 3 + "highlight_count": 3, + "num_live": 1, }, // ... 99 more items }, @@ -927,7 +929,7 @@ async def encode_response( ) -> JsonDict: response: JsonDict = defaultdict(dict) - response["next_pos"] = await sliding_sync_result.next_pos.to_string(self.store) + response["pos"] = await sliding_sync_result.next_pos.to_string(self.store) serialized_lists = self.encode_lists(sliding_sync_result.lists) if serialized_lists: response["lists"] = serialized_lists diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 151658df534..b52236d6029 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -1078,6 +1078,9 @@ class PersistedPosition: stream: int def persisted_after(self, token: AbstractMultiWriterStreamToken) -> bool: + """ + Checks whether this position happened after the token + """ return token.get_stream_pos_for_instance(self.instance_name) < self.stream diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 5b611cd0962..d538716e5ac 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -19,6 +19,7 @@ # # import json +import logging from typing import List from parameterized import parameterized, parameterized_class @@ -35,7 +36,7 @@ ) from synapse.rest.client import devices, knock, login, read_marker, receipts, room, sync from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken, UserID from synapse.util import Clock from tests import unittest @@ -44,6 +45,8 @@ ) from tests.server import TimedOutException +logger = logging.getLogger(__name__) + class FilterTestCase(unittest.HomeserverTestCase): user_id = "@apple:test" @@ -1379,11 +1382,9 @@ def test_wait_for_sync_token(self) -> None: channel.await_result(timeout_ms=200) self.assertEqual(channel.code, 200, channel.json_body) - # We expect the `next_pos` in the result to be the same as what we requested + # We expect the next `pos` in the result to be the same as what we requested # with because we weren't able to find anything new yet. - self.assertEqual( - channel.json_body["next_pos"], future_position_token_serialized - ) + self.assertEqual(channel.json_body["pos"], future_position_token_serialized) def test_filter_list(self) -> None: """ @@ -1602,7 +1603,15 @@ def test_rooms_limited_initial_sync(self) -> None: f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", ) - def test_not_limited_initial_sync(self) -> None: + # With no `from_token` (initial sync), it's all historical since there is no + # "current" range + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_not_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=False` when there are no more events to paginate to. @@ -1619,6 +1628,7 @@ def test_not_limited_initial_sync(self) -> None: self.helper.join(room_id1, user1_id, tok=user1_tok) # Make the Sliding Sync request + timeline_limit = 100 channel = self.make_request( "POST", self.sync_endpoint, @@ -1627,7 +1637,7 @@ def test_not_limited_initial_sync(self) -> None: "foo-list": { "ranges": [[0, 1]], "required_state": [], - "timeline_limit": 100, + "timeline_limit": timeline_limit, } } }, @@ -1642,9 +1652,257 @@ def test_not_limited_initial_sync(self) -> None: False, channel.json_body["rooms"][room_id1], ) + expected_number_of_events = 9 # We're just looking to make sure we got all of the events before hitting the `timeline_limit` self.assertEqual( len(channel.json_body["rooms"][room_id1]["timeline"]), - 9, + expected_number_of_events, channel.json_body["rooms"][room_id1]["timeline"], ) + self.assertLessEqual(expected_number_of_events, timeline_limit) + + # With no `from_token` (initial sync), it's all historical since there is no + # "live" token range. + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_incremental_sync(self) -> None: + """ + Test that `rooms` data during an incremental sync after an initial sync. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + self.helper.send(room_id1, "activity before initial sync1", tok=user2_tok) + + # Make an initial Sliding Sync request to grab a token. This is also a sanity + # check that we can go from initial to incremental sync. + sync_params = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + } + channel = self.make_request( + "POST", + self.sync_endpoint, + sync_params, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + next_pos = channel.json_body["pos"] + + # Send some events but don't send enough to saturate the `timeline_limit`. + # We want to later test that we only get the new events since the `next_pos` + event_response2 = self.helper.send(room_id1, "activity after2", tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + + # Make an incremental Sliding Sync request (what we're trying to test) + channel = self.make_request( + "POST", + self.sync_endpoint + f"?pos={next_pos}", + sync_params, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We only expect to see the new events since the last sync which isn't enough to + # fill up the `timeline_limit`. + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + f'Our `timeline_limit` was {sync_params["lists"]["foo-list"]["timeline_limit"]} ' + + f'and {len(channel.json_body["rooms"][room_id1]["timeline"])} events were returned in the timeline. ' + + str(channel.json_body["rooms"][room_id1]), + ) + # Check to make sure the latest events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + event_response3["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # All events are "live" + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 2, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_newly_joined_incremental_sync(self) -> None: + """ + Test that when we make an incremental sync with a `newly_joined` `rooms`, we are + able to see some historical events before the `from_token`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before token1", tok=user2_tok) + event_response2 = self.helper.send( + room_id1, "activity before token2", tok=user2_tok + ) + + from_token = self.event_sources.get_current_token() + + # Join the room after the `from_token` which will make us consider this room as + # `newly_joined`. + user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Send some events but don't send enough to saturate the `timeline_limit`. + # We want to later test that we only get the new events since the `next_pos` + event_response3 = self.helper.send( + room_id1, "activity after token3", tok=user2_tok + ) + event_response4 = self.helper.send( + room_id1, "activity after token4", tok=user2_tok + ) + + # The `timeline_limit` is set to 4 so we can at least see one historical event + # before the `from_token`. We should see historical events because this is a + # `newly_joined` room. + timeline_limit = 4 + # Make an incremental Sliding Sync request (what we're trying to test) + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": timeline_limit, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see the new events and the rest should be filled with historical + # events which will make us `limited=True` since there are more to paginate to. + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + f"Our `timeline_limit` was {timeline_limit} " + + f'and {len(channel.json_body["rooms"][room_id1]["timeline"])} events were returned in the timeline. ' + + str(channel.json_body["rooms"][room_id1]), + ) + # Check to make sure that the "live" and historical events are returned + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + user1_join_response["event_id"], + event_response3["event_id"], + event_response4["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + + # Only events after the `from_token` are "live" (join, event3, event4) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 3, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_invite_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` and that + we can't see any timeline events because we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Should not see anything (except maybe the invite event) because we haven't + # joined yet (`filter_events_for_client(...)` is doing the work here) + self.assertEqual( + channel.json_body["rooms"][room_id1]["timeline"], + [], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # We should have some stripped state so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) From 70ecd4d8d3646ddb1fb55b37cdf9a07612a59d2f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:38:35 -0500 Subject: [PATCH 12/68] Fix lint --- synapse/handlers/sliding_sync.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1b0b2153a1..7a6ef1a2d98 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -774,24 +774,22 @@ async def get_room_sync_data( ) ) - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - should_limit_timeline_to_token_range = ( - from_token is not None and not newly_joined - ) - timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, # We're going to paginate backwards from the `to_token` from_key=to_token.room_key, to_key=( + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before from_token.room_key - if should_limit_timeline_to_token_range + if from_token is not None and not newly_joined else None ), direction=Direction.BACKWARDS, From 71eabe5e63fc2d637785866c6e1f471fe67d0966 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:41:41 -0500 Subject: [PATCH 13/68] Make room name optional --- synapse/handlers/sliding_sync.py | 3 +-- synapse/rest/client/sync.py | 4 +++- synapse/types/handlers/__init__.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 7a6ef1a2d98..f2b29ce1d1f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -879,8 +879,7 @@ async def get_room_sync_data( return SlidingSyncResult.RoomResult( # TODO: Dummy value - # TODO: Make this optional because a computed name doesn't make sense for translated cases - name="TODO", + name=None, # TODO: Dummy value avatar=None, # TODO: Dummy value diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 434eaa4789a..da28c2b3a5b 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -990,7 +990,6 @@ async def encode_rooms( ) serialized_rooms[room_id] = { - "name": room_result.name, "required_state": serialized_required_state, "timeline": serialized_timeline, "prev_batch": await room_result.prev_batch.to_string(self.store), @@ -1002,6 +1001,9 @@ async def encode_rooms( "num_live": room_result.num_live, } + if room_result.name: + serialized_rooms[room_id]["name"] = room_result.name + if room_result.avatar: serialized_rooms[room_id]["avatar"] = room_result.avatar diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 04b0ab972b8..1b544456a65 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -186,7 +186,7 @@ class RoomResult: (with potentially other old events in the timeline). """ - name: str + name: Optional[str] avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool From 39b4f10533fded08647c198c80e6b185bc8558e0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 19:55:12 -0500 Subject: [PATCH 14/68] Update comments --- synapse/handlers/sliding_sync.py | 14 +++++++++----- tests/rest/client/test_sync.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f2b29ce1d1f..cb5274d495f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,10 +756,10 @@ async def get_room_sync_data( # Assemble the list of timeline events timeline_events: List[EventBase] = [] limited = False - # We want to use `to_token` (vs `from_token`) because we look backwards from the - # `to_token` up to the `timeline_limit` and we might not reach `from_token` - # before we hit the limit. We will update the room stream position once we've - # fetched the events. + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: newly_joined = False @@ -803,6 +803,7 @@ async def get_room_sync_data( # most recent). timeline_events.reverse() + # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, user.to_string(), @@ -851,11 +852,14 @@ async def get_room_sync_data( # this more with a binary search (bisect). break + # Update the `prev_batch_token` to point to the position that allows us to + # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( StreamKeyType.ROOM, new_room_key ) - # Figure out any stripped state events for invite/knocks + # Figure out any stripped state events for invite/knocks. This allows the + # potential joiner to identify the room. stripped_state: List[JsonDict] = [] if rooms_for_user_membership_at_to_token.membership in { Membership.INVITE, diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index d538716e5ac..838ff6e2b41 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1874,7 +1874,7 @@ def test_rooms_invite_sync(self) -> None: [], channel.json_body["rooms"][room_id1]["timeline"], ) - # We should have some stripped state so the potential joiner can identify the + # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( channel.json_body["rooms"][room_id1]["invite_state"], From 9883b0f63f87cf34b50e28390a0fa29d8e014443 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 21:00:26 -0500 Subject: [PATCH 15/68] Add bundled aggregations --- synapse/handlers/sliding_sync.py | 16 +++++++++++++++- synapse/rest/client/sync.py | 5 ++--- synapse/types/handlers/__init__.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cb5274d495f..e418a6e0742 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -114,6 +114,7 @@ def __init__(self, hs: "HomeServer"): self.auth_blocking = hs.get_auth_blocking() self.notifier = hs.get_notifier() self.event_sources = hs.get_event_sources() + self.relations_handler = hs.get_relations_handler() self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync async def wait_for_sync_for_user( @@ -881,6 +882,18 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle timeline gaps (`get_timeline_gaps()`) + + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + bundled_aggregations = None + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, @@ -895,7 +908,8 @@ async def get_room_sync_data( initial=True, # TODO: Dummy value required_state=[], - timeline=timeline_events, + timeline_events=timeline_events, + bundled_aggregations=bundled_aggregations, # TODO: Dummy value is_dm=False, stripped_state=stripped_state, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index da28c2b3a5b..4333ee8c2b6 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -976,11 +976,10 @@ async def encode_rooms( serialized_rooms = {} for room_id, room_result in rooms.items(): serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline, + room_result.timeline_events, time_now, config=serialize_options, - # TODO - # bundle_aggregations=room.timeline.bundled_aggregations, + bundle_aggregations=room_result.bundled_aggregations, ) serialized_required_state = await self.event_serializer.serialize_events( diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1b544456a65..1ba5ea55c14 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,6 +31,7 @@ from pydantic import Extra from synapse.events import EventBase +from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody @@ -159,7 +160,11 @@ class RoomResult: entirely and NOT send "initial":false as this is wasteful on bandwidth. The absence of this flag means 'false'. required_state: The current state of the room - timeline: Latest events in the room. The last event is the most recent + timeline: Latest events in the room. The last event is the most recent. + bundled_aggregations: A mapping of event ID to the bundled aggregations for + the timeline events above. This allows clients to show accurate reaction + counts (or edits, threads), even if some of the reaction events were skipped + over in a gappy sync. is_dm: Flag to specify whether the room is a direct-message room (most likely between two people). stripped_state: Stripped state events (for rooms where the usre is @@ -191,7 +196,8 @@ class RoomResult: heroes: Optional[List[EventBase]] initial: bool required_state: List[EventBase] - timeline: List[EventBase] + timeline_events: List[EventBase] + bundled_aggregations: Optional[Dict[str, BundledAggregations]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 1c06153a0d3c24039a70b0c770947874bc05c246 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 18 Jun 2024 21:22:40 -0500 Subject: [PATCH 16/68] Determine limited before filtering --- synapse/handlers/sliding_sync.py | 27 ++++++++++++++++----------- tests/rest/client/test_sync.py | 8 ++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e418a6e0742..fe369949c5e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -804,17 +804,9 @@ async def get_room_sync_data( # most recent). timeline_events.reverse() - # Make sure we don't expose any events that the client shouldn't see - timeline_events = await filter_events_for_client( - self.storage_controllers, - user.to_string(), - timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership - != Membership.JOIN, - filter_send_to_client=True, - ) - - # Determine our `limited` status + # Determine our `limited` status based on the timeline. We do this before + # filtering the events so we can accurately determine if there is more to + # paginate even if we filter out some/all events. if len(timeline_events) > room_sync_config.timeline_limit: limited = True # Get rid of that extra "+ 1" event because we only used it to determine @@ -825,6 +817,19 @@ async def get_room_sync_data( stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) + # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail + # to understand why. + + # Make sure we don't expose any events that the client shouldn't see + timeline_events = await filter_events_for_client( + self.storage_controllers, + user.to_string(), + timeline_events, + is_peeking=rooms_for_user_membership_at_to_token.membership + != Membership.JOIN, + filter_send_to_client=True, + ) + # Determine how many "live" events we have (events within the given token range). # # This is mostly useful to determine whether a given @mention event should diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 838ff6e2b41..df85c94bd5a 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1874,6 +1874,13 @@ def test_rooms_invite_sync(self) -> None: [], channel.json_body["rooms"][room_id1]["timeline"], ) + # Even though we don't get any timeline events because they are filtered out, + # there is still more to paginate + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) # We should have some `stripped_state` so the potential joiner can identify the # room (we don't care about the order). self.assertCountEqual( @@ -1906,3 +1913,4 @@ def test_rooms_invite_sync(self) -> None: ], channel.json_body["rooms"][room_id1]["invite_state"], ) + From c81f3006a5e768e0e3f099dd7e001a7f1768b2c6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 12:54:39 -0500 Subject: [PATCH 17/68] Add better support for leave/ban --- synapse/handlers/sliding_sync.py | 48 ++-- synapse/storage/databases/main/stream.py | 20 ++ tests/rest/client/test_sync.py | 350 ++++++++++++++++++++++- tests/rest/client/utils.py | 4 +- 4 files changed, 399 insertions(+), 23 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fe369949c5e..0d2f4dbfffa 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -775,24 +775,36 @@ async def get_room_sync_data( ) ) + # We're going to paginate backwards from the `to_token` + from_bound = to_token.room_key + # People shouldn't see past their leave/ban event + if rooms_for_user_membership_at_to_token.membership in ( + Membership.LEAVE, + Membership.BAN, + ): + from_bound = ( + rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + ) + + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - TODO: For an incremental sync where we haven't sent it down this + # connection before + to_bound = ( + from_token.room_key + if from_token is not None and not newly_joined + else None + ) + timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, - # We're going to paginate backwards from the `to_token` - from_key=to_token.room_key, - to_key=( - # Determine whether we should limit the timeline to the token range. - # - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - from_token.room_key - if from_token is not None and not newly_joined - else None - ), + from_key=from_bound, + to_key=to_bound, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -867,10 +879,10 @@ async def get_room_sync_data( # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in { + if rooms_for_user_membership_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, - }: + ): invite_or_knock_event = await self.store.get_event( rooms_for_user_membership_at_to_token.event_id ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ff0d723684d..c21e69ecdab 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1551,6 +1551,9 @@ async def paginate_room_events( ) -> Tuple[List[EventBase], RoomStreamToken]: """Returns list of events before or after a given token. + When Direction.FORWARDS: from_key < x <= to_key + When Direction.BACKWARDS: from_key >= x > to_key + Args: room_id from_key: The token used to stream from @@ -1567,6 +1570,23 @@ async def paginate_room_events( and `to_key`). """ + # We can bail early if we're looking forwards, and our `to_key` is already + # before our `from_key`. + if ( + direction == Direction.FORWARDS + and to_key is not None + and to_key.is_before_or_eq(from_key) + ): + return [], from_key + # Or vice-versa, if we're looking backwards and our `from_key` is already before + # our `to_key`. + elif ( + direction == Direction.BACKWARDS + and to_key is not None + and from_key.is_before_or_eq(to_key) + ): + return [], from_key + rows, token = await self.db_pool.runInteraction( "paginate_room_events", self._paginate_room_events_txn, diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index df85c94bd5a..32542a64e8b 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -31,6 +31,7 @@ AccountDataTypes, EventContentFields, EventTypes, + HistoryVisibility, ReceiptTypes, RelationTypes, ) @@ -1831,10 +1832,11 @@ def test_rooms_newly_joined_incremental_sync(self) -> None: channel.json_body["rooms"][room_id1], ) - def test_rooms_invite_sync(self) -> None: + def test_rooms_invite_shared_history_initial_sync(self) -> None: """ Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because we haven't joined the room yet. + we can't see any timeline events because the history visiblity is `shared` and + we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1844,6 +1846,16 @@ def test_rooms_invite_sync(self) -> None: user2 = UserID.from_string(user2_id) room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + self.helper.send(room_id1, "activity before1", tok=user2_tok) self.helper.send(room_id1, "activity before2", tok=user2_tok) self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) @@ -1868,12 +1880,21 @@ def test_rooms_invite_sync(self) -> None: self.assertEqual(channel.code, 200, channel.json_body) # Should not see anything (except maybe the invite event) because we haven't - # joined yet (`filter_events_for_client(...)` is doing the work here) + # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` + # is doing the work here) self.assertEqual( channel.json_body["rooms"][room_id1]["timeline"], [], channel.json_body["rooms"][room_id1]["timeline"], ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) and no events returned in the timeline anyway so nothing could be + # "live". + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) # Even though we don't get any timeline events because they are filtered out, # there is still more to paginate self.assertEqual( @@ -1914,3 +1935,326 @@ def test_rooms_invite_sync(self) -> None: channel.json_body["rooms"][room_id1]["invite_state"], ) + + def test_rooms_invite_world_readable_history_initial_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` and that + we can't see any timeline events because the history visiblity is `shared` and + we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": {"history_visibility": HistoryVisibility.WORLD_READABLE}, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + },) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before1", tok=user2_tok) + event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) + use1_invite_response = self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Should see the last 4 events in the room + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response2["event_id"], + use1_invite_response["event_id"], + event_response3["event_id"], + event_response4["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There is still more to paginate + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_ban_initial_sync(self) -> None: + """ + Test that `rooms` we are banned from in an intial sync only allows us to see + timeline events up to the ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + + self.helper.send(room_id1, "activity after5", tok=user2_tok) + self.helper.send(room_id1, "activity after6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see events before the ban but not after + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response3["event_id"], + event_response4["event_id"], + user1_ban_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No "live" events in a initial sync (no `from_token` to define the "live" + # range) + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There are more events to paginate to + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + True, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_ban_incremental_sync1(self) -> None: + """ + Test that `rooms` we are banned from during the next incremental sync only + allows us to see timeline events up to the ban event. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + from_token = self.event_sources.get_current_token() + + event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) + event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + # The ban is within the token range (between the `from_token` and the sliding + # sync request) + user1_ban_response = self.helper.ban( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) + + self.helper.send(room_id1, "activity after5", tok=user2_tok) + self.helper.send(room_id1, "activity after6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # We should see events before the ban but not after + self.assertEqual( + [ + event["event_id"] + for event in channel.json_body["rooms"][room_id1]["timeline"] + ], + [ + event_response3["event_id"], + event_response4["event_id"], + user1_ban_response["event_id"], + ], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # All live events in the incremental sync + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 3, + channel.json_body["rooms"][room_id1], + ) + # There aren't anymore events to paginate to in this range + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) + + def test_rooms_ban_incremental_sync2(self) -> None: + """ + Test that `rooms` we are banned from before the incremental sync doesn't return + any events in the timeline. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.send(room_id1, "activity before1", tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + self.helper.send(room_id1, "activity after2", tok=user2_tok) + # The ban is before we get our `from_token` + self.helper.ban(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + + self.helper.send(room_id1, "activity after3", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after4", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Nothing to see for this banned user in the room in the token range + self.assertEqual( + channel.json_body["rooms"][room_id1]["timeline"], + [], + channel.json_body["rooms"][room_id1]["timeline"], + ) + # No events returned in the timeline so nothing is "live" + self.assertEqual( + channel.json_body["rooms"][room_id1]["num_live"], + 0, + channel.json_body["rooms"][room_id1], + ) + # There aren't anymore events to paginate to in this range + self.assertEqual( + channel.json_body["rooms"][room_id1]["limited"], + False, + channel.json_body["rooms"][room_id1], + ) diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index f0ba40a1f13..e43140720db 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -261,9 +261,9 @@ def ban( targ: str, expect_code: int = HTTPStatus.OK, tok: Optional[str] = None, - ) -> None: + ) -> JsonDict: """A convenience helper: `change_membership` with `membership` preset to "ban".""" - self.change_membership( + return self.change_membership( room=room, src=src, targ=targ, From d801db0d96ef53e1eaa42c7540f744a56de90b59 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 13:24:01 -0500 Subject: [PATCH 18/68] Fix lints --- tests/rest/client/test_sync.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 32542a64e8b..6db6f855baf 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1935,7 +1935,6 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: channel.json_body["rooms"][room_id1]["invite_state"], ) - def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ Test that `rooms` we are invited to have some stripped `invite_state` and that @@ -1949,17 +1948,22 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: user2_tok = self.login(user2_id, "pass") user2 = UserID.from_string(user2_id) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, extra_content={ "preset": "public_chat", "initial_state": [ { - "content": {"history_visibility": HistoryVisibility.WORLD_READABLE}, + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, "state_key": "", "type": EventTypes.RoomHistoryVisibility, } ], - },) + }, + ) # Ensure we're testing with a room with `world_readable` history visibility # which means events are visible to anyone even without membership. history_visibility_response = self.helper.get_state( @@ -1972,7 +1976,9 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: self.helper.send(room_id1, "activity before1", tok=user2_tok) event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + use1_invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) From 884b44801253c6b97ae07f958744c8443649153e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 19 Jun 2024 13:50:28 -0500 Subject: [PATCH 19/68] Update some wording --- synapse/handlers/sliding_sync.py | 6 +++--- tests/rest/client/test_sync.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0d2f4dbfffa..3e49054e430 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -264,7 +264,7 @@ async def current_sync_for_user( ) ) - # Update the relevant room map + # Take the superset of the `RoomSyncConfig` for each room for room_id in sliced_room_ids: if relevant_room_map.get(room_id) is not None: # Take the highest timeline limit @@ -739,7 +739,7 @@ async def get_room_sync_data( to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: """ - Fetch room data for a room. + Fetch room data for the sync response. We fetch data according to the token range (> `from_token` and <= `to_token`). @@ -760,7 +760,7 @@ async def get_room_sync_data( # We want to start off using the `to_token` (vs `from_token`) because we look # backwards from the `to_token` up to the `timeline_limit` and we might not # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events. + # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token if room_sync_config.timeline_limit > 0: newly_joined = False diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3213059a784..a55804c96ca 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1607,7 +1607,7 @@ def test_rooms_limited_initial_sync(self) -> None: ) # With no `from_token` (initial sync), it's all historical since there is no - # "current" range + # "live" range self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], 0, @@ -1674,7 +1674,7 @@ def test_rooms_not_limited_initial_sync(self) -> None: def test_rooms_incremental_sync(self) -> None: """ - Test that `rooms` data during an incremental sync after an initial sync. + Test `rooms` data during an incremental sync after an initial sync. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1889,7 +1889,7 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: [], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) and no events returned in the timeline anyway so nothing could be # "live". self.assertEqual( @@ -2016,7 +2016,7 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: ], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], @@ -2116,7 +2116,7 @@ def test_rooms_ban_initial_sync(self) -> None: ], channel.json_body["rooms"][room_id1]["timeline"], ) - # No "live" events in a initial sync (no `from_token` to define the "live" + # No "live" events in an initial sync (no `from_token` to define the "live" # range) self.assertEqual( channel.json_body["rooms"][room_id1]["num_live"], @@ -2206,7 +2206,7 @@ def test_rooms_ban_incremental_sync1(self) -> None: def test_rooms_ban_incremental_sync2(self) -> None: """ - Test that `rooms` we are banned from before the incremental sync doesn't return + Test that `rooms` we are banned from before the incremental sync don't return any events in the timeline. """ user1_id = self.register_user("user1", "pass") From 0eb029472e5410b780156f12db13434b003f42ae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 Jun 2024 14:34:10 -0500 Subject: [PATCH 20/68] Remove unused `IncludeOldRooms` class --- synapse/types/rest/client/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index 25fbd772f6f..5d453769b5e 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -154,10 +154,6 @@ class CommonRoomParameters(RequestBodyModel): (Max 1000 messages) """ - class IncludeOldRooms(RequestBodyModel): - timeline_limit: StrictInt - required_state: List[Tuple[StrictStr, StrictStr]] - required_state: List[Tuple[StrictStr, StrictStr]] # mypy workaround via https://github.com/pydantic/pydantic/issues/156#issuecomment-1130883884 if TYPE_CHECKING: From 87fac19fdebd070b09a7a7daae7217ccaa2f2d1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 10:15:15 -0500 Subject: [PATCH 21/68] Fix lints See https://github.com/element-hq/synapse/pull/17320#discussion_r1647701997 ``` synapse/federation/federation_server.py:677: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] synapse/federation/federation_server.py:720: error: Cannot determine type of "_join_rate_per_room_limiter" [has-type] ``` --- synapse/types/handlers/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 1ba5ea55c14..8e097d8b486 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -31,10 +31,12 @@ from pydantic import Extra from synapse.events import EventBase -from synapse.handlers.relations import BundledAggregations from synapse.types import JsonDict, JsonMapping, StreamToken, UserID from synapse.types.rest.client import SlidingSyncBody +if TYPE_CHECKING: + from synapse.handlers.relations import BundledAggregations + class ShutdownRoomParams(TypedDict): """ @@ -197,7 +199,7 @@ class RoomResult: initial: bool required_state: List[EventBase] timeline_events: List[EventBase] - bundled_aggregations: Optional[Dict[str, BundledAggregations]] + bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] prev_batch: StreamToken From 0e71a2f2d1231603d4643f9402dbd7b4f4df226b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 15:56:27 -0500 Subject: [PATCH 22/68] Add TODO for filtering call invites in public rooms --- synapse/handlers/sliding_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3e49054e430..a6e84cb976e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -841,6 +841,8 @@ async def get_room_sync_data( != Membership.JOIN, filter_send_to_client=True, ) + # TODO: Filter out `EventTypes.CallInvite` in public rooms, + # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 # Determine how many "live" events we have (events within the given token range). # From 21ca02c5ad2b030f3a3d76526690b23f40ef9412 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 16:08:58 -0500 Subject: [PATCH 23/68] `newly_joined` vs `limited` already being tracked in a discussion See https://github.com/element-hq/synapse/pull/17320#discussion_r1646579623 if anything comes out of it. --- synapse/handlers/sliding_sync.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index a6e84cb976e..4d73134e7ff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -829,9 +829,6 @@ async def get_room_sync_data( stream=timeline_events[0].internal_metadata.stream_ordering - 1 ) - # TODO: Does `newly_joined` affect `limited`? It does in sync v2 but I fail - # to understand why. - # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, From 35683119890e06bb65bca24e303154acb4f62a1b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 24 Jun 2024 19:08:18 -0500 Subject: [PATCH 24/68] Fix spelling typo --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 4d73134e7ff..d5390e89459 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -535,7 +535,7 @@ async def get_sync_room_ids_for_user( # 2) ----------------------------------------------------- # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out our newly_left in the following code + # some left rooms that we can figure out are newly_left in the following code # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` membership_change_events_in_from_to_range = [] From 7aea406c22066f061cf537ed25d0dbb00a107308 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 11:18:27 -0500 Subject: [PATCH 25/68] Just stripped_state for invite rooms --- synapse/handlers/sliding_sync.py | 27 +++-- synapse/rest/client/sync.py | 57 ++++++--- synapse/types/handlers/__init__.py | 15 ++- tests/rest/client/test_sync.py | 188 ++++++++++++++++++++++------- 4 files changed, 208 insertions(+), 79 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index d5390e89459..991d32356ee 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -755,14 +755,23 @@ async def get_room_sync_data( """ # Assemble the list of timeline events - timeline_events: List[EventBase] = [] - limited = False - # We want to start off using the `to_token` (vs `from_token`) because we look - # backwards from the `to_token` up to the `timeline_limit` and we might not - # reach the `from_token` before we hit the limit. We will update the room stream - # position once we've fetched the events to point to the earliest event fetched. - prev_batch_token = to_token - if room_sync_config.timeline_limit > 0: + timeline_events: Optional[List[EventBase]] = None + limited: Optional[bool] = None + prev_batch_token: Optional[StreamToken] = None + num_live: Optional[int] = None + if ( + room_sync_config.timeline_limit > 0 + # No timeline for invite/knock rooms (just `stripped_state`) + and rooms_for_user_membership_at_to_token.membership + not in (Membership.INVITE, Membership.KNOCK) + ): + limited = False + # We want to start off using the `to_token` (vs `from_token`) because we look + # backwards from the `to_token` up to the `timeline_limit` and we might not + # reach the `from_token` before we hit the limit. We will update the room stream + # position once we've fetched the events to point to the earliest event fetched. + prev_batch_token = to_token + newly_joined = False if ( # We can only determine new-ness if we have a `from_token` to define our range @@ -903,7 +912,7 @@ async def get_room_sync_data( # If the timeline is `limited=True`, the client does not have all events # necessary to calculate aggregations themselves. bundled_aggregations = None - if limited: + if limited and timeline_events is not None: bundled_aggregations = ( await self.relations_handler.get_bundled_aggregations( timeline_events, user.to_string() diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index b60af6356ab..1d955a2e893 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -973,31 +973,13 @@ async def encode_rooms( requester=requester, ) - serialized_rooms = {} + serialized_rooms: Dict[str, JsonDict] = {} for room_id, room_result in rooms.items(): - serialized_timeline = await self.event_serializer.serialize_events( - room_result.timeline_events, - time_now, - config=serialize_options, - bundle_aggregations=room_result.bundled_aggregations, - ) - - serialized_required_state = await self.event_serializer.serialize_events( - room_result.required_state, - time_now, - config=serialize_options, - ) - serialized_rooms[room_id] = { - "required_state": serialized_required_state, - "timeline": serialized_timeline, - "prev_batch": await room_result.prev_batch.to_string(self.store), - "limited": room_result.limited, "joined_count": room_result.joined_count, "invited_count": room_result.invited_count, "notification_count": room_result.notification_count, "highlight_count": room_result.highlight_count, - "num_live": room_result.num_live, } if room_result.name: @@ -1014,12 +996,47 @@ async def encode_rooms( if room_result.initial: serialized_rooms[room_id]["initial"] = room_result.initial + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.required_state is not None: + serialized_required_state = ( + await self.event_serializer.serialize_events( + room_result.required_state, + time_now, + config=serialize_options, + ) + ) + serialized_rooms[room_id]["required_state"] = serialized_required_state + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.timeline_events is not None: + serialized_timeline = await self.event_serializer.serialize_events( + room_result.timeline_events, + time_now, + config=serialize_options, + bundle_aggregations=room_result.bundled_aggregations, + ) + serialized_rooms[room_id]["timeline"] = serialized_timeline + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.limited is not None: + serialized_rooms[room_id]["limited"] = room_result.limited + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.prev_batch is not None: + serialized_rooms[room_id]["prev_batch"] = ( + await room_result.prev_batch.to_string(self.store) + ) + + # This will omitted for invite/knock rooms with `stripped_state` + if room_result.num_live is not None: + serialized_rooms[room_id]["num_live"] = room_result.num_live + # Field should be absent on non-DM rooms if room_result.is_dm: serialized_rooms[room_id]["is_dm"] = room_result.is_dm # Stripped state only applies to invite/knock rooms - if room_result.stripped_state: + if room_result.stripped_state is not None: # TODO: `knocked_state` but that isn't specced yet. # # TODO: Instead of adding `knocked_state`, it would be good to rename diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 8e097d8b486..d50d02bfc60 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -197,18 +197,23 @@ class RoomResult: avatar: Optional[str] heroes: Optional[List[EventBase]] initial: bool - required_state: List[EventBase] - timeline_events: List[EventBase] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + required_state: Optional[List[EventBase]] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool stripped_state: Optional[List[JsonDict]] - prev_batch: StreamToken - limited: bool + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + prev_batch: Optional[StreamToken] + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + limited: Optional[bool] joined_count: int invited_count: int notification_count: int highlight_count: int - num_live: int + # Only optional because it won't be included for invite/knock rooms with `stripped_state` + num_live: Optional[int] @attr.s(slots=True, frozen=True, auto_attribs=True) class SlidingWindowList: diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index a55804c96ca..ad6b29b412a 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1881,27 +1881,134 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - # Should not see anything (except maybe the invite event) because we haven't - # joined yet (history visibility is `shared`) (`filter_events_for_client(...)` - # is doing the work here) - self.assertEqual( - channel.json_body["rooms"][room_id1]["timeline"], - [], - channel.json_body["rooms"][room_id1]["timeline"], + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) and no events returned in the timeline anyway so nothing could be - # "live". - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) - # Even though we don't get any timeline events because they are filtered out, - # there is still more to paginate + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + + def test_rooms_invite_shared_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` + + This is an `invite` room so we should only have `stripped_state` (no timeline) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + # Ensure we're testing with a room with `shared` history visibility which means + # history visible until you actually join the room. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + history_visibility_response.get("history_visibility"), + HistoryVisibility.SHARED, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 3, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the @@ -1977,12 +2084,10 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: ) self.helper.send(room_id1, "activity before1", tok=user2_tok) - event_response2 = self.helper.send(room_id1, "activity before2", tok=user2_tok) - use1_invite_response = self.helper.invite( - room_id1, src=user2_id, targ=user1_id, tok=user2_tok - ) - event_response3 = self.helper.send(room_id1, "activity after3", tok=user2_tok) - event_response4 = self.helper.send(room_id1, "activity after4", tok=user2_tok) + self.helper.send(room_id1, "activity before2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after3", tok=user2_tok) + self.helper.send(room_id1, "activity after4", tok=user2_tok) # Make the Sliding Sync request channel = self.make_request( @@ -2002,31 +2107,24 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: ) self.assertEqual(channel.code, 200, channel.json_body) - # Should see the last 4 events in the room - self.assertEqual( - [ - event["event_id"] - for event in channel.json_body["rooms"][room_id1]["timeline"] - ], - [ - event_response2["event_id"], - use1_invite_response["event_id"], - event_response3["event_id"], - event_response4["event_id"], - ], - channel.json_body["rooms"][room_id1]["timeline"], + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], ) - # No "live" events in an initial sync (no `from_token` to define the "live" - # range) - self.assertEqual( - channel.json_body["rooms"][room_id1]["num_live"], - 0, + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), channel.json_body["rooms"][room_id1], ) - # There is still more to paginate - self.assertEqual( - channel.json_body["rooms"][room_id1]["limited"], - True, + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), channel.json_body["rooms"][room_id1], ) # We should have some `stripped_state` so the potential joiner can identify the From e3e431fab4ba821b62558ebdffb5bbad2fcc6da3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:35:48 -0500 Subject: [PATCH 26/68] Finish up stripped_state for invite rooms See https://github.com/element-hq/synapse/pull/17320#discussion_r1646581077 --- synapse/handlers/sliding_sync.py | 27 ++--- synapse/types/handlers/__init__.py | 1 + tests/rest/client/test_sync.py | 156 +++++++++++++++++++++++++++-- 3 files changed, 162 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 991d32356ee..e7810804707 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -26,6 +26,7 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase from synapse.events.utils import strip_event +from synapse.handlers.relations import BundledAggregations from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -756,6 +757,7 @@ async def get_room_sync_data( # Assemble the list of timeline events timeline_events: Optional[List[EventBase]] = None + bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None prev_batch_token: Optional[StreamToken] = None num_live: Optional[int] = None @@ -848,7 +850,9 @@ async def get_room_sync_data( filter_send_to_client=True, ) # TODO: Filter out `EventTypes.CallInvite` in public rooms, - # see https://github.com/element-hq/synapse/pull/16908#discussion_r1651598029 + # see https://github.com/element-hq/synapse/issues/17359 + + # TODO: Handle timeline gaps (`get_timeline_gaps()`) # Determine how many "live" events we have (events within the given token range). # @@ -878,6 +882,15 @@ async def get_room_sync_data( # this more with a binary search (bisect). break + # If the timeline is `limited=True`, the client does not have all events + # necessary to calculate aggregations themselves. + if limited: + bundled_aggregations = ( + await self.relations_handler.get_bundled_aggregations( + timeline_events, user.to_string() + ) + ) + # Update the `prev_batch_token` to point to the position that allows us to # keep paginating backwards from the oldest event we return in the timeline. prev_batch_token = prev_batch_token.copy_and_replace( @@ -907,18 +920,6 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) - # TODO: Handle timeline gaps (`get_timeline_gaps()`) - - # If the timeline is `limited=True`, the client does not have all events - # necessary to calculate aggregations themselves. - bundled_aggregations = None - if limited and timeline_events is not None: - bundled_aggregations = ( - await self.relations_handler.get_bundled_aggregations( - timeline_events, user.to_string() - ) - ) - return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index d50d02bfc60..3cd3c8fb0fa 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -203,6 +203,7 @@ class RoomResult: timeline_events: Optional[List[EventBase]] bundled_aggregations: Optional[Dict[str, "BundledAggregations"]] is_dm: bool + # Optional because it's only relevant to invite/knock rooms stripped_state: Optional[List[JsonDict]] # Only optional because it won't be included for invite/knock rooms with `stripped_state` prev_batch: Optional[StreamToken] diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index ad6b29b412a..ba7cae8645f 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1836,9 +1836,12 @@ def test_rooms_newly_joined_incremental_sync(self) -> None: def test_rooms_invite_shared_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but we also shouldn't see any timeline events because the history visiblity is + `shared` and we haven't joined the room yet. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -1936,9 +1939,10 @@ def test_rooms_invite_shared_history_initial_sync(self) -> None: def test_rooms_invite_shared_history_incremental_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` - - This is an `invite` room so we should only have `stripped_state` (no timeline) + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) but we also shouldn't see any timeline events because the history visiblity is `shared` and we haven't joined the room yet. """ @@ -2046,9 +2050,14 @@ def test_rooms_invite_shared_history_incremental_sync(self) -> None: def test_rooms_invite_world_readable_history_initial_sync(self) -> None: """ - Test that `rooms` we are invited to have some stripped `invite_state` and that - we can't see any timeline events because the history visiblity is `shared` and - we haven't joined the room yet. + Test that `rooms` we are invited to have some stripped `invite_state` during an + initial sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -2160,6 +2169,135 @@ def test_rooms_invite_world_readable_history_initial_sync(self) -> None: channel.json_body["rooms"][room_id1]["invite_state"], ) + def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: + """ + Test that `rooms` we are invited to have some stripped `invite_state` during an + incremental sync. + + This is an `invite` room so we should only have `stripped_state` (no `timeline`) + but depending on the semantics we decide, we could potentially see some + historical events before/after the `from_token` because the history is + `world_readable`. Same situation for events after the `from_token` if the + history visibility was set to `invited`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user1 = UserID.from_string(user1_id) + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user2 = UserID.from_string(user2_id) + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "preset": "public_chat", + "initial_state": [ + { + "content": { + "history_visibility": HistoryVisibility.WORLD_READABLE + }, + "state_key": "", + "type": EventTypes.RoomHistoryVisibility, + } + ], + }, + ) + # Ensure we're testing with a room with `world_readable` history visibility + # which means events are visible to anyone even without membership. + history_visibility_response = self.helper.get_state( + room_id1, EventTypes.RoomHistoryVisibility, tok=user2_tok + ) + self.assertEqual( + history_visibility_response.get("history_visibility"), + HistoryVisibility.WORLD_READABLE, + ) + + self.helper.send(room_id1, "activity before invite1", tok=user2_tok) + self.helper.send(room_id1, "activity before invite2", tok=user2_tok) + self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + self.helper.send(room_id1, "activity after invite3", tok=user2_tok) + self.helper.send(room_id1, "activity after invite4", tok=user2_tok) + + from_token = self.event_sources.get_current_token() + + self.helper.send(room_id1, "activity after token5", tok=user2_tok) + self.helper.send(room_id1, "activity after toekn6", tok=user2_tok) + + # Make the Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + + f"?pos={self.get_success( + from_token.to_string(self.store) + )}", + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + # Large enough to see the latest events and before the invite + "timeline_limit": 4, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # `timeline` is omitted for `invite` rooms with `stripped_state` + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("timeline"), + channel.json_body["rooms"][room_id1], + ) + # `num_live` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("num_live"), + channel.json_body["rooms"][room_id1], + ) + # `limited` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("limited"), + channel.json_body["rooms"][room_id1], + ) + # `prev_batch` is omitted for `invite` rooms with `stripped_state` (no timeline anyway) + self.assertIsNone( + channel.json_body["rooms"][room_id1].get("prev_batch"), + channel.json_body["rooms"][room_id1], + ) + # We should have some `stripped_state` so the potential joiner can identify the + # room (we don't care about the order). + self.assertCountEqual( + channel.json_body["rooms"][room_id1]["invite_state"], + [ + { + "content": {"creator": user2_id, "room_version": "10"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.create", + }, + { + "content": {"join_rule": "public"}, + "sender": user2_id, + "state_key": "", + "type": "m.room.join_rules", + }, + { + "content": {"displayname": user2.localpart, "membership": "join"}, + "sender": user2_id, + "state_key": user2_id, + "type": "m.room.member", + }, + { + "content": {"displayname": user1.localpart, "membership": "invite"}, + "sender": user2_id, + "state_key": user1_id, + "type": "m.room.member", + }, + ], + channel.json_body["rooms"][room_id1]["invite_state"], + ) + def test_rooms_ban_initial_sync(self) -> None: """ Test that `rooms` we are banned from in an intial sync only allows us to see From 303d834b78a7c93e390da3f426754cafff07c20f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 12:38:09 -0500 Subject: [PATCH 27/68] Add tracking discussion for not optional in the future --- synapse/handlers/sliding_sync.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e7810804707..0538fddf845 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -756,6 +756,11 @@ async def get_room_sync_data( """ # Assemble the list of timeline events + # + # It would be nice to make the `rooms` response more uniform regardless of + # membership. Currently, we have to make all of these optional because + # `invite`/`knock` rooms only have `stripped_state`. See + # https://github.com/matrix-org/matrix-spec-proposals/pull/3575#discussion_r1653045932 timeline_events: Optional[List[EventBase]] = None bundled_aggregations: Optional[Dict[str, BundledAggregations]] = None limited: Optional[bool] = None From 4c2213144258cef2b2ac7960f290649a076d1927 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:43 -0500 Subject: [PATCH 28/68] Start testing for the correct room membership (failing) --- tests/handlers/test_sliding_sync.py | 477 +++++++++++++++++++++++++--- 1 file changed, 432 insertions(+), 45 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0358239c7f4..df262400e4f 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -63,6 +63,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.sliding_sync_handler = self.hs.get_sliding_sync_handler() self.store = self.hs.get_datastores().main self.event_sources = hs.get_event_sources() + self.storage_controllers = hs.get_storage_controllers() def test_no_rooms(self) -> None: """ @@ -90,10 +91,13 @@ def test_get_newly_joined_room(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room_token = self.event_sources.get_current_token() - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -106,6 +110,12 @@ def test_get_newly_joined_room(self) -> None: ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_already_joined_room(self) -> None: """ @@ -113,8 +123,11 @@ def test_get_already_joined_room(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id, user1_id, tok=user1_tok) after_room_token = self.event_sources.get_current_token() @@ -127,6 +140,12 @@ def test_get_already_joined_room(self) -> None: ) self.assertEqual(room_id_results.keys(), {room_id}) + # It should be pointing to the join event (latest membership event in the + # from/to range) + self.assertEqual( + room_id_results[room_id].event_id, + join_response["event_id"], + ) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -142,14 +161,18 @@ def test_get_invited_banned_knocked_room(self) -> None: # Setup the invited room (user2 invites user1 to the room) invited_room_id = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.invite(invited_room_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + invited_room_id, targ=user1_id, tok=user2_tok + ) # Setup the ban room (user2 bans user1 from the room) ban_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) self.helper.join(ban_room_id, user1_id, tok=user1_tok) - self.helper.ban(ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok) + ban_response = self.helper.ban( + ban_room_id, src=user2_id, targ=user1_id, tok=user2_tok + ) # Setup the knock room (user1 knocks on the room) knock_room_id = self.helper.create_room_as( @@ -162,13 +185,19 @@ def test_get_invited_banned_knocked_room(self) -> None: tok=user2_tok, ) # User1 knocks on the room - channel = self.make_request( + knock_channel = self.make_request( "POST", "/_matrix/client/r0/knock/%s" % (knock_room_id,), b"{}", user1_tok, ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(knock_channel.code, 200, knock_channel.result) + knock_room_membership_state_event = self.get_success( + self.storage_controllers.state.get_current_state_event( + knock_room_id, EventTypes.Member, user1_id + ) + ) + assert knock_room_membership_state_event is not None after_room_token = self.event_sources.get_current_token() @@ -189,6 +218,20 @@ def test_get_invited_banned_knocked_room(self) -> None: knock_room_id, }, ) + # It should be pointing to the the respective membership event (latest + # membership event in the from/to range) + self.assertEqual( + room_id_results[invited_room_id].event_id, + invite_response["event_id"], + ) + self.assertEqual( + room_id_results[ban_room_id].event_id, + ban_response["event_id"], + ) + self.assertEqual( + room_id_results[knock_room_id].event_id, + knock_room_membership_state_event.event_id, + ) def test_get_kicked_room(self) -> None: """ @@ -206,7 +249,7 @@ def test_get_kicked_room(self) -> None: ) self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -229,6 +272,11 @@ def test_get_kicked_room(self) -> None: # The kicked room should show up self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + ) def test_forgotten_rooms(self) -> None: """ @@ -329,7 +377,7 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - self.helper.leave(room_id2, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -343,6 +391,11 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id2].event_id, + leave_response["event_id"], + ) def test_no_joins_after_to_token(self) -> None: """ @@ -351,16 +404,19 @@ def test_no_joins_after_to_token(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() - # Room join after after our `to_token` shouldn't show up - room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - _ = room_id2 + # Room join after our `to_token` shouldn't show up + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -371,6 +427,11 @@ def test_no_joins_after_to_token(self) -> None: ) self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -380,15 +441,18 @@ def test_join_during_range_and_left_room_after_to_token(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") before_room1_token = self.event_sources.get_current_token() - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -401,6 +465,18 @@ def test_join_during_range_and_left_room_after_to_token(self) -> None: # We should still see the room because we were joined during the # from_token/to_token time period. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -410,13 +486,16 @@ def test_join_before_range_and_left_room_after_to_token(self) -> None: """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave the room after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -428,6 +507,18 @@ def test_join_before_range_and_left_room_after_to_token(self) -> None: # We should still see the room because we were joined before the `from_token` self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -444,9 +535,9 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: kick_room_id = self.helper.create_room_as( user2_id, tok=user2_tok, is_public=True ) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) + join_response1 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) # Kick user1 from the room - self.helper.change_membership( + kick_response = self.helper.change_membership( room=kick_room_id, src=user2_id, targ=user1_id, @@ -463,8 +554,8 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: # # We have to join before we can leave (leave -> leave isn't a valid transition # or at least it doesn't work in Synapse, 403 forbidden) - self.helper.join(kick_room_id, user1_id, tok=user1_tok) - self.helper.leave(kick_room_id, user1_id, tok=user1_tok) + join_response2 = self.helper.join(kick_room_id, user1_id, tok=user1_tok) + leave_response = self.helper.leave(kick_room_id, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -476,6 +567,20 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: # We shouldn't see the room because it was forgotten self.assertEqual(room_id_results.keys(), {kick_room_id}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[kick_room_id].event_id, + kick_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "kick_response": kick_response["event_id"], + "join_response2": join_response2["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -494,14 +599,14 @@ def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -513,6 +618,20 @@ def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + } + ), + ) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -531,13 +650,13 @@ def test_newly_left_during_range_and_join_after_to_token(self) -> None: # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join and leave the room during the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Join the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -549,11 +668,24 @@ def test_newly_left_during_range_and_join_after_to_token(self) -> None: # Room should still show up because it's newly_left during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + leave_response1["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + } + ), + ) def test_no_from_token(self) -> None: """ Test that if we don't provide a `from_token`, we get all the rooms that we we're - joined to up to the `to_token`. + joined up to the `to_token`. Providing `from_token` only really has the effect that it adds `newly_left` rooms to the response. @@ -569,7 +701,7 @@ def test_no_from_token(self) -> None: room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before the `to_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -590,6 +722,11 @@ def test_no_from_token(self) -> None: # Only rooms we were joined to before the `to_token` should show up self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -609,7 +746,7 @@ def test_from_token_ahead_of_to_token(self) -> None: room_id4 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join room1 before `before_room_token` - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) # Join and leave the room2 before `before_room_token` self.helper.join(room_id2, user1_id, tok=user1_tok) @@ -651,6 +788,11 @@ def test_from_token_ahead_of_to_token(self) -> None: # There won't be any newly_left rooms because the `from_token` is ahead of the # `to_token` and that range will give no membership changes to check. self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response1["event_id"], + ) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -741,16 +883,16 @@ def test_join_leave_multiple_times_during_range_and_after_to_token( # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -762,6 +904,22 @@ def test_join_leave_multiple_times_during_range_and_after_to_token( # Room should show up because it was newly_left and joined during the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -781,16 +939,16 @@ def test_join_leave_multiple_times_before_range_and_after_to_token( # leave and can still re-join. room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Join, leave, join back to the room before the from/to range - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) after_room1_token = self.event_sources.get_current_token() # Leave and Join the room multiple times after we already have our tokens - self.helper.leave(room_id1, user1_id, tok=user1_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_response2 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response3 = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response3 = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -802,6 +960,22 @@ def test_join_leave_multiple_times_before_range_and_after_to_token( # Room should show up because we were joined before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "leave_response1": leave_response1["event_id"], + "join_response2": join_response2["event_id"], + "leave_response2": leave_response2["event_id"], + "join_response3": join_response3["event_id"], + "leave_response3": leave_response3["event_id"], + } + ), + ) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -821,13 +995,15 @@ def test_invite_before_range_and_join_leave_after_to_token( room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) # Invited to the room before the token - self.helper.invite(room_id1, src=user2_id, targ=user1_id, tok=user2_tok) + invite_response = self.helper.invite( + room_id1, src=user2_id, targ=user1_id, tok=user2_tok + ) after_room1_token = self.event_sources.get_current_token() # Join and leave the room after we already have our tokens - self.helper.join(room_id1, user1_id, tok=user1_tok) - self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_respsonse = self.helper.join(room_id1, user1_id, tok=user1_tok) + leave_response = self.helper.leave(room_id1, user1_id, tok=user1_tok) room_id_results = self.get_success( self.sliding_sync_handler.get_sync_room_ids_for_user( @@ -839,6 +1015,217 @@ def test_invite_before_range_and_join_leave_after_to_token( # Room should show up because we were invited before the from/to range self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + invite_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "invite_response": invite_response["event_id"], + "join_respsonse": join_respsonse["event_id"], + "leave_response": leave_response["event_id"], + } + ), + ) + + def test_display_name_changes( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_leave_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there are multiple `join` membership events in a row indicating + `displayname`/`avatar_url` updates and we leave after the `to_token`. + + See condition "1a)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + # Leave after the token + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + + def test_display_name_changes_join_after_token_range( + self, + ) -> None: + """ + Test that multiple `join` membership events (after the `to_token`) in a row + indicating `displayname`/`avatar_url` updates doesn't affect the results (we + joined after the token range so it shouldn't show up) + + See condition "1b)" comments in the `get_sync_room_ids_for_user()` method. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + + after_room1_token = self.event_sources.get_current_token() + + self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname after the token range + self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room shouldn't show up because we joined after the from/to range + self.assertEqual(room_id_results.keys(), set()) def test_multiple_rooms_are_not_confused( self, From 83d6f76606bb7d1eaba9d5e498efc9fa15d13957 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 15:27:49 -0500 Subject: [PATCH 29/68] Describe `current_state_delta_stream` better --- synapse/storage/schema/main/delta/42/current_state_delta.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/42/current_state_delta.sql b/synapse/storage/schema/main/delta/42/current_state_delta.sql index 876b61e6a51..3d2fd694803 100644 --- a/synapse/storage/schema/main/delta/42/current_state_delta.sql +++ b/synapse/storage/schema/main/delta/42/current_state_delta.sql @@ -32,7 +32,10 @@ * limitations under the License. */ - +-- Tracks what the server thinks is the current state of the room as time goes. It does +-- not track how state progresses from the beginning of the room. So for example, when +-- you remotely join a room, the first rows will just be the state when you joined and +-- progress from there. CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, From fbd92e1c9da2bc89a555f3fa609bba20a76e4440 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 20:16:50 -0500 Subject: [PATCH 30/68] Add `get_current_state_delta_membership_changes_for_user(...)` (using `current_state_delta_stream`) (still need to add newly_left rooms back) --- synapse/handlers/sliding_sync.py | 351 ++++++++++++++--------- synapse/storage/databases/main/stream.py | 151 +++++++++- tests/handlers/test_sliding_sync.py | 73 ++++- 3 files changed, 428 insertions(+), 147 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0538fddf845..2e24b0c338c 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -27,6 +27,7 @@ from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, @@ -369,6 +370,9 @@ async def get_sync_room_ids_for_user( # Our working list of rooms that can show up in the sync response sync_room_id_set = { + # Note: The `room_for_user` we're assigning here will need to be fixed up + # (below) because they are potentially from the current snapshot time + # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list if filter_membership_for_sync( @@ -404,33 +408,10 @@ async def get_sync_room_ids_for_user( instance_map=immutabledict(instance_to_max_stream_ordering_map), ) - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # - # Below, we're doing two separate lookups for membership changes. We could - # request everything for both fixups in one range, [`from_token.room_key`, - # `membership_snapshot_token`), but we want to avoid raw `stream_ordering` - # comparison without `instance_name` (which is flawed). We could refactor - # `event.internal_metadata` to include `instance_name` but it might turn out a - # little difficult and a bigger, broader Synapse change than we want to make. - - # 1) ----------------------------------------------------- - - # 1) Fetch membership changes that fall in the range from `to_token` up to - # `membership_snapshot_token` - # - # If our `to_token` is already the same or ahead of the latest room membership - # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # use the room list from the snapshot as a base (nothing has changed) - membership_change_events_after_to_token = [] + current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - membership_change_events_after_to_token = ( - await self.store.get_membership_changes_for_user( + current_state_delta_membership_changes_after_to_token = ( + await self.store.get_current_state_delta_membership_changes_for_user( user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, @@ -438,138 +419,224 @@ async def get_sync_room_ids_for_user( ) ) - # 1) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # We also need the first membership event after the `to_token` so we can step + # We need the first membership event after the `to_token` so we can step # backward to the previous membership that would apply to the from/to range. - first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - for event in membership_change_events_after_to_token: - last_membership_change_by_room_id_after_to_token[event.room_id] = event + first_membership_change_by_room_id_after_to_token: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - event.room_id, event + membership_change.room_id, membership_change ) - # 1) Fixup + # Since we fetched a snapshot of the users room list at some point in time after + # the from/to tokens, we need to revert/rewind some membership changes to match + # the point in time of the `to_token`. + prev_event_ids_in_from_to_range = [] for ( - last_membership_change_after_to_token - ) in last_membership_change_by_room_id_after_to_token.values(): - room_id = last_membership_change_after_to_token.room_id - - # We want to find the first membership change after the `to_token` then step - # backward to know the membership in the from/to range. - first_membership_change_after_to_token = ( - first_membership_change_by_room_id_after_to_token.get(room_id) - ) - assert first_membership_change_after_to_token is not None, ( - "If there was a `last_membership_change_after_to_token` that we're iterating over, " - + "then there should be corresponding a first change. For example, even if there " - + "is only one event after the `to_token`, the first and last event will be same event. " - + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - + "/`first_membership_change_by_room_id_after_to_token` dicts above." - ) - # TODO: Instead of reading from `unsigned`, refactor this to use the - # `current_state_delta_stream` table in the future. Probably a new - # `get_membership_changes_for_user()` function that uses - # `current_state_delta_stream` with a join to `room_memberships`. This would - # help in state reset scenarios since `prev_content` is looking at the - # current branch vs the current room state. This is all just data given to - # the client so no real harm to data integrity, but we'd like to be nice to - # the client. Since the `current_state_delta_stream` table is new, it - # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # to, we can signal the client to throw all of their state away by sending - # "operation: RESET". - prev_content = first_membership_change_after_to_token.unsigned.get( - "prev_content", {} - ) - prev_membership = prev_content.get("membership", None) - prev_sender = first_membership_change_after_to_token.unsigned.get( - "prev_sender", None + room_id, + first_membership_change_after_to_token, + ) in first_membership_change_by_room_id_after_to_token.items(): + # One of these should exist to be a valid row in `current_state_delta_stream` + assert ( + first_membership_change_after_to_token.event_id is not None + or first_membership_change_after_to_token.prev_event_id is not None ) - # Check if the previous membership (membership that applies to the from/to - # range) should be included in our `sync_room_id_set` - should_prev_membership_be_included = ( - prev_membership is not None - and prev_sender is not None - and filter_membership_for_sync( - membership=prev_membership, - user_id=user_id, - sender=prev_sender, + # If the membership change was added after the `to_token`, we need to remove + # it + if first_membership_change_after_to_token.prev_event_id is None: + sync_room_id_set.pop(room_id, None) + # From the first membership event after the `to_token`, we need to step + # backward to the previous membership that would apply to the from/to range. + else: + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) - ) - # Check if the last membership (membership that applies to our snapshot) was - # already included in our `sync_room_id_set` - was_last_membership_already_included = filter_membership_for_sync( - membership=last_membership_change_after_to_token.membership, + # Fetch the previous membership events that apply to the from/to range and fixup + # our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # Update if the membership should be included + if filter_membership_for_sync( + membership=prev_event_in_from_to_range.membership, user_id=user_id, - sender=last_membership_change_after_to_token.sender, - ) - - # 1a) Add back rooms that the user left after the `to_token` - # - # For example, if the last membership event after the `to_token` is a leave - # event, then the room was excluded from `sync_room_id_set` when we first - # crafted it above. We should add these rooms back as long as the user also - # was part of the room before the `to_token`. - if ( - not was_last_membership_already_included - and should_prev_membership_be_included + sender=prev_event_in_from_to_range.sender, ): - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_after_to_token + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) ) - # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # - # For example, if the last membership event after the `to_token` is a "join" - # event, then the room was included `sync_room_id_set` when we first crafted - # it above. We should remove these rooms as long as the user also wasn't - # part of the room before the `to_token`. - elif ( - was_last_membership_already_included - and not should_prev_membership_be_included - ): - del sync_room_id_set[room_id] - - # 2) ----------------------------------------------------- - # We fix-up newly_left rooms after the first fixup because it may have removed - # some left rooms that we can figure out are newly_left in the following code + # Otherwise, remove it + else: + sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) - # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - membership_change_events_in_from_to_range = [] - if from_token: - membership_change_events_in_from_to_range = ( - await self.store.get_membership_changes_for_user( - user_id, - from_key=from_token.room_key, - to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, - ) - ) + # TODO: Add back newly_left rooms - # 2) Assemble a list of the last membership events in some given ranges. Someone - # could have left and joined multiple times during the given range but we only - # care about end-result so we grab the last one. - last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - for event in membership_change_events_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - # 2) Fixup - for ( - last_membership_change_in_from_to_range - ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id - - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # include newly_left rooms because the last event that the user should see - # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # # 1) ----------------------------------------------------- + + # # 1) Fetch membership changes that fall in the range from `to_token` up to + # # `membership_snapshot_token` + # # + # # If our `to_token` is already the same or ahead of the latest room membership + # # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # # use the room list from the snapshot as a base (nothing has changed) + # membership_change_events_after_to_token = [] + # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): + # membership_change_events_after_to_token = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=to_token.room_key, + # to_key=membership_snapshot_token, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) + + # # 1) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # # We also need the first membership event after the `to_token` so we can step + # # backward to the previous membership that would apply to the from/to range. + # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} + # for event in membership_change_events_after_to_token: + # last_membership_change_by_room_id_after_to_token[event.room_id] = event + # # Only set if we haven't already set it + # first_membership_change_by_room_id_after_to_token.setdefault( + # event.room_id, event + # ) + + # # 1) Fixup + # for ( + # last_membership_change_after_to_token + # ) in last_membership_change_by_room_id_after_to_token.values(): + # room_id = last_membership_change_after_to_token.room_id + + # # We want to find the first membership change after the `to_token` then step + # # backward to know the membership in the from/to range. + # first_membership_change_after_to_token = ( + # first_membership_change_by_room_id_after_to_token.get(room_id) + # ) + # assert first_membership_change_after_to_token is not None, ( + # "If there was a `last_membership_change_after_to_token` that we're iterating over, " + # + "then there should be corresponding a first change. For example, even if there " + # + "is only one event after the `to_token`, the first and last event will be same event. " + # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" + # + "/`first_membership_change_by_room_id_after_to_token` dicts above." + # ) + # # TODO: Instead of reading from `unsigned`, refactor this to use the + # # `current_state_delta_stream` table in the future. Probably a new + # # `get_membership_changes_for_user()` function that uses + # # `current_state_delta_stream` with a join to `room_memberships`. This would + # # help in state reset scenarios since `prev_content` is looking at the + # # current branch vs the current room state. This is all just data given to + # # the client so no real harm to data integrity, but we'd like to be nice to + # # the client. Since the `current_state_delta_stream` table is new, it + # # doesn't have all events in it. Since this is Sliding Sync, if we ever need + # # to, we can signal the client to throw all of their state away by sending + # # "operation: RESET". + # prev_content = first_membership_change_after_to_token.unsigned.get( + # "prev_content", {} + # ) + # prev_membership = prev_content.get("membership", None) + # prev_sender = first_membership_change_after_to_token.unsigned.get( + # "prev_sender", None + # ) + + # # Check if the previous membership (membership that applies to the from/to + # # range) should be included in our `sync_room_id_set` + # should_prev_membership_be_included = ( + # prev_membership is not None + # and prev_sender is not None + # and filter_membership_for_sync( + # membership=prev_membership, + # user_id=user_id, + # sender=prev_sender, + # ) + # ) + + # # Check if the last membership (membership that applies to our snapshot) was + # # already included in our `sync_room_id_set` + # was_last_membership_already_included = filter_membership_for_sync( + # membership=last_membership_change_after_to_token.membership, + # user_id=user_id, + # sender=last_membership_change_after_to_token.sender, + # ) + + # # 1a) Add back rooms that the user left after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a leave + # # event, then the room was excluded from `sync_room_id_set` when we first + # # crafted it above. We should add these rooms back as long as the user also + # # was part of the room before the `to_token`. + # if ( + # not was_last_membership_already_included + # and should_prev_membership_be_included + # ): + # # TODO: Assign the correct membership event at the `to_token` here + # # (currently we're setting it as the last event after the `to_token`) + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_after_to_token + # ) + # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` + # # + # # For example, if the last membership event after the `to_token` is a "join" + # # event, then the room was included `sync_room_id_set` when we first crafted + # # it above. We should remove these rooms as long as the user also wasn't + # # part of the room before the `to_token`. + # elif ( + # was_last_membership_already_included + # and not should_prev_membership_be_included + # ): + # del sync_room_id_set[room_id] + + # # 2) ----------------------------------------------------- + # # We fix-up newly_left rooms after the first fixup because it may have removed + # # some left rooms that we can figure out are newly_left in the following code + + # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + # membership_change_events_in_from_to_range = [] + # if from_token: + # membership_change_events_in_from_to_range = ( + # await self.store.get_membership_changes_for_user( + # user_id, + # from_key=from_token.room_key, + # to_key=to_token.room_key, + # excluded_rooms=self.rooms_to_exclude_globally, + # ) + # ) + + # # 2) Assemble a list of the last membership events in some given ranges. Someone + # # could have left and joined multiple times during the given range but we only + # # care about end-result so we grab the last one. + # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} + # for event in membership_change_events_in_from_to_range: + # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event + + # # 2) Fixup + # for ( + # last_membership_change_in_from_to_range + # ) in last_membership_change_by_room_id_in_from_to_range.values(): + # room_id = last_membership_change_in_from_to_range.room_id + + # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # # include newly_left rooms because the last event that the user should see + # # is their own leave event + # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + # last_membership_change_in_from_to_range + # ) return sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c21e69ecdab..f5de23080d9 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -44,6 +44,7 @@ import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Collection, Dict, @@ -62,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -111,6 +112,24 @@ class _EventsAround: end: RoomStreamToken +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event_id: The "current" membership event ID in this room. May be `None` if the + server is no longer in the room or a state reset happened. + prev_event_id: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + """ + + event_id: Optional[str] + prev_event_id: Optional[str] + room_id: str + # Could be useful but we're not using it yet. + # event_pos: PersistedEventPosition + + def generate_pagination_where_clause( direction: Direction, column_names: Tuple[str, str], @@ -390,6 +409,42 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + Note: This function only works with "live" tokens with `stream_ordering` only. + + Returns True if the event persisted by the given instance at the given + topological/stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -731,6 +786,94 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: return ret, key + async def get_current_state_delta_membership_changes_for_user( + self, + user_id: str, + from_key: RoomStreamToken, + to_key: RoomStreamToken, + excluded_rooms: Optional[List[str]] = None, + ) -> List[CurrentStateDeltaMembership]: + """ + TODO + + Note: This function only works with "live" tokens with `stream_ordering` only. + + All such events whose stream ordering `s` lies in the range `from_key < s <= + to_key` are returned. Events are sorted by `stream_ordering` ascending. + """ + # Start by ruling out cases where a DB query is not necessary. + if from_key == to_key: + return [] + + if from_key: + has_changed = self._membership_stream_cache.has_entity_changed( + user_id, int(from_key.stream) + ) + if not has_changed: + return [] + + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + # To handle tokens with a non-empty instance_map we fetch more + # results than necessary and then filter down + min_from_id = from_key.stream + max_to_id = to_key.get_max_stream_pos() + + args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + + # TODO: It would be good to assert that the `to_token` is >= + # the first row in `current_state_delta_stream` for the rooms we're + # interested in. Otherwise, we will end up with empty results and not know + # it. + + # Note: There is no index for `(type, state_key)` in + # `current_state_delta_stream`. We also can't just add an index for + # `event_id` and join the `room_memberships` table by `event_id` because it + # may be `null` in `current_state_delta_stream` so nothing will match (it's + # `null` when the server is no longer in the room or a state reset happened + # and it was unset). + sql = """ + SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + FROM current_state_delta_stream AS s + WHERE s.type = ? AND s.state_key = ? + AND s.stream_id > ? AND s.stream_id <= ? + ORDER BY s.stream_id ASC + """ + + txn.execute(sql, args) + + return [ + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + if _filter_results_by_stream( + from_key, + to_key, + instance_name, + stream_ordering, + ) + ] + + current_state_delta_membership_changes = await self.db_pool.runInteraction( + "get_current_state_delta_membership_changes_for_user", f + ) + + rooms_to_exclude: AbstractSet[str] = set() + if excluded_rooms is not None: + rooms_to_exclude = set(excluded_rooms) + + return [ + membership_change + for membership_change in current_state_delta_membership_changes + if membership_change.room_id not in rooms_to_exclude + ] + @cancellable async def get_membership_changes_for_user( self, @@ -766,10 +909,10 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause = "" if excluded_rooms is not None and len(excluded_rooms) > 0: - ignore_room_clause = "AND e.room_id NOT IN (%s)" % ",".join( - "?" for _ in excluded_rooms + ignore_room_clause, ignore_room_args = make_in_list_sql_clause( + txn.database_engine, "e.room_id", excluded_rooms, negative=True ) - args = args + excluded_rooms + args += ignore_room_args sql = """ SELECT m.event_id, instance_name, topological_ordering, stream_ordering diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index df262400e4f..694fd17a023 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1029,7 +1029,7 @@ def test_invite_before_range_and_join_leave_after_to_token( ), ) - def test_display_name_changes( + def test_display_name_changes_in_token_range( self, ) -> None: """ @@ -1102,6 +1102,77 @@ def test_display_name_changes( ), ) + def test_display_name_changes_before_and_after_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event even though there are no + membership events in the from/range but there are `displayname`/`avatar_url` + changes before/after the token range. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname before the token range + displayname_change_before_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname after the token range + displayname_change_after_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname after token range", + }, + tok=user1_tok, + ) + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because we were joined before the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_before_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_before_token_range_response": displayname_change_before_token_range_response[ + "event_id" + ], + "displayname_change_after_token_range_response": displayname_change_after_token_range_response[ + "event_id" + ], + } + ), + ) + def test_display_name_changes_leave_after_token_range( self, ) -> None: From 6c791a88b34b5646324a22584d5f84d99501ff34 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 21:07:19 -0500 Subject: [PATCH 31/68] WIP: Add back `newly_left` --- synapse/handlers/sliding_sync.py | 107 ++++++++++++++++++----- synapse/storage/databases/main/stream.py | 18 +++- 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 2e24b0c338c..5603fdeb383 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -375,11 +375,6 @@ async def get_sync_room_ids_for_user( # instead from the time of the `to_token`. room_for_user.room_id: room_for_user for room_for_user in room_for_user_list - if filter_membership_for_sync( - membership=room_for_user.membership, - user_id=user_id, - sender=room_for_user.sender, - ) } # Get the `RoomStreamToken` that represents the spot we queried up to when we got @@ -408,6 +403,23 @@ async def get_sync_room_ids_for_user( instance_map=immutabledict(instance_to_max_stream_ordering_map), ) + # Since we fetched the users room list at some point in time after the from/to + # tokens, we need to revert/rewind some membership changes to match the point in + # time of the `to_token`. In particular, we need to make these fixups: + # + # - 1a) Remove rooms that the user joined after the `to_token` + # - 1b) Add back rooms that the user left after the `to_token` + # - 1c) Update room membership events to the point in time of the `to_token` + # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + + # 1) ----------------------------------------------------- + + # 1) Fetch membership changes that fall in the range from `to_token` up to + # `membership_snapshot_token` + # + # If our `to_token` is already the same or ahead of the latest room membership + # for the user, we don't need to do any "2)" fix-ups and can just straight-up + # use the room list from the snapshot as a base (nothing has changed) current_state_delta_membership_changes_after_to_token = [] if not membership_snapshot_token.is_before_or_eq(to_token.room_key): current_state_delta_membership_changes_after_to_token = ( @@ -419,8 +431,9 @@ async def get_sync_room_ids_for_user( ) ) - # We need the first membership event after the `to_token` so we can step - # backward to the previous membership that would apply to the from/to range. + # 1) Assemble a list of the first membership event after the `to_token` so we can + # step backward to the previous membership that would apply to the from/to + # range. first_membership_change_by_room_id_after_to_token: Dict[ str, CurrentStateDeltaMembership ] = {} @@ -430,6 +443,8 @@ async def get_sync_room_ids_for_user( membership_change.room_id, membership_change ) + # 1) Fixup part 1 + # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. @@ -444,37 +459,81 @@ async def get_sync_room_ids_for_user( or first_membership_change_after_to_token.prev_event_id is not None ) - # If the membership change was added after the `to_token`, we need to remove - # it + # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) - # From the first membership event after the `to_token`, we need to step - # backward to the previous membership that would apply to the from/to range. + # 1b) 1c) From the first membership event after the `to_token`, step backward to the + # previous membership that would apply to the from/to range. else: prev_event_ids_in_from_to_range.append( first_membership_change_after_to_token.prev_event_id ) - # Fetch the previous membership events that apply to the from/to range and fixup - # our working list. + # 1) Fixup part 2 + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. prev_events_in_from_to_range = await self.store.get_events( prev_event_ids_in_from_to_range ) for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # Update if the membership should be included + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + convert_event_to_rooms_for_user(prev_event_in_from_to_range) + ) + + filtered_sync_room_id_set = { + room_id: room_for_user + for room_id, room_for_user in sync_room_id_set.items() if filter_membership_for_sync( - membership=prev_event_in_from_to_range.membership, + membership=room_for_user.membership, user_id=user_id, - sender=prev_event_in_from_to_range.sender, - ): - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) + sender=room_for_user.sender, + ) + } + + # 2) ----------------------------------------------------- + # We fix-up newly_left rooms after the first fixup because it may have removed + # some left rooms that we can figure out are newly_left in the following code + + # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` + current_state_delta_membership_changes_in_from_to_range = [] + if from_token: + current_state_delta_membership_changes_in_from_to_range = ( + await self.store.get_current_state_delta_membership_changes_for_user( + user_id, + from_key=from_token.room_key, + to_key=to_token.room_key, + excluded_rooms=self.rooms_to_exclude_globally, ) - # Otherwise, remove it - else: - sync_room_id_set.pop(prev_event_in_from_to_range.room_id, None) + ) - # TODO: Add back newly_left rooms + # 2) Assemble a list of the last membership events in some given ranges. Someone + # could have left and joined multiple times during the given range but we only + # care about end-result so we grab the last one. + last_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + for ( + membership_change + ) in current_state_delta_membership_changes_in_from_to_range: + last_membership_change_by_room_id_in_from_to_range[ + membership_change.room_id + ] = membership_change + + # 2) Fixup + for ( + last_membership_change_in_from_to_range + ) in last_membership_change_by_room_id_in_from_to_range.values(): + room_id = last_membership_change_in_from_to_range.room_id + + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We + # include newly_left rooms because the last event that the user should see + # is their own leave event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range + ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in @@ -638,7 +697,7 @@ async def get_sync_room_ids_for_user( # last_membership_change_in_from_to_range # ) - return sync_room_id_set + return filtered_sync_room_id_set async def filter_rooms( self, diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f5de23080d9..595245e70ec 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -126,6 +126,7 @@ class CurrentStateDeltaMembership: event_id: Optional[str] prev_event_id: Optional[str] room_id: str + membership: str # Could be useful but we're not using it yet. # event_pos: PersistedEventPosition @@ -832,7 +833,13 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `null` when the server is no longer in the room or a state reset happened # and it was unset). sql = """ - SELECT s.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id + SELECT + s.event_id, + s.prev_event_id, + s.room_id, + s.instance_name, + s.stream_id, + m.membership FROM current_state_delta_stream AS s WHERE s.type = ? AND s.state_key = ? AND s.stream_id > ? AND s.stream_id <= ? @@ -846,12 +853,17 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, + # We can assume that the membership is `LEAVE` as a default. This + # will happen when `current_state_delta_stream.event_id` is null + # because it was unset due to a state reset or the server is no + # longer in the room (everyone on our local server left). + membership=membership if membership else Membership.LEAVE, # event_pos=PersistedEventPosition( # instance_name=instance_name, # stream=stream_ordering, # ), ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering in txn + for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn if _filter_results_by_stream( from_key, to_key, From 27d74b023e1a5679b4fbe6a5b4f6efaada8ec3b0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:20:27 -0500 Subject: [PATCH 32/68] Iterate --- synapse/handlers/sliding_sync.py | 33 ++++--- synapse/storage/databases/main/stream.py | 115 +++++++++-------------- 2 files changed, 68 insertions(+), 80 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5603fdeb383..dbbbbc66bfa 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -443,22 +443,16 @@ async def get_sync_room_ids_for_user( membership_change.room_id, membership_change ) - # 1) Fixup part 1 + # 1) Fixup # # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range = [] + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): - # One of these should exist to be a valid row in `current_state_delta_stream` - assert ( - first_membership_change_after_to_token.event_id is not None - or first_membership_change_after_to_token.prev_event_id is not None - ) - # 1a) Remove rooms that the user joined after the `to_token` if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) @@ -469,7 +463,7 @@ async def get_sync_room_ids_for_user( first_membership_change_after_to_token.prev_event_id ) - # 1) Fixup part 2 + # 1) Fixup (more) # # 1b) 1c) Fetch the previous membership events that apply to the from/to range # and fixup our working list. @@ -522,18 +516,33 @@ async def get_sync_room_ids_for_user( ] = membership_change # 2) Fixup + last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + sync_room_id_set[room_id] + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range - ) + # Save the look-up if we already have the `leave` event + if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: + filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] + else: + last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) + + # TODO + # last_membership_events_to_include_in_from_to_range = await self.store.get_events( + # last_membership_event_ids_to_include_in_from_to_range + # ) + # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # # 1b) 1c) Update the membership with what we found + # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + # convert_event_to_rooms_for_user(prev_event_in_from_to_range) + # ) # Since we fetched the users room list at some point in time after the from/to # tokens, we need to revert/rewind some membership changes to match the point in diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 595245e70ec..ed571b0de7f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes, Membership +from synapse.api.constants import Direction from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -116,14 +116,13 @@ class _EventsAround: class CurrentStateDeltaMembership: """ Attributes: - event_id: The "current" membership event ID in this room. May be `None` if the - server is no longer in the room or a state reset happened. + event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. """ - event_id: Optional[str] + event_id: str prev_event_id: Optional[str] room_id: str membership: str @@ -410,42 +409,6 @@ def _filter_results( return True -def _filter_results_by_stream( - lower_token: Optional[RoomStreamToken], - upper_token: Optional[RoomStreamToken], - instance_name: str, - stream_ordering: int, -) -> bool: - """ - Note: This function only works with "live" tokens with `stream_ordering` only. - - Returns True if the event persisted by the given instance at the given - topological/stream_ordering falls between the two tokens (taking a None - token to mean unbounded). - - Used to filter results from fetching events in the DB against the given - tokens. This is necessary to handle the case where the tokens include - position maps, which we handle by fetching more than necessary from the DB - and then filtering (rather than attempting to construct a complicated SQL - query). - """ - if lower_token: - assert lower_token.topological is None - - # If these are live tokens we compare the stream ordering against the - # writers stream position. - if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): - return False - - if upper_token: - assert upper_token.topological is None - - if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: - return False - - return True - - def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -819,58 +782,74 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [EventTypes.Member, user_id, min_from_id, max_to_id] + args: List[Any] = [user_id, min_from_id, max_to_id] # TODO: It would be good to assert that the `to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # Note: There is no index for `(type, state_key)` in - # `current_state_delta_stream`. We also can't just add an index for - # `event_id` and join the `room_memberships` table by `event_id` because it - # may be `null` in `current_state_delta_stream` so nothing will match (it's - # `null` when the server is no longer in the room or a state reset happened - # and it was unset). + # We have to look-up events by `stream_ordering` because + # `current_state_delta_stream.event_id` can be `null` if the server is no + # longer in the room or a state reset happened and it was unset. + # `stream_ordering` is unique across the Synapse instance so this should + # work fine. sql = """ SELECT - s.event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, s.stream_id, + e.topological_ordering, m.membership FROM current_state_delta_stream AS s - WHERE s.type = ? AND s.state_key = ? + INNER JOIN events AS e ON e.stream_ordering = s.stream_id + INNER JOIN room_memberships AS m ON m.event_id = e.event_id + WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC """ txn.execute(sql, args) - return [ - CurrentStateDeltaMembership( - event_id=event_id, - prev_event_id=prev_event_id, - room_id=room_id, - # We can assume that the membership is `LEAVE` as a default. This - # will happen when `current_state_delta_stream.event_id` is null - # because it was unset due to a state reset or the server is no - # longer in the room (everyone on our local server left). - membership=membership if membership else Membership.LEAVE, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), - ) - for event_id, prev_event_id, room_id, instance_name, stream_ordering, membership in txn - if _filter_results_by_stream( + membership_changes: List[CurrentStateDeltaMembership] = [] + for ( + event_id, + prev_event_id, + room_id, + instance_name, + stream_ordering, + topological_ordering, + membership, + ) in txn: + assert event_id is not None + # `prev_event_id` can be `None` + assert room_id is not None + assert instance_name is not None + assert stream_ordering is not None + assert topological_ordering is not None + assert membership is not None + + if _filter_results( from_key, to_key, instance_name, + topological_ordering, stream_ordering, - ) - ] + ): + membership_changes.append( + CurrentStateDeltaMembership( + event_id=event_id, + prev_event_id=prev_event_id, + room_id=room_id, + membership=membership, + # event_pos=PersistedEventPosition( + # instance_name=instance_name, + # stream=stream_ordering, + # ), + ) + ) current_state_delta_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f From fb8fbd489cb920b6d29282e3b2912a311bade162 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:37:45 -0500 Subject: [PATCH 33/68] Just fetch full events for `get_current_state_delta_membership_changes_for_user(...)` Makes downstream logic simpler and although we may look-up some events we don't use, the lookup is all done in one go instead of fetching events from event_ids in a couple different places. --- synapse/handlers/sliding_sync.py | 203 +---------------------- synapse/storage/databases/main/stream.py | 54 +++++- 2 files changed, 51 insertions(+), 206 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index dbbbbc66bfa..5d63099499f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -448,34 +448,20 @@ async def get_sync_room_ids_for_user( # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event_id is None: + if first_membership_change_after_to_token.prev_event is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id + sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + first_membership_change_after_to_token.prev_event ) - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - convert_event_to_rooms_for_user(prev_event_in_from_to_range) - ) - filtered_sync_room_id_set = { room_id: room_for_user for room_id, room_for_user in sync_room_id_set.items() @@ -516,195 +502,18 @@ async def get_sync_room_ids_for_user( ] = membership_change # 2) Fixup - last_membership_event_ids_to_include_in_from_to_range: List[str] = [] for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id - sync_room_id_set[room_id] - # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # Save the look-up if we already have the `leave` event - if sync_room_id_set[room_id].event_id == last_membership_change_in_from_to_range.prev_event_id:: - filtered_sync_room_id_set[room_id] = sync_room_id_set[room_id] - else: - last_membership_event_ids_to_include_in_from_to_range.append(last_membership_change_in_from_to_range.event_id) - - # TODO - # last_membership_events_to_include_in_from_to_range = await self.store.get_events( - # last_membership_event_ids_to_include_in_from_to_range - # ) - # for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # # 1b) 1c) Update the membership with what we found - # sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - # convert_event_to_rooms_for_user(prev_event_in_from_to_range) - # ) - - # Since we fetched the users room list at some point in time after the from/to - # tokens, we need to revert/rewind some membership changes to match the point in - # time of the `to_token`. In particular, we need to make these fixups: - # - # - 1a) Remove rooms that the user joined after the `to_token` - # - 1b) Add back rooms that the user left after the `to_token` - # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) - - # # 1) ----------------------------------------------------- - - # # 1) Fetch membership changes that fall in the range from `to_token` up to - # # `membership_snapshot_token` - # # - # # If our `to_token` is already the same or ahead of the latest room membership - # # for the user, we don't need to do any "2)" fix-ups and can just straight-up - # # use the room list from the snapshot as a base (nothing has changed) - # membership_change_events_after_to_token = [] - # if not membership_snapshot_token.is_before_or_eq(to_token.room_key): - # membership_change_events_after_to_token = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=to_token.room_key, - # to_key=membership_snapshot_token, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 1) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # # We also need the first membership event after the `to_token` so we can step - # # backward to the previous membership that would apply to the from/to range. - # first_membership_change_by_room_id_after_to_token: Dict[str, EventBase] = {} - # for event in membership_change_events_after_to_token: - # last_membership_change_by_room_id_after_to_token[event.room_id] = event - # # Only set if we haven't already set it - # first_membership_change_by_room_id_after_to_token.setdefault( - # event.room_id, event - # ) - - # # 1) Fixup - # for ( - # last_membership_change_after_to_token - # ) in last_membership_change_by_room_id_after_to_token.values(): - # room_id = last_membership_change_after_to_token.room_id - - # # We want to find the first membership change after the `to_token` then step - # # backward to know the membership in the from/to range. - # first_membership_change_after_to_token = ( - # first_membership_change_by_room_id_after_to_token.get(room_id) - # ) - # assert first_membership_change_after_to_token is not None, ( - # "If there was a `last_membership_change_after_to_token` that we're iterating over, " - # + "then there should be corresponding a first change. For example, even if there " - # + "is only one event after the `to_token`, the first and last event will be same event. " - # + "This is probably a mistake in assembling the `last_membership_change_by_room_id_after_to_token`" - # + "/`first_membership_change_by_room_id_after_to_token` dicts above." - # ) - # # TODO: Instead of reading from `unsigned`, refactor this to use the - # # `current_state_delta_stream` table in the future. Probably a new - # # `get_membership_changes_for_user()` function that uses - # # `current_state_delta_stream` with a join to `room_memberships`. This would - # # help in state reset scenarios since `prev_content` is looking at the - # # current branch vs the current room state. This is all just data given to - # # the client so no real harm to data integrity, but we'd like to be nice to - # # the client. Since the `current_state_delta_stream` table is new, it - # # doesn't have all events in it. Since this is Sliding Sync, if we ever need - # # to, we can signal the client to throw all of their state away by sending - # # "operation: RESET". - # prev_content = first_membership_change_after_to_token.unsigned.get( - # "prev_content", {} - # ) - # prev_membership = prev_content.get("membership", None) - # prev_sender = first_membership_change_after_to_token.unsigned.get( - # "prev_sender", None - # ) - - # # Check if the previous membership (membership that applies to the from/to - # # range) should be included in our `sync_room_id_set` - # should_prev_membership_be_included = ( - # prev_membership is not None - # and prev_sender is not None - # and filter_membership_for_sync( - # membership=prev_membership, - # user_id=user_id, - # sender=prev_sender, - # ) - # ) - - # # Check if the last membership (membership that applies to our snapshot) was - # # already included in our `sync_room_id_set` - # was_last_membership_already_included = filter_membership_for_sync( - # membership=last_membership_change_after_to_token.membership, - # user_id=user_id, - # sender=last_membership_change_after_to_token.sender, - # ) - - # # 1a) Add back rooms that the user left after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a leave - # # event, then the room was excluded from `sync_room_id_set` when we first - # # crafted it above. We should add these rooms back as long as the user also - # # was part of the room before the `to_token`. - # if ( - # not was_last_membership_already_included - # and should_prev_membership_be_included - # ): - # # TODO: Assign the correct membership event at the `to_token` here - # # (currently we're setting it as the last event after the `to_token`) - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_after_to_token - # ) - # # 1b) Remove rooms that the user joined (hasn't left) after the `to_token` - # # - # # For example, if the last membership event after the `to_token` is a "join" - # # event, then the room was included `sync_room_id_set` when we first crafted - # # it above. We should remove these rooms as long as the user also wasn't - # # part of the room before the `to_token`. - # elif ( - # was_last_membership_already_included - # and not should_prev_membership_be_included - # ): - # del sync_room_id_set[room_id] - - # # 2) ----------------------------------------------------- - # # We fix-up newly_left rooms after the first fixup because it may have removed - # # some left rooms that we can figure out are newly_left in the following code - - # # 2) Fetch membership changes that fall in the range from `from_token` up to `to_token` - # membership_change_events_in_from_to_range = [] - # if from_token: - # membership_change_events_in_from_to_range = ( - # await self.store.get_membership_changes_for_user( - # user_id, - # from_key=from_token.room_key, - # to_key=to_token.room_key, - # excluded_rooms=self.rooms_to_exclude_globally, - # ) - # ) - - # # 2) Assemble a list of the last membership events in some given ranges. Someone - # # could have left and joined multiple times during the given range but we only - # # care about end-result so we grab the last one. - # last_membership_change_by_room_id_in_from_to_range: Dict[str, EventBase] = {} - # for event in membership_change_events_in_from_to_range: - # last_membership_change_by_room_id_in_from_to_range[event.room_id] = event - - # # 2) Fixup - # for ( - # last_membership_change_in_from_to_range - # ) in last_membership_change_by_room_id_in_from_to_range.values(): - # room_id = last_membership_change_in_from_to_range.room_id - - # # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We - # # include newly_left rooms because the last event that the user should see - # # is their own leave event - # if last_membership_change_in_from_to_range.membership == Membership.LEAVE: - # sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - # last_membership_change_in_from_to_range - # ) + filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( + last_membership_change_in_from_to_range.event + ) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ed571b0de7f..ce135ededc9 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -113,21 +113,37 @@ class _EventsAround: @attr.s(slots=True, frozen=True, auto_attribs=True) -class CurrentStateDeltaMembership: +class _CurrentStateDeltaMembershipReturn: """ Attributes: event_id: The "current" membership event ID in this room. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. + membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str membership: str - # Could be useful but we're not using it yet. - # event_pos: PersistedEventPosition + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class CurrentStateDeltaMembership: + """ + Attributes: + event: The "current" membership event in this room. + prev_event: The previous membership event in this room that was replaced by + the "current" one. May be `None` if there was no previous membership event. + room_id: The room ID of the membership event. + membership: The membership state of the user in the room. + """ + + event: EventBase + prev_event: Optional[EventBase] + room_id: str + membership: str def generate_pagination_where_clause( @@ -776,7 +792,7 @@ async def get_current_state_delta_membership_changes_for_user( if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: + def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -813,7 +829,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: txn.execute(sql, args) - membership_changes: List[CurrentStateDeltaMembership] = [] + membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] for ( event_id, prev_event_id, @@ -839,7 +855,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: stream_ordering, ): membership_changes.append( - CurrentStateDeltaMembership( + _CurrentStateDeltaMembershipReturn( event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, @@ -851,17 +867,37 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: ) ) - current_state_delta_membership_changes = await self.db_pool.runInteraction( + return membership_changes + + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) + # Fetch all events in one go + event_ids = [] + for m in membership_changes: + event_ids.append(m.event_id) + if m.prev_event_id is not None: + event_ids.append(m.prev_event_id) + + events = await self.get_events(event_ids, get_prev_content=False) + rooms_to_exclude: AbstractSet[str] = set() if excluded_rooms is not None: rooms_to_exclude = set(excluded_rooms) return [ - membership_change - for membership_change in current_state_delta_membership_changes + CurrentStateDeltaMembership( + event=events[membership_change.event_id], + prev_event=( + events[membership_change.prev_event_id] + if membership_change.prev_event_id + else None + ), + room_id=membership_change.room_id, + membership=membership_change.membership, + ) + for membership_change in membership_changes if membership_change.room_id not in rooms_to_exclude ] From d91aa0018ca082cc88a3b3bfb7e06d1becb74227 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 22:45:59 -0500 Subject: [PATCH 34/68] Remove extras --- synapse/handlers/sliding_sync.py | 15 +++++---- synapse/storage/databases/main/stream.py | 40 +++++++----------------- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5d63099499f..fed663ac364 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -427,7 +427,7 @@ async def get_sync_room_ids_for_user( user_id, from_key=to_token.room_key, to_key=membership_snapshot_token, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -440,7 +440,7 @@ async def get_sync_room_ids_for_user( for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.room_id, membership_change + membership_change.event.room_id, membership_change ) # 1) Fixup @@ -484,7 +484,7 @@ async def get_sync_room_ids_for_user( user_id, from_key=from_token.room_key, to_key=to_token.room_key, - excluded_rooms=self.rooms_to_exclude_globally, + excluded_room_ids=self.rooms_to_exclude_globally, ) ) @@ -498,19 +498,22 @@ async def get_sync_room_ids_for_user( membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id + membership_change.event.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.room_id + room_id = last_membership_change_in_from_to_range.event.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + if ( + last_membership_change_in_from_to_range.event.membership + == Membership.LEAVE + ): filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( last_membership_change_in_from_to_range.event ) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ce135ededc9..efc0b88797e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -120,13 +120,11 @@ class _CurrentStateDeltaMembershipReturn: prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event_id: str prev_event_id: Optional[str] room_id: str - membership: str @attr.s(slots=True, frozen=True, auto_attribs=True) @@ -136,14 +134,10 @@ class CurrentStateDeltaMembership: event: The "current" membership event in this room. prev_event: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. - room_id: The room ID of the membership event. - membership: The membership state of the user in the room. """ event: EventBase prev_event: Optional[EventBase] - room_id: str - membership: str def generate_pagination_where_clause( @@ -771,7 +765,7 @@ async def get_current_state_delta_membership_changes_for_user( user_id: str, from_key: RoomStreamToken, to_key: RoomStreamToken, - excluded_rooms: Optional[List[str]] = None, + excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ TODO @@ -817,8 +811,7 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: s.room_id, s.instance_name, s.stream_id, - e.topological_ordering, - m.membership + e.topological_ordering FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -837,7 +830,6 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: instance_name, stream_ordering, topological_ordering, - membership, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -845,7 +837,6 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None - assert membership is not None if _filter_results( from_key, @@ -859,46 +850,39 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: event_id=event_id, prev_event_id=prev_event_id, room_id=room_id, - membership=membership, - # event_pos=PersistedEventPosition( - # instance_name=instance_name, - # stream=stream_ordering, - # ), ) ) return membership_changes - membership_changes = await self.db_pool.runInteraction( + raw_membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) # Fetch all events in one go event_ids = [] - for m in membership_changes: + for m in raw_membership_changes: event_ids.append(m.event_id) if m.prev_event_id is not None: event_ids.append(m.prev_event_id) events = await self.get_events(event_ids, get_prev_content=False) - rooms_to_exclude: AbstractSet[str] = set() - if excluded_rooms is not None: - rooms_to_exclude = set(excluded_rooms) + room_ids_to_exclude: AbstractSet[str] = set() + if excluded_room_ids is not None: + room_ids_to_exclude = set(excluded_room_ids) return [ CurrentStateDeltaMembership( - event=events[membership_change.event_id], + event=events[raw_membership_change.event_id], prev_event=( - events[membership_change.prev_event_id] - if membership_change.prev_event_id + events[raw_membership_change.prev_event_id] + if raw_membership_change.prev_event_id else None ), - room_id=membership_change.room_id, - membership=membership_change.membership, ) - for membership_change in membership_changes - if membership_change.room_id not in rooms_to_exclude + for raw_membership_change in raw_membership_changes + if raw_membership_change.room_id not in room_ids_to_exclude ] @cancellable From daa7e3691aa73f2d8a81de1823a0a44b54fe838f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:01:28 -0500 Subject: [PATCH 35/68] Add docstring --- synapse/storage/databases/main/stream.py | 32 ++++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index efc0b88797e..730e55d135c 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -768,12 +768,34 @@ async def get_current_state_delta_membership_changes_for_user( excluded_room_ids: Optional[List[str]] = None, ) -> List[CurrentStateDeltaMembership]: """ - TODO + Fetch membership events (and the previous event that was replaced by that one) + for a given user. - Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= + `to_key`). - All such events whose stream ordering `s` lies in the range `from_key < s <= - to_key` are returned. Events are sorted by `stream_ordering` ascending. + Please be mindful to only use this with `from_key` and `to_key` tokens that are + recent enough to be after when the first local user joined the room. Otherwise, + the results may be incomplete or too greedy. For example, if you use a token + range before the first local user joined the room, you will see 0 events since + `current_state_delta_stream` tracks what the server thinks is the current state + of the room as time goes. It does not track how state progresses from the + beginning of the room. So for example, when you remotely join a room, the first + rows will just be the state when you joined and progress from there. + + You can probably reasonably use this with `/sync` because the `to_key` passed in + will be the "current" now token and the range will cover when the user joined + the room. + + Args: + user_id: The user ID to fetch membership events for. + from_key: The point in the stream to sync from (fetching events > this point). + to_key: The token to fetch rooms up to (fetching events <= this point). + excluded_room_ids: Optional list of room IDs to exclude from the results. + + Returns: + All membership changes to the current state in the token range. Events are + sorted by `stream_ordering` ascending. """ # Start by ruling out cases where a DB query is not necessary. if from_key == to_key: @@ -794,7 +816,7 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: args: List[Any] = [user_id, min_from_id, max_to_id] - # TODO: It would be good to assert that the `to_token` is >= + # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. From cccbd15e7ece55ec8aab2632fcb7099215b29c86 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 25 Jun 2024 23:40:55 -0500 Subject: [PATCH 36/68] Refactor back to not pulling out full events --- synapse/handlers/sliding_sync.py | 159 +++++++++++++---------- synapse/storage/databases/main/stream.py | 65 ++++----- 2 files changed, 120 insertions(+), 104 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fed663ac364..c1cfec50008 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -28,7 +28,6 @@ from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.storage.roommember import RoomsForUser from synapse.types import ( JsonDict, PersistedEventPosition, @@ -48,27 +47,6 @@ logger = logging.getLogger(__name__) -def convert_event_to_rooms_for_user(event: EventBase) -> RoomsForUser: - """ - Quick helper to convert an event to a `RoomsForUser` object. - """ - # These fields should be present for all persisted events - assert event.internal_metadata.stream_ordering is not None - assert event.internal_metadata.instance_name is not None - - return RoomsForUser( - room_id=event.room_id, - sender=event.sender, - membership=event.membership, - event_id=event.event_id, - event_pos=PersistedEventPosition( - event.internal_metadata.instance_name, - event.internal_metadata.stream_ordering, - ), - room_version_id=event.room_version.identifier, - ) - - def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: """ Returns True if the membership event should be included in the sync response, @@ -108,6 +86,25 @@ class RoomSyncConfig: required_state: Set[Tuple[str, str]] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _RoomMembershipForUser: + """ + Attributes: + event_id: The event ID of the membership event + event_pos: The stream position of the membership event + membership: The membership state of the user in the room + sender: The person who sent the membership event + newly_joined: Whether the user newly joined the room during the given token + range + """ + + event_id: str + event_pos: PersistedEventPosition + membership: str + sender: str + newly_joined: bool + + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() @@ -302,7 +299,7 @@ async def current_sync_for_user( user=sync_config.user, room_id=room_id, room_sync_config=room_sync_config, - rooms_for_user_membership_at_to_token=sync_room_map[room_id], + rooms_membership_for_user_at_to_token=sync_room_map[room_id], from_token=from_token, to_token=to_token, ) @@ -321,7 +318,7 @@ async def get_sync_room_ids_for_user( user: UserID, to_token: StreamToken, from_token: Optional[StreamToken] = None, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Fetch room IDs that should be listed for this user in the sync response (the full room list that will be filtered, sorted, and sliced). @@ -373,7 +370,13 @@ async def get_sync_room_ids_for_user( # Note: The `room_for_user` we're assigning here will need to be fixed up # (below) because they are potentially from the current snapshot time # instead from the time of the `to_token`. - room_for_user.room_id: room_for_user + room_for_user.room_id: _RoomMembershipForUser( + event_id=room_for_user.event_id, + event_pos=room_for_user.event_pos, + membership=room_for_user.membership, + sender=room_for_user.sender, + newly_joined=False, + ) for room_for_user in room_for_user_list } @@ -440,7 +443,7 @@ async def get_sync_room_ids_for_user( for membership_change in current_state_delta_membership_changes_after_to_token: # Only set if we haven't already set it first_membership_change_by_room_id_after_to_token.setdefault( - membership_change.event.room_id, membership_change + membership_change.room_id, membership_change ) # 1) Fixup @@ -448,27 +451,59 @@ async def get_sync_room_ids_for_user( # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. + prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, ) in first_membership_change_by_room_id_after_to_token.items(): # 1a) Remove rooms that the user joined after the `to_token` - if first_membership_change_after_to_token.prev_event is None: + if first_membership_change_after_to_token.prev_event_id is None: sync_room_id_set.pop(room_id, None) # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - first_membership_change_after_to_token.prev_event + prev_event_ids_in_from_to_range.append( + first_membership_change_after_to_token.prev_event_id ) + # 1) Fixup (more) + # + # 1b) 1c) Fetch the previous membership events that apply to the from/to range + # and fixup our working list. + prev_events_in_from_to_range = await self.store.get_events( + prev_event_ids_in_from_to_range + ) + for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): + # These fields should be present for all persisted events + assert ( + prev_event_in_from_to_range.internal_metadata.instance_name is not None + ) + assert ( + prev_event_in_from_to_range.internal_metadata.stream_ordering + is not None + ) + + # 1b) 1c) Update the membership with what we found + sync_room_id_set[prev_event_in_from_to_range.room_id] = ( + _RoomMembershipForUser( + event_id=prev_event_in_from_to_range.event_id, + event_pos=PersistedEventPosition( + instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, + stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, + ), + membership=prev_event_in_from_to_range.membership, + sender=prev_event_in_from_to_range.sender, + newly_joined=False, + ) + ) + filtered_sync_room_id_set = { - room_id: room_for_user - for room_id, room_for_user in sync_room_id_set.items() + room_id: room_membership_for_user + for room_id, room_membership_for_user in sync_room_id_set.items() if filter_membership_for_sync( - membership=room_for_user.membership, + membership=room_membership_for_user.membership, user_id=user_id, - sender=room_for_user.sender, + sender=room_membership_for_user.sender, ) } @@ -498,35 +533,38 @@ async def get_sync_room_ids_for_user( membership_change ) in current_state_delta_membership_changes_in_from_to_range: last_membership_change_by_room_id_in_from_to_range[ - membership_change.event.room_id + membership_change.room_id ] = membership_change # 2) Fixup for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): - room_id = last_membership_change_in_from_to_range.event.room_id + room_id = last_membership_change_in_from_to_range.room_id # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event - if ( - last_membership_change_in_from_to_range.event.membership - == Membership.LEAVE - ): - filtered_sync_room_id_set[room_id] = convert_event_to_rooms_for_user( - last_membership_change_in_from_to_range.event + if last_membership_change_in_from_to_range.membership == Membership.LEAVE: + filtered_sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=last_membership_change_in_from_to_range.event_id, + event_pos=last_membership_change_in_from_to_range.event_pos, + membership=last_membership_change_in_from_to_range.membership, + sender=last_membership_change_in_from_to_range.sender, + newly_joined=False, ) + # TODO: Figure out `newly_joined` + return filtered_sync_room_id_set async def filter_rooms( self, user: UserID, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], filters: SlidingSyncConfig.SlidingSyncList.Filters, to_token: StreamToken, - ) -> Dict[str, RoomsForUser]: + ) -> Dict[str, _RoomMembershipForUser]: """ Filter rooms based on the sync request. @@ -627,9 +665,9 @@ async def filter_rooms( async def sort_rooms( self, - sync_room_map: Dict[str, RoomsForUser], + sync_room_map: Dict[str, _RoomMembershipForUser], to_token: StreamToken, - ) -> List[Tuple[str, RoomsForUser]]: + ) -> List[Tuple[str, _RoomMembershipForUser]]: """ Sort by `stream_ordering` of the last event that the user should see in the room. `stream_ordering` is unique so we get a stable sort. @@ -682,7 +720,7 @@ async def get_room_sync_data( user: UserID, room_id: str, room_sync_config: RoomSyncConfig, - rooms_for_user_membership_at_to_token: RoomsForUser, + rooms_membership_for_user_at_to_token: _RoomMembershipForUser, from_token: Optional[StreamToken], to_token: StreamToken, ) -> SlidingSyncResult.RoomResult: @@ -696,7 +734,7 @@ async def get_room_sync_data( room_id: The room ID to fetch data for room_sync_config: Config for what data we should fetch for a room in the sync response. - rooms_for_user_membership_at_to_token: Membership information for the user + rooms_membership_for_user_at_to_token: Membership information for the user in the room at the time of `to_token`. from_token: The point in the stream to sync from. to_token: The point in the stream to sync up to. @@ -716,7 +754,7 @@ async def get_room_sync_data( if ( room_sync_config.timeline_limit > 0 # No timeline for invite/knock rooms (just `stripped_state`) - and rooms_for_user_membership_at_to_token.membership + and rooms_membership_for_user_at_to_token.membership not in (Membership.INVITE, Membership.KNOCK) ): limited = False @@ -726,27 +764,15 @@ async def get_room_sync_data( # position once we've fetched the events to point to the earliest event fetched. prev_batch_token = to_token - newly_joined = False - if ( - # We can only determine new-ness if we have a `from_token` to define our range - from_token is not None - and rooms_for_user_membership_at_to_token.membership == Membership.JOIN - ): - newly_joined = ( - rooms_for_user_membership_at_to_token.event_pos.persisted_after( - from_token.room_key - ) - ) - # We're going to paginate backwards from the `to_token` from_bound = to_token.room_key # People shouldn't see past their leave/ban event - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.LEAVE, Membership.BAN, ): from_bound = ( - rooms_for_user_membership_at_to_token.event_pos.to_room_stream_token() + rooms_membership_for_user_at_to_token.event_pos.to_room_stream_token() ) # Determine whether we should limit the timeline to the token range. @@ -760,7 +786,8 @@ async def get_room_sync_data( # connection before to_bound = ( from_token.room_key - if from_token is not None and not newly_joined + if from_token is not None + and not rooms_membership_for_user_at_to_token.newly_joined else None ) @@ -797,7 +824,7 @@ async def get_room_sync_data( self.storage_controllers, user.to_string(), timeline_events, - is_peeking=rooms_for_user_membership_at_to_token.membership + is_peeking=rooms_membership_for_user_at_to_token.membership != Membership.JOIN, filter_send_to_client=True, ) @@ -852,12 +879,12 @@ async def get_room_sync_data( # Figure out any stripped state events for invite/knocks. This allows the # potential joiner to identify the room. stripped_state: List[JsonDict] = [] - if rooms_for_user_membership_at_to_token.membership in ( + if rooms_membership_for_user_at_to_token.membership in ( Membership.INVITE, Membership.KNOCK, ): invite_or_knock_event = await self.store.get_event( - rooms_for_user_membership_at_to_token.event_id + rooms_membership_for_user_at_to_token.event_id ) stripped_state = [] diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 730e55d135c..c5e65379806 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -113,31 +113,24 @@ class _EventsAround: @attr.s(slots=True, frozen=True, auto_attribs=True) -class _CurrentStateDeltaMembershipReturn: +class CurrentStateDeltaMembership: """ Attributes: event_id: The "current" membership event ID in this room. + event_pos: The position of the "current" membership event in the event stream. prev_event_id: The previous membership event in this room that was replaced by the "current" one. May be `None` if there was no previous membership event. room_id: The room ID of the membership event. + membership: The membership state of the user in the room + sender: The person who sent the membership event """ event_id: str + event_pos: PersistedEventPosition prev_event_id: Optional[str] room_id: str - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class CurrentStateDeltaMembership: - """ - Attributes: - event: The "current" membership event in this room. - prev_event: The previous membership event in this room that was replaced by - the "current" one. May be `None` if there was no previous membership event. - """ - - event: EventBase - prev_event: Optional[EventBase] + membership: str + sender: str def generate_pagination_where_clause( @@ -808,7 +801,7 @@ async def get_current_state_delta_membership_changes_for_user( if not has_changed: return [] - def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: + def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -833,7 +826,9 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: s.room_id, s.instance_name, s.stream_id, - e.topological_ordering + e.topological_ordering, + m.membership, + e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_id = e.event_id @@ -844,7 +839,7 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: txn.execute(sql, args) - membership_changes: List[_CurrentStateDeltaMembershipReturn] = [] + membership_changes: List[CurrentStateDeltaMembership] = [] for ( event_id, prev_event_id, @@ -852,6 +847,8 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: instance_name, stream_ordering, topological_ordering, + membership, + sender, ) in txn: assert event_id is not None # `prev_event_id` can be `None` @@ -859,6 +856,8 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: assert instance_name is not None assert stream_ordering is not None assert topological_ordering is not None + assert membership is not None + assert sender is not None if _filter_results( from_key, @@ -868,43 +867,33 @@ def f(txn: LoggingTransaction) -> List[_CurrentStateDeltaMembershipReturn]: stream_ordering, ): membership_changes.append( - _CurrentStateDeltaMembershipReturn( + CurrentStateDeltaMembership( event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), prev_event_id=prev_event_id, room_id=room_id, + membership=membership, + sender=sender, ) ) return membership_changes - raw_membership_changes = await self.db_pool.runInteraction( + membership_changes = await self.db_pool.runInteraction( "get_current_state_delta_membership_changes_for_user", f ) - # Fetch all events in one go - event_ids = [] - for m in raw_membership_changes: - event_ids.append(m.event_id) - if m.prev_event_id is not None: - event_ids.append(m.prev_event_id) - - events = await self.get_events(event_ids, get_prev_content=False) - room_ids_to_exclude: AbstractSet[str] = set() if excluded_room_ids is not None: room_ids_to_exclude = set(excluded_room_ids) return [ - CurrentStateDeltaMembership( - event=events[raw_membership_change.event_id], - prev_event=( - events[raw_membership_change.prev_event_id] - if raw_membership_change.prev_event_id - else None - ), - ) - for raw_membership_change in raw_membership_changes - if raw_membership_change.room_id not in room_ids_to_exclude + membership_change + for membership_change in membership_changes + if membership_change.room_id not in room_ids_to_exclude ] @cancellable From 62c6a4e8609f5d563b85f576d0a4d5b764c1f9c2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:10:00 -0500 Subject: [PATCH 37/68] Add `newly_joined` support to `get_sync_room_ids_for_user(...)` --- synapse/handlers/sliding_sync.py | 82 +++++++++- tests/handlers/test_sliding_sync.py | 224 +++++++++++++++++++++++++++- 2 files changed, 300 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c1cfec50008..97b04698b2d 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,8 @@ # # import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from collections import defaultdict +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr from immutabledict import immutabledict @@ -104,6 +105,9 @@ class _RoomMembershipForUser: sender: str newly_joined: bool + def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": + return attr.evolve(self, **kwds) + class SlidingSyncHandler: def __init__(self, hs: "HomeServer"): @@ -414,6 +418,7 @@ async def get_sync_room_ids_for_user( # - 1b) Add back rooms that the user left after the `to_token` # - 1c) Update room membership events to the point in time of the `to_token` # - 2) Add back newly_left rooms (> `from_token` and <= `to_token`) + # - 3) Figure out which rooms are `newly_joined` # 1) ----------------------------------------------------- @@ -529,19 +534,49 @@ async def get_sync_room_ids_for_user( last_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} + # We also want to assemble a list of the first membership events during the token + # range so we can step backward to the previous membership that would apply to + # before the token range to see if we have `newly_joined` the room. + first_membership_change_by_room_id_in_from_to_range: Dict[ + str, CurrentStateDeltaMembership + ] = {} + non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( + defaultdict(list) + ) for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: - last_membership_change_by_room_id_in_from_to_range[ - membership_change.room_id - ] = membership_change + room_id = membership_change.room_id + + last_membership_change_by_room_id_in_from_to_range[room_id] = ( + membership_change + ) + + # Only set if we haven't already set it + first_membership_change_by_room_id_in_from_to_range.setdefault( + room_id, membership_change + ) + + if membership_change.membership != Membership.JOIN: + non_join_event_ids_by_room_id_in_from_to_range[room_id].append( + membership_change.event_id + ) # 2) Fixup + # + # 3) We also want to assemble a list of possibly newly joined rooms. Someone + # could have left and joined multiple times during the given range but we only + # care about whether they are joined at the end of the token range so we are + # working with the last membership even in the token range. + possibly_newly_joined_room_ids = set() for ( last_membership_change_in_from_to_range ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + if last_membership_change_in_from_to_range.membership == Membership.JOIN: + possibly_newly_joined_room_ids.add(room_id) + # 2) Add back newly_left rooms (> `from_token` and <= `to_token`). We # include newly_left rooms because the last event that the user should see # is their own leave event @@ -554,7 +589,44 @@ async def get_sync_room_ids_for_user( newly_joined=False, ) - # TODO: Figure out `newly_joined` + # 3) Figure out `newly_joined` + prev_event_ids_before_token_range: List[str] = [] + for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ + possibly_newly_joined_room_id + ] + if len(non_joins_for_room) > 0: + # We found a `newly_joined` room (we left and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + prev_event_id = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_event_id + + if prev_event_id is None: + # We found a `newly_joined` room (we are joining the room for the + # first time within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) + else: + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + prev_event_ids_before_token_range.append(prev_event_id) + + # 3) more + prev_events_before_token_range = await self.store.get_events( + prev_event_ids_before_token_range + ) + for prev_event_before_token_range in prev_events_before_token_range.values(): + if prev_event_before_token_range.membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 694fd17a023..c25ca41098d 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -116,6 +116,9 @@ def test_get_newly_joined_room(self) -> None: room_id_results[room_id].event_id, join_response["event_id"], ) + # We should be considered `newly_joined` because we joined during the token + # range + self.assertEqual(room_id_results[room_id].newly_joined, True) def test_get_already_joined_room(self) -> None: """ @@ -146,6 +149,8 @@ def test_get_already_joined_room(self) -> None: room_id_results[room_id].event_id, join_response["event_id"], ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id].newly_joined, False) def test_get_invited_banned_knocked_room(self) -> None: """ @@ -232,6 +237,11 @@ def test_get_invited_banned_knocked_room(self) -> None: room_id_results[knock_room_id].event_id, knock_room_membership_state_event.event_id, ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[invited_room_id].newly_joined, False) + self.assertEqual(room_id_results[ban_room_id].newly_joined, False) + self.assertEqual(room_id_results[knock_room_id].newly_joined, False) def test_get_kicked_room(self) -> None: """ @@ -277,6 +287,9 @@ def test_get_kicked_room(self) -> None: room_id_results[kick_room_id].event_id, kick_response["event_id"], ) + # We should *NOT* be `newly_joined` because we were not joined at the the time + # of the `to_token`. + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_forgotten_rooms(self) -> None: """ @@ -396,6 +409,8 @@ def test_only_newly_left_rooms_show_up(self) -> None: room_id_results[room_id2].event_id, leave_response["event_id"], ) + # We should *NOT* be `newly_joined` because we are instead `newly_left` + self.assertEqual(room_id_results[room_id2].newly_joined, False) def test_no_joins_after_to_token(self) -> None: """ @@ -432,6 +447,8 @@ def test_no_joins_after_to_token(self) -> None: room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_during_range_and_left_room_after_to_token(self) -> None: """ @@ -477,6 +494,8 @@ def test_join_during_range_and_left_room_after_to_token(self) -> None: } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_before_range_and_left_room_after_to_token(self) -> None: """ @@ -519,6 +538,8 @@ def test_join_before_range_and_left_room_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_kicked_before_range_and_left_after_to_token(self) -> None: """ @@ -581,6 +602,8 @@ def test_kicked_before_range_and_left_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we were kicked + self.assertEqual(room_id_results[kick_room_id].newly_joined, False) def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: """ @@ -632,6 +655,8 @@ def test_newly_left_during_range_and_join_leave_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_newly_left_during_range_and_join_after_to_token(self) -> None: """ @@ -681,6 +706,8 @@ def test_newly_left_during_range_and_join_after_to_token(self) -> None: } ), ) + # We should *NOT* be `newly_joined` because we left during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_no_from_token(self) -> None: """ @@ -727,6 +754,9 @@ def test_no_from_token(self) -> None: room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because there is no `from_token` to + # define a "live" range to compare against + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_from_token_ahead_of_to_token(self) -> None: """ @@ -793,6 +823,8 @@ def test_from_token_ahead_of_to_token(self) -> None: room_id_results[room_id1].event_id, join_response1["event_id"], ) + # We should *NOT* be `newly_joined` because we joined `room1` before either of the tokens + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_leave_before_range_and_join_leave_after_to_token(self) -> None: """ @@ -920,6 +952,8 @@ def test_join_leave_multiple_times_during_range_and_after_to_token( } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_join_leave_multiple_times_before_range_and_after_to_token( self, @@ -976,6 +1010,8 @@ def test_join_leave_multiple_times_before_range_and_after_to_token( } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_invite_before_range_and_join_leave_after_to_token( self, @@ -1028,8 +1064,11 @@ def test_invite_before_range_and_join_leave_after_to_token( } ), ) + # We should *NOT* be `newly_joined` because we were only invited before the + # token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) - def test_display_name_changes_in_token_range( + def test_join_and_display_name_changes_in_token_range( self, ) -> None: """ @@ -1101,6 +1140,68 @@ def test_display_name_changes_in_token_range( } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_display_name_changes_in_token_range( + self, + ) -> None: + """ + Test that we point to the correct membership event within the from/to range even + if there is `displayname`/`avatar_url` updates. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Update the displayname during the token range + displayname_change_during_token_range_response = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_change1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_change1_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response": join_response["event_id"], + "displayname_change_during_token_range_response": displayname_change_during_token_range_response[ + "event_id" + ], + } + ), + ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_before_and_after_token_range( self, @@ -1172,6 +1273,8 @@ def test_display_name_changes_before_and_after_token_range( } ), ) + # We should *NOT* be `newly_joined` because we joined before the token range + self.assertEqual(room_id_results[room_id1].newly_joined, False) def test_display_name_changes_leave_after_token_range( self, @@ -1250,6 +1353,8 @@ def test_display_name_changes_leave_after_token_range( } ), ) + # We should be `newly_joined` because we joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) def test_display_name_changes_join_after_token_range( self, @@ -1298,6 +1403,123 @@ def test_display_name_changes_join_after_token_range( # Room shouldn't show up because we joined after the from/to range self.assertEqual(room_id_results.keys(), set()) + def test_newly_joined_with_leave_join_in_token_range( + self, + ) -> None: + """ + Test that `newly_joined` TODO + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_room1_token = self.event_sources.get_current_token() + + # Leave and join back during the token range + self.helper.leave(room_id1, user1_id, tok=user1_tok) + join_response2 = self.helper.join(room_id1, user1_id, tok=user1_tok) + + after_more_changes_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=after_room1_token, + to_token=after_more_changes_token, + ) + ) + + # Room should show up because we were joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + join_response2["event_id"], + ) + # We should be considered `newly_joined` because there is some non-join event in + # between our latest join event. + self.assertEqual(room_id_results[room_id1].newly_joined, True) + + def test_newly_joined_only_joins_during_token_range( + self, + ) -> None: + """ + Test that a join and more joins caused by display name changes, all during the + token range, still count as `newly_joined`. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # We create the room with user2 so the room isn't left with no members when we + # leave and can still re-join. + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok, is_public=True) + # Join, leave, join back to the room before the from/to range + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response1 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + # Update the displayname during the token range (looks like another join) + displayname_change_during_token_range_response2 = self.helper.send_state( + room_id1, + event_type=EventTypes.Member, + state_key=user1_id, + body={ + "membership": Membership.JOIN, + "displayname": "displayname during token range", + }, + tok=user1_tok, + ) + + after_room1_token = self.event_sources.get_current_token() + + room_id_results = self.get_success( + self.sliding_sync_handler.get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_room1_token, + to_token=after_room1_token, + ) + ) + + # Room should show up because it was newly_left and joined during the from/to range + self.assertEqual(room_id_results.keys(), {room_id1}) + # It should be pointing to the latest membership event in the from/to range + self.assertEqual( + room_id_results[room_id1].event_id, + displayname_change_during_token_range_response2["event_id"], + "Corresponding map to disambiguate the opaque event IDs: " + + str( + { + "join_response1": join_response1["event_id"], + "displayname_change_during_token_range_response1": displayname_change_during_token_range_response1[ + "event_id" + ], + "displayname_change_during_token_range_response2": displayname_change_during_token_range_response2[ + "event_id" + ], + } + ), + ) + # We should be `newly_joined` because we first joined during the token range + self.assertEqual(room_id_results[room_id1].newly_joined, True) + def test_multiple_rooms_are_not_confused( self, ) -> None: From 39259f66fa8ccd13818b8a5681b81fa020a8d4d2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 01:16:46 -0500 Subject: [PATCH 38/68] Join both tables with stream_ordering --- synapse/storage/databases/main/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c5e65379806..2646dfd9cb9 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -831,7 +831,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_id = e.event_id + INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id WHERE m.user_id = ? AND s.stream_id > ? AND s.stream_id <= ? ORDER BY s.stream_id ASC From c60aca755b35f9e655b2f2c71367ba5806db64e5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:02:34 -0500 Subject: [PATCH 39/68] Fix clause change --- synapse/storage/databases/main/stream.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2646dfd9cb9..562dc6eacf7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -934,6 +934,7 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause, ignore_room_args = make_in_list_sql_clause( txn.database_engine, "e.room_id", excluded_rooms, negative=True ) + ignore_room_clause = f"AND {ignore_room_clause}" args += ignore_room_args sql = """ @@ -948,6 +949,8 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause, ) + logger.info("get_membership_changes_for_user: %s", sql) + txn.execute(sql, args) rows = [ From 11db1befa2845f89d09be78e32d53b9b4b9bbad4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:05:25 -0500 Subject: [PATCH 40/68] Remove debug log --- synapse/storage/databases/main/stream.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 562dc6eacf7..f6be97698ea 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -949,8 +949,6 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: ignore_room_clause, ) - logger.info("get_membership_changes_for_user: %s", sql) - txn.execute(sql, args) rows = [ From 7395e1042072b3ab9f04898afa3989fda55a0978 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 02:19:31 -0500 Subject: [PATCH 41/68] Fix `builtins.SyntaxError: EOL while scanning string literal (test_sync.py, line 1885)` See https://github.com/element-hq/synapse/actions/runs/9675073109/job/26692003103?pr=17320#step:9:5552 Worked fine locally but failed in CI with Python 3.8 --- tests/rest/client/test_sync.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 338149f09a0..bd1e7d521b7 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1882,9 +1882,7 @@ def test_rooms_newly_joined_incremental_sync(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2074,9 +2072,7 @@ def test_rooms_invite_shared_history_incremental_sync(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2323,9 +2319,7 @@ def test_rooms_invite_world_readable_history_incremental_sync(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2493,9 +2487,7 @@ def test_rooms_ban_incremental_sync1(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { @@ -2563,9 +2555,7 @@ def test_rooms_ban_incremental_sync2(self) -> None: channel = self.make_request( "POST", self.sync_endpoint - + f"?pos={self.get_success( - from_token.to_string(self.store) - )}", + + f"?pos={self.get_success(from_token.to_string(self.store))}", { "lists": { "foo-list": { From 2bf39231ede3a9bcad65ad3f1321e788acfdcd15 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:40:36 -0500 Subject: [PATCH 42/68] Add some tests for `get_current_state_delta_membership_changes_for_user(...)` --- synapse/storage/databases/main/stream.py | 14 +- tests/storage/test_stream.py | 515 +++++++++++++++++++++++ 2 files changed, 523 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index f6be97698ea..e222f36bab7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction +from synapse.api.constants import Direction, EventTypes from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -807,7 +807,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [user_id, min_from_id, max_to_id] + args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -824,16 +824,18 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: e.event_id, s.prev_event_id, s.room_id, - s.instance_name, - s.stream_id, + e.instance_name, + e.stream_ordering, e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s INNER JOIN events AS e ON e.stream_ordering = s.stream_id INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id - WHERE m.user_id = ? - AND s.stream_id > ? AND s.stream_id <= ? + WHERE s.stream_id > ? AND s.stream_id <= ? + AND m.user_id = ? + AND s.state_key = m.user_id + AND s.type = ? ORDER BY s.stream_id ASC """ diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index fe1e873e154..64f123987af 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -28,9 +28,12 @@ from synapse.api.constants import Direction, EventTypes, RelationTypes from synapse.api.filtering import Filter +from synapse.api.room_versions import RoomVersions +from synapse.events import make_event_from_dict from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer +from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock @@ -543,3 +546,515 @@ def test_last_event_before_sharded_token(self) -> None: } ), ) + + +class GetCurrentStateDeltaMembershipChangesForUserTestCase(HomeserverTestCase): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` + """ + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.state_handler = self.hs.get_state_handler() + persistence = hs.get_storage_controllers().persistence + assert persistence is not None + self.persistence = persistence + + def test_returns_membership_events(self) -> None: + """ + A basic test that a membership event in the token range is returned for the user. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos = self.get_success( + self.store.get_position_for_event(join_response["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response["event_id"], + event_pos=join_pos, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + def test_server_left_after_us_room(self) -> None: + """ + Test that when probing over part of the DAG where the server left the room *after + us*, we still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + # User1 should leave the room first + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_server_left_room(self) -> None: + """ + Test that when probing over part of the DAG where we leave the room causing the + server to leave the room (because we were the last local user in the room), we + still see the join and leave changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "power_level_content_override": { + "users": { + user2_id: 100, + # Allow user1 to send state in the room + user1_id: 100, + } + } + }, + ) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + # Make sure random other non-member state that happens to have a state_key + # matching the user ID doesn't mess with things. + self.helper.send_state( + room_id1, + event_type="foobarbazdummy", + state_key=user1_id, + body={"foo": "bar"}, + tok=user1_tok, + ) + + # User2 should leave the room first. + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + # User1 (the person we're testing with) should also leave the room (everyone has + # left the room which means the server is no longer in the room). + leave_response1 = self.helper.leave(room_id1, user1_id, tok=user1_tok) + leave_pos1 = self.get_success( + self.store.get_position_for_event(leave_response1["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=leave_response1["event_id"], + event_pos=leave_pos1, + prev_event_id=join_response1["event_id"], + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events being processed at once. This will result in all + of the memberships being stored in the `current_state_delta_stream` table with + the same `stream_ordering` even though the individual events have different + `stream_ordering`s. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + user3_id = self.register_user("user3", "pass") + _user3_tok = self.login(user3_id, "pass") + user4_id = self.register_user("user4", "pass") + _user4_tok = self.login(user4_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist the user1, user3, and user4 join events in the same batch so they all + # end up in the `current_state_delta_stream` table with the same + # stream_ordering. + join_event1 = make_event_from_dict( + { + "sender": user1_id, + "type": EventTypes.Member, + "state_key": user1_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context1 = self.get_success( + self.state_handler.compute_event_context(join_event1) + ) + join_event3 = make_event_from_dict( + { + "sender": user3_id, + "type": EventTypes.Member, + "state_key": user3_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context3 = self.get_success( + self.state_handler.compute_event_context(join_event3) + ) + join_event4 = make_event_from_dict( + { + "sender": user4_id, + "type": EventTypes.Member, + "state_key": user4_id, + "content": {"membership": "join"}, + "room_id": room_id1, + "depth": 2, + "origin_server_ts": 2, + "prev_events": [], + "auth_events": [], + }, + room_version=RoomVersions.V10, + ) + join_event_context4 = self.get_success( + self.state_handler.compute_event_context(join_event4) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event1, join_event_context1), + (join_event3, join_event_context3), + (join_event4, join_event_context4), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Let's get membership changes from user3's perspective because it was in the + # middle of the batch. This way, if rows in` current_state_delta_stream` are + # stored with the first or last event's `stream_ordering`, we will still catch + # bugs. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user3_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos3 = self.get_success( + self.store.get_position_for_event(join_event3.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event3.event_id, + event_pos=join_pos3, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + ], + ) + + # TODO: Test remote join where the first rows will just be the state when you joined + + # TODO: Test state reset where the user gets removed from the room (when there is no + # corresponding leave event) + + def test_excluded_room_ids(self) -> None: + """ + Test that the `excluded_room_ids` option excludes changes from the specified rooms. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response2 = self.helper.join(room_id2, user1_id, tok=user1_tok) + join_pos2 = self.get_success( + self.store.get_position_for_event(join_response2["event_id"]) + ) + + after_room1_token = self.event_sources.get_current_token() + + # First test the the room is returned without the `excluded_room_ids` option + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ), + CurrentStateDeltaMembership( + event_id=join_response2["event_id"], + event_pos=join_pos2, + prev_event_id=None, + room_id=room_id2, + membership="join", + sender=user1_id, + ), + ], + ) + + # The test that `excluded_room_ids` excludes room2 as expected + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + excluded_room_ids=[room_id2], + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_response1["event_id"], + event_pos=join_pos1, + prev_event_id=None, + room_id=room_id1, + membership="join", + sender=user1_id, + ) + ], + ) + + +# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): +# """ +# TODO +# """ + +# servlets = [ +# admin.register_servlets_for_client_rest_resource, +# room.register_servlets, +# login.register_servlets, +# ] + +# def default_config(self) -> dict: +# conf = super().default_config() +# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] +# return conf + +# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: +# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() +# self.store = self.hs.get_datastores().main +# self.event_sources = hs.get_event_sources() + + +# def test_sharded_event_persisters(self) -> None: +# """ +# TODO +# """ +# user1_id = self.register_user("user1", "pass") +# user1_tok = self.login(user1_id, "pass") +# user2_id = self.register_user("user2", "pass") +# user2_tok = self.login(user2_id, "pass") + +# remote_hs = self.make_worker_hs("synapse.app.generic_worker") + +# channel = make_request( +# self.reactor, +# self._hs_to_site[hs], +# "GET", +# f"/_matrix/media/r0/download/{target}/{media_id}", +# shorthand=False, +# access_token=self.access_token, +# await_result=False, +# ) + +# remote_hs + +# worker_store2 = worker_hs2.get_datastores().main +# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) +# actx = worker_store2._stream_id_gen.get_next() + +# self.assertEqual( +# room_id_results.keys(), +# { +# room_id1, +# # room_id2 shouldn't show up because we left before the from/to range +# # and the join event during the range happened while worker2 was stuck. +# # This means that from the perspective of the master, where the +# # `stuck_activity_token` is generated, the stream position for worker2 +# # wasn't advanced to the join yet. Looking at the `instance_map`, the +# # join technically comes after `stuck_activity_token``. +# # +# # room_id2, +# room_id3, +# }, +# ) From ec2d8dc1e3c602dadb4fac289bcd38b211f6b34d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 18:52:24 -0500 Subject: [PATCH 43/68] Create events using helper --- tests/storage/test_stream.py | 76 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 64f123987af..39cb5a25c54 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -37,6 +37,7 @@ from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken from synapse.util import Clock +from tests.test_utils.event_injection import create_event from tests.unittest import HomeserverTestCase logger = logging.getLogger(__name__) @@ -809,56 +810,35 @@ def test_membership_persisted_in_same_batch(self) -> None: # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1 = make_event_from_dict( - { - "sender": user1_id, - "type": EventTypes.Member, - "state_key": user1_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 0, - "origin_server_ts": 0, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context1 = self.get_success( - self.state_handler.compute_event_context(join_event1) - ) - join_event3 = make_event_from_dict( - { - "sender": user3_id, - "type": EventTypes.Member, - "state_key": user3_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 1, - "origin_server_ts": 1, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, - ) - join_event_context3 = self.get_success( - self.state_handler.compute_event_context(join_event3) + join_event1, join_event_context1 = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event4 = make_event_from_dict( - { - "sender": user4_id, - "type": EventTypes.Member, - "state_key": user4_id, - "content": {"membership": "join"}, - "room_id": room_id1, - "depth": 2, - "origin_server_ts": 2, - "prev_events": [], - "auth_events": [], - }, - room_version=RoomVersions.V10, + join_event3, join_event_context3 = self.get_success( + create_event( + self.hs, + sender=user3_id, + type=EventTypes.Member, + state_key=user3_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) - join_event_context4 = self.get_success( - self.state_handler.compute_event_context(join_event4) + join_event4, join_event_context4 = self.get_success( + create_event( + self.hs, + sender=user4_id, + type=EventTypes.Member, + state_key=user4_id, + content={"membership": "join"}, + room_id=room_id1, + ) ) self.get_success( self.persistence.persist_events( From 0b9a903ca12831e431b596daacf127e53ecbd050 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:35:18 -0500 Subject: [PATCH 44/68] Add test that remotely joins room --- tests/storage/test_stream.py | 259 +++++++++++++++++++++++++---------- 1 file changed, 188 insertions(+), 71 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 39cb5a25c54..3b825dbbbef 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -21,24 +21,32 @@ import logging from typing import List, Tuple +from unittest.mock import AsyncMock, patch from immutabledict import immutabledict from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import Direction, EventTypes, RelationTypes +from synapse.api.constants import Direction, EventTypes, Membership, RelationTypes from synapse.api.filtering import Filter -from synapse.api.room_versions import RoomVersions -from synapse.events import make_event_from_dict +from synapse.crypto.event_signing import add_hashes_and_signatures +from synapse.events import FrozenEventV3 +from synapse.federation.federation_client import SendJoinResult from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.stream import CurrentStateDeltaMembership -from synapse.types import JsonDict, PersistedEventPosition, RoomStreamToken +from synapse.types import ( + JsonDict, + PersistedEventPosition, + RoomStreamToken, + UserID, + create_requester, +) from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -884,8 +892,6 @@ def test_membership_persisted_in_same_batch(self) -> None: ], ) - # TODO: Test remote join where the first rows will just be the state when you joined - # TODO: Test state reset where the user gets removed from the room (when there is no # corresponding leave event) @@ -974,67 +980,178 @@ def test_excluded_room_ids(self) -> None: ) -# class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase(BaseMultiWorkerStreamTestCase): -# """ -# TODO -# """ - -# servlets = [ -# admin.register_servlets_for_client_rest_resource, -# room.register_servlets, -# login.register_servlets, -# ] - -# def default_config(self) -> dict: -# conf = super().default_config() -# conf["federation_custom_ca_list"] = [get_test_ca_cert_file()] -# return conf - -# def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: -# self.sliding_sync_handler = self.hs.get_sliding_sync_handler() -# self.store = self.hs.get_datastores().main -# self.event_sources = hs.get_event_sources() - - -# def test_sharded_event_persisters(self) -> None: -# """ -# TODO -# """ -# user1_id = self.register_user("user1", "pass") -# user1_tok = self.login(user1_id, "pass") -# user2_id = self.register_user("user2", "pass") -# user2_tok = self.login(user2_id, "pass") - -# remote_hs = self.make_worker_hs("synapse.app.generic_worker") - -# channel = make_request( -# self.reactor, -# self._hs_to_site[hs], -# "GET", -# f"/_matrix/media/r0/download/{target}/{media_id}", -# shorthand=False, -# access_token=self.access_token, -# await_result=False, -# ) - -# remote_hs - -# worker_store2 = worker_hs2.get_datastores().main -# assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) -# actx = worker_store2._stream_id_gen.get_next() - -# self.assertEqual( -# room_id_results.keys(), -# { -# room_id1, -# # room_id2 shouldn't show up because we left before the from/to range -# # and the join event during the range happened while worker2 was stuck. -# # This means that from the perspective of the master, where the -# # `stuck_activity_token` is generated, the stream position for worker2 -# # wasn't advanced to the join yet. Looking at the `instance_map`, the -# # join technically comes after `stuck_activity_token``. -# # -# # room_id2, -# room_id3, -# }, -# ) +class GetCurrentStateDeltaMembershipChangesForUserFederationTestCase( + FederatingHomeserverTestCase +): + """ + Test `get_current_state_delta_membership_changes_for_user(...)` when joining remote federated rooms. + """ + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.sliding_sync_handler = self.hs.get_sliding_sync_handler() + self.store = self.hs.get_datastores().main + self.event_sources = hs.get_event_sources() + self.room_member_handler = hs.get_room_member_handler() + + def test_remote_join(self) -> None: + """ + Test remote join where the first rows will just be the state when you joined + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" + + # Remotely join a room on another homeserver. + # + # To do this we have to mock the responses from the remote homeserver. We also + # patch out a bunch of event checks on our end. + create_event_source = { + "auth_events": [], + "content": { + "creator": f"@creator:{self.OTHER_SERVER_NAME}", + "room_version": self.hs.config.server.default_room_version.identifier, + }, + "depth": 0, + "origin_server_ts": 0, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": "", + "type": EventTypes.Create, + } + self.add_hashes_and_signatures_from_other_server( + create_event_source, + self.hs.config.server.default_room_version, + ) + create_event = FrozenEventV3( + create_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + creator_join_event_source = { + "auth_events": [create_event.event_id], + "content": { + "membership": "join", + }, + "depth": 1, + "origin_server_ts": 1, + "prev_events": [], + "room_id": intially_unjoined_room_id, + "sender": f"@creator:{self.OTHER_SERVER_NAME}", + "state_key": f"@creator:{self.OTHER_SERVER_NAME}", + "type": EventTypes.Member, + } + self.add_hashes_and_signatures_from_other_server( + creator_join_event_source, + self.hs.config.server.default_room_version, + ) + creator_join_event = FrozenEventV3( + creator_join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + + # Our local user is going to remote join the room + join_event_source = { + "auth_events": [create_event.event_id], + "content": {"membership": "join"}, + "depth": 1, + "origin_server_ts": 100, + "prev_events": [creator_join_event.event_id], + "sender": user1_id, + "state_key": user1_id, + "room_id": intially_unjoined_room_id, + "type": EventTypes.Member, + } + add_hashes_and_signatures( + self.hs.config.server.default_room_version, + join_event_source, + self.hs.hostname, + self.hs.signing_key, + ) + join_event = FrozenEventV3( + join_event_source, + self.hs.config.server.default_room_version, + {}, + None, + ) + + mock_make_membership_event = AsyncMock( + return_value=( + self.OTHER_SERVER_NAME, + join_event, + self.hs.config.server.default_room_version, + ) + ) + mock_send_join = AsyncMock( + return_value=SendJoinResult( + join_event, + self.OTHER_SERVER_NAME, + state=[create_event, creator_join_event], + auth_chain=[create_event, creator_join_event], + partial_state=False, + servers_in_room=frozenset(), + ) + ) + + with patch.object( + self.room_member_handler.federation_handler.federation_client, + "make_membership_event", + mock_make_membership_event, + ), patch.object( + self.room_member_handler.federation_handler.federation_client, + "send_join", + mock_send_join, + ), patch( + "synapse.event_auth._is_membership_change_allowed", + return_value=None, + ), patch( + "synapse.handlers.federation_event.check_state_dependent_auth_rules", + return_value=None, + ): + self.get_success( + self.room_member_handler.update_membership( + requester=create_requester(user1_id), + target=UserID.from_string(user1_id), + room_id=intially_unjoined_room_id, + action=Membership.JOIN, + remote_room_hosts=[self.OTHER_SERVER_NAME], + ) + ) + + events_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="events", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump events", + ) + ) + + logger.info("events_db_dump: %s", events_db_dump) + + current_state_delta_stream_db_dump = self.get_success( + self.store.db_pool.simple_select_list( + table="current_state_delta_stream", + keyvalues={}, + retcols=[ + "*", + ], + desc="debug dump current_state_delta_stream", + ) + ) + + logger.info( + "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + ) From 48d0acfbcda30f956d79cef873fa762f88530341 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 19:52:12 -0500 Subject: [PATCH 45/68] Actually test `get_current_state_delta_membership_changes_for_user(...)` in remote join test --- tests/storage/test_stream.py | 59 ++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 3b825dbbbef..dfca17db64b 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1001,10 +1001,13 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: def test_remote_join(self) -> None: """ - Test remote join where the first rows will just be the state when you joined + Test remote join where the first rows in `current_state_delta_stream` will just + be the state when you joined the remote room. """ user1_id = self.register_user("user1", "pass") - user1_tok = self.login(user1_id, "pass") + _user1_tok = self.login(user1_id, "pass") + + before_join_token = self.event_sources.get_current_token() intially_unjoined_room_id = f"!example:{self.OTHER_SERVER_NAME}" @@ -1128,30 +1131,40 @@ def test_remote_join(self) -> None: ) ) - events_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="events", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump events", + after_join_token = self.event_sources.get_current_token() + + # Get the membership changes for the user at this point, the + # `current_state_delta_stream` table should look like: + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_join_token.room_key, + to_key=after_join_token.room_key, ) ) - logger.info("events_db_dump: %s", events_db_dump) - - current_state_delta_stream_db_dump = self.get_success( - self.store.db_pool.simple_select_list( - table="current_state_delta_stream", - keyvalues={}, - retcols=[ - "*", - ], - desc="debug dump current_state_delta_stream", - ) + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) ) - logger.info( - "current_state_delta_stream_db_dump: %s", current_state_delta_stream_db_dump + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=join_event.event_id, + event_pos=join_pos, + prev_event_id=None, + room_id=intially_unjoined_room_id, + membership="join", + sender=user1_id, + ), + ], ) From 2a944ffcef16744ade6b0172fcb98c7eeb281766 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:20:17 -0500 Subject: [PATCH 46/68] Add state of the db in each situation --- tests/storage/test_stream.py | 68 ++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index dfca17db64b..2ac88f18eaf 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -673,6 +673,29 @@ def test_server_left_after_us_room(self) -> None: after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -761,6 +784,29 @@ def test_server_left_room(self) -> None: after_room1_token = self.event_sources.get_current_token() + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -864,6 +910,21 @@ def test_membership_persisted_in_same_batch(self) -> None: # middle of the batch. This way, if rows in` current_state_delta_stream` are # stored with the first or last event's `stream_ordering`, we will still catch # bugs. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | + # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1133,8 +1194,11 @@ def test_remote_join(self) -> None: after_join_token = self.event_sources.get_current_token() - # Get the membership changes for the user at this point, the - # `current_state_delta_stream` table should look like: + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. Notice that all of the events are at the same `stream_id` because + # the current state starts out where we remotely joined: # # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| From 8df39d1baff8cac6aa446c8b71b3a64a8bf29a1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 20:22:03 -0500 Subject: [PATCH 47/68] Remove redundant `instance_name` column --- tests/storage/test_stream.py | 98 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 2ac88f18eaf..840f9803440 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -679,23 +679,23 @@ def test_server_left_after_us_room(self) -> None: # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|----------|-----------------------------|----------------|----------|---------------|---------------| - # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | 'master' | - # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | 'master' | - # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | 'master' | - # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | 'master' | - # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | 'master' | - # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | 'master' | - # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | 'master' | - # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | 'master' | - # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | 'master' | - # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|----------|-----------------------------|----------------|----------|---------------| + # | 2 | !x:test | 'm.room.create' | '' | $xxx | None | + # | 3 | !x:test | 'm.room.member' | '@user2:test' | $aaa | None | + # | 4 | !x:test | 'm.room.history_visibility' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.join_rules' | '' | $xxx | None | + # | 4 | !x:test | 'm.room.power_levels' | '' | $xxx | None | + # | 7 | !x:test | 'm.room.member' | '@user1:test' | $ooo | None | + # | 8 | !x:test | 'foobarbazdummy' | '@user1:test' | $xxx | None | + # | 9 | !x:test | 'm.room.member' | '@user1:test' | $ppp | $ooo | + # | 10 | !x:test | 'foobarbazdummy' | '@user1:test' | None | $xxx | + # | 10 | !x:test | 'm.room.create' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.history_visibility' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.join_rules' | '' | None | $xxx | + # | 10 | !x:test | 'm.room.member' | '@user1:test' | None | $ppp | + # | 10 | !x:test | 'm.room.member' | '@user2:test' | None | $aaa | + # | 10 | !x:test | 'm.room.power_levels' | | None | $xxx | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -790,23 +790,23 @@ def test_server_left_room(self) -> None: # following. When the server leaves a room, it will insert new rows with # `event_id = null` for all current state. # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|-----------------------------|---------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | 'master' | - # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | 'master' | - # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | 'master' | - # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | 'master' | - # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | 'master' | - # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|-----------------------------|---------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$aaa' | None | + # | 4 | '!x:test' | 'm.room.history_visibility' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$ooo' | None | + # | 8 | '!x:test' | 'foobarbazdummy' | '@user1:test' | '$xxx' | None | + # | 9 | '!x:test' | 'm.room.member' | '@user2:test' | '$bbb' | '$aaa' | + # | 10 | '!x:test' | 'foobarbazdummy' | '@user1:test' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.create' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.history_visibility' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.join_rules' | '' | None | '$xxx' | + # | 10 | '!x:test' | 'm.room.member' | '@user1:test' | None | '$ooo' | + # | 10 | '!x:test' | 'm.room.member' | '@user2:test' | None | '$bbb' | + # | 10 | '!x:test' | 'm.room.power_levels' | '' | None | '$xxx' | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, @@ -915,16 +915,16 @@ def test_membership_persisted_in_same_batch(self) -> None: # those three memberships at the end with `stream_id=7` because we persisted # them in the same batch): # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|-----------|----------------------------|------------------|----------|---------------|---------------| - # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | 'master' | - # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|-----------|----------------------------|------------------|----------|---------------| + # | 2 | '!x:test' | 'm.room.create' | '' | '$xxx' | None | + # | 3 | '!x:test' | 'm.room.member' | '@user2:test' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.history_visibility'| '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.join_rules' | '' | '$xxx' | None | + # | 4 | '!x:test' | 'm.room.power_levels' | '' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user3:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user3_id, @@ -1200,11 +1200,11 @@ def test_remote_join(self) -> None: # following. Notice that all of the events are at the same `stream_id` because # the current state starts out where we remotely joined: # - # | stream_id | room_id | type | state_key | event_id | prev_event_id | instance_name | - # |-----------|------------------------------|-----------------|------------------------------|----------|---------------|----------------| - # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | 'master' | - # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | 'master' | + # | stream_id | room_id | type | state_key | event_id | prev_event_id | + # |-----------|------------------------------|-----------------|------------------------------|----------|---------------| + # | 2 | '!example:other.example.com' | 'm.room.member' | '@user1:test' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.create' | '' | '$xxx' | None | + # | 2 | '!example:other.example.com' | 'm.room.member' | '@creator:other.example.com' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( user1_id, From b7914e76769ea330cdfa99e18fd7695f8301b02b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:01:54 -0500 Subject: [PATCH 48/68] Add skipped test for state resets --- tests/storage/test_stream.py | 92 ++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 840f9803440..04a0e24154d 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -839,6 +839,7 @@ def test_server_left_room(self) -> None: ], ) + @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -948,13 +949,96 @@ def test_membership_persisted_in_same_batch(self) -> None: prev_event_id=None, room_id=room_id1, membership="join", - sender=user1_id, + sender=user3_id, ), ], ) - # TODO: Test state reset where the user gets removed from the room (when there is no - # corresponding leave event) + @skip_unless(False, "We don't support this yet") + def test_state_reset(self) -> None: + """ + Test a state reset scenario where the user gets removed from the room (when + there is no corresponding leave event) + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) + + before_reset_token = self.event_sources.get_current_token() + + # Send another state event which we will cause the reset at + dummy_state_response = self.helper.send_state( + room_id1, + event_type="foobarbaz", + state_key="", + body={"foo": "bar"}, + tok=user2_tok, + ) + dummy_state_pos = self.get_success( + self.store.get_position_for_event(dummy_state_response["event_id"]) + ) + + # Mock a state reset removing the membership for user1 in the current state + self.get_success( + self.store.db_pool.simple_delete( + table="current_state_events", + keyvalues={ + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + table="current_state_delta_stream", + values={ + "stream_id": dummy_state_pos.stream, + "room_id": room_id1, + "type": EventTypes.Member, + "state_key": user1_id, + "event_id": None, + # FIXME: I'm not sure if a state reset should have a prev_event_id + "prev_event_id": None, + "instance_name": dummy_state_pos.instance_name, + }, + desc="state reset user in current_state_delta_stream", + ) + ) + + after_reset_token = self.event_sources.get_current_token() + + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_reset_token.room_key, + to_key=after_reset_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=TODO, + event_pos=TODO, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) def test_excluded_room_ids(self) -> None: """ From 7eb1806ee3279f6581996b029f80251f8aaf3d69 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Jun 2024 21:06:05 -0500 Subject: [PATCH 49/68] Fix lints --- tests/storage/test_stream.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 04a0e24154d..5b30d7106f7 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -966,10 +966,7 @@ def test_state_reset(self) -> None: user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) - join_pos1 = self.get_success( - self.store.get_position_for_event(join_response1["event_id"]) - ) + self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1028,16 +1025,19 @@ def test_state_reset(self) -> None: self.maxDiff = None self.assertEqual( membership_changes, - [ - CurrentStateDeltaMembership( - event_id=TODO, - event_pos=TODO, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], + # TODO: Uncomment the expected membership. We just have a `False` value + # here so the test expectation fails and you look here. + False, + # [ + # CurrentStateDeltaMembership( + # event_id=TODO, + # event_pos=TODO, + # prev_event_id=None, + # room_id=room_id1, + # membership="leave", + # sender=user1_id, + # ), + # ], ) def test_excluded_room_ids(self) -> None: From 935b98c474f030f92bdd28cd69fcf20f3d6045fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 00:48:17 -0500 Subject: [PATCH 50/68] All `get_current_state_delta_membership_changes_for_user(...)` tests passing --- synapse/storage/databases/main/stream.py | 80 ++++++++++++++++-------- tests/storage/test_stream.py | 39 ++++++------ 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index e222f36bab7..9ae1fe6c152 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -63,7 +63,7 @@ from twisted.internet import defer -from synapse.api.constants import Direction, EventTypes +from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background @@ -125,12 +125,12 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition prev_event_id: Optional[str] room_id: str membership: str - sender: str + sender: Optional[str] def generate_pagination_where_clause( @@ -819,22 +819,32 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # longer in the room or a state reset happened and it was unset. # `stream_ordering` is unique across the Synapse instance so this should # work fine. + # + # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer + # the source of truth from the events table. This gives slightly more + # accurate results when available since `current_state_delta_stream` only + # tracks that the current state is at this stream position (not what stream + # position the state event was added) and batches events at the same + # `stream_id` in certain cases. + # + # TODO: We need to add indexes for `current_state_delta_stream.event_id` and + # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` + # for this to be efficient. sql = """ SELECT e.event_id, s.prev_event_id, s.room_id, - e.instance_name, - e.stream_ordering, + COALESCE(e.instance_name, s.instance_name), + COALESCE(e.stream_ordering, s.stream_id), e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s - INNER JOIN events AS e ON e.stream_ordering = s.stream_id - INNER JOIN room_memberships AS m ON m.event_stream_ordering = s.stream_id + LEFT JOIN events AS e ON e.event_id = s.event_id + LEFT JOIN room_memberships AS m ON m.event_id = s.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND m.user_id = ? - AND s.state_key = m.user_id + AND s.state_key = ? AND s.type = ? ORDER BY s.stream_id ASC """ @@ -842,6 +852,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] + membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -852,36 +863,55 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership, sender, ) in txn: - assert event_id is not None - # `prev_event_id` can be `None` assert room_id is not None assert instance_name is not None assert stream_ordering is not None - assert topological_ordering is not None - assert membership is not None - assert sender is not None if _filter_results( from_key, to_key, instance_name, + # TODO: This isn't always filled now topological_ordering, stream_ordering, ): - membership_changes.append( - CurrentStateDeltaMembership( - event_id=event_id, - event_pos=PersistedEventPosition( - instance_name=instance_name, - stream=stream_ordering, - ), - prev_event_id=prev_event_id, - room_id=room_id, - membership=membership, - sender=sender, + # When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. This means we might + # already have a row for the leave event and then another for the + # same leave where the `event_id=null` but the `prev_event_id` is + # pointing back at the earlier leave event. Since we're assuming the + # `event_id = null` row is a `leave` and we don't want duplicate + # membership changes in our results, let's get rid of those + # (deduplicate) (see `test_server_left_after_us_room`). + if event_id is None: + already_tracked_membership_change = membership_change_map.get( + prev_event_id ) + if ( + already_tracked_membership_change is not None + and already_tracked_membership_change.membership + == Membership.LEAVE + ): + continue + + membership_change = CurrentStateDeltaMembership( + event_id=event_id, + event_pos=PersistedEventPosition( + instance_name=instance_name, + stream=stream_ordering, + ), + prev_event_id=prev_event_id, + room_id=room_id, + membership=( + membership if membership is not None else Membership.LEAVE + ), + sender=sender, ) + membership_changes.append(membership_change) + if event_id: + membership_change_map[event_id] = membership_change + return membership_changes membership_changes = await self.db_pool.runInteraction( diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5b30d7106f7..ffa763bff2f 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -829,17 +829,16 @@ def test_server_left_room(self) -> None: sender=user1_id, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, + sender=None, # user1_id, ), ], ) - @skip_unless(False, "We don't support this yet") def test_membership_persisted_in_same_batch(self) -> None: """ Test batch of membership events being processed at once. This will result in all @@ -954,7 +953,6 @@ def test_membership_persisted_in_same_batch(self) -> None: ], ) - @skip_unless(False, "We don't support this yet") def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when @@ -970,7 +968,7 @@ def test_state_reset(self) -> None: before_reset_token = self.event_sources.get_current_token() - # Send another state event which we will cause the reset at + # Send another state event to make a position for the state reset to happen at dummy_state_response = self.helper.send_state( room_id1, event_type="foobarbaz", @@ -1011,6 +1009,12 @@ def test_state_reset(self) -> None: ) ) + # Manually bust the cache since we we're just manually messing with the database + # and not causing an actual state reset. + self.store._membership_stream_cache.entity_has_changed( + user1_id, dummy_state_pos.stream + ) + after_reset_token = self.event_sources.get_current_token() membership_changes = self.get_success( @@ -1025,19 +1029,16 @@ def test_state_reset(self) -> None: self.maxDiff = None self.assertEqual( membership_changes, - # TODO: Uncomment the expected membership. We just have a `False` value - # here so the test expectation fails and you look here. - False, - # [ - # CurrentStateDeltaMembership( - # event_id=TODO, - # event_pos=TODO, - # prev_event_id=None, - # room_id=room_id1, - # membership="leave", - # sender=user1_id, - # ), - # ], + [ + CurrentStateDeltaMembership( + event_id=None, + event_pos=dummy_state_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=None, # user1_id, + ), + ], ) def test_excluded_room_ids(self) -> None: From f163fcf08a435ea96de334b1f88bd99a0ccbcc25 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:20:42 -0500 Subject: [PATCH 51/68] Remove need for topological_ordering --- synapse/storage/databases/main/stream.py | 45 +++++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9ae1fe6c152..9e94cb08f63 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -412,6 +412,43 @@ def _filter_results( return True +def _filter_results_by_stream( + lower_token: Optional[RoomStreamToken], + upper_token: Optional[RoomStreamToken], + instance_name: str, + stream_ordering: int, +) -> bool: + """ + This function only works with "live" tokens with `stream_ordering` only. See + `_filter_results(...)` if you want to work with all tokens. + + Returns True if the event persisted by the given instance at the given + stream_ordering falls between the two tokens (taking a None + token to mean unbounded). + + Used to filter results from fetching events in the DB against the given + tokens. This is necessary to handle the case where the tokens include + position maps, which we handle by fetching more than necessary from the DB + and then filtering (rather than attempting to construct a complicated SQL + query). + """ + if lower_token: + assert lower_token.topological is None + + # If these are live tokens we compare the stream ordering against the + # writers stream position. + if stream_ordering <= lower_token.get_stream_pos_for_instance(instance_name): + return False + + if upper_token: + assert upper_token.topological is None + + if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering: + return False + + return True + + def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: # NB: This may create SQL clauses that don't optimise well (and we don't # have indices on all possible clauses). E.g. it may create @@ -764,6 +801,8 @@ async def get_current_state_delta_membership_changes_for_user( Fetch membership events (and the previous event that was replaced by that one) for a given user. + Note: This function only works with "live" tokens with `stream_ordering` only. + We're looking for membership changes in the token range (> `from_key` and <= `to_key`). @@ -837,7 +876,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: s.room_id, COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), - e.topological_ordering, m.membership, e.sender FROM current_state_delta_stream AS s @@ -859,7 +897,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: room_id, instance_name, stream_ordering, - topological_ordering, membership, sender, ) in txn: @@ -867,12 +904,10 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: assert instance_name is not None assert stream_ordering is not None - if _filter_results( + if _filter_results_by_stream( from_key, to_key, instance_name, - # TODO: This isn't always filled now - topological_ordering, stream_ordering, ): # When the server leaves a room, it will insert new rows with From 956f20ef748b6e3caf76f91623e72b9a617ae235 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 01:24:15 -0500 Subject: [PATCH 52/68] (currently failing) Add test to make sure membership changes don't re-appear if the server leaves the room later --- tests/storage/test_stream.py | 63 +++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index ffa763bff2f..00821324744 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -619,7 +619,7 @@ def test_returns_membership_events(self) -> None: ], ) - def test_server_left_after_us_room(self) -> None: + def test_server_left_room_after_us(self) -> None: """ Test that when probing over part of the DAG where the server left the room *after us*, we still see the join and leave changes. @@ -652,7 +652,7 @@ def test_server_left_after_us_room(self) -> None: join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, @@ -728,7 +728,62 @@ def test_server_left_after_us_room(self) -> None: ], ) - def test_server_left_room(self) -> None: + def test_server_left_room_after_us_later(self) -> None: + """ + Test when the user leaves the room, then sometime later, everyone else leaves + the room, causing the server to leave the room, we shouldn't see any membership + changes. + + This is to make sure we play nicely with this behavior: When the server leaves a + room, it will insert new rows with `event_id = null` into the + `current_state_delta_stream` table for all current state. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + # User1 should leave the room first + self.helper.leave(room_id1, user1_id, tok=user1_tok) + + after_user1_leave_token = self.event_sources.get_current_token() + + # User2 should also leave the room (everyone has left the room which means the + # server is no longer in the room). + self.helper.leave(room_id1, user2_id, tok=user2_tok) + + after_server_leave_token = self.event_sources.get_current_token() + + # Join another room as user1 just to advance the stream_ordering and bust + # `_membership_stream_cache` + room_id2 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id2, user1_id, tok=user1_tok) + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like the + # following. When the server leaves a room, it will insert new rows with + # `event_id = null` for all current state. + # + # TODO: Add DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=after_user1_leave_token.room_key, + to_key=after_server_leave_token.room_key, + ) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [], + ) + + def test_we_cause_server_left_room(self) -> None: """ Test that when probing over part of the DAG where we leave the room causing the server to leave the room (because we were the last local user in the room), we @@ -762,7 +817,7 @@ def test_server_left_room(self) -> None: join_pos1 = self.get_success( self.store.get_position_for_event(join_response1["event_id"]) ) - # Make sure random other non-member state that happens to have a state_key + # Make sure that random other non-member state that happens to have a `state_key` # matching the user ID doesn't mess with things. self.helper.send_state( room_id1, From 830e09d2defc6ae742dce30bdc822dcaf9a74092 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 10:13:02 -0500 Subject: [PATCH 53/68] Grab `prev_membership` to see whether the server left the room (fixes tests) See https://github.com/element-hq/synapse/pull/17320#discussion_r1657170493 `prev_membership` helps determine whether we should include the `event_id=null` row because we can check whether we have already left. - When we leave the room causing the server to leave the room, the `prev_event_id` will be our join event - When the server leaves the room after us, the `prev_event_id` will be leave event - In the state reset case, `prev_event_id` will be our join event --- synapse/storage/databases/main/stream.py | 20 ++++++-------------- tests/storage/test_stream.py | 7 +++---- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 9e94cb08f63..d94b9366ab7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -877,10 +877,12 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: COALESCE(e.instance_name, s.instance_name), COALESCE(e.stream_ordering, s.stream_id), m.membership, - e.sender + e.sender, + m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id + LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.state_key = ? AND s.type = ? @@ -890,7 +892,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: txn.execute(sql, args) membership_changes: List[CurrentStateDeltaMembership] = [] - membership_change_map: Dict[str, CurrentStateDeltaMembership] = {} for ( event_id, prev_event_id, @@ -899,6 +900,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: stream_ordering, membership, sender, + prev_membership, ) in txn: assert room_id is not None assert instance_name is not None @@ -918,16 +920,8 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `event_id = null` row is a `leave` and we don't want duplicate # membership changes in our results, let's get rid of those # (deduplicate) (see `test_server_left_after_us_room`). - if event_id is None: - already_tracked_membership_change = membership_change_map.get( - prev_event_id - ) - if ( - already_tracked_membership_change is not None - and already_tracked_membership_change.membership - == Membership.LEAVE - ): - continue + if event_id is None and prev_membership == Membership.LEAVE: + continue membership_change = CurrentStateDeltaMembership( event_id=event_id, @@ -944,8 +938,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: ) membership_changes.append(membership_change) - if event_id: - membership_change_map[event_id] = membership_change return membership_changes diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 00821324744..1342794d377 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -1019,7 +1019,7 @@ def test_state_reset(self) -> None: user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.join(room_id1, user1_id, tok=user1_tok) + join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) before_reset_token = self.event_sources.get_current_token() @@ -1056,8 +1056,7 @@ def test_state_reset(self) -> None: "type": EventTypes.Member, "state_key": user1_id, "event_id": None, - # FIXME: I'm not sure if a state reset should have a prev_event_id - "prev_event_id": None, + "prev_event_id": join_response1["event_id"], "instance_name": dummy_state_pos.instance_name, }, desc="state reset user in current_state_delta_stream", @@ -1088,7 +1087,7 @@ def test_state_reset(self) -> None: CurrentStateDeltaMembership( event_id=None, event_pos=dummy_state_pos, - prev_event_id=None, + prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", sender=None, # user1_id, From 15fcead2a5df17ee10278f1c0cdd16dbba26c76d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:33:41 -0500 Subject: [PATCH 54/68] Slight clean-up --- synapse/storage/databases/main/stream.py | 12 +++++------- tests/storage/test_stream.py | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index d94b9366ab7..ab592dcf150 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -846,7 +846,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, user_id, EventTypes.Member] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -874,7 +874,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: e.event_id, s.prev_event_id, s.room_id, - COALESCE(e.instance_name, s.instance_name), + s.instance_name, COALESCE(e.stream_ordering, s.stream_id), m.membership, e.sender, @@ -884,8 +884,8 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id WHERE s.stream_id > ? AND s.stream_id <= ? - AND s.state_key = ? AND s.type = ? + AND s.state_key = ? ORDER BY s.stream_id ASC """ @@ -916,10 +916,8 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `event_id = null` for all current state. This means we might # already have a row for the leave event and then another for the # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. Since we're assuming the - # `event_id = null` row is a `leave` and we don't want duplicate - # membership changes in our results, let's get rid of those - # (deduplicate) (see `test_server_left_after_us_room`). + # pointing back at the earlier leave event. We don't want to report + # the leave, if we already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 1342794d377..5a054d7f2ed 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -785,9 +785,9 @@ def test_server_left_room_after_us_later(self) -> None: def test_we_cause_server_left_room(self) -> None: """ - Test that when probing over part of the DAG where we leave the room causing the - server to leave the room (because we were the last local user in the room), we - still see the join and leave changes. + Test that when probing over part of the DAG where the user leaves the room + causing the server to leave the room (because we were the last local user in the + room), we still see the join and leave changes. This is to make sure we play nicely with this behavior: When the server leaves a room, it will insert new rows with `event_id = null` into the From 81c06bec20d2f6732100672853a140a6e19ff67d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 11:50:18 -0500 Subject: [PATCH 55/68] Detect state resets --- synapse/storage/databases/main/stream.py | 51 +++++++++++++++++------- tests/storage/test_stream.py | 15 ++++++- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ab592dcf150..19dba00a0fa 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,6 +123,8 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event + state_reset: Whether the membership in the room was changed without a + corresponding event (state reset). """ event_id: Optional[str] @@ -131,6 +133,7 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] + state_reset: bool def generate_pagination_where_clause( @@ -846,7 +849,15 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] + args: List[Any] = [ + EventTypes.Member, + user_id, + user_id, + min_from_id, + max_to_id, + EventTypes.Member, + user_id, + ] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're @@ -859,30 +870,35 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # `stream_ordering` is unique across the Synapse instance so this should # work fine. # - # We `COALESCE` the `instance_name` and `stream_ordering` because we prefer - # the source of truth from the events table. This gives slightly more - # accurate results when available since `current_state_delta_stream` only - # tracks that the current state is at this stream position (not what stream - # position the state event was added) and batches events at the same - # `stream_id` in certain cases. + # We `COALESCE` the `stream_ordering` because we prefer the source of truth + # from the `events` table. This gives slightly more accurate results when + # available since `current_state_delta_stream` only tracks that the current + # state is at this stream position (not what stream position the state event + # was added) and uses the *minimum* stream position for batches of events. # - # TODO: We need to add indexes for `current_state_delta_stream.event_id` and - # `current_state_delta_stream.state_key`/`current_state_delta_stream.type` - # for this to be efficient. + # The extra `LEFT JOIN` by stream position are only needed to tell a state + # reset from the server leaving the room. Both cases have `event_id = null` + # but if we can find a corresponding event at that stream position, then we + # know it was just the server leaving the room. sql = """ SELECT - e.event_id, + COALESCE(e.event_id, e_by_stream.event_id) AS event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, s.stream_id), - m.membership, - e.sender, + COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, + COALESCE(m.membership, m_by_stream.membership) AS membership, + COALESCE(e.sender, e_by_stream.sender) AS sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id + AND e_by_stream.type = ? + AND e_by_stream.state_key = ? + LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id + AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -921,6 +937,12 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: if event_id is None and prev_membership == Membership.LEAVE: continue + # We can detect a state reset if there was a membership change + # without a corresponding event. + state_reset = False + if event_id is None and membership != prev_membership: + state_reset = True + membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -933,6 +955,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership if membership is not None else Membership.LEAVE ), sender=sender, + state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 5a054d7f2ed..acb2f0e429b 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,6 +615,7 @@ def test_returns_membership_events(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -716,6 +717,7 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -724,6 +726,7 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="leave", sender=user1_id, + state_reset=False, ), ], ) @@ -882,14 +885,16 @@ def test_we_cause_server_left_room(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( - event_id=None, # leave_response1["event_id"], + event_id=leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=None, # user1_id, + sender=user1_id, + state_reset=False, ), ], ) @@ -1004,6 +1009,7 @@ def test_membership_persisted_in_same_batch(self) -> None: room_id=room_id1, membership="join", sender=user3_id, + state_reset=False, ), ], ) @@ -1091,6 +1097,7 @@ def test_state_reset(self) -> None: room_id=room_id1, membership="leave", sender=None, # user1_id, + state_reset=True, ), ], ) @@ -1141,6 +1148,7 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1149,6 +1157,7 @@ def test_excluded_room_ids(self) -> None: room_id=room_id2, membership="join", sender=user1_id, + state_reset=False, ), ], ) @@ -1175,6 +1184,7 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, + state_reset=False, ) ], ) @@ -1368,6 +1378,7 @@ def test_remote_join(self) -> None: room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + state_reset=False, ), ], ) From eb159c11cd7bcc0a72983da46a728282fdbed8e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 14:38:55 -0500 Subject: [PATCH 56/68] Don't worry about `state_reset` for now See: - Why no `COALESCE` https://github.com/element-hq/synapse/pull/17320#discussion_r1657435662 - Don't worry about `state_reset` for now, https://github.com/element-hq/synapse/pull/17320#discussion_r1657562645 --- synapse/storage/databases/main/stream.py | 53 ++++++------------------ tests/storage/test_stream.py | 47 +++++++++------------ 2 files changed, 32 insertions(+), 68 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 19dba00a0fa..c128eb5d5b4 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -123,8 +123,6 @@ class CurrentStateDeltaMembership: room_id: The room ID of the membership event. membership: The membership state of the user in the room sender: The person who sent the membership event - state_reset: Whether the membership in the room was changed without a - corresponding event (state reset). """ event_id: Optional[str] @@ -133,7 +131,6 @@ class CurrentStateDeltaMembership: room_id: str membership: str sender: Optional[str] - state_reset: bool def generate_pagination_where_clause( @@ -849,56 +846,37 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: min_from_id = from_key.stream max_to_id = to_key.get_max_stream_pos() - args: List[Any] = [ - EventTypes.Member, - user_id, - user_id, - min_from_id, - max_to_id, - EventTypes.Member, - user_id, - ] + args: List[Any] = [min_from_id, max_to_id, EventTypes.Member, user_id] # TODO: It would be good to assert that the `from_token`/`to_token` is >= # the first row in `current_state_delta_stream` for the rooms we're # interested in. Otherwise, we will end up with empty results and not know # it. - # We have to look-up events by `stream_ordering` because - # `current_state_delta_stream.event_id` can be `null` if the server is no - # longer in the room or a state reset happened and it was unset. - # `stream_ordering` is unique across the Synapse instance so this should - # work fine. + # We could `COALESCE(e.stream_ordering, s.stream_id)` to get more accurate + # stream positioning when available but given our usages, we can avoid the + # complexity. Between two (valid) stream tokens, we will still get all of + # the state changes. Since those events are persisted in a batch, valid + # tokens will either be before or after the batch of events. # - # We `COALESCE` the `stream_ordering` because we prefer the source of truth - # from the `events` table. This gives slightly more accurate results when - # available since `current_state_delta_stream` only tracks that the current + # `stream_ordering` from the `events` table is more accurate when available + # since the `current_state_delta_stream` table only tracks that the current # state is at this stream position (not what stream position the state event # was added) and uses the *minimum* stream position for batches of events. - # - # The extra `LEFT JOIN` by stream position are only needed to tell a state - # reset from the server leaving the room. Both cases have `event_id = null` - # but if we can find a corresponding event at that stream position, then we - # know it was just the server leaving the room. sql = """ SELECT - COALESCE(e.event_id, e_by_stream.event_id) AS event_id, + e.event_id, s.prev_event_id, s.room_id, s.instance_name, - COALESCE(e.stream_ordering, e_by_stream.stream_ordering, s.stream_id) AS stream_ordering, - COALESCE(m.membership, m_by_stream.membership) AS membership, - COALESCE(e.sender, e_by_stream.sender) AS sender, + s.stream_id, + m.membership, + e.sender, m_prev.membership AS prev_membership FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id - LEFT JOIN events AS e_by_stream ON e_by_stream.stream_ordering = s.stream_id - AND e_by_stream.type = ? - AND e_by_stream.state_key = ? - LEFT JOIN room_memberships AS m_by_stream ON m_by_stream.event_stream_ordering = s.stream_id - AND m_by_stream.user_id = ? WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -937,12 +915,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: if event_id is None and prev_membership == Membership.LEAVE: continue - # We can detect a state reset if there was a membership change - # without a corresponding event. - state_reset = False - if event_id is None and membership != prev_membership: - state_reset = True - membership_change = CurrentStateDeltaMembership( event_id=event_id, event_pos=PersistedEventPosition( @@ -955,7 +927,6 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership if membership is not None else Membership.LEAVE ), sender=sender, - state_reset=state_reset, ) membership_changes.append(membership_change) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index acb2f0e429b..4f8f919a24e 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -615,7 +615,6 @@ def test_returns_membership_events(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -717,7 +716,6 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=leave_response1["event_id"], @@ -726,7 +724,6 @@ def test_server_left_room_after_us(self) -> None: room_id=room_id1, membership="leave", sender=user1_id, - state_reset=False, ), ], ) @@ -885,16 +882,14 @@ def test_we_cause_server_left_room(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( - event_id=leave_response1["event_id"], + event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, prev_event_id=join_response1["event_id"], room_id=room_id1, membership="leave", - sender=user1_id, - state_reset=False, + sender=None, # user1_id, ), ], ) @@ -924,22 +919,25 @@ def test_membership_persisted_in_same_batch(self) -> None: # Persist the user1, user3, and user4 join events in the same batch so they all # end up in the `current_state_delta_stream` table with the same # stream_ordering. - join_event1, join_event_context1 = self.get_success( + join_event3, join_event_context3 = self.get_success( create_event( self.hs, - sender=user1_id, + sender=user3_id, type=EventTypes.Member, - state_key=user1_id, + state_key=user3_id, content={"membership": "join"}, room_id=room_id1, ) ) - join_event3, join_event_context3 = self.get_success( + # We want to put user1 in the middle of the batch. This way, regardless of the + # implementation that inserts rows into current_state_delta_stream` (whether it + # be minimum/maximum of stream position of the batch), we will still catch bugs. + join_event1, join_event_context1 = self.get_success( create_event( self.hs, - sender=user3_id, + sender=user1_id, type=EventTypes.Member, - state_key=user3_id, + state_key=user1_id, content={"membership": "join"}, room_id=room_id1, ) @@ -957,8 +955,8 @@ def test_membership_persisted_in_same_batch(self) -> None: self.get_success( self.persistence.persist_events( [ - (join_event1, join_event_context1), (join_event3, join_event_context3), + (join_event1, join_event_context1), (join_event4, join_event_context4), ] ) @@ -966,10 +964,7 @@ def test_membership_persisted_in_same_batch(self) -> None: after_room1_token = self.event_sources.get_current_token() - # Let's get membership changes from user3's perspective because it was in the - # middle of the batch. This way, if rows in` current_state_delta_stream` are - # stored with the first or last event's `stream_ordering`, we will still catch - # bugs. + # Get the membership changes for the user. # # At this point, the `current_state_delta_stream` table should look like (notice # those three memberships at the end with `stream_id=7` because we persisted @@ -987,7 +982,7 @@ def test_membership_persisted_in_same_batch(self) -> None: # | 7 | '!x:test' | 'm.room.member' | '@user4:test' | '$xxx' | None | membership_changes = self.get_success( self.store.get_current_state_delta_membership_changes_for_user( - user3_id, + user1_id, from_key=before_room1_token.room_key, to_key=after_room1_token.room_key, ) @@ -1003,13 +998,16 @@ def test_membership_persisted_in_same_batch(self) -> None: membership_changes, [ CurrentStateDeltaMembership( - event_id=join_event3.event_id, + event_id=join_event1.event_id, + # Ideally, this would be `join_pos1` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. event_pos=join_pos3, prev_event_id=None, room_id=room_id1, membership="join", - sender=user3_id, - state_reset=False, + sender=user1_id, ), ], ) @@ -1097,7 +1095,6 @@ def test_state_reset(self) -> None: room_id=room_id1, membership="leave", sender=None, # user1_id, - state_reset=True, ), ], ) @@ -1148,7 +1145,6 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ), CurrentStateDeltaMembership( event_id=join_response2["event_id"], @@ -1157,7 +1153,6 @@ def test_excluded_room_ids(self) -> None: room_id=room_id2, membership="join", sender=user1_id, - state_reset=False, ), ], ) @@ -1184,7 +1179,6 @@ def test_excluded_room_ids(self) -> None: room_id=room_id1, membership="join", sender=user1_id, - state_reset=False, ) ], ) @@ -1378,7 +1372,6 @@ def test_remote_join(self) -> None: room_id=intially_unjoined_room_id, membership="join", sender=user1_id, - state_reset=False, ), ], ) From ba56350642d33332d5ab3f3a94005e408cb9f433 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:31:18 -0500 Subject: [PATCH 57/68] Passing current tests --- synapse/handlers/sliding_sync.py | 44 +++++++++++++++++++---------- tests/handlers/test_sliding_sync.py | 9 ++++-- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3ce10d3ea70..b327e340ff1 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,7 +18,6 @@ # # import logging -from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import attr @@ -48,7 +47,9 @@ logger = logging.getLogger(__name__) -def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> bool: +def filter_membership_for_sync( + *, membership: str, user_id: str, sender: Optional[str] +) -> bool: """ Returns True if the membership event should be included in the sync response, otherwise False. @@ -65,6 +66,11 @@ def filter_membership_for_sync(*, membership: str, user_id: str, sender: str) -> # # This logic includes kicks (leave events where the sender is not the same user) and # can be read as "anything that isn't a leave or a leave with a different sender". + # + # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset + # happened that removed the user from the room, or the user was the last person + # locally to leave the room which caused the server to leave the room. In both + # cases, TODO return membership != Membership.LEAVE or sender != user_id @@ -99,10 +105,10 @@ class _RoomMembershipForUser: range """ - event_id: str + event_id: Optional[str] event_pos: PersistedEventPosition membership: str - sender: str + sender: Optional[str] newly_joined: bool def copy_and_replace(self, **kwds: Any) -> "_RoomMembershipForUser": @@ -540,9 +546,11 @@ async def get_sync_room_ids_for_user( first_membership_change_by_room_id_in_from_to_range: Dict[ str, CurrentStateDeltaMembership ] = {} - non_join_event_ids_by_room_id_in_from_to_range: Dict[str, List[str]] = ( - defaultdict(list) - ) + # Keep track if the room has a non-join event in the token range so we can later + # tell if it was a `newly_joined` room. If the last membership event in the + # token range is a join and there is also some non-join in the range, we know + # they `newly_joined`. + has_non_join_event_by_room_id_in_from_to_range: Dict[str, bool] = {} for ( membership_change ) in current_state_delta_membership_changes_in_from_to_range: @@ -551,16 +559,13 @@ async def get_sync_room_ids_for_user( last_membership_change_by_room_id_in_from_to_range[room_id] = ( membership_change ) - # Only set if we haven't already set it first_membership_change_by_room_id_in_from_to_range.setdefault( room_id, membership_change ) if membership_change.membership != Membership.JOIN: - non_join_event_ids_by_room_id_in_from_to_range[room_id].append( - membership_change.event_id - ) + has_non_join_event_by_room_id_in_from_to_range[room_id] = True # 2) Fixup # @@ -574,6 +579,7 @@ async def get_sync_room_ids_for_user( ) in last_membership_change_by_room_id_in_from_to_range.values(): room_id = last_membership_change_in_from_to_range.room_id + # 3) if last_membership_change_in_from_to_range.membership == Membership.JOIN: possibly_newly_joined_room_ids.add(room_id) @@ -592,10 +598,14 @@ async def get_sync_room_ids_for_user( # 3) Figure out `newly_joined` prev_event_ids_before_token_range: List[str] = [] for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: - non_joins_for_room = non_join_event_ids_by_room_id_in_from_to_range[ - possibly_newly_joined_room_id - ] - if len(non_joins_for_room) > 0: + has_non_join_in_from_to_range = ( + has_non_join_event_by_room_id_in_from_to_range.get( + possibly_newly_joined_room_id, False + ) + ) + # If the last membership event in the token range is a join and there is + # also some non-join in the range, we know they `newly_joined`. + if has_non_join_in_from_to_range: # We found a `newly_joined` room (we left and joined within the token range) filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id @@ -968,6 +978,10 @@ async def get_room_sync_data( Membership.INVITE, Membership.KNOCK, ): + # This should never happen. If someone is invited/knocked on room, then + # there should be an event for it. + assert rooms_membership_for_user_at_to_token.event_id is not None + invite_or_knock_event = await self.store.get_event( rooms_membership_for_user_at_to_token.event_id ) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 7339cb460e0..a751fef1df5 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -390,7 +390,7 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Leave during the from_token/to_token range (newly_left) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) - leave_response = self.helper.leave(room_id2, user1_id, tok=user1_tok) + _leave_response2 = self.helper.leave(room_id2, user1_id, tok=user1_tok) after_room2_token = self.event_sources.get_current_token() @@ -404,10 +404,13 @@ def test_only_newly_left_rooms_show_up(self) -> None: # Only the newly_left room should show up self.assertEqual(room_id_results.keys(), {room_id2}) - # It should be pointing to the latest membership event in the from/to range + # It should be pointing to the latest membership event in the from/to range but + # the `event_id` is `None` because we left the room causing the server to leave + # the room because no other local users are in it (quirk of the + # `current_state_delta_stream` table that we source things from) self.assertEqual( room_id_results[room_id2].event_id, - leave_response["event_id"], + None, # _leave_response2["event_id"], ) # We should *NOT* be `newly_joined` because we are instead `newly_left` self.assertEqual(room_id_results[room_id2].newly_joined, False) From f77403251cd2faf65689b785eba0a6af5366b5bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:39:43 -0500 Subject: [PATCH 58/68] Add better comments --- synapse/handlers/sliding_sync.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b327e340ff1..3dd32ae1f15 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -70,8 +70,9 @@ def filter_membership_for_sync( # When `sender=None` and `membership=Membership.LEAVE`, it means that a state reset # happened that removed the user from the room, or the user was the last person # locally to leave the room which caused the server to leave the room. In both - # cases, TODO - return membership != Membership.LEAVE or sender != user_id + # cases, we can just remove the rooms since they are no longer relevant to the user. + # They could still be added back later if they are `newly_left`. + return membership != Membership.LEAVE or sender not in (user_id, None) # We can't freeze this class because we want to update it in place with the @@ -508,6 +509,8 @@ async def get_sync_room_ids_for_user( ) ) + # Filter the rooms that that we have updated room membership events to the point + # in time of the `to_token` (from the "1)" fixups) filtered_sync_room_id_set = { room_id: room_membership_for_user for room_id, room_membership_for_user in sync_room_id_set.items() From 325856e14b97aa6eca59d4d5d3b4145d050adfe0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 15:57:01 -0500 Subject: [PATCH 59/68] Inclusive ranges --- synapse/handlers/sliding_sync.py | 3 +- tests/rest/client/test_sync.py | 92 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3dd32ae1f15..db5dd75d044 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -263,7 +263,8 @@ async def current_sync_for_user( for range in list_config.ranges: sliced_room_ids = [ room_id - for room_id, _ in sorted_room_info[range[0] : range[1]] + # Both sides of range are inclusive + for room_id, _ in sorted_room_info[range[0] : range[1] + 1] ] ops.append( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index bd1e7d521b7..3f4f88c3d14 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1616,6 +1616,98 @@ def test_sort_list(self) -> None: channel.json_body["lists"]["foo-list"], ) + def test_sliced_windows(self) -> None: + """ + Test that the `lists` `ranges` are sliced correctly. Both sides of each range + are inclusive. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + + # Make the Sliding Sync request for a single room + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 0], + "room_ids": [room_id3], + } + ], + channel.json_body["lists"]["foo-list"], + ) + + # Make the Sliding Sync request for the first two rooms + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"], + ], + "timeline_limit": 1, + } + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure it has the foo-list we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["foo-list"], + channel.json_body["lists"].keys(), + ) + # Make sure the list is sorted in the way we expect + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 1], + "room_ids": [room_id3, room_id2], + } + ], + channel.json_body["lists"]["foo-list"], + ) + def test_rooms_limited_initial_sync(self) -> None: """ Test that we mark `rooms` as `limited=True` when we saturate the `timeline_limit` From 63c7b5017ad82ee20bc2ae5898b051a2660cf188 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 16:34:41 -0500 Subject: [PATCH 60/68] (doesn't work) Add test for batch persisting multiple member events for the same user --- tests/storage/test_stream.py | 121 +++++++++++++++++++++++++++++++++-- 1 file changed, 115 insertions(+), 6 deletions(-) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 4f8f919a24e..53a58bd82a9 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless logger = logging.getLogger(__name__) @@ -894,12 +894,12 @@ def test_we_cause_server_left_room(self) -> None: ], ) - def test_membership_persisted_in_same_batch(self) -> None: + def test_different_user_membership_persisted_in_same_batch(self) -> None: """ - Test batch of membership events being processed at once. This will result in all - of the memberships being stored in the `current_state_delta_stream` table with - the same `stream_ordering` even though the individual events have different - `stream_ordering`s. + Test batch of membership events from different users being processed at once. + This will result in all of the memberships being stored in the + `current_state_delta_stream` table with the same `stream_ordering` even though + the individual events have different `stream_ordering`s. """ user1_id = self.register_user("user1", "pass") _user1_tok = self.login(user1_id, "pass") @@ -1012,6 +1012,115 @@ def test_membership_persisted_in_same_batch(self) -> None: ], ) + @skip_unless( + False, + "persist code does not support multiple membership events for the same user in the same batch", + ) + def test_membership_persisted_in_same_batch(self) -> None: + """ + Test batch of membership events for the same user being processed at once. + + This *should* (doesn't happen currently) result in all of the memberships being + stored in the `current_state_delta_stream` table with the same `stream_ordering` + even though the individual events have different `stream_ordering`s. + + FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` + table. + """ + user1_id = self.register_user("user1", "pass") + _user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + before_room1_token = self.event_sources.get_current_token() + + # User2 is just the designated person to create the room (we do this across the + # tests to be consistent) + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + + # Persist a timeline event sandwiched between two membership events so they end + # up in the `current_state_delta_stream` table with the same `stream_id`. + join_event, join_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "join"}, + room_id=room_id1, + ) + ) + timeline_event, timeline_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Message, + state_key=user1_id, + content={"body": "foo bar", "msgtype": "m.text"}, + room_id=room_id1, + ) + ) + leave_event, leave_event_context = self.get_success( + create_event( + self.hs, + sender=user1_id, + type=EventTypes.Member, + state_key=user1_id, + content={"membership": "leave"}, + room_id=room_id1, + ) + ) + self.get_success( + self.persistence.persist_events( + [ + (join_event, join_event_context), + (timeline_event, timeline_event_context), + (leave_event, leave_event_context), + ] + ) + ) + + after_room1_token = self.event_sources.get_current_token() + + # Get the membership changes for the user. + # + # At this point, the `current_state_delta_stream` table should look like (notice + # those three memberships at the end with `stream_id=7` because we persisted + # them in the same batch): + # + # TODO: DB rows to better see what's going on. + membership_changes = self.get_success( + self.store.get_current_state_delta_membership_changes_for_user( + user1_id, + from_key=before_room1_token.room_key, + to_key=after_room1_token.room_key, + ) + ) + + join_pos = self.get_success( + self.store.get_position_for_event(join_event.event_id) + ) + + # Let the whole diff show on failure + self.maxDiff = None + self.assertEqual( + membership_changes, + [ + CurrentStateDeltaMembership( + event_id=leave_event.event_id, + # Ideally, this would be `leave_pos` (to match the `event_id`) but + # when events are persisted in a batch, they are all stored in the + # `current_state_delta_stream` table with the minimum + # `stream_ordering` from the batch. + event_pos=join_pos, # leave_pos, + prev_event_id=None, + room_id=room_id1, + membership="leave", + sender=user1_id, + ), + ], + ) + def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 1158058aa52e47d0463b44f115222e0e122e045e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 17:58:31 -0500 Subject: [PATCH 61/68] Opt for tackling more batch scenarios in future PRs --- tests/rest/client/test_sync.py | 2 +- tests/storage/test_stream.py | 111 +-------------------------------- 2 files changed, 2 insertions(+), 111 deletions(-) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3f4f88c3d14..766c8850d0c 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -1624,7 +1624,7 @@ def test_sliced_windows(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) + _room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) room_id3 = self.helper.create_room_as(user1_id, tok=user1_tok, is_public=True) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 53a58bd82a9..e420e680e27 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -46,7 +46,7 @@ from synapse.util import Clock from tests.test_utils.event_injection import create_event -from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase, skip_unless +from tests.unittest import FederatingHomeserverTestCase, HomeserverTestCase logger = logging.getLogger(__name__) @@ -1012,115 +1012,6 @@ def test_different_user_membership_persisted_in_same_batch(self) -> None: ], ) - @skip_unless( - False, - "persist code does not support multiple membership events for the same user in the same batch", - ) - def test_membership_persisted_in_same_batch(self) -> None: - """ - Test batch of membership events for the same user being processed at once. - - This *should* (doesn't happen currently) result in all of the memberships being - stored in the `current_state_delta_stream` table with the same `stream_ordering` - even though the individual events have different `stream_ordering`s. - - FIXME: Currently, only the `join_event` is recorded in the `current_state_delta_stream` - table. - """ - user1_id = self.register_user("user1", "pass") - _user1_tok = self.login(user1_id, "pass") - user2_id = self.register_user("user2", "pass") - user2_tok = self.login(user2_id, "pass") - - before_room1_token = self.event_sources.get_current_token() - - # User2 is just the designated person to create the room (we do this across the - # tests to be consistent) - room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - - # Persist a timeline event sandwiched between two membership events so they end - # up in the `current_state_delta_stream` table with the same `stream_id`. - join_event, join_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "join"}, - room_id=room_id1, - ) - ) - timeline_event, timeline_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Message, - state_key=user1_id, - content={"body": "foo bar", "msgtype": "m.text"}, - room_id=room_id1, - ) - ) - leave_event, leave_event_context = self.get_success( - create_event( - self.hs, - sender=user1_id, - type=EventTypes.Member, - state_key=user1_id, - content={"membership": "leave"}, - room_id=room_id1, - ) - ) - self.get_success( - self.persistence.persist_events( - [ - (join_event, join_event_context), - (timeline_event, timeline_event_context), - (leave_event, leave_event_context), - ] - ) - ) - - after_room1_token = self.event_sources.get_current_token() - - # Get the membership changes for the user. - # - # At this point, the `current_state_delta_stream` table should look like (notice - # those three memberships at the end with `stream_id=7` because we persisted - # them in the same batch): - # - # TODO: DB rows to better see what's going on. - membership_changes = self.get_success( - self.store.get_current_state_delta_membership_changes_for_user( - user1_id, - from_key=before_room1_token.room_key, - to_key=after_room1_token.room_key, - ) - ) - - join_pos = self.get_success( - self.store.get_position_for_event(join_event.event_id) - ) - - # Let the whole diff show on failure - self.maxDiff = None - self.assertEqual( - membership_changes, - [ - CurrentStateDeltaMembership( - event_id=leave_event.event_id, - # Ideally, this would be `leave_pos` (to match the `event_id`) but - # when events are persisted in a batch, they are all stored in the - # `current_state_delta_stream` table with the minimum - # `stream_ordering` from the batch. - event_pos=join_pos, # leave_pos, - prev_event_id=None, - room_id=room_id1, - membership="leave", - sender=user1_id, - ), - ], - ) - def test_state_reset(self) -> None: """ Test a state reset scenario where the user gets removed from the room (when From 32b8b68df67c6ef4a11921704c570236d2d08592 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 Jun 2024 18:13:34 -0500 Subject: [PATCH 62/68] Add TODO to handle state resets See https://github.com/element-hq/synapse/pull/17320#discussion_r1656548733 --- synapse/handlers/sliding_sync.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index db5dd75d044..5dc98679071 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1002,6 +1002,12 @@ async def get_room_sync_data( stripped_state.append(strip_event(invite_or_knock_event)) + # TODO: Handle state resets. For example, if we see + # `rooms_membership_for_user_at_to_token.membership = Membership.LEAVE` but + # `required_state` doesn't include it, we should indicate to the client that a + # state reset happened. Perhaps we should indicate this by setting `initial: + # True` and empty `required_state`. + return SlidingSyncResult.RoomResult( # TODO: Dummy value name=None, From 9e53336a71f3567c451456d778e0606cef19cde1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 13:44:00 -0500 Subject: [PATCH 63/68] Avoid fetching full events for `prev_event_ids` See https://github.com/element-hq/synapse/pull/17320#discussion_r1658832755 --- synapse/handlers/sliding_sync.py | 72 ++++++------------------ synapse/storage/databases/main/stream.py | 51 +++++++++++++---- 2 files changed, 59 insertions(+), 64 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5dc98679071..c9285d23c02 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -464,7 +464,6 @@ async def get_sync_room_ids_for_user( # Since we fetched a snapshot of the users room list at some point in time after # the from/to tokens, we need to revert/rewind some membership changes to match # the point in time of the `to_token`. - prev_event_ids_in_from_to_range: List[str] = [] for ( room_id, first_membership_change_after_to_token, @@ -475,40 +474,13 @@ async def get_sync_room_ids_for_user( # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - prev_event_ids_in_from_to_range.append( - first_membership_change_after_to_token.prev_event_id - ) - - # 1) Fixup (more) - # - # 1b) 1c) Fetch the previous membership events that apply to the from/to range - # and fixup our working list. - prev_events_in_from_to_range = await self.store.get_events( - prev_event_ids_in_from_to_range - ) - for prev_event_in_from_to_range in prev_events_in_from_to_range.values(): - # These fields should be present for all persisted events - assert ( - prev_event_in_from_to_range.internal_metadata.instance_name is not None - ) - assert ( - prev_event_in_from_to_range.internal_metadata.stream_ordering - is not None - ) - - # 1b) 1c) Update the membership with what we found - sync_room_id_set[prev_event_in_from_to_range.room_id] = ( - _RoomMembershipForUser( - event_id=prev_event_in_from_to_range.event_id, - event_pos=PersistedEventPosition( - instance_name=prev_event_in_from_to_range.internal_metadata.instance_name, - stream=prev_event_in_from_to_range.internal_metadata.stream_ordering, - ), - membership=prev_event_in_from_to_range.membership, - sender=prev_event_in_from_to_range.sender, + sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=first_membership_change_after_to_token.prev_event_id, + event_pos=first_membership_change_after_to_token.prev_event_pos, + membership=first_membership_change_after_to_token.prev_membership, + sender=first_membership_change_after_to_token.prev_sender, newly_joined=False, ) - ) # Filter the rooms that that we have updated room membership events to the point # in time of the `to_token` (from the "1)" fixups) @@ -600,12 +572,9 @@ async def get_sync_room_ids_for_user( ) # 3) Figure out `newly_joined` - prev_event_ids_before_token_range: List[str] = [] - for possibly_newly_joined_room_id in possibly_newly_joined_room_ids: + for room_id in possibly_newly_joined_room_ids: has_non_join_in_from_to_range = ( - has_non_join_event_by_room_id_in_from_to_range.get( - possibly_newly_joined_room_id, False - ) + has_non_join_event_by_room_id_in_from_to_range.get(room_id, False) ) # If the last membership event in the token range is a join and there is # also some non-join in the range, we know they `newly_joined`. @@ -618,6 +587,9 @@ async def get_sync_room_ids_for_user( prev_event_id = first_membership_change_by_room_id_in_from_to_range[ room_id ].prev_event_id + prev_membership = first_membership_change_by_room_id_in_from_to_range[ + room_id + ].prev_membership if prev_event_id is None: # We found a `newly_joined` room (we are joining the room for the @@ -625,22 +597,14 @@ async def get_sync_room_ids_for_user( filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ room_id ].copy_and_replace(newly_joined=True) - else: - # Last resort, we need to step back to the previous membership event - # just before the token range to see if we're joined then or not. - prev_event_ids_before_token_range.append(prev_event_id) - - # 3) more - prev_events_before_token_range = await self.store.get_events( - prev_event_ids_before_token_range - ) - for prev_event_before_token_range in prev_events_before_token_range.values(): - if prev_event_before_token_range.membership != Membership.JOIN: - # We found a `newly_joined` room (we left before the token range - # and joined within the token range) - filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ - room_id - ].copy_and_replace(newly_joined=True) + # Last resort, we need to step back to the previous membership event + # just before the token range to see if we're joined then or not. + elif prev_membership != Membership.JOIN: + # We found a `newly_joined` room (we left before the token range + # and joined within the token range) + filtered_sync_room_id_set[room_id] = filtered_sync_room_id_set[ + room_id + ].copy_and_replace(newly_joined=True) return filtered_sync_room_id_set diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index c128eb5d5b4..29f675ae441 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -125,12 +125,17 @@ class CurrentStateDeltaMembership: sender: The person who sent the membership event """ + room_id: str + # Event event_id: Optional[str] event_pos: PersistedEventPosition - prev_event_id: Optional[str] - room_id: str membership: str sender: Optional[str] + # Prev event + prev_event_id: Optional[str] + prev_event_pos: Optional[PersistedEventPosition] + prev_membership: Optional[str] + prev_sender: Optional[str] def generate_pagination_where_clause( @@ -865,18 +870,22 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: # was added) and uses the *minimum* stream position for batches of events. sql = """ SELECT - e.event_id, - s.prev_event_id, s.room_id, + e.event_id, s.instance_name, s.stream_id, m.membership, e.sender, - m_prev.membership AS prev_membership + s.prev_event_id, + e_prev.instance_name AS prev_instance_name, + e_prev.stream_ordering AS prev_stream_ordering, + m_prev.membership AS prev_membership, + e_prev.sender AS prev_sender FROM current_state_delta_stream AS s LEFT JOIN events AS e ON e.event_id = s.event_id LEFT JOIN room_memberships AS m ON m.event_id = s.event_id - LEFT JOIN room_memberships AS m_prev ON s.prev_event_id = m_prev.event_id + LEFT JOIN events AS e_prev ON e_prev.event_id = s.prev_event_id + LEFT JOIN room_memberships AS m_prev ON m_prev.event_id = s.prev_event_id WHERE s.stream_id > ? AND s.stream_id <= ? AND s.type = ? AND s.state_key = ? @@ -887,14 +896,17 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: membership_changes: List[CurrentStateDeltaMembership] = [] for ( - event_id, - prev_event_id, room_id, + event_id, instance_name, stream_ordering, membership, sender, + prev_event_id, + prev_instance_name, + prev_stream_ordering, prev_membership, + prev_sender, ) in txn: assert room_id is not None assert instance_name is not None @@ -916,17 +928,36 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: continue membership_change = CurrentStateDeltaMembership( + room_id=room_id, + # Event event_id=event_id, event_pos=PersistedEventPosition( instance_name=instance_name, stream=stream_ordering, ), - prev_event_id=prev_event_id, - room_id=room_id, membership=( membership if membership is not None else Membership.LEAVE ), sender=sender, + # Prev event + prev_event_id=prev_event_id, + prev_event_pos=( + PersistedEventPosition( + instance_name=prev_instance_name, + stream=prev_stream_ordering, + ) + if ( + prev_instance_name is not None + and prev_stream_ordering is not None + ) + else None + ), + prev_membership=( + prev_membership + if prev_membership is not None + else Membership.LEAVE + ), + prev_sender=prev_sender, ) membership_changes.append(membership_change) From a4263bf92513ecb395fc646dd783badecd2b0c3a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 18:56:45 -0500 Subject: [PATCH 64/68] Update stream tests with prev event info --- synapse/storage/databases/main/stream.py | 24 +++---- tests/storage/test_stream.py | 80 +++++++++++++++++------- 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 29f675ae441..7e6beb52395 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -918,12 +918,13 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: instance_name, stream_ordering, ): - # When the server leaves a room, it will insert new rows with - # `event_id = null` for all current state. This means we might - # already have a row for the leave event and then another for the - # same leave where the `event_id=null` but the `prev_event_id` is - # pointing back at the earlier leave event. We don't want to report - # the leave, if we already have a leave event. + # When the server leaves a room, it will insert new rows into the + # `current_state_delta_stream` table with `event_id = null` for all + # current state. This means we might already have a row for the + # leave event and then another for the same leave where the + # `event_id=null` but the `prev_event_id` is pointing back at the + # earlier leave event. We don't want to report the leave, if we + # already have a leave event. if event_id is None and prev_membership == Membership.LEAVE: continue @@ -935,6 +936,11 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: instance_name=instance_name, stream=stream_ordering, ), + # When `s.event_id = null`, we won't be able to get respective + # `room_membership` but can assume the user has left the room + # because this only happens when the server leaves a room + # (meaning everyone locally left) or a state reset which removed + # the person from the room. membership=( membership if membership is not None else Membership.LEAVE ), @@ -952,11 +958,7 @@ def f(txn: LoggingTransaction) -> List[CurrentStateDeltaMembership]: ) else None ), - prev_membership=( - prev_membership - if prev_membership is not None - else Membership.LEAVE - ), + prev_membership=prev_membership, prev_sender=prev_sender, ) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index e420e680e27..aad46b1b445 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -609,12 +609,15 @@ def test_returns_membership_events(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response["event_id"], event_pos=join_pos, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ) ], ) @@ -710,20 +713,26 @@ def test_server_left_room_after_us(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id1, event_id=leave_response1["event_id"], event_pos=leave_pos1, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -876,20 +885,26 @@ def test_we_cause_server_left_room(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id1, event_id=None, # leave_response1["event_id"], event_pos=leave_pos1, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=None, # user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -998,16 +1013,19 @@ def test_different_user_membership_persisted_in_same_batch(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_event1.event_id, # Ideally, this would be `join_pos1` (to match the `event_id`) but # when events are persisted in a batch, they are all stored in the # `current_state_delta_stream` table with the minimum # `stream_ordering` from the batch. event_pos=join_pos3, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) @@ -1024,6 +1042,9 @@ def test_state_reset(self) -> None: room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) join_response1 = self.helper.join(room_id1, user1_id, tok=user1_tok) + join_pos1 = self.get_success( + self.store.get_position_for_event(join_response1["event_id"]) + ) before_reset_token = self.event_sources.get_current_token() @@ -1089,12 +1110,15 @@ def test_state_reset(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=None, event_pos=dummy_state_pos, - prev_event_id=join_response1["event_id"], - room_id=room_id1, membership="leave", sender=None, # user1_id, + prev_event_id=join_response1["event_id"], + prev_event_pos=join_pos1, + prev_membership="join", + prev_sender=user1_id, ), ], ) @@ -1139,20 +1163,26 @@ def test_excluded_room_ids(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), CurrentStateDeltaMembership( + room_id=room_id2, event_id=join_response2["event_id"], event_pos=join_pos2, - prev_event_id=None, - room_id=room_id2, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) @@ -1173,12 +1203,15 @@ def test_excluded_room_ids(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=room_id1, event_id=join_response1["event_id"], event_pos=join_pos1, - prev_event_id=None, - room_id=room_id1, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ) ], ) @@ -1366,12 +1399,15 @@ def test_remote_join(self) -> None: membership_changes, [ CurrentStateDeltaMembership( + room_id=intially_unjoined_room_id, event_id=join_event.event_id, event_pos=join_pos, - prev_event_id=None, - room_id=intially_unjoined_room_id, membership="join", sender=user1_id, + prev_event_id=None, + prev_event_pos=None, + prev_membership=None, + prev_sender=None, ), ], ) From 10d78d66b7f2e28c8391da7fc479b329eeddf3cd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 19:04:46 -0500 Subject: [PATCH 65/68] Protect for non-existent prev events --- synapse/handlers/sliding_sync.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c9285d23c02..8622ef84726 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -474,13 +474,26 @@ async def get_sync_room_ids_for_user( # 1b) 1c) From the first membership event after the `to_token`, step backward to the # previous membership that would apply to the from/to range. else: - sync_room_id_set[room_id] = _RoomMembershipForUser( - event_id=first_membership_change_after_to_token.prev_event_id, - event_pos=first_membership_change_after_to_token.prev_event_pos, - membership=first_membership_change_after_to_token.prev_membership, - sender=first_membership_change_after_to_token.prev_sender, - newly_joined=False, - ) + # We don't expect these fields to be `None` if we have a `prev_event_id` + # but we're being defensive since it's possible that the prev event was + # culled from the database. + if ( + first_membership_change_after_to_token.prev_event_pos is not None + and first_membership_change_after_to_token.prev_membership + is not None + ): + sync_room_id_set[room_id] = _RoomMembershipForUser( + event_id=first_membership_change_after_to_token.prev_event_id, + event_pos=first_membership_change_after_to_token.prev_event_pos, + membership=first_membership_change_after_to_token.prev_membership, + sender=first_membership_change_after_to_token.prev_sender, + newly_joined=False, + ) + else: + # If we can't find the previous membership event, we shouldn't + # include the room in the sync response since we can't determine the + # exact membership state and shouldn't rely on the current snapshot. + sync_room_id_set.pop(room_id, None) # Filter the rooms that that we have updated room membership events to the point # in time of the `to_token` (from the "1)" fixups) From b8687e771cef14ac936bf4c401c83470fae1d8e7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 21:42:11 -0500 Subject: [PATCH 66/68] Select `to_key if to_key else from_key` See https://github.com/element-hq/synapse/pull/17320#discussion_r1646591886 --- synapse/storage/databases/main/stream.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 7e6beb52395..f96032c9533 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1827,7 +1827,9 @@ async def paginate_room_events( and to_key is not None and to_key.is_before_or_eq(from_key) ): - return [], from_key + # Token selection matches what we do in `_paginate_room_events_txn` if there + # are no rows + return [], to_key if to_key else from_key # Or vice-versa, if we're looking backwards and our `from_key` is already before # our `to_key`. elif ( @@ -1835,7 +1837,9 @@ async def paginate_room_events( and to_key is not None and from_key.is_before_or_eq(to_key) ): - return [], from_key + # Token selection matches what we do in `_paginate_room_events_txn` if there + # are no rows + return [], to_key if to_key else from_key rows, token = await self.db_pool.runInteraction( "paginate_room_events", From 7c9513ccb1d6e4ab296395fe171318a9a128d052 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 21:49:41 -0500 Subject: [PATCH 67/68] Add missing test description --- tests/handlers/test_sliding_sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index a751fef1df5..3d37a696d5b 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -1410,7 +1410,8 @@ def test_newly_joined_with_leave_join_in_token_range( self, ) -> None: """ - Test that `newly_joined` TODO + Test that even though we're joined before the token range, if we leave and join + within the token range, it's still counted as `newly_joined`. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") From 8b73185094657beb02c8e3d1c678fb8832ad53c6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 1 Jul 2024 22:41:08 -0500 Subject: [PATCH 68/68] Trigger CI again