From 4bb26c95a931e0be79d6ab9649e4338f7467a987 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 10 Mar 2023 15:31:25 +0000 Subject: [PATCH] Refactor `filter_events_for_server` (#15240) * Tweak docstring and type hint * Flip logic and provide better name * Separate decision from action * Track a set of strings, not EventBases * Require explicit boolean options from callers * Add explicit option for partial state rooms * Changelog * Rename param --- changelog.d/15240.misc | 1 + .../sender/per_destination_queue.py | 2 + synapse/handlers/federation.py | 29 ++++++-- synapse/visibility.py | 67 +++++++++++++------ tests/test_visibility.py | 40 +++++++++-- 5 files changed, 109 insertions(+), 30 deletions(-) create mode 100644 changelog.d/15240.misc diff --git a/changelog.d/15240.misc b/changelog.d/15240.misc new file mode 100644 index 000000000000..2b7edf916ec5 --- /dev/null +++ b/changelog.d/15240.misc @@ -0,0 +1 @@ +Refactor `filter_events_for_server`. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index ffc9d95ee703..478187ce449a 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -547,6 +547,8 @@ async def _catch_up_transmission_loop(self) -> None: self._server_name, new_pdus, redact=False, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) # If we've filtered out all the extremities, fall back to diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5f2057269dac..80156ef343aa 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -392,7 +392,7 @@ async def _maybe_backfill_inner( get_prev_content=False, ) - # We set `check_history_visibility_only` as we might otherwise get false + # We unset `filter_out_erased_senders` as we might otherwise get false # positives from users having been erased. filtered_extremities = await filter_events_for_server( self._storage_controllers, @@ -400,7 +400,8 @@ async def _maybe_backfill_inner( self.server_name, events_to_check, redact=False, - check_history_visibility_only=True, + filter_out_erased_senders=False, + filter_out_remote_partial_state_events=False, ) if filtered_extremities: extremities_to_request.append(bp.event_id) @@ -1331,7 +1332,13 @@ async def on_backfill_request( ) events = await filter_events_for_server( - self._storage_controllers, origin, self.server_name, events + self._storage_controllers, + origin, + self.server_name, + events, + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) return events @@ -1362,7 +1369,13 @@ async def get_persisted_pdu( await self._event_auth_handler.assert_host_in_room(event.room_id, origin) events = await filter_events_for_server( - self._storage_controllers, origin, self.server_name, [event] + self._storage_controllers, + origin, + self.server_name, + [event], + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) event = events[0] return event @@ -1390,7 +1403,13 @@ async def on_get_missing_events( ) missing_events = await filter_events_for_server( - self._storage_controllers, origin, self.server_name, missing_events + self._storage_controllers, + origin, + self.server_name, + missing_events, + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) return missing_events diff --git a/synapse/visibility.py b/synapse/visibility.py index e442de31739e..468e22f8f64e 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -14,7 +14,17 @@ # limitations under the License. import logging from enum import Enum, auto -from typing import Collection, Dict, FrozenSet, List, Optional, Tuple +from typing import ( + Collection, + Dict, + FrozenSet, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, +) import attr from typing_extensions import Final @@ -565,29 +575,43 @@ async def filter_events_for_server( storage: StorageControllers, target_server_name: str, local_server_name: str, - events: List[EventBase], - redact: bool = True, - check_history_visibility_only: bool = False, + events: Sequence[EventBase], + *, + redact: bool, + filter_out_erased_senders: bool, + filter_out_remote_partial_state_events: bool, ) -> List[EventBase]: - """Filter a list of events based on whether given server is allowed to + """Filter a list of events based on whether the target server is allowed to see them. + For a fully stated room, the target server is allowed to see an event E if: + - the state at E has world readable or shared history vis, OR + - the state at E says that the target server is in the room. + + For a partially stated room, the target server is allowed to see E if: + - E was created by this homeserver, AND: + - the partial state at E has world readable or shared history vis, OR + - the partial state at E says that the target server is in the room. + + TODO: state before or state after? + Args: storage - server_name + target_server_name + local_server_name events - redact: Whether to return a redacted version of the event, or - to filter them out entirely. - check_history_visibility_only: Whether to only check the - history visibility, rather than things like if the sender has been + redact: Controls what to do with events which have been filtered out. + If True, include their redacted forms; if False, omit them entirely. + filter_out_erased_senders: If true, also filter out events whose sender has been erased. This is used e.g. during pagination to decide whether to backfill or not. - + filter_out_remote_partial_state_events: If True, also filter out events in + partial state rooms created by other homeservers. Returns The filtered events. """ - def is_sender_erased(event: EventBase, erased_senders: Dict[str, bool]) -> bool: + def is_sender_erased(event: EventBase, erased_senders: Mapping[str, bool]) -> bool: if erased_senders and erased_senders[event.sender]: logger.info("Sender of %s has been erased, redacting", event.event_id) return True @@ -616,7 +640,7 @@ def check_event_is_visible( # server has no users in the room: redact return False - if not check_history_visibility_only: + if filter_out_erased_senders: erased_senders = await storage.main.are_users_erased(e.sender for e in events) else: # We don't want to check whether users are erased, which is equivalent @@ -631,15 +655,15 @@ def check_event_is_visible( # otherwise a room could be fully joined after we retrieve those, which would then bypass # this check but would base the filtering on an outdated view of the membership events. - partial_state_invisible_events = set() - if not check_history_visibility_only: + partial_state_invisible_event_ids: Set[str] = set() + if filter_out_remote_partial_state_events: for e in events: sender_domain = get_domain_from_id(e.sender) if ( sender_domain != local_server_name and await storage.main.is_partial_state_room(e.room_id) ): - partial_state_invisible_events.add(e) + partial_state_invisible_event_ids.add(e.event_id) # Let's check to see if all the events have a history visibility # of "shared" or "world_readable". If that's the case then we don't @@ -658,17 +682,20 @@ def check_event_is_visible( target_server_name, ) - to_return = [] - for e in events: + def include_event_in_output(e: EventBase) -> bool: erased = is_sender_erased(e, erased_senders) visible = check_event_is_visible( event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {}) ) - if e in partial_state_invisible_events: + if e.event_id in partial_state_invisible_event_ids: visible = False - if visible and not erased: + return visible and not erased + + to_return = [] + for e in events: + if include_event_in_output(e): to_return.append(e) elif redact: to_return.append(prune_event(e)) diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 2801a950a85c..9ed330f55497 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -63,7 +63,13 @@ def test_filtering(self) -> None: filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "test_server", "hs", events_to_filter + self._storage_controllers, + "test_server", + "hs", + events_to_filter, + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) ) @@ -85,7 +91,13 @@ def test_filter_outlier(self) -> None: self.assertEqual( self.get_success( filter_events_for_server( - self._storage_controllers, "remote_hs", "hs", [outlier] + self._storage_controllers, + "remote_hs", + "hs", + [outlier], + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) ), [outlier], @@ -96,7 +108,13 @@ def test_filter_outlier(self) -> None: filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "remote_hs", "local_hs", [outlier, evt] + self._storage_controllers, + "remote_hs", + "local_hs", + [outlier, evt], + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) ) self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}") @@ -108,7 +126,13 @@ def test_filter_outlier(self) -> None: # be redacted) filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "other_server", "local_hs", [outlier, evt] + self._storage_controllers, + "other_server", + "local_hs", + [outlier, evt], + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) ) self.assertEqual(filtered[0], outlier) @@ -143,7 +167,13 @@ def test_erased_user(self) -> None: # ... and the filtering happens. filtered = self.get_success( filter_events_for_server( - self._storage_controllers, "test_server", "local_hs", events_to_filter + self._storage_controllers, + "test_server", + "local_hs", + events_to_filter, + redact=True, + filter_out_erased_senders=True, + filter_out_remote_partial_state_events=True, ) )