Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Remove cached wrap on _get_joined_users_from_context method #13569

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13569.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove redundant `_get_joined_users_from_context` cache. Contributed by Nick @ Beeper (@fizzadar).
2 changes: 1 addition & 1 deletion synapse/state/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ async def get_current_user_ids_in_room(
logger.debug("calling resolve_state_groups from get_current_user_ids_in_room")
entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids)
state = await entry.get_state(self._state_storage_controller, StateFilter.all())
return await self.store.get_joined_user_ids_from_state(room_id, state, entry)
return await self.store.get_joined_user_ids_from_state(room_id, state)

async def get_hosts_in_room_at_events(
self, room_id: str, event_ids: Collection[str]
Expand Down
122 changes: 38 additions & 84 deletions synapse/storage/databases/main/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import attr

from synapse.api.constants import EventTypes, Membership
from synapse.events import EventBase
from synapse.metrics import LaterGauge
from synapse.metrics.background_process_metrics import (
run_as_background_process,
Expand Down Expand Up @@ -862,96 +861,51 @@ async def get_mutual_rooms_between_users(
return shared_room_ids or frozenset()

async def get_joined_user_ids_from_state(
self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
self, room_id: str, state: StateMap[str]
) -> Set[str]:
state_group: Union[object, int] = state_entry.state_group
if not state_group:
# If state_group is None it means it has yet to be assigned a
# state group, i.e. we need to make sure that calls with a state_group
# of None don't hit previous cached calls with a None state_group.
# To do this we set the state_group to a new object as object() != object()
state_group = object()

assert state_group is not None
with Measure(self._clock, "get_joined_users_from_state"):
return await self._get_joined_user_ids_from_context(
room_id, state_group, state, context=state_entry
)
"""
For a given set of state IDs, get a set of user IDs in the room.

@cached(num_args=2, iterable=True, max_entries=100000)
async def _get_joined_user_ids_from_context(
self,
room_id: str,
state_group: Union[object, int],
current_state_ids: StateMap[str],
event: Optional[EventBase] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

context: this also seems to have been unused since #13267.

context: Optional["_StateCacheEntry"] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to self/for posterity: context has always been not-None since #8070 removed the only call that doesn't provide it (https://github.com/matrix-org/synapse/pull/8070/files#diff-54aaace5758538a90a9ed3a020eb9b260139269ee33c10ee8b2caf850c8a3e1dL290)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also: #13267 changed things so that it was always a _StateCacheEntry. Previously (since time immemorial) it could have been either an EventContext or a _StateCacheEntry. Both classes have properties like delta_ids and prev_group, so this worked, though it's a miracle we never broke it.

) -> Set[str]:
# We don't use `state_group`, it's there so that we can cache based
# on it. However, it's important that it's never None, since two current_states
# with a state_group of None are likely to be different.
assert state_group is not None
This method checks the local event cache, before calling
`_get_user_ids_from_membership_event_ids` for any uncached events.
"""

users_in_room = set()
member_event_ids = [
e_id
for key, e_id in current_state_ids.items()
if key[0] == EventTypes.Member
]

if context is not None:
# If we have a context with a delta from a previous state group,
# check if we also have the result from the previous group in cache.
# If we do then we can reuse that result and simply update it with
# any membership changes in `delta_ids`
if context.prev_group and context.delta_ids:
prev_res = self._get_joined_user_ids_from_context.cache.get_immediate(
(room_id, context.prev_group), None
)
if prev_res and isinstance(prev_res, set):
users_in_room = prev_res
member_event_ids = [
e_id
for key, e_id in context.delta_ids.items()
if key[0] == EventTypes.Member
]
for etype, state_key in context.delta_ids:
if etype == EventTypes.Member:
users_in_room.discard(state_key)

# We check if we have any of the member event ids in the event cache
# before we ask the DB

# We don't update the event cache hit ratio as it completely throws off
# the hit ratio counts. After all, we don't populate the cache if we
# miss it here
event_map = self._get_events_from_local_cache(
member_event_ids, update_metrics=False
)
with Measure(self._clock, "get_joined_user_ids_from_state"):
users_in_room = set()
member_event_ids = [
e_id for key, e_id in state.items() if key[0] == EventTypes.Member
]

missing_member_event_ids = []
for event_id in member_event_ids:
ev_entry = event_map.get(event_id)
if ev_entry and not ev_entry.event.rejected_reason:
if ev_entry.event.membership == Membership.JOIN:
users_in_room.add(ev_entry.event.state_key)
else:
missing_member_event_ids.append(event_id)
# We check if we have any of the member event ids in the event cache
# before we ask the DB

if missing_member_event_ids:
event_to_memberships = await self._get_user_ids_from_membership_event_ids(
missing_member_event_ids
)
users_in_room.update(
user_id for user_id in event_to_memberships.values() if user_id
# We don't update the event cache hit ratio as it completely throws off
# the hit ratio counts. After all, we don't populate the cache if we
# miss it here
event_map = self._get_events_from_local_cache(
member_event_ids, update_metrics=False
)

if event is not None and event.type == EventTypes.Member:
if event.membership == Membership.JOIN:
if event.event_id in member_event_ids:
users_in_room.add(event.state_key)
missing_member_event_ids = []
for event_id in member_event_ids:
ev_entry = event_map.get(event_id)
if ev_entry and not ev_entry.event.rejected_reason:
if ev_entry.event.membership == Membership.JOIN:
users_in_room.add(ev_entry.event.state_key)
else:
missing_member_event_ids.append(event_id)

if missing_member_event_ids:
event_to_memberships = (
await self._get_user_ids_from_membership_event_ids(
missing_member_event_ids
)
)
users_in_room.update(
user_id for user_id in event_to_memberships.values() if user_id
)

return users_in_room
return users_in_room

@cached(
max_entries=10000,
Expand Down Expand Up @@ -1151,7 +1105,7 @@ async def _get_joined_hosts(
# The cache doesn't match the state group or prev state group,
# so we calculate the result from first principles.
joined_user_ids = await self.get_joined_user_ids_from_state(
room_id, state, state_entry
room_id, state
)

cache.hosts_to_joined_users = {}
Expand Down