Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Faster joins: add issue links to the TODOs (#13004)
Browse files Browse the repository at this point in the history
... to help us keep track of these things
  • Loading branch information
richvdh authored Jun 9, 2022
1 parent 97053c9 commit 7c6b220
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 2 deletions.
1 change: 1 addition & 0 deletions changelog.d/13004.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Faster joins: add issue links to the TODO comments in the code.
13 changes: 12 additions & 1 deletion synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ async def do_invite_join(
if ret.partial_state:
# TODO(faster_joins): roll this back if we don't manage to start the
# background resync (eg process_remote_join fails)
# https://github.com/matrix-org/synapse/issues/12998
await self.store.store_partial_state_room(room_id, ret.servers_in_room)

max_stream_id = await self._federation_event_handler.process_remote_join(
Expand Down Expand Up @@ -1506,14 +1507,17 @@ async def _sync_partial_state_room(
# TODO(faster_joins): do we need to lock to avoid races? What happens if other
# worker processes kick off a resync in parallel? Perhaps we should just elect
# a single worker to do the resync.
# https://github.com/matrix-org/synapse/issues/12994
#
# TODO(faster_joins): what happens if we leave the room during a resync? if we
# really leave, that might mean we have difficulty getting the room state over
# federation.
# https://github.com/matrix-org/synapse/issues/12802
#
# TODO(faster_joins): we need some way of prioritising which homeservers in
# `other_destinations` to try first, otherwise we'll spend ages trying dead
# homeservers for large rooms.
# https://github.com/matrix-org/synapse/issues/12999

if initial_destination is None and len(other_destinations) == 0:
raise ValueError(
Expand Down Expand Up @@ -1543,9 +1547,11 @@ async def _sync_partial_state_room(
# all the events are updated, so we can update current state and
# clear the lazy-loading flag.
logger.info("Updating current state for %s", room_id)
# TODO(faster_joins): support workers
# https://github.com/matrix-org/synapse/issues/12994
assert (
self._storage_controllers.persistence is not None
), "TODO(faster_joins): support for workers"
), "worker-mode deployments not currently supported here"
await self._storage_controllers.persistence.update_current_state(
room_id
)
Expand All @@ -1559,13 +1565,17 @@ async def _sync_partial_state_room(
)

# TODO(faster_joins) update room stats and user directory?
# https://github.com/matrix-org/synapse/issues/12814
# https://github.com/matrix-org/synapse/issues/12815
return

# we raced against more events arriving with partial state. Go round
# the loop again. We've already logged a warning, so no need for more.
# TODO(faster_joins): there is still a race here, whereby incoming events which raced
# with us will fail to be persisted after the call to `clear_partial_state_room` due to
# having partial state.
# https://github.com/matrix-org/synapse/issues/12988
#
continue

events = await self.store.get_events_as_list(
Expand All @@ -1588,6 +1598,7 @@ async def _sync_partial_state_room(
# indefinitely is also not the right thing to do if we can
# reach all homeservers and they all claim they don't have
# the state we want.
# https://github.com/matrix-org/synapse/issues/13000
logger.error(
"Failed to get state for %s at %s from %s because %s, "
"giving up!",
Expand Down
2 changes: 2 additions & 0 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ async def update_state_for_partial_state_event(
#
# TODO(faster_joins): we probably need to be more intelligent, and
# exclude partial-state prev_events from consideration
# https://github.com/matrix-org/synapse/issues/13001
logger.warning(
"%s still has partial state: can't de-partial-state it yet",
event.event_id,
Expand Down Expand Up @@ -777,6 +778,7 @@ async def _process_pulled_event(
state_ids = await self._resolve_state_at_missing_prevs(origin, event)
# TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does
# not return partial state
# https://github.com/matrix-org/synapse/issues/13002

await self._process_received_pdu(
origin, event, state_ids=state_ids, backfilled=backfilled
Expand Down
1 change: 1 addition & 0 deletions synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,7 @@ async def create_new_client_event(
#
# TODO(faster_joins): figure out how this works, and make sure that the
# old state is complete.
# https://github.com/matrix-org/synapse/issues/13003
metadata = await self.store.get_metadata_for_events(state_event_ids)

state_map_for_event: MutableStateMap[str] = {}
Expand Down
5 changes: 4 additions & 1 deletion synapse/storage/controllers/persist_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,13 @@ async def update_current_state(self, room_id: str) -> None:

# TODO(faster_joins): get a real stream ordering, to make this work correctly
# across workers.
# https://github.com/matrix-org/synapse/issues/12994
#
# TODO(faster_joins): this can race against event persistence, in which case we
# will end up with incorrect state. Perhaps we should make this a job we
# farm out to the event persister, somehow.
# farm out to the event persister thread, somehow.
# https://github.com/matrix-org/synapse/issues/13007
#
stream_id = self.main_store.get_room_max_stream_ordering()
await self.persist_events_store.update_current_state(room_id, delta, stream_id)

Expand Down
3 changes: 3 additions & 0 deletions synapse/storage/controllers/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,9 @@ async def get_current_state_deltas(
up to date.
"""
# FIXME(faster_joins): what do we do here?
# https://github.com/matrix-org/synapse/issues/12814
# https://github.com/matrix-org/synapse/issues/12815
# https://github.com/matrix-org/synapse/issues/13008

return await self.stores.main.get_partial_current_state_deltas(
prev_stream_id, max_stream_id
Expand Down
2 changes: 2 additions & 0 deletions synapse/storage/databases/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,13 +1112,15 @@ async def clear_partial_state_room(self, room_id: str) -> bool:
# this can race with incoming events, so we watch out for FK errors.
# TODO(faster_joins): this still doesn't completely fix the race, since the persist process
# is not atomic. I fear we need an application-level lock.
# https://github.com/matrix-org/synapse/issues/12988
try:
await self.db_pool.runInteraction(
"clear_partial_state_room", self._clear_partial_state_room_txn, room_id
)
return True
except self.db_pool.engine.module.DatabaseError as e:
# TODO(faster_joins): how do we distinguish between FK errors and other errors?
# https://github.com/matrix-org/synapse/issues/12988
logger.warning(
"Exception while clearing lazy partial-state-room %s, retrying: %s",
room_id,
Expand Down
1 change: 1 addition & 0 deletions synapse/storage/databases/main/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ def _update_state_for_partial_state_event_txn(
)

# TODO(faster_joins): need to do something about workers here
# https://github.com/matrix-org/synapse/issues/12994
txn.call_after(self.is_partial_state_event.invalidate, (event.event_id,))
txn.call_after(
self._get_state_group_for_event.prefill,
Expand Down
1 change: 1 addition & 0 deletions synapse/storage/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool:
# the sender of a piece of state wasn't actually in the room, then clearly that
# state shouldn't have been returned.
# We should at least add some tests around this to see what happens.
# https://github.com/matrix-org/synapse/issues/13006

# if we haven't requested membership events, then it depends on the value of
# 'include_others'
Expand Down

0 comments on commit 7c6b220

Please sign in to comment.