From 83e1d8896de3006a02ade34f2237756010bd4231 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Feb 2020 11:38:48 +0000 Subject: [PATCH 1/2] Reduce auth chains fetched during v2 state res. The state res v2 algorithm only cares about the difference between auth chains, so we can pass in the known common state to the `get_auth_chain` storage function so that it can ignore those events. --- synapse/state/__init__.py | 15 ++++++---- synapse/state/v2.py | 2 +- .../data_stores/main/event_federation.py | 28 +++++++++++++++---- tests/state/test_v2.py | 6 ++-- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index fdd6bef6b4ce..df7a4f6a893a 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -16,7 +16,7 @@ import logging from collections import namedtuple -from typing import Dict, Iterable, List, Optional +from typing import Dict, Iterable, List, Optional, Set from six import iteritems, itervalues @@ -662,7 +662,7 @@ def get_events(self, event_ids, allow_rejected=False): allow_rejected=allow_rejected, ) - def get_auth_chain(self, event_ids): + def get_auth_chain(self, event_ids: List[str], ignore_events: Set[str]): """Gets the full auth chain for a set of events (including rejected events). @@ -674,11 +674,16 @@ def get_auth_chain(self, event_ids): presence of rejected events Args: - event_ids (list): The event IDs of the events to fetch the auth - chain for. Must be state events. + event_ids: The event IDs of the events to fetch the auth chain for. + Must be state events. + ignore_events: Set of events to exclude from the returned auth + chain. + Returns: Deferred[list[str]]: List of event IDs of the auth chain. """ - return self.store.get_auth_chain_ids(event_ids, include_given=True) + return self.store.get_auth_chain_ids( + event_ids, include_given=True, ignore_events=ignore_events, + ) diff --git a/synapse/state/v2.py b/synapse/state/v2.py index 531018c6a5fa..75fe58305a46 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py @@ -248,7 +248,7 @@ def _get_auth_chain_difference(state_sets, event_map, state_res_store): and eid not in common ) - auth_chain = yield state_res_store.get_auth_chain(auth_ids) + auth_chain = yield state_res_store.get_auth_chain(auth_ids, common) auth_ids.update(auth_chain) auth_sets.append(auth_ids) diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py index 60c67457b4d1..e16da2577d76 100644 --- a/synapse/storage/data_stores/main/event_federation.py +++ b/synapse/storage/data_stores/main/event_federation.py @@ -14,6 +14,7 @@ # limitations under the License. import itertools import logging +from typing import List, Optional, Set from six.moves import range from six.moves.queue import Empty, PriorityQueue @@ -46,21 +47,37 @@ def get_auth_chain(self, event_ids, include_given=False): event_ids, include_given=include_given ).addCallback(self.get_events_as_list) - def get_auth_chain_ids(self, event_ids, include_given=False): + def get_auth_chain_ids( + self, + event_ids: List[str], + include_given: bool = False, + ignore_events: Optional[Set[str]] = None, + ): """Get auth events for given event_ids. The events *must* be state events. Args: - event_ids (list): state events - include_given (bool): include the given events in result + event_ids: state events + include_given: include the given events in result + ignore_events: Set of events to exclude from the returned auth + chain. This is useful if the caller will just discard the + given events anyway, and saves us from figuring out their auth + chains if not required. Returns: list of event_ids """ return self.db.runInteraction( - "get_auth_chain_ids", self._get_auth_chain_ids_txn, event_ids, include_given + "get_auth_chain_ids", + self._get_auth_chain_ids_txn, + event_ids, + include_given, + ignore_events, ) - def _get_auth_chain_ids_txn(self, txn, event_ids, include_given): + def _get_auth_chain_ids_txn(self, txn, event_ids, include_given, ignore_events): + if ignore_events is None: + ignore_events = set() + if include_given: results = set(event_ids) else: @@ -80,6 +97,7 @@ def _get_auth_chain_ids_txn(self, txn, event_ids, include_given): txn.execute(base_sql + clause, list(args)) new_front.update([r[0] for r in txn]) + new_front -= ignore_events new_front -= results front = new_front diff --git a/tests/state/test_v2.py b/tests/state/test_v2.py index 5bafad9f19f6..5059ade8503f 100644 --- a/tests/state/test_v2.py +++ b/tests/state/test_v2.py @@ -603,7 +603,7 @@ def get_events(self, event_ids, allow_rejected=False): return {eid: self.event_map[eid] for eid in event_ids if eid in self.event_map} - def get_auth_chain(self, event_ids): + def get_auth_chain(self, event_ids, ignore_events): """Gets the full auth chain for a set of events (including rejected events). @@ -617,6 +617,8 @@ def get_auth_chain(self, event_ids): Args: event_ids (list): The event IDs of the events to fetch the auth chain for. Must be state events. + ignore_events: Set of events to exclude from the returned auth + chain. Returns: Deferred[list[str]]: List of event IDs of the auth chain. @@ -627,7 +629,7 @@ def get_auth_chain(self, event_ids): stack = list(event_ids) while stack: event_id = stack.pop() - if event_id in result: + if event_id in result or event_id in ignore_events: continue result.add(event_id) From 7cc4d0cd1f64e35d49c59699bc7aadb87fef95d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Feb 2020 11:41:15 +0000 Subject: [PATCH 2/2] Newsfile --- changelog.d/6952.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6952.misc diff --git a/changelog.d/6952.misc b/changelog.d/6952.misc new file mode 100644 index 000000000000..e26dc5cab881 --- /dev/null +++ b/changelog.d/6952.misc @@ -0,0 +1 @@ +Improve perf of v2 state res for large rooms.