From c857f5ef9b77b07179555a63973f11b8aa8c1752 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Sep 2018 12:48:10 +0100 Subject: [PATCH 1/5] Make purge history slightly faster Don't pull out events that are outliers and won't be deleted, as nothing should happen to them. --- synapse/storage/events.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 30ff87a4c464..1c8e01a47ea3 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1930,15 +1930,22 @@ def _purge_history_txn( should_delete_params = () if not delete_local_events: should_delete_expr += " AND event_id NOT LIKE ?" - should_delete_params += ("%:" + self.hs.hostname, ) + should_delete_params += ( + "%:" + self.hs.hostname, + "%:" + self.hs.hostname, + ) should_delete_params += (room_id, token.topological) + # Note that we insert events that are outliers and aren't going to be + # deleted, as nothing will happen to them. txn.execute( "INSERT INTO events_to_purge" " SELECT event_id, %s" " FROM events AS e LEFT JOIN state_events USING (event_id)" - " WHERE e.room_id = ? AND topological_ordering < ?" % ( + " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?" + % ( + should_delete_expr, should_delete_expr, ), should_delete_params, From e7cd7cb0f02f1f918387e4b3061b4a251c5d68fc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Sep 2018 12:55:40 +0100 Subject: [PATCH 2/5] Newsfile --- changelog.d/3856.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3856.misc diff --git a/changelog.d/3856.misc b/changelog.d/3856.misc new file mode 100644 index 000000000000..36c311eb3db9 --- /dev/null +++ b/changelog.d/3856.misc @@ -0,0 +1 @@ +Speed up purge history for rooms that have been previously purged From 9dbe38ea7de66f6e5dfab1570dd3a5217a4883eb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Sep 2018 15:05:52 +0100 Subject: [PATCH 3/5] Create indices after insertion --- synapse/storage/events.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1c8e01a47ea3..622f2ababf1e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1890,20 +1890,6 @@ def _purge_history_txn( ")" ) - # create an index on should_delete because later we'll be looking for - # the should_delete / shouldn't_delete subsets - txn.execute( - "CREATE INDEX events_to_purge_should_delete" - " ON events_to_purge(should_delete)", - ) - - # We do joins against events_to_purge for e.g. calculating state - # groups to purge, etc., so lets make an index. - txn.execute( - "CREATE INDEX events_to_purge_id" - " ON events_to_purge(event_id)", - ) - # First ensure that we're not about to delete all the forward extremeties txn.execute( "SELECT e.event_id, e.depth FROM events as e " @@ -1950,6 +1936,24 @@ def _purge_history_txn( ), should_delete_params, ) + + # We create the indices *after* insertion as that's a lot faster. + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute( + "CREATE INDEX events_to_purge_should_delete" + " ON events_to_purge(should_delete)", + ) + + # We do joins against events_to_purge for e.g. calculating state + # groups to purge, etc., so lets make an index. + txn.execute( + "CREATE INDEX events_to_purge_id" + " ON events_to_purge(event_id)", + ) + + txn.execute( "SELECT event_id, should_delete FROM events_to_purge" ) From 9cbd0094f0dcf03676b3234896e73af80cb75021 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Sep 2018 15:15:35 +0100 Subject: [PATCH 4/5] pep8 --- synapse/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 622f2ababf1e..bc1c18023b13 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1953,7 +1953,6 @@ def _purge_history_txn( " ON events_to_purge(event_id)", ) - txn.execute( "SELECT event_id, should_delete FROM events_to_purge" ) From ed5331a6272280efd4fc98ae425711acec8c6be5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 13 Sep 2018 16:10:56 +0100 Subject: [PATCH 5/5] comment --- synapse/storage/events.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bc1c18023b13..e7487311ce9b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1916,6 +1916,8 @@ def _purge_history_txn( should_delete_params = () if not delete_local_events: should_delete_expr += " AND event_id NOT LIKE ?" + + # We include the parameter twice since we use the expression twice should_delete_params += ( "%:" + self.hs.hostname, "%:" + self.hs.hostname,