Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Make purge history slightly faster #3856

Merged
merged 6 commits into from
Sep 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/3856.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up purge history for rooms that have been previously purged
44 changes: 28 additions & 16 deletions synapse/storage/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -1890,20 +1890,6 @@ def _purge_history_txn(
")"
)

# create an index on should_delete because later we'll be looking for
# the should_delete / shouldn't_delete subsets
txn.execute(
"CREATE INDEX events_to_purge_should_delete"
" ON events_to_purge(should_delete)",
)

# We do joins against events_to_purge for e.g. calculating state
# groups to purge, etc., so lets make an index.
txn.execute(
"CREATE INDEX events_to_purge_id"
" ON events_to_purge(event_id)",
)

# First ensure that we're not about to delete all the forward extremeties
txn.execute(
"SELECT e.event_id, e.depth FROM events as e "
Expand All @@ -1930,19 +1916,45 @@ def _purge_history_txn(
should_delete_params = ()
if not delete_local_events:
should_delete_expr += " AND event_id NOT LIKE ?"
should_delete_params += ("%:" + self.hs.hostname, )

# We include the parameter twice since we use the expression twice
should_delete_params += (
"%:" + self.hs.hostname,
"%:" + self.hs.hostname,
)

should_delete_params += (room_id, token.topological)

# Note that we insert events that are outliers and aren't going to be
# deleted, as nothing will happen to them.
txn.execute(
"INSERT INTO events_to_purge"
" SELECT event_id, %s"
" FROM events AS e LEFT JOIN state_events USING (event_id)"
" WHERE e.room_id = ? AND topological_ordering < ?" % (
" WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
% (
should_delete_expr,
should_delete_expr,
),
should_delete_params,
)

# We create the indices *after* insertion as that's a lot faster.

# create an index on should_delete because later we'll be looking for
# the should_delete / shouldn't_delete subsets
txn.execute(
"CREATE INDEX events_to_purge_should_delete"
" ON events_to_purge(should_delete)",
)

# We do joins against events_to_purge for e.g. calculating state
# groups to purge, etc., so lets make an index.
txn.execute(
"CREATE INDEX events_to_purge_id"
" ON events_to_purge(event_id)",
)

txn.execute(
"SELECT event_id, should_delete FROM events_to_purge"
)
Expand Down