Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Remove some more joins on room_memberships #5752

Merged
merged 6 commits into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5752.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce database IO usage by optimising queries for current membership.
135 changes: 100 additions & 35 deletions synapse/storage/roommember.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,19 +179,27 @@ def _get_room_summary_txn(txn):

# we order by membership and then fairly arbitrarily by event_id so
# heroes are consistent
sql = """
SELECT m.user_id, m.membership, m.event_id
FROM room_memberships as m
INNER JOIN current_state_events as c
ON m.event_id = c.event_id
AND m.room_id = c.room_id
AND m.user_id = c.state_key
WHERE c.type = 'm.room.member' AND c.room_id = ?
ORDER BY
CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
m.event_id ASC
LIMIT ?
"""
if self._current_state_events_membership_up_to_date:
sql = """
SELECT state_key, membership, event_id
FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
ORDER BY
CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
event_id ASC
LIMIT ?
"""
else:
sql = """
SELECT c.state_key, m.membership, c.event_id
FROM room_memberships as m
INNER JOIN current_state_events as c USING (room_id, event_id)
WHERE c.type = 'm.room.member' AND c.room_id = ?
ORDER BY
CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
c.event_id ASC
LIMIT ?
"""

# 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
Expand Down Expand Up @@ -256,28 +264,35 @@ def get_invite_for_user_in_room(self, user_id, room_id):
defer.returnValue(invite)
defer.returnValue(None)

@defer.inlineCallbacks
def get_rooms_for_user_where_membership_is(self, user_id, membership_list):
""" Get all the rooms for this user where the membership for this user
matches one in the membership list.

Filters out forgotten rooms.

Args:
user_id (str): The user ID.
membership_list (list): A list of synapse.api.constants.Membership
values which the user must be in.

Returns:
A list of dictionary objects, with room_id, membership and sender
defined.
Deferred[list[RoomsForUser]]
"""
if not membership_list:
return defer.succeed(None)

return self.runInteraction(
rooms = yield self.runInteraction(
"get_rooms_for_user_where_membership_is",
self._get_rooms_for_user_where_membership_is_txn,
user_id,
membership_list,
)

# Now we filter out forgotten rooms
forgotten_rooms = yield self.get_forgotten_rooms_for_user(user_id)
return [room for room in rooms if room.room_id not in forgotten_rooms]

def _get_rooms_for_user_where_membership_is_txn(
self, txn, user_id, membership_list
):
Expand All @@ -287,26 +302,33 @@ def _get_rooms_for_user_where_membership_is_txn(

results = []
if membership_list:
where_clause = "user_id = ? AND (%s) AND forgotten = 0" % (
" OR ".join(["m.membership = ?" for _ in membership_list]),
)

args = [user_id]
args.extend(membership_list)
if self._current_state_events_membership_up_to_date:
sql = """
SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering
FROM current_state_events AS c
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND state_key = ?
AND c.membership IN (%s)
""" % (
",".join("?" * len(membership_list))
)
else:
sql = """
SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering
FROM current_state_events AS c
INNER JOIN room_memberships AS m USING (room_id, event_id)
INNER JOIN events AS e USING (room_id, event_id)
WHERE
c.type = 'm.room.member'
AND state_key = ?
AND m.membership IN (%s)
""" % (
",".join("?" * len(membership_list))
)

sql = (
"SELECT m.room_id, m.sender, m.membership, m.event_id, e.stream_ordering"
" FROM current_state_events as c"
" INNER JOIN room_memberships as m"
" ON m.event_id = c.event_id"
" INNER JOIN events as e"
" ON e.event_id = c.event_id"
" AND m.room_id = c.room_id"
" AND m.user_id = c.state_key"
" WHERE c.type = 'm.room.member' AND %s"
) % (where_clause,)

txn.execute(sql, args)
txn.execute(sql, (user_id, *membership_list))
results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)]

if do_invite:
Expand Down Expand Up @@ -639,6 +661,39 @@ def f(txn):
count = yield self.runInteraction("did_forget_membership", f)
defer.returnValue(count == 0)

@cached()
def get_forgotten_rooms_for_user(self, user_id):
"""Gets all rooms the user has forgotten.

Args:
user_id (str)

Returns:
Deferred[set[str]]
"""

def _get_forgotten_rooms_for_user_txn(txn):
# This is a slightly convoluted query that first looks up all rooms
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so is this in aid of trying to support unforgetting of rooms? is that a thing that's possible?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, you can just rejoin the room (I believe)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually could you update the comment to make this a bit clearer?

# that the user has forgotten in the past, then rechecks that list
# to see if any have subsequently been updated. This is done so that
# we can use a partial index on `forgotten = 1` on the assumption
# that few users will actually forget many rooms.
sql = """
SELECT room_id, (
SELECT count(*) FROM room_memberships
WHERE room_id = m.room_id AND user_id = m.user_id AND forgotten = 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

won't this include membership events that happened before the "forget"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, the non-obvious way forgetting works is to mark all room memberships with forgotten = 1, so to check if a room is forgotten or not is a case of checking if any memberships have forgotten = 0

) AS count
FROM room_memberships AS m
WHERE user_id = ? AND forgotten = 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is going to be very painful until that background reindex completes, isn't it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It takes about ~500ms to 900ms for large accounts, small accounts take a few ms.

The query plan does a bitmap scan across room_id and user_id indices:

matrix=> explain analyze SELECT room_id, (
                    SELECT count(*) FROM room_memberships
                    WHERE room_id = m.room_id AND user_id = m.user_id AND forgotten = 0
                ) AS count
                FROM room_memberships AS m
                WHERE user_id = '@e:matrix.org' AND forgotten = 1
                GROUP BY room_id, user_id;
                                                                         QUERY PLAN                                                                          
-------------------------------------------------------------------------------------------------------------------------------------------------------------
 Group  (cost=2518.90..4262.71 rows=4 width=68) (actual time=0.412..0.412 rows=0 loops=1)
   Group Key: m.room_id, m.user_id
   ->  Sort  (cost=2518.90..2518.91 rows=4 width=60) (actual time=0.411..0.411 rows=0 loops=1)
         Sort Key: m.room_id
         Sort Method: quicksort  Memory: 25kB
         ->  Index Scan using room_memberships_user_id on room_memberships m  (cost=0.57..2518.86 rows=4 width=60) (actual time=0.407..0.407 rows=0 loops=1)
               Index Cond: (user_id = '@e:matrix.org'::text)
               Filter: (forgotten = 1)
               Rows Removed by Filter: 46
   SubPlan 1
     ->  Aggregate  (cost=435.94..435.95 rows=1 width=8) (never executed)
           ->  Bitmap Heap Scan on room_memberships  (cost=433.92..435.93 rows=1 width=0) (never executed)
                 Recheck Cond: ((user_id = m.user_id) AND (room_id = m.room_id))
                 Filter: (forgotten = 0)
                 ->  BitmapAnd  (cost=433.92..433.92 rows=1 width=0) (never executed)
                       ->  Bitmap Index Scan on room_memberships_user_id  (cost=0.00..32.24 rows=1289 width=0) (never executed)
                             Index Cond: (user_id = m.user_id)
                       ->  Bitmap Index Scan on room_memberships_room_id  (cost=0.00..401.43 rows=17981 width=0) (never executed)
                             Index Cond: (room_id = m.room_id)
 Planning time: 0.144 ms
 Execution time: 0.468 ms
(21 rows)

GROUP BY room_id, user_id;
"""
txn.execute(sql, (user_id,))
return set(row[0] for row in txn if row[1] == 0)

return self.runInteraction(
"get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn
)

@defer.inlineCallbacks
def get_rooms_user_has_been_in(self, user_id):
"""Get all rooms that the user has ever been in.
Expand Down Expand Up @@ -670,6 +725,13 @@ def __init__(self, db_conn, hs):
_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
self._background_current_state_membership,
)
self.register_background_index_update(
"room_membership_forgotten_idx",
index_name="room_memberships_user_room_forgotten",
table="room_memberships",
columns=["user_id", "room_id"],
where_clause="forgotten = 1",
)

def _store_room_members_txn(self, txn, events, backfilled):
"""Store a room member in the database.
Expand Down Expand Up @@ -771,6 +833,9 @@ def f(txn):
txn.execute(sql, (user_id, room_id))

self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
self._invalidate_cache_and_stream(
txn, self.get_forgotten_rooms_for_user, (user_id,)
)

return self.runInteraction("forget_membership", f)

Expand Down
25 changes: 25 additions & 0 deletions synapse/storage/schema/delta/56/room_membership_idx.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/* Copyright 2019 The Matrix.org Foundation C.I.C.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- We add membership to current state so that we don't need to join against
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment looks a bit lost?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Err, yes :/

-- room_memberships, which can be surprisingly costly (we do such queries
-- very frequently).
-- This will be null for non-membership events and the content.membership key
-- for membership events. (Will also be null for membership events until the
-- background update job has finished).

-- Adds an index on room_memberships for fetching all forgotten rooms for a user
INSERT INTO background_updates (update_name, progress_json) VALUES
('room_membership_forgotten_idx', '{}');