-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Experimental support to include bundled aggregations in search results (MSC3666) #11837
Changes from 5 commits
bbd10e3
4b3aebe
1de1e58
3bd4e78
adfdc64
58ebd31
3c2336b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Experimental support for [MSC3666](https://github.com/matrix-org/matrix-doc/pull/3666): including bundled aggregations in server side search results. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,8 @@ def __init__(self, hs: "HomeServer"): | |
self.state_store = self.storage.state | ||
self.auth = hs.get_auth() | ||
|
||
self._msc3666_enabled = hs.config.experimental.msc3666_enabled | ||
|
||
async def get_old_rooms_from_upgraded_room(self, room_id: str) -> Iterable[str]: | ||
"""Retrieves room IDs of old rooms in the history of an upgraded room. | ||
|
||
|
@@ -238,8 +240,6 @@ async def search( | |
|
||
results = search_result["results"] | ||
|
||
results_map = {r["event"].event_id: r for r in results} | ||
|
||
rank_map.update({r["event"].event_id: r["rank"] for r in results}) | ||
|
||
filtered_events = await search_filter.filter([r["event"] for r in results]) | ||
|
@@ -420,12 +420,29 @@ async def search( | |
|
||
time_now = self.clock.time_msec() | ||
|
||
aggregations = None | ||
if self._msc3666_enabled: | ||
aggregations = await self.store.get_bundled_aggregations( | ||
# Generate an iterable of EventBase for all the events that will be | ||
# returned, including contextual events. | ||
itertools.chain( | ||
# The events_before and events_after for each context. | ||
itertools.chain.from_iterable( | ||
itertools.chain(context["events_before"], context["events_after"]) # type: ignore[arg-type] | ||
for context in contexts.values() | ||
), | ||
# The returned events. | ||
allowed_events, | ||
), | ||
Comment on lines
+426
to
+436
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hrm. I think it's quite likely that we'll end up with some duplicates in here. Maybe we could have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, the results are cached so hopefully we would just immediately hit the cache, but depending on that seems not very nice. Good call! 👍 |
||
user.to_string(), | ||
) | ||
|
||
for context in contexts.values(): | ||
context["events_before"] = self._event_serializer.serialize_events( | ||
context["events_before"], time_now # type: ignore[arg-type] | ||
context["events_before"], time_now, bundle_aggregations=aggregations # type: ignore[arg-type] | ||
) | ||
context["events_after"] = self._event_serializer.serialize_events( | ||
context["events_after"], time_now # type: ignore[arg-type] | ||
context["events_after"], time_now, bundle_aggregations=aggregations # type: ignore[arg-type] | ||
) | ||
|
||
state_results = {} | ||
|
@@ -442,7 +459,9 @@ async def search( | |
results.append( | ||
{ | ||
"rank": rank_map[e.event_id], | ||
"result": self._event_serializer.serialize_event(e, time_now), | ||
"result": self._event_serializer.serialize_event( | ||
e, time_now, bundle_aggregations=aggregations | ||
), | ||
"context": contexts.get(e.event_id, {}), | ||
} | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -682,10 +682,20 @@ async def get_bundled_aggregations( | |
A map of event ID to the bundled aggregation for the event. Not all | ||
events may have bundled aggregations in the results. | ||
""" | ||
# The already processed event IDs. Tracked separately from the result | ||
# since the result omits events which do not have bundled aggregations. | ||
seen_events = set() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was very much hoping to be able to do |
||
|
||
# TODO Parallelize. | ||
results = {} | ||
for event in events: | ||
# De-duplicate events by ID to handle the same event requested multiple | ||
# times. The caches that _get_bundled_aggregation_for_event use should | ||
# capture this, but best to reduce work. | ||
if event.event_id in seen_events: | ||
continue | ||
seen_events.add(event.event_id) | ||
|
||
event_result = await self._get_bundled_aggregation_for_event(event, user_id) | ||
if event_result: | ||
results[event.event_id] = event_result | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ugh. If this function wasn't 50000 lines long, we'd be able to follow this sort of thing much more easily.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, PyCharm noticed it and declared it unused. 😢
I can refactor this method a bit first if you'd like. I think moving the
context
calculation would be easy enough.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it would certainly be nice to refactor it if you have a few tuits. Suggest doing as a followup though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I took a look at doing this and it quickly spiraled. Will do as a follow-up.