Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Experimental support to include bundled aggregations in search results (MSC3666) #11837

Merged
merged 7 commits into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/11837.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Experimental support for [MSC3666](https://github.com/matrix-org/matrix-doc/pull/3666): including bundled aggregations in server side search results.
2 changes: 2 additions & 0 deletions synapse/config/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def read_config(self, config: JsonDict, **kwargs):

# MSC3440 (thread relation)
self.msc3440_enabled: bool = experimental.get("msc3440_enabled", False)
# MSC3666: including bundled relations in /search.
self.msc3666_enabled: bool = experimental.get("msc3666_enabled", False)

# MSC3026 (busy presence state)
self.msc3026_enabled: bool = experimental.get("msc3026_enabled", False)
Expand Down
29 changes: 24 additions & 5 deletions synapse/handlers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def __init__(self, hs: "HomeServer"):
self.state_store = self.storage.state
self.auth = hs.get_auth()

self._msc3666_enabled = hs.config.experimental.msc3666_enabled

async def get_old_rooms_from_upgraded_room(self, room_id: str) -> Iterable[str]:
"""Retrieves room IDs of old rooms in the history of an upgraded room.

Expand Down Expand Up @@ -238,8 +240,6 @@ async def search(

results = search_result["results"]

results_map = {r["event"].event_id: r for r in results}

Comment on lines -241 to -242
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ugh. If this function wasn't 50000 lines long, we'd be able to follow this sort of thing much more easily.

Copy link
Member Author

@clokep clokep Feb 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, PyCharm noticed it and declared it unused. 😢

I can refactor this method a bit first if you'd like. I think moving the context calculation would be easy enough.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would certainly be nice to refactor it if you have a few tuits. Suggest doing as a followup though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took a look at doing this and it quickly spiraled. Will do as a follow-up.

rank_map.update({r["event"].event_id: r["rank"] for r in results})

filtered_events = await search_filter.filter([r["event"] for r in results])
Expand Down Expand Up @@ -420,12 +420,29 @@ async def search(

time_now = self.clock.time_msec()

aggregations = None
if self._msc3666_enabled:
aggregations = await self.store.get_bundled_aggregations(
# Generate an iterable of EventBase for all the events that will be
# returned, including contextual events.
itertools.chain(
# The events_before and events_after for each context.
itertools.chain.from_iterable(
itertools.chain(context["events_before"], context["events_after"]) # type: ignore[arg-type]
for context in contexts.values()
),
# The returned events.
allowed_events,
),
Comment on lines +426 to +436
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hrm. I think it's quite likely that we'll end up with some duplicates in here. Maybe we could have get_bundled_aggregations deduplicate on event_id?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, the results are cached so hopefully we would just immediately hit the cache, but depending on that seems not very nice. Good call! 👍

user.to_string(),
)

for context in contexts.values():
context["events_before"] = self._event_serializer.serialize_events(
context["events_before"], time_now # type: ignore[arg-type]
context["events_before"], time_now, bundle_aggregations=aggregations # type: ignore[arg-type]
)
context["events_after"] = self._event_serializer.serialize_events(
context["events_after"], time_now # type: ignore[arg-type]
context["events_after"], time_now, bundle_aggregations=aggregations # type: ignore[arg-type]
)

state_results = {}
Expand All @@ -442,7 +459,9 @@ async def search(
results.append(
{
"rank": rank_map[e.event_id],
"result": self._event_serializer.serialize_event(e, time_now),
"result": self._event_serializer.serialize_event(
e, time_now, bundle_aggregations=aggregations
),
"context": contexts.get(e.event_id, {}),
}
)
Expand Down
10 changes: 10 additions & 0 deletions synapse/storage/databases/main/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,10 +682,20 @@ async def get_bundled_aggregations(
A map of event ID to the bundled aggregation for the event. Not all
events may have bundled aggregations in the results.
"""
# The already processed event IDs. Tracked separately from the result
# since the result omits events which do not have bundled aggregations.
seen_events = set()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was very much hoping to be able to do events = set(events), but I don't think that would work as events aren't comparable as far as I can tell.


# TODO Parallelize.
results = {}
for event in events:
# De-duplicate events by ID to handle the same event requested multiple
# times. The caches that _get_bundled_aggregation_for_event use should
# capture this, but best to reduce work.
if event.event_id in seen_events:
continue
seen_events.add(event.event_id)

event_result = await self._get_bundled_aggregation_for_event(event, user_id)
if event_result:
results[event.event_id] = event_result
Expand Down
39 changes: 38 additions & 1 deletion tests/rest/client/test_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,9 @@ def test_aggregation_must_be_annotation(self):
)
self.assertEquals(400, channel.code, channel.json_body)

@unittest.override_config({"experimental_features": {"msc3440_enabled": True}})
@unittest.override_config(
{"experimental_features": {"msc3440_enabled": True, "msc3666_enabled": True}}
)
def test_bundled_aggregations(self):
"""
Test that annotations, references, and threads get correctly bundled.
Expand Down Expand Up @@ -579,6 +581,23 @@ def assert_bundle(event_json: JsonDict) -> None:
self.assertTrue(room_timeline["limited"])
assert_bundle(self._find_event_in_chunk(room_timeline["events"]))

# Request search.
channel = self.make_request(
"POST",
"/search",
# Search term matches the parent message.
content={"search_categories": {"room_events": {"search_term": "Hi"}}},
access_token=self.user_token,
)
self.assertEquals(200, channel.code, channel.json_body)
chunk = [
result["result"]
for result in channel.json_body["search_categories"]["room_events"][
"results"
]
]
assert_bundle(self._find_event_in_chunk(chunk))

def test_aggregation_get_event_for_annotation(self):
"""Test that annotations do not get bundled aggregations included
when directly requested.
Expand Down Expand Up @@ -759,6 +778,7 @@ def test_ignore_invalid_room(self):
self.assertEquals(200, channel.code, channel.json_body)
self.assertNotIn("m.relations", channel.json_body["unsigned"])

@unittest.override_config({"experimental_features": {"msc3666_enabled": True}})
def test_edit(self):
"""Test that a simple edit works."""

Expand Down Expand Up @@ -825,6 +845,23 @@ def assert_bundle(event_json: JsonDict) -> None:
self.assertTrue(room_timeline["limited"])
assert_bundle(self._find_event_in_chunk(room_timeline["events"]))

# Request search.
channel = self.make_request(
"POST",
"/search",
# Search term matches the parent message.
content={"search_categories": {"room_events": {"search_term": "Hi"}}},
access_token=self.user_token,
)
self.assertEquals(200, channel.code, channel.json_body)
chunk = [
result["result"]
for result in channel.json_body["search_categories"]["room_events"][
"results"
]
]
assert_bundle(self._find_event_in_chunk(chunk))

def test_multi_edit(self):
"""Test that multiple edits, including attempts by people who
shouldn't be allowed, are correctly handled.
Expand Down