From 845721ab08649e9f7e4ce6d4b4952190287b9b15 Mon Sep 17 00:00:00 2001 From: mattc Date: Mon, 23 May 2022 14:26:07 +1000 Subject: [PATCH 1/4] Add - storage method to fetch the monthly active users (and their appservices) within an optionally specified time range. --- .../databases/main/monthly_active_users.py | 43 ++++++++++ tests/storage/test_monthly_active_users.py | 83 +++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py index 5beb8f1d4bc8..cfaba39011d7 100644 --- a/synapse/storage/databases/main/monthly_active_users.py +++ b/synapse/storage/databases/main/monthly_active_users.py @@ -122,6 +122,49 @@ def _count_users_by_service(txn: LoggingTransaction) -> Dict[str, int]: "count_users_by_service", _count_users_by_service ) + async def get_monthly_active_users_by_service( + self, start_timestamp: Optional[int] = None, end_timestamp: Optional[int] = None + ) -> List[Tuple[str, str]]: + """Generates list of monthly active users and their services. + Please see "get_monthly_active_count_by_service" docstring for more details + about services. + + Arguments: + start_timestamp: If specified, only include users that were first active + at or after this point + end_timestamp: If specified, only include users that were first active + at or before this point + + Returns: + A list of tuples (appservice_id, user_id) + + """ + if start_timestamp is not None and end_timestamp is not None: + where_clause = 'WHERE "timestamp" >= ? and "timestamp" <= ?' + query_params = [start_timestamp, end_timestamp] + elif start_timestamp is not None: + where_clause = 'WHERE "timestamp" >= ?' + query_params = [start_timestamp] + elif end_timestamp is not None: + where_clause = 'WHERE "timestamp" <= ?' + query_params = [end_timestamp] + else: + where_clause = "" + query_params = [] + + def _list_users(txn: LoggingTransaction) -> List[Tuple[str, str]]: + sql = f""" + SELECT COALESCE(appservice_id, 'native'), user_id + FROM monthly_active_users + LEFT JOIN users ON monthly_active_users.user_id=users.name + {where_clause}; + """ + + txn.execute(sql, query_params) + return cast(List[Tuple[str, str]], txn.fetchall()) + + return await self.db_pool.runInteraction("list_users", _list_users) + async def get_registered_reserved_users(self) -> List[str]: """Of the reserved threepids defined in config, retrieve those that are associated with registered users diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 4c29ad79b643..e8b4a5644bf7 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -407,3 +407,86 @@ def test_get_monthly_active_count_by_service(self): self.assertEqual(result[service1], 2) self.assertEqual(result[service2], 1) self.assertEqual(result[native], 1) + + def test_get_monthly_active_users_by_service(self): + # (No users, no filtering) -> empty result + result = self.get_success(self.store.get_monthly_active_users_by_service()) + + self.assertEqual(len(result), 0) + + # (Some users, no filtering) -> non-empty result + appservice1_user1 = "@appservice1_user1:example.com" + appservice2_user1 = "@appservice2_user1:example.com" + service1 = "service1" + service2 = "service2" + self.get_success( + self.store.register_user( + user_id=appservice1_user1, password_hash=None, appservice_id=service1 + ) + ) + self.get_success(self.store.upsert_monthly_active_user(appservice1_user1)) + self.get_success( + self.store.register_user( + user_id=appservice2_user1, password_hash=None, appservice_id=service2 + ) + ) + self.get_success(self.store.upsert_monthly_active_user(appservice2_user1)) + + result = self.get_success(self.store.get_monthly_active_users_by_service()) + + self.assertEqual(len(result), 2) + self.assertIn((service1, appservice1_user1), result) + self.assertIn((service2, appservice2_user1), result) + + # (Some users, end-timestamp filtering) -> non-empty result + appservice1_user2 = "@appservice1_user2:example.com" + timestamp1 = self.reactor.seconds() + self.reactor.advance(5) + timestamp2 = self.reactor.seconds() + self.get_success( + self.store.register_user( + user_id=appservice1_user2, password_hash=None, appservice_id=service1 + ) + ) + self.get_success(self.store.upsert_monthly_active_user(appservice1_user2)) + + result = self.get_success( + self.store.get_monthly_active_users_by_service( + end_timestamp=round(timestamp1 * 1000) + ) + ) + + self.assertEqual(len(result), 2) + self.assertNotIn((service1, appservice1_user2), result) + + # (Some users, start-timestamp filtering) -> non-empty result + result = self.get_success( + self.store.get_monthly_active_users_by_service( + start_timestamp=round(timestamp2 * 1000) + ) + ) + + self.assertEqual(len(result), 1) + self.assertIn((service1, appservice1_user2), result) + + # (Some users, full-timestamp filtering) -> non-empty result + native_user1 = "@native_user1:example.com" + native = "native" + timestamp3 = self.reactor.seconds() + self.reactor.advance(100) + self.get_success( + self.store.register_user( + user_id=native_user1, password_hash=None, appservice_id=native + ) + ) + self.get_success(self.store.upsert_monthly_active_user(native_user1)) + + result = self.get_success( + self.store.get_monthly_active_users_by_service( + start_timestamp=round(timestamp2 * 1000), + end_timestamp=round(timestamp3 * 1000), + ) + ) + + self.assertEqual(len(result), 1) + self.assertIn((service1, appservice1_user2), result) From 0023f5da11cce6f54f7a6675bd9248e44f3f79c4 Mon Sep 17 00:00:00 2001 From: mattc Date: Mon, 23 May 2022 14:26:47 +1000 Subject: [PATCH 2/4] Add - module API method to fetch the monthly active users (and their appservices) within an optionally specified time range. --- synapse/module_api/__init__.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 73f92d2df8d6..b7d526f375d0 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -1419,6 +1419,26 @@ async def set_push_rule_action( user_id, spec, {"actions": actions} ) + async def get_monthly_active_users_by_service( + self, start_timestamp: Optional[int] = None, end_timestamp: Optional[int] = None + ) -> List[Tuple[str, str]]: + """Generates list of monthly active users and their services. + Please see corresponding storage docstring for more details. + + Arguments: + start_timestamp: If specified, only include users that were first active + at or after this point + end_timestamp: If specified, only include users that were first active + at or before this point + + Returns: + A list of tuples (appservice_id, user_id) + + """ + return await self._store.get_monthly_active_users_by_service( + start_timestamp, end_timestamp + ) + class PublicRoomListManager: """Contains methods for adding to, removing from and querying whether a room From 451d7becda6c3e4971771bf12bce6aaba27a2315 Mon Sep 17 00:00:00 2001 From: mattc Date: Mon, 23 May 2022 13:58:38 +1000 Subject: [PATCH 3/4] Document - Add changelog entry for 12838. --- changelog.d/12838.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/12838.feature diff --git a/changelog.d/12838.feature b/changelog.d/12838.feature new file mode 100644 index 000000000000..b24489aaad6c --- /dev/null +++ b/changelog.d/12838.feature @@ -0,0 +1 @@ +Add storage and module API methods to get monthly active users (and their corresponding appservices) within an optionally specified time range. From 95accd8fecc2ecb6c4aa25afd609d0b0661c054b Mon Sep 17 00:00:00 2001 From: MattC Date: Thu, 26 May 2022 10:11:58 +1000 Subject: [PATCH 4/4] Refactor - Add comment about what appservice will be emitted for native Matrix users. --- synapse/storage/databases/main/monthly_active_users.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py index cfaba39011d7..9a63f953fb5e 100644 --- a/synapse/storage/databases/main/monthly_active_users.py +++ b/synapse/storage/databases/main/monthly_active_users.py @@ -136,7 +136,9 @@ async def get_monthly_active_users_by_service( at or before this point Returns: - A list of tuples (appservice_id, user_id) + A list of tuples (appservice_id, user_id). "native" is emitted as the + appservice for users that don't come from appservices (i.e. native Matrix + users). """ if start_timestamp is not None and end_timestamp is not None: