Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a background job to automatically delete stale devices #12855

Merged
merged 11 commits into from
May 27, 2022
1 change: 1 addition & 0 deletions changelog.d/12855.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a configurable background job to delete stale devices.
12 changes: 12 additions & 0 deletions docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,18 @@ Example configuration:
dummy_events_threshold: 5
```
---
Config option `delete_stale_devices_after`

An optional duration. If set, Synapse will run a daily background task to log out and
delete any device that hasn't been accessed for more than the specified amount of time.

Defaults to no duration, which means devices are never pruned.

Example configuration:
```yaml
delete_stale_devices_after: 1y
```

## Homeserver blocking ##
Useful options for Synapse admins.

Expand Down
11 changes: 11 additions & 0 deletions synapse/config/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,17 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
config.get("exclude_rooms_from_sync") or []
)

delete_stale_devices_after: Optional[str] = (
config.get("delete_stale_devices_after") or None
)
babolivier marked this conversation as resolved.
Show resolved Hide resolved

if delete_stale_devices_after is not None:
self.delete_stale_devices_after: Optional[int] = self.parse_duration(
delete_stale_devices_after
)
else:
self.delete_stale_devices_after = None

def has_tls_listener(self) -> bool:
return any(listener.tls for listener in self.listeners)

Expand Down
33 changes: 32 additions & 1 deletion synapse/handlers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
logger = logging.getLogger(__name__)

MAX_DEVICE_DISPLAY_NAME_LEN = 100
DELETE_STALE_DEVICES_INTERVAL_MS = 24 * 60 * 60 * 1000


class DeviceWorkerHandler:
Expand Down Expand Up @@ -292,6 +293,19 @@ def __init__(self, hs: "HomeServer"):
# On start up check if there are any updates pending.
hs.get_reactor().callWhenRunning(self._handle_new_device_update_async)

self._delete_stale_devices_after = hs.config.server.delete_stale_devices_after

# Ideally we would run this on a worker and condition this on the
# "run_background_tasks_on" setting, but this would mean making the notification
# of device list changes over federation work on workers, which is nontrivial.
if self._delete_stale_devices_after is not None:
self.clock.looping_call(
run_as_background_process,
DELETE_STALE_DEVICES_INTERVAL_MS,
"delete_stale_devices",
self._delete_stale_devices,
)

def _check_device_name_length(self, name: Optional[str]) -> None:
"""
Checks whether a device name is longer than the maximum allowed length.
Expand Down Expand Up @@ -367,6 +381,22 @@ async def check_device_registered(

raise errors.StoreError(500, "Couldn't generate a device ID.")

async def _delete_stale_devices(self) -> None:
"""Background task that deletes devices which haven't been accessed for more than
a configured time period.
"""
# We should only be running this job if the config option is defined.
assert self._delete_stale_devices_after is not None
now_ms = self.clock.time_msec()
since_ms = now_ms - self._delete_stale_devices_after
devices = await self.store.get_devices_not_accessed_since(since_ms)

for user_id, user_devices in devices.items():
# The devices table can contain devices for remote users, and we don't want
# e.g. break encryption by accidentally removing them.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a glance it looks like this is only the case for hidden devices, but I'd rather be overly safe here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment seems to say that we shouldn't have hidden devices for remote users?

# The cross-signing keys need to occupy the same namespace as devices,
# since signatures are identified by device ID. So add an entry to the
# device table to make sure that we don't have a collision with device
# IDs.
# We only need to do this for local users, since remote servers should be
# responsible for checking this for their own users.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this now, but interestingly my homeserver's database seems to think otherwise:

synapse=# select count(*) from devices where hidden is TRUE and user_id like '%abolivier.bzh';
 count 
-------
    30
(1 row)

synapse=# select count(*) from devices where hidden is TRUE and user_id not like '%abolivier.bzh';
 count 
-------
   813
(1 row)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe due to #6956? That talks a bit about adding a background update for this, but chose not to?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, would we want this check in get_devices_not_accessed_since instead of here? I guess it doesn't make too much of a difference, but I don't think remote devices are ever "stale"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that's a good point.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe due to #6956? That talks a bit about adding a background update for this, but chose not to?

Right, yes, that looks like it's the culprit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I filed #12855 for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@clokep I think you mean #12898?

if self.hs.is_mine_id(user_id):
await self.delete_devices(user_id, user_devices)

@trace
async def delete_device(self, user_id: str, device_id: str) -> None:
"""Delete the given device
Expand Down Expand Up @@ -689,7 +719,8 @@ async def _handle_new_device_update_async(self) -> None:
)
# TODO: when called, this isn't in a logging context.
# This leads to log spam, sentry event spam, and massive
# memory usage. See #12552.
# memory usage.
# See https://github.com/matrix-org/synapse/issues/12552.
# log_kv(
# {"message": "sent device update to host", "host": host}
# )
Expand Down
38 changes: 38 additions & 0 deletions synapse/storage/databases/main/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,44 @@ def _prune_txn(txn: LoggingTransaction) -> None:
_prune_txn,
)

async def get_devices_not_accessed_since(
self, since_ms: int
) -> Dict[str, List[str]]:
"""Retrieves a list of all devices that haven't been accessed since a given date.
babolivier marked this conversation as resolved.
Show resolved Hide resolved

Args:
since_ms: the timestamp to select on, every device with a last access date
from before that time is returned.

Returns:
A dictionary with an entry for each user with at least one device matching
the request, which value is a list of the device ID(s) for the corresponding
device(s).
"""

def get_devices_not_accessed_since_txn(
txn: LoggingTransaction,
) -> List[Dict[str, str]]:
sql = """
SELECT user_id, device_id
FROM devices WHERE last_seen < ? AND hidden = FALSE
"""
txn.execute(sql, (since_ms,))
return self.db_pool.cursor_to_dict(txn)

rows = await self.db_pool.runInteraction(
"get_devices_not_accessed_since",
get_devices_not_accessed_since_txn,
)

devices: Dict[str, List[str]] = {}
for row in rows:
user_devices = devices.get(row["user_id"], [])
user_devices.append(row["device_id"])
devices[row["user_id"]] = user_devices
babolivier marked this conversation as resolved.
Show resolved Hide resolved

return devices


class DeviceBackgroundUpdateStore(SQLBaseStore):
def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@
# limitations under the License.
from http import HTTPStatus

from twisted.internet.testing import MemoryReactor

from synapse.api.errors import NotFoundError
from synapse.rest import admin, devices, room, sync
from synapse.rest.client import account, login, register
from synapse.server import HomeServer
from synapse.util import Clock

from tests import unittest

Expand Down Expand Up @@ -157,3 +162,41 @@ def test_not_receiving_local_device_list_changes(self) -> None:
self.assertNotIn(
alice_user_id, changed_device_lists, bob_sync_channel.json_body
)


class DevicesTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
sync.register_servlets,
]

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.handler = hs.get_device_handler()

@unittest.override_config({"delete_stale_devices_after": 72000000})
def test_delete_stale_devices(self) -> None:
"""Tests that stale devices are automatically removed after a set time of
inactivity.
The configuration is set to delete devices that haven't been used in the past 20h.
"""
# Register a user and creates 2 devices for them.
user_id = self.register_user("user", "password")
tok1 = self.login("user", "password", device_id="abc")
tok2 = self.login("user", "password", device_id="def")

# Sync them so they have a last_seen value.
babolivier marked this conversation as resolved.
Show resolved Hide resolved
self.make_request("GET", "/sync", access_token=tok1)
self.make_request("GET", "/sync", access_token=tok2)

# Advance half a day and sync again with one of the devices, so that the next
# time the background job runs we don't delete this device (since it will look
# for devices that haven't been used for over an hour).
self.reactor.advance(43200)
self.make_request("GET", "/sync", access_token=tok1)

# Advance another half a day, and check that the device that has synced still
# exists but the one that hasn't has been removed.
self.reactor.advance(43200)
self.get_success(self.handler.get_device(user_id, "abc"))
self.get_failure(self.handler.get_device(user_id, "def"), NotFoundError)