Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add config options for media retention #12732

Merged
merged 14 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,39 @@ media_store_path: "DATADIR/media_store"
# height: 600
# method: scale

# Configure media retention settings. Media will be purged if it
# has not been accessed in at least this amount of time. If the
# media has never been access, the media's creation time is used
# instead. Both thumbnails and the original media will be removed.
#
# Media is 'accessed' when loaded in a room in a client, or
# otherwise downloaded by a local or remote user.
#
media_retention:
# Whether media retention settings should apply. Defaults to
# false.
#
# Uncomment to enable media retention on this homeserver.
#
#enabled: true

# How long to keep local media since its last access. Local
# media that is removed will be permanently deleted.
#
# If this option is not set, local media will not have a
# retention policy applied.
#
#local_media_lifetime: 30d

# How long to keep downloaded remote media since its last
# access. Remote media will be downloaded again from the
# originating server on demand.
#
# If this option is not set, remote media will not have a
# retention policy applied.
#
remote_media_lifetime: 7d

# Is the preview URL API enabled?
#
# 'false' by default: uncomment the following to enable it (and specify a
Expand Down
50 changes: 50 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,23 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
"url_preview_accept_language"
) or ["en"]

media_retention = config.get("media_retention") or {}
self.media_retention_enabled = media_retention.get("enabled", False)

self.media_retention_local_media_lifetime_ms = None
local_media_lifetime = media_retention.get("local_media_lifetime")
if local_media_lifetime is not None:
self.media_retention_local_media_lifetime_ms = self.parse_duration(
local_media_lifetime
)

self.media_retention_remote_media_lifetime_ms = None
remote_media_lifetime = media_retention.get("remote_media_lifetime")
if remote_media_lifetime is not None:
self.media_retention_remote_media_lifetime_ms = self.parse_duration(
remote_media_lifetime
)

def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
assert data_dir_path is not None
media_store = os.path.join(data_dir_path, "media_store")
Expand Down Expand Up @@ -289,6 +306,39 @@ def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
#thumbnail_sizes:
%(formatted_thumbnail_sizes)s

# Configure media retention settings. Media will be purged if it
# has not been accessed in at least this amount of time. If the
# media has never been access, the media's creation time is used
# instead. Both thumbnails and the original media will be removed.
#
# Media is 'accessed' when loaded in a room in a client, or
# otherwise downloaded by a local or remote user.
#
media_retention:
# Whether media retention settings should apply. Defaults to
# false.
#
# Uncomment to enable media retention on this homeserver.
#
#enabled: true

# How long to keep local media since its last access. Local
# media that is removed will be permanently deleted.
#
# If this option is not set, local media will not have a
# retention policy applied.
#
#local_media_lifetime: 30d

# How long to keep downloaded remote media since its last
# access. Remote media will be downloaded again from the
# originating server on demand.
#
# If this option is not set, remote media will not have a
# retention policy applied.
#
remote_media_lifetime: 7d

# Is the preview URL API enabled?
#
# 'false' by default: uncomment the following to enable it (and specify a
Expand Down
70 changes: 69 additions & 1 deletion synapse/rest/media/v1/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,12 @@
logger = logging.getLogger(__name__)


UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
# How often to run the background job to update the "recently accessed"
# attribute of local and remote media.
UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute
# How often to run the background job that purges local and remote media
# according to the configured media retention rules.
APPLY_MEDIA_RETENTION_RULES_PERIOD_MS = 60 * 60 * 1000 # 1 hour


class MediaRepository:
Expand Down Expand Up @@ -122,11 +127,32 @@ def __init__(self, hs: "HomeServer"):
self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
)

# Media retention configuration options
self._media_retention_local_media_lifetime_ms = (
hs.config.media.media_retention_local_media_lifetime_ms
)
self._media_retention_remote_media_lifetime_ms = (
hs.config.media.media_retention_remote_media_lifetime_ms
)

if hs.config.media.media_retention_enabled:
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
# Run the background job to apply media retention rules every
# $APPLY_MEDIA_RETENTION_RULES_PERIOD_MS milliseconds.
self.clock.looping_call(
self._start_apply_media_retention_rules,
APPLY_MEDIA_RETENTION_RULES_PERIOD_MS,
)

def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
)

def _start_apply_media_retention_rules(self) -> Deferred:
return run_as_background_process(
"apply_media_retention_rules", self._apply_media_retention_rules
)

async def _update_recently_accessed(self) -> None:
remote_media = self.recently_accessed_remotes
self.recently_accessed_remotes = set()
Expand Down Expand Up @@ -835,6 +861,48 @@ async def _generate_thumbnails(

return {"width": m_width, "height": m_height}

async def _apply_media_retention_rules(self) -> None:
"""
Purge old local and remote media according to the media retention rules
defined in the homeserver config.

Raises:
...
"""
# Purge remote media
if self._media_retention_remote_media_lifetime_ms is not None:
# Calculate a threshold timestamp derived from the configured lifetime. Any
# media that has not been accessed since this timestamp will be removed.
remote_media_threshold_timestamp_ms = (
self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
)

logger.info(
"Purging remote media last accessed before"
f" {remote_media_threshold_timestamp_ms}"
)

await self.delete_old_remote_media(
before_ts=remote_media_threshold_timestamp_ms
)

# And now do the same for local media
if self._media_retention_local_media_lifetime_ms is not None:
# This works the same as the remote media threshold
local_media_threshold_timestamp_ms = (
self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
)

logger.info(
"Purging local media last accessed before"
f" {local_media_threshold_timestamp_ms}"
)

await self.delete_old_local_media(
before_ts=local_media_threshold_timestamp_ms,
keep_profiles=True,
)

async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
old_media = await self.store.get_remote_media_before(before_ts)

Expand Down