Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add an admin api to delete local media. #8519

Merged
merged 8 commits into from
Oct 26, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 63 additions & 8 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,16 @@ Response:
}
```

# Draft: Delete local media
This API deletes the *local* media from the disc of your own server.
# Delete local media
This API deletes the *local* media from the disk of your own server.
This includes any local thumbnails and copies of media downloaded from
remote homeservers.
This API will not affect media that has been uploaded to external
media repositories (e.g https://github.com/turt2live/matrix-media-repo/).
See also [purge_remote_media.rst](purge_remote_media.rst).

## Delete a specific local media
Delete a specific ``media_id``.

anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
Request:

Expand All @@ -114,13 +122,60 @@ DELETE /_synapse/admin/v1/media/<server_name>/<media_id>

URL Parameters

* `server_name` - The name of your local server (e.g `matrix.org`)
* `media_id` - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`)
* ``server_name``: string - The name of your local server (e.g ``matrix.org``)
* ``media_id``: string - The ID of the media (e.g ``abcdefghijklmnopqrstuvwx``)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although it renders fine, we typically use single backticks in markdown files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have done an update.


Response:

```json
{
"deleted_media":[
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
"abcdefghijklmnopqrstuvwx"
],
"total": 1
}
```
{
"deleted": 1 # The number of media items successfully deleted
}
```

The following fields are returned in the JSON response body:

* ``deleted_media``: list of strings - List of deleted ``media_id``
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
* ``total``: integer - Total number of deleted ``media_id``

## Delete local media by date or size

Request:

```
POST /_synapse/admin/v1/media/<server_name>/delete?before_ts=<before_ts>

{}
```

URL Parameters

* ``server_name``: string - The name of your local server (e.g ``matrix.org``)
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
* ``before_ts``: string representing a positive integer - Unix timestamp in ms.
Files that were last used before this timestamp will be deleted. It is the timestamp of
last access and not the timestamp creation.
* ``size_gt``: Optional - string representing a positive integer - Size of the media in bytes.
Files that are larger will be deleted. Defaults to ``0``.
* ``keep_profiles``: Optional- string representing a boolean - Switch to delete also files
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
that are still used in image data (e.g user profile, room avatar).
If ``false`` thse files will be deleted. Defaults to ``true``.
dklimpel marked this conversation as resolved.
Show resolved Hide resolved

Response:

```json
{
"deleted_media":[
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
"abcdefghijklmnopqrstuvwx",
"abcdefghijklmnopqrstuvwz"
],
"total": 2
}
```

The following fields are returned in the JSON response body:

* ``deleted_media``: list of strings - List of deleted ``media_id``
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
* ``total``: integer - Total number of deleted ``media_id``
55 changes: 50 additions & 5 deletions synapse/rest/admin/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

import logging

from synapse.api.errors import AuthError, NotFoundError, SynapseError
from synapse.http.servlet import RestServlet, parse_integer
from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
from synapse.http.servlet import RestServlet, parse_boolean, parse_integer
from synapse.rest.admin._base import (
admin_patterns,
assert_requester_is_admin,
Expand Down Expand Up @@ -155,7 +155,7 @@ class DeleteMediaByID(RestServlet):
"""Delete local media by a given ID. Removes it from this server.
"""

PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)", "v1")
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)")

def __init__(self, hs):
self.store = hs.get_datastore()
Expand All @@ -174,8 +174,52 @@ async def on_DELETE(self, request, server_name: str, media_id: str):

logging.info("Deleting local media by ID: %s", media_id)

ret = await self.media_repository.delete_local_media(media_id)
return 200, {"deleted": ret}
deleted_media, total = await self.media_repository.delete_local_media(media_id)
return 200, {"deleted_media": deleted_media, "total": total}


class DeleteMediaByDateSize(RestServlet):
"""Delete local media by timestamp and size.
Removes it from this server.
"""
dklimpel marked this conversation as resolved.
Show resolved Hide resolved

PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/delete")

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()

async def on_POST(self, request, server_name: str):
await assert_requester_is_admin(self.auth, request)

before_ts = parse_integer(request, "before_ts", required=True)
size_gt = parse_integer(request, "size_gt", default=0)
keep_profiles = parse_boolean(request, "keep_profiles", default=True)

if before_ts < 0:
raise SynapseError(
400,
"Query parameter before_ts must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)
if size_gt < 0:
raise SynapseError(
400,
"Query parameter size_gt must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)

if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")

logging.info("Deleting local media by timestamp: %s", before_ts)

deleted_media, total = await self.media_repository.delete_old_local_media(
before_ts, size_gt, keep_profiles
)
return 200, {"deleted_media": deleted_media, "total": total}


def register_servlets_for_media_repo(hs, http_server):
Expand All @@ -188,3 +232,4 @@ def register_servlets_for_media_repo(hs, http_server):
QuarantineMediaByUser(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
DeleteMediaByDateSize(hs).register(http_server)
78 changes: 61 additions & 17 deletions synapse/rest/media/v1/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
import os
import shutil
from typing import IO, Dict, Optional, Tuple
from typing import IO, Dict, List, Optional, Tuple

import twisted.internet.error
import twisted.web.http
Expand Down Expand Up @@ -767,35 +767,79 @@ async def delete_old_remote_media(self, before_ts):

return {"deleted": deleted}

async def delete_local_media(self, media_id: str) -> int:
async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]:
"""
Delete the given media_id from this server
dklimpel marked this conversation as resolved.
Show resolved Hide resolved

Args:
media_id: The media ID to delete.
Returns:
Number of deleted files.
In this case 1 or 0
List of deleted media_id
Number of deleted media_id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably word this something more like "A tuple of (list of deleted media IDs, total deleted media IDs)."

Likewise below.

"""
logger.info("Deleting local media: %s", media_id)

full_path = self.filepaths.local_media_filepath(media_id)
try:
os.remove(full_path)
except OSError as e:
logger.warning("Failed to remove file: %r: %s", full_path, e)
if e.errno != errno.ENOENT:
return 0
return await self._remove_local_media_from_disk([media_id])

async def delete_old_local_media(
self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True,
) -> Tuple[List[str], int]:
"""
Delete old media_id from this server
dklimpel marked this conversation as resolved.
Show resolved Hide resolved

Args:
before_ts: Unix timestamp in ms.
Files that were last used before this timestamp will be deleted
size_gt: Size of the media in bytes. Files that are larger will be deleted
keep_profiles: Switch to delete also files that are still used in image data
(e.g user profile, room avatar)
If false thse files will be deleted
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
Returns:
List of deleted media_id
Number of deleted media_id
"""
old_media = await self.store.get_local_media_before(
before_ts, size_gt, keep_profiles,
)
logger.info("Deleting local media: %s", old_media)
return await self._remove_local_media_from_disk(old_media)

async def _remove_local_media_from_disk(
self, media_ids: List[str]
) -> Tuple[List[str], int]:
"""
Delete old media_id from this server
dklimpel marked this conversation as resolved.
Show resolved Hide resolved

Args:
media_ids: List of media_id to delete
Returns:
List of deleted media_id
Number of deleted media_id
"""
removed_media = []
for media_id in media_ids:
logger.info("Deleting: %s", media_id)
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
full_path = self.filepaths.local_media_filepath(media_id)
try:
os.remove(full_path)
except OSError as e:
logger.warning("Failed to remove file: %r: %s", full_path, e)
if e.errno == errno.ENOENT:
pass
else:
continue

thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
shutil.rmtree(thumbnail_dir, ignore_errors=True)

thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
shutil.rmtree(thumbnail_dir, ignore_errors=True)
await self.store.delete_remote_media(self.server_name, media_id)

await self.store.delete_remote_media(self.server_name, media_id)
await self.store.delete_url_cache((media_id,))
await self.store.delete_url_cache_media((media_id,))

await self.store.delete_url_cache((media_id,))
await self.store.delete_url_cache_media((media_id,))
removed_media.append(media_id)

return 1
return removed_media, len(removed_media)


class MediaRepositoryResource(Resource):
Expand Down
48 changes: 48 additions & 0 deletions synapse/storage/databases/main/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):

def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
self.server_name = hs.hostname

async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
"""Get the metadata for a local piece of media
Expand All @@ -115,6 +116,53 @@ async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
desc="get_local_media",
)

async def get_local_media_before(
self, before_ts: int, size_gt: int, keep_profiles: bool,
) -> Optional[List[str]]:

sql = """
SELECT media_id
FROM local_media_repository AS lmr
WHERE last_access_ts < ? and media_length > ?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about media that is never accessed? As far as I can tell this will have a NULL value in the DB. Perhaps in this case we can use created_ts instead?

"""

if keep_profiles:
sql_keep = """
AND (
NOT EXISTS
(SELECT 1
FROM profiles
WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM groups
WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_memberships
WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM user_directory
WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_stats_state
WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id)
)
""".format(
media_prefix="mxc://%s/" % (self.server_name,),
)
sql += sql_keep

def _get_local_media_before_txn(txn):
txn.execute(sql, (before_ts, size_gt))
return [row[0] for row in txn]

return await self.db_pool.runInteraction(
"get_local_media_before", _get_local_media_before_txn
)

async def store_local_media(
self,
media_id,
Expand Down
Loading