-
Notifications
You must be signed in to change notification settings - Fork 214
/
Copy pathextension.py
130 lines (104 loc) · 4.68 KB
/
extension.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import asyncio
import mimetypes
from os.path import splitext
from urllib.parse import urlparse
from django.conf import settings
import aiohttp
import django_redis
import structlog
from asgiref.sync import sync_to_async
from redis.exceptions import ConnectionError
from api.utils.aiohttp import get_aiohttp_session
from api.utils.image_proxy.dataclasses import MediaInfo
from api.utils.image_proxy.exception import UpstreamThumbnailException
logger = structlog.get_logger(__name__)
_HEAD_TIMEOUT = aiohttp.ClientTimeout(settings.THUMBNAIL_EXTENSION_REQUEST_TIMEOUT)
# Used to filter network errors during extension checks that we believe
# we should get errors (rather than warnings) for, e.g., a Sentry issue.
# As such, this should exclude errors where we don't think we have the
# ability to control the outcome, whether now or in the future.
# Some things, like SSL errors and timeouts, may be used to inform liveness
# checks or otherwise useful catalog information in the future.
# However, for now, we just need to avoid filling up our error backlog
# with things we aren't able to actually do anything about right now.
_NON_ACTIONABLE_NETWORK_EXCEPTIONS = (
aiohttp.ClientConnectorCertificateError,
aiohttp.ClientConnectorSSLError,
aiohttp.ClientConnectorError,
aiohttp.ServerDisconnectedError,
aiohttp.ServerTimeoutError,
aiohttp.ClientOSError,
asyncio.TimeoutError,
)
async def get_image_extension(media_info: MediaInfo) -> str | None:
image_url = media_info.image_url
cache = django_redis.get_redis_connection("default")
key = f"media:{media_info.media_identifier}:thumb_type"
ext = _get_file_extension_from_url(image_url)
if not ext:
# If the extension is not present in the URL, try to get it from the redis cache
try:
ext = await sync_to_async(cache.get)(key)
ext = ext.decode("utf-8") if ext else None
except ConnectionError:
logger.warning("Redis connect failed, cannot get cached image extension.")
if not ext:
# If the extension is still not present, try getting it from the content type
try:
session = await get_aiohttp_session()
async with session.head(
image_url,
raise_for_status=True,
timeout=_HEAD_TIMEOUT,
trace_request_ctx={
"timing_event_name": "thumbnail_extension_request_timing",
"timing_event_ctx": {"provider": media_info.media_provider},
},
) as response:
if response.headers and "Content-Type" in response.headers:
content_type = response.headers["Content-Type"]
ext = _get_file_extension_from_content_type(content_type)
else:
ext = None
await _cache_extension(cache, key, ext)
except Exception as exc:
# Aside from client errors, the timeout defined for `get_image_extension`
# is generous, and if the head request exceeds it, we're comfortable saying
# we'll skip generating this thumbnail. In the future, we might adjust
# timeouts with per-provider granularity, but for now, we just have to
# accept they will happen and are part of the set of non-actionable
# networking errors that we don't need to report as errors to Sentry.
if not isinstance(exc, asyncio.TimeoutError):
if isinstance(exc, _NON_ACTIONABLE_NETWORK_EXCEPTIONS):
log = logger.warning
else:
log = logger.error
log("upstream_thumbnail_exception", exc=exc, exc_info=True)
raise UpstreamThumbnailException(
"Failed to render thumbnail due to inability to check media "
f"type. {exc}"
)
return ext
@sync_to_async
def _cache_extension(cache, key, ext):
try:
cache.set(key, ext if ext else "unknown")
except ConnectionError:
logger.warning("Redis connect failed, cannot cache image extension.")
def _get_file_extension_from_url(image_url: str) -> str:
"""Return the image extension if present in the URL."""
parsed = urlparse(image_url)
_, ext = splitext(parsed.path)
return ext[1:].lower() # remove the leading dot
def _get_file_extension_from_content_type(content_type: str) -> str | None:
"""
Return the image extension if present in the Response's content type
header.
"""
if (
content_type
and "/" in content_type
and (ext := mimetypes.guess_extension(content_type.split(";")[0], strict=False))
):
return ext.strip(".")
return None