Skip to content

Commit

Permalink
WebPush reliability improvements (#212)
Browse files Browse the repository at this point in the history
Part of #186:
 - Only reject push key on 404 or 410 response codes
 - Trim message body so we don't exceed 4096 characters
 - Log when returned TTL header is lower than request
 - Support events_only flag on push data, to avoid bogus notifications in browser.
  • Loading branch information
bwindels authored Apr 8, 2021
1 parent d8e5397 commit 6b2a7c1
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 11 deletions.
1 change: 1 addition & 0 deletions changelog.d/212.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Prevent the push key from being rejected for temporary errors and oversized payloads, add TTL logging, and support events_only push data flag.
6 changes: 6 additions & 0 deletions docs/applications.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ any properties set in it will be present in the push messages you receive,
so it can be used to pass identifiers specific to your client
(like which account the notification is for).

##### events_only

As of the time of writing, all webpush-supporting browsers require you to set `userVisibleOnly: true` when calling (`pushManager.subscribe`)[https://developer.mozilla.org/en-US/docs/Web/API/PushManager/subscribe], to (prevent abusing webpush to track users)[https://goo.gl/yqv4Q4] without their knowledge. With this (mandatory) flag, the browser will show a "site has been updated in the background" notification if no notifications are visible after your service worker processes a `push` event. This can easily happen when sygnal sends a push message to clear the unread count, which is not specific to an event. With `events_only: true` in the pusher data, sygnal won't forward any push message without a event id. This prevents your service worker being forced to show a notification to push messages that clear the unread count.

##### Multiple pushers on one origin

Also note that because you can only have one push subscription per service worker,
and hence per origin, you might create pushers for different accounts with the same
p256dh push key. To prevent the server from removing other pushers with the same
Expand Down
87 changes: 76 additions & 11 deletions sygnal/webpushpushkin.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@

DEFAULT_MAX_CONNECTIONS = 20
DEFAULT_TTL = 15 * 60 # in seconds
# Max payload size is 4096
MAX_BODY_LENGTH = 1000
MAX_CIPHERTEXT_LENGTH = 2000


class WebpushPushkin(ConcurrencyLimitedPushkin):
Expand Down Expand Up @@ -133,6 +136,11 @@ async def _dispatch_notification_unlimited(self, n, device, context):
)
return [device.pushkey]

# drop notifications without an event id if requested,
# see https://github.com/matrix-org/sygnal/issues/186
if device.data.get("events_only") is True and not n.event_id:
return []

endpoint = device.data.get("endpoint")
auth = device.data.get("auth")
endpoint_domain = urlparse(endpoint).netloc
Expand Down Expand Up @@ -175,7 +183,6 @@ async def _dispatch_notification_unlimited(self, n, device, context):
with QUEUE_TIME_HISTOGRAM.time():
with PENDING_REQUESTS_GAUGE.track_inprogress():
await self.connection_semaphore.acquire()

try:
with SEND_TIME_HISTOGRAM.time():
with ACTIVE_REQUESTS_GAUGE.track_inprogress():
Expand All @@ -192,15 +199,10 @@ async def _dispatch_notification_unlimited(self, n, device, context):
finally:
self.connection_semaphore.release()

# assume 4xx is permanent and 5xx is temporary
if 400 <= response.code < 500:
logger.warn(
"Rejecting pushkey %s; gateway %s failed with %d: %s",
device.pushkey,
endpoint_domain,
response.code,
response_text,
)
reject_pushkey = self._handle_response(
response, response_text, device.pushkey, endpoint_domain
)
if reject_pushkey:
return [device.pushkey]
return []

Expand Down Expand Up @@ -233,7 +235,6 @@ def _build_payload(n, device):
"sender_display_name",
"user_is_target",
"type",
"content",
]:
value = getattr(n, attr, None)
if value:
Expand All @@ -246,8 +247,72 @@ def _build_payload(n, device):
if count_value is not None:
payload[attr] = count_value

if n.content and isinstance(n.content, dict):
content = n.content.copy()
# we can't show formatted_body in a notification anyway on web
# so remove it
content.pop("formatted_body", None)
body = content.get("body")
# make some attempts to not go over the max payload length
if isinstance(body, str) and len(body) > MAX_BODY_LENGTH:
content["body"] = body[0 : MAX_BODY_LENGTH - 1] + "…"
ciphertext = content.get("ciphertext")
if isinstance(ciphertext, str) and len(ciphertext) > MAX_CIPHERTEXT_LENGTH:
content.pop("ciphertext", None)
payload["content"] = content

return payload

def _handle_response(self, response, response_text, pushkey, endpoint_domain):
"""
Logs and determines the outcome of the response
Returns:
Boolean whether the puskey should be rejected
"""
ttl_response_headers = response.headers.getRawHeaders(b"TTL")
if ttl_response_headers:
try:
ttl_given = int(ttl_response_headers[0])
if ttl_given != self.ttl:
logger.info(
"requested TTL of %d to endpoint %s but got %d",
self.ttl,
endpoint_domain,
ttl_given,
)
except ValueError:
pass
# permanent errors
if response.code == 404 or response.code == 410:
logger.warn(
"Rejecting pushkey %s; subscription is invalid on %s: %d: %s",
pushkey,
endpoint_domain,
response.code,
response_text,
)
return True
# and temporary ones
if response.code >= 400:
logger.warn(
"webpush request failed for pushkey %s; %s responded with %d: %s",
pushkey,
endpoint_domain,
response.code,
response_text,
)
elif response.code != 201:
logger.info(
"webpush request for pushkey %s didn't respond with 201; "
+ "%s responded with %d: %s",
pushkey,
endpoint_domain,
response.code,
response_text,
)
return False


class HttpAgentWrapper:
"""
Expand Down

0 comments on commit 6b2a7c1

Please sign in to comment.