-
-
Notifications
You must be signed in to change notification settings - Fork 127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement the github-filter worker in the API #1164
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from unittest import mock | ||
|
||
from django.urls import reverse | ||
from rest_framework.test import APITestCase | ||
|
||
|
||
class GitHubWebhookFilterAPITests(APITestCase): | ||
def test_ignores_bot_sender(self): | ||
url = reverse('api:github-webhook-filter', args=('id', 'token')) | ||
payload = {'sender': {'login': 'limette', 'type': 'bot'}} | ||
headers = {'X-GitHub-Event': 'pull_request_review'} | ||
response = self.client.post(url, data=payload, headers=headers) | ||
self.assertEqual(response.status_code, 203) | ||
|
||
def test_accepts_interesting_events(self): | ||
url = reverse('api:github-webhook-filter', args=('id', 'token')) | ||
payload = { | ||
'ref': 'refs/heads/master', | ||
'pull_request': { | ||
'user': { | ||
'login': "lemon", | ||
} | ||
}, | ||
'review': { | ||
'state': 'commented', | ||
'body': "Amazing!!!" | ||
}, | ||
'repository': { | ||
'name': 'black', | ||
'owner': { | ||
'login': 'psf', | ||
} | ||
} | ||
} | ||
headers = {'X-GitHub-Event': 'pull_request_review'} | ||
|
||
with mock.patch('urllib.request.urlopen') as urlopen: | ||
urlopen.return_value = mock.MagicMock() | ||
context_mock = urlopen.return_value.__enter__.return_value | ||
context_mock.status = 299 | ||
context_mock.getheaders.return_value = [('X-Clacks-Overhead', 'Joe Armstrong')] | ||
context_mock.read.return_value = b'{"status": "ok"}' | ||
|
||
response = self.client.post(url, data=payload, headers=headers) | ||
self.assertEqual(response.status_code, context_mock.status) | ||
self.assertEqual(response.headers.get('X-Clacks-Overhead'), 'Joe Armstrong') |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
import json | ||
import urllib.request | ||
from collections.abc import Mapping | ||
|
||
from rest_framework import status | ||
from rest_framework.exceptions import ParseError | ||
from rest_framework.request import Request | ||
from rest_framework.response import Response | ||
|
@@ -226,3 +231,102 @@ def get( | |
"error": str(e), | ||
"requested_resource": f"{owner}/{repo}/{sha}/{action_name}/{artifact_name}" | ||
}, status=e.status) | ||
|
||
|
||
class GitHubWebhookFilterView(APIView): | ||
""" | ||
Filters uninteresting events from webhooks sent by GitHub to Discord. | ||
|
||
## Routes | ||
### POST /github/webhook-filter/:webhook_id/:webhook_token | ||
Takes the GitHub webhook payload as the request body, documented on here: | ||
https://docs.github.com/en/webhooks/webhook-events-and-payloads. The endpoint | ||
will then determine whether the sent webhook event is of interest, | ||
and if so, will forward it to Discord. The response from Discord is | ||
then returned back to the client of this website, including the original | ||
status code and headers (excluding `Content-Type`). | ||
|
||
## Authentication | ||
Does not require any authentication nor permissions on its own, however, | ||
Discord will validate that the webhook originates from GitHub and respond | ||
with a 403 forbidden error if not. | ||
""" | ||
|
||
authentication_classes = () | ||
permission_classes = () | ||
|
||
def post(self, request: Request, *, webhook_id: str, webhook_token: str) -> Response: | ||
"""Filter a webhook POST from GitHub before sending it to Discord.""" | ||
sender = request.data.get('sender', {}) | ||
sender_name = sender.get('login', '') | ||
event = request.headers.get('X-GitHub-Event') | ||
repository = request.data.get('repository', {}) | ||
|
||
is_coveralls = 'coveralls' in sender_name | ||
is_github_bot = sender.get('type') == 'bot' | ||
is_sentry = 'sentry-io' in sender_name | ||
is_dependabot_branch_deletion = ( | ||
'dependabot' in request.data.get('ref', '') | ||
and event == 'delete' | ||
) | ||
is_bot_pr_approval = ( | ||
'[bot]' in request.data.get('pull_request', {}).get('user', {}).get('login', '') | ||
and event == 'pull_request_review' | ||
) | ||
is_empty_review = ( | ||
request.data.get('review', {}).get('state') == 'commented' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be better to reduce the verbosity of all of these dictionary accesses we're doing by assing them to variables. e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've changed this for some things that were used multiple times, but e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, we want to avoid repetition, so if it's not repeated, it's fine. |
||
and event == 'pull_request_review' | ||
and request.data.get('review', {}).get('body') is None | ||
) | ||
is_black_non_main_push = ( | ||
request.data.get('ref') != 'refs/heads/main' | ||
and repository.get('name') == 'black' | ||
and repository.get('owner', {}).get('login') == 'psf' | ||
and event == 'push' | ||
) | ||
|
||
is_bot_payload = ( | ||
is_coveralls | ||
or (is_github_bot and not is_sentry) | ||
or is_dependabot_branch_deletion | ||
or is_bot_pr_approval | ||
) | ||
is_noisy_user_action = is_empty_review | ||
should_ignore = is_bot_payload or is_noisy_user_action or is_black_non_main_push | ||
|
||
if should_ignore: | ||
return Response( | ||
{'message': "Ignored by github-filter endpoint"}, | ||
status=status.HTTP_203_NON_AUTHORITATIVE_INFORMATION, | ||
) | ||
|
||
(response_status, headers, body) = self.send_webhook( | ||
webhook_id, webhook_token, request.data, dict(request.headers), | ||
) | ||
headers.pop('Connection', None) | ||
headers.pop('Content-Length', None) | ||
return Response(data=body, headers=headers, status=response_status) | ||
|
||
def send_webhook( | ||
self, | ||
webhook_id: str, | ||
webhook_token: str, | ||
data: dict, | ||
headers: Mapping[str, str], | ||
) -> tuple[int, dict[str, str], bytes]: | ||
"""Execute a webhook on Discord's GitHub webhook endpoint.""" | ||
payload = json.dumps(data).encode() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm just curious, why use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like urllib. It works perfectly fine, it's included with the standard library, and the risk of it being unmaintained is very very very close to 0. Also, this plays into my long-term plan of removing dependencies we don't need like |
||
headers.pop('Content-Length', None) | ||
headers.pop('Content-Type', None) | ||
headers.pop('Host', None) | ||
request = urllib.request.Request( # noqa: S310 | ||
f'https://discord.com/api/webhooks/{webhook_id}/{webhook_token}/github?wait=1', | ||
data=payload, | ||
headers={'Content-Type': 'application/json', **headers}, | ||
) | ||
|
||
try: | ||
with urllib.request.urlopen(request) as response: # noqa: S310 | ||
return (response.status, dict(response.getheaders()), response.read()) | ||
except urllib.error.HTTPError as err: # pragma: no cover | ||
return (err.code, dict(err.headers), err.fp.read()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a
user
has atype
property, which would be equal tobot
if it's a bot, wouldn't it be better to use that ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've copied the way the Cloudflare worker does it, but we can change it.
Is there documentation about this? On https://docs.github.com/en/webhooks/webhook-events-and-payloads I only found the very helpful
object
. But I've added it to the amended commit.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It says here that the
sender
is an instnace ofGithub User
If you look at the response schema of the Get User response, you'll see it has a
type
property.It's the same info that's carried over.