From 182c1356833d30f77bd1b83b138d7e5e57d63dd6 Mon Sep 17 00:00:00 2001 From: Johannes Christ Date: Sun, 10 Dec 2023 15:39:11 +0100 Subject: [PATCH] Implement the github-filter worker in the API The current github-filter worker, found at https://github.com/python-discord/workers/blob/main/github-filter/src/index.ts, fails to work at present because Discord's webhook endpoints block Cloudflare's IP ranges from accessing this endpoint. Whilst they use Cloudflare to guard themselves, it seems they do not wish others to use it. Implement it on the site to circumvent IP restrictions and allow to modify the code in Python. --- .../api/tests/test_github_webhook_filter.py | 46 ++++++++ pydis_site/apps/api/urls.py | 12 +- pydis_site/apps/api/views.py | 104 ++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 pydis_site/apps/api/tests/test_github_webhook_filter.py diff --git a/pydis_site/apps/api/tests/test_github_webhook_filter.py b/pydis_site/apps/api/tests/test_github_webhook_filter.py new file mode 100644 index 000000000..2c9f59e52 --- /dev/null +++ b/pydis_site/apps/api/tests/test_github_webhook_filter.py @@ -0,0 +1,46 @@ +from unittest import mock + +from django.urls import reverse +from rest_framework.test import APITestCase + + +class GitHubWebhookFilterAPITests(APITestCase): + def test_ignores_bot_sender(self): + url = reverse('api:github-webhook-filter', args=('id', 'token')) + payload = {'sender': {'login': 'limette', 'type': 'bot'}} + headers = {'X-GitHub-Event': 'pull_request_review'} + response = self.client.post(url, data=payload, headers=headers) + self.assertEqual(response.status_code, 203) + + def test_accepts_interesting_events(self): + url = reverse('api:github-webhook-filter', args=('id', 'token')) + payload = { + 'ref': 'refs/heads/master', + 'pull_request': { + 'user': { + 'login': "lemon", + } + }, + 'review': { + 'state': 'commented', + 'body': "Amazing!!!" + }, + 'repository': { + 'name': 'black', + 'owner': { + 'login': 'psf', + } + } + } + headers = {'X-GitHub-Event': 'pull_request_review'} + + with mock.patch('urllib.request.urlopen') as urlopen: + urlopen.return_value = mock.MagicMock() + context_mock = urlopen.return_value.__enter__.return_value + context_mock.status = 299 + context_mock.getheaders.return_value = [('X-Clacks-Overhead', 'Joe Armstrong')] + context_mock.read.return_value = b'{"status": "ok"}' + + response = self.client.post(url, data=payload, headers=headers) + self.assertEqual(response.status_code, context_mock.status) + self.assertEqual(response.headers.get('X-Clacks-Overhead'), 'Joe Armstrong') diff --git a/pydis_site/apps/api/urls.py b/pydis_site/apps/api/urls.py index f872ba920..80d4edc29 100644 --- a/pydis_site/apps/api/urls.py +++ b/pydis_site/apps/api/urls.py @@ -1,7 +1,12 @@ from django.urls import include, path from rest_framework.routers import DefaultRouter -from .views import GitHubArtifactsView, HealthcheckView, RulesView +from .views import ( + GitHubArtifactsView, + GitHubWebhookFilterView, + HealthcheckView, + RulesView, +) from .viewsets import ( AocAccountLinkViewSet, AocCompletionistBlockViewSet, @@ -101,4 +106,9 @@ GitHubArtifactsView.as_view(), name="github-artifacts" ), + path( + 'github/webhook-filter//', + GitHubWebhookFilterView.as_view(), + name='github-webhook-filter' + ), ) diff --git a/pydis_site/apps/api/views.py b/pydis_site/apps/api/views.py index 829086e76..8a9eebd77 100644 --- a/pydis_site/apps/api/views.py +++ b/pydis_site/apps/api/views.py @@ -1,3 +1,8 @@ +import json +import urllib.request +from collections.abc import Mapping + +from rest_framework import status from rest_framework.exceptions import ParseError from rest_framework.request import Request from rest_framework.response import Response @@ -226,3 +231,102 @@ def get( "error": str(e), "requested_resource": f"{owner}/{repo}/{sha}/{action_name}/{artifact_name}" }, status=e.status) + + +class GitHubWebhookFilterView(APIView): + """ + Filters uninteresting events from webhooks sent by GitHub to Discord. + + ## Routes + ### POST /github/webhook-filter/:webhook_id/:webhook_token + Takes the GitHub webhook payload as the request body, documented on here: + https://docs.github.com/en/webhooks/webhook-events-and-payloads. The endpoint + will then determine whether the sent webhook event is of interest, + and if so, will forward it to Discord. The response from Discord is + then returned back to the client of this website, including the original + status code and headers (excluding `Content-Type`). + + ## Authentication + Does not require any authentication nor permissions on its own, however, + Discord will validate that the webhook originates from GitHub and respond + with a 403 forbidden error if not. + """ + + authentication_classes = () + permission_classes = () + + def post(self, request: Request, *, webhook_id: str, webhook_token: str) -> Response: + """Filter a webhook POST from GitHub before sending it to Discord.""" + sender = request.data.get('sender', {}) + sender_name = sender.get('login', '') + event = request.headers.get('X-GitHub-Event') + repository = request.data.get('repository', {}) + + is_coveralls = 'coveralls' in sender_name + is_github_bot = sender.get('type') == 'bot' + is_sentry = 'sentry-io' in sender_name + is_dependabot_branch_deletion = ( + 'dependabot' in request.data.get('ref', '') + and event == 'delete' + ) + is_bot_pr_approval = ( + '[bot]' in request.data.get('pull_request', {}).get('user', {}).get('login', '') + and event == 'pull_request_review' + ) + is_empty_review = ( + request.data.get('review', {}).get('state') == 'commented' + and event == 'pull_request_review' + and request.data.get('review', {}).get('body') is None + ) + is_black_non_main_push = ( + request.data.get('ref') != 'refs/heads/main' + and repository.get('name') == 'black' + and repository.get('owner', {}).get('login') == 'psf' + and event == 'push' + ) + + is_bot_payload = ( + is_coveralls + or (is_github_bot and not is_sentry) + or is_dependabot_branch_deletion + or is_bot_pr_approval + ) + is_noisy_user_action = is_empty_review + should_ignore = is_bot_payload or is_noisy_user_action or is_black_non_main_push + + if should_ignore: + return Response( + {'message': "Ignored by github-filter endpoint"}, + status=status.HTTP_203_NON_AUTHORITATIVE_INFORMATION, + ) + + (response_status, headers, body) = self.send_webhook( + webhook_id, webhook_token, request.data, dict(request.headers), + ) + headers.pop('Connection', None) + headers.pop('Content-Length', None) + return Response(data=body, headers=headers, status=response_status) + + def send_webhook( + self, + webhook_id: str, + webhook_token: str, + data: dict, + headers: Mapping[str, str], + ) -> tuple[int, dict[str, str], bytes]: + """Execute a webhook on Discord's GitHub webhook endpoint.""" + payload = json.dumps(data).encode() + headers.pop('Content-Length', None) + headers.pop('Content-Type', None) + headers.pop('Host', None) + request = urllib.request.Request( # noqa: S310 + f'https://discord.com/api/webhooks/{webhook_id}/{webhook_token}/github?wait=1', + data=payload, + headers={'Content-Type': 'application/json', **headers}, + ) + + try: + with urllib.request.urlopen(request) as response: # noqa: S310 + return (response.status, dict(response.getheaders()), response.read()) + except urllib.error.HTTPError as err: # pragma: no cover + return (err.code, dict(err.headers), err.fp.read())