Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the github-filter worker in the API #1164

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions pydis_site/apps/api/tests/test_github_webhook_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from unittest import mock

from django.urls import reverse
from rest_framework.test import APITestCase


class GitHubWebhookFilterAPITests(APITestCase):
def test_ignores_bot_sender(self):
url = reverse('api:github-webhook-filter', args=('id', 'token'))
payload = {'sender': {'login': 'limette', 'type': 'bot'}}
headers = {'X-GitHub-Event': 'pull_request_review'}
response = self.client.post(url, data=payload, headers=headers)
self.assertEqual(response.status_code, 203)

def test_accepts_interesting_events(self):
url = reverse('api:github-webhook-filter', args=('id', 'token'))
payload = {
'ref': 'refs/heads/master',
'pull_request': {
'user': {
'login': "lemon",
}
},
'review': {
'state': 'commented',
'body': "Amazing!!!"
},
'repository': {
'name': 'black',
'owner': {
'login': 'psf',
}
}
}
headers = {'X-GitHub-Event': 'pull_request_review'}

with mock.patch('urllib.request.urlopen') as urlopen:
urlopen.return_value = mock.MagicMock()
context_mock = urlopen.return_value.__enter__.return_value
context_mock.status = 299
context_mock.getheaders.return_value = [('X-Clacks-Overhead', 'Joe Armstrong')]
context_mock.read.return_value = b'{"status": "ok"}'

response = self.client.post(url, data=payload, headers=headers)
self.assertEqual(response.status_code, context_mock.status)
self.assertEqual(response.headers.get('X-Clacks-Overhead'), 'Joe Armstrong')
12 changes: 11 additions & 1 deletion pydis_site/apps/api/urls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from django.urls import include, path
from rest_framework.routers import DefaultRouter

from .views import GitHubArtifactsView, HealthcheckView, RulesView
from .views import (
GitHubArtifactsView,
GitHubWebhookFilterView,
HealthcheckView,
RulesView,
)
from .viewsets import (
AocAccountLinkViewSet,
AocCompletionistBlockViewSet,
Expand Down Expand Up @@ -101,4 +106,9 @@
GitHubArtifactsView.as_view(),
name="github-artifacts"
),
path(
'github/webhook-filter/<str:webhook_id>/<str:webhook_token>',
GitHubWebhookFilterView.as_view(),
name='github-webhook-filter'
),
)
104 changes: 104 additions & 0 deletions pydis_site/apps/api/views.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import json
import urllib.request
from collections.abc import Mapping

from rest_framework import status
from rest_framework.exceptions import ParseError
from rest_framework.request import Request
from rest_framework.response import Response
Expand Down Expand Up @@ -226,3 +231,102 @@ def get(
"error": str(e),
"requested_resource": f"{owner}/{repo}/{sha}/{action_name}/{artifact_name}"
}, status=e.status)


class GitHubWebhookFilterView(APIView):
"""
Filters uninteresting events from webhooks sent by GitHub to Discord.

## Routes
### POST /github/webhook-filter/:webhook_id/:webhook_token
Takes the GitHub webhook payload as the request body, documented on here:
https://docs.github.com/en/webhooks/webhook-events-and-payloads. The endpoint
will then determine whether the sent webhook event is of interest,
and if so, will forward it to Discord. The response from Discord is
then returned back to the client of this website, including the original
status code and headers (excluding `Content-Type`).

## Authentication
Does not require any authentication nor permissions on its own, however,
Discord will validate that the webhook originates from GitHub and respond
with a 403 forbidden error if not.
"""

authentication_classes = ()
permission_classes = ()

def post(self, request: Request, *, webhook_id: str, webhook_token: str) -> Response:
"""Filter a webhook POST from GitHub before sending it to Discord."""
sender = request.data.get('sender', {})
sender_name = sender.get('login', '')
event = request.headers.get('X-GitHub-Event')
repository = request.data.get('repository', {})

is_coveralls = 'coveralls' in sender_name
is_github_bot = sender.get('type') == 'bot'
is_sentry = 'sentry-io' in sender_name
is_dependabot_branch_deletion = (
'dependabot' in request.data.get('ref', '')
and event == 'delete'
)
is_bot_pr_approval = (
'[bot]' in request.data.get('pull_request', {}).get('user', {}).get('login', '')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a user has a type property, which would be equal to bot if it's a bot, wouldn't it be better to use that ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've copied the way the Cloudflare worker does it, but we can change it.
Is there documentation about this? On https://docs.github.com/en/webhooks/webhook-events-and-payloads I only found the very helpful object. But I've added it to the amended commit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It says here that the sender is an instnace of Github User
image

If you look at the response schema of the Get User response, you'll see it has a type property.

It's the same info that's carried over.

{
  "oneOf": [
    {
      "title": "Private User",
      "description": "Private User",
      "type": "object",
      "properties": {
        "login": {
          "type": "string",
          "examples": [
            "octocat"
          ]
        },
        "id": {
          "type": "integer",
          "examples": [
            1
          ]
        },
        "node_id": {
          "type": "string",
          "examples": [
            "MDQ6VXNlcjE="
          ]
        },
        "avatar_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://github.com/images/error/octocat_happy.gif"
          ]
        },
        "gravatar_id": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "41d064eb2195891e12d0413f63227ea7"
          ]
        },
        "url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://api.github.com/users/octocat"
          ]
        },
        "html_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://github.com/octocat"
          ]
        },
        "followers_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://api.github.com/users/octocat/followers"
          ]
        },
        "following_url": {
          "type": "string",
          "examples": [
            "https://api.github.com/users/octocat/following{/other_user}"
          ]
        },
        "gists_url": {
          "type": "string",
          "examples": [
            "https://api.github.com/users/octocat/gists{/gist_id}"
          ]
        },
        "starred_url": {
          "type": "string",
          "examples": [
            "https://api.github.com/users/octocat/starred{/owner}{/repo}"
          ]
        },
        "subscriptions_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://api.github.com/users/octocat/subscriptions"
          ]
        },
        "organizations_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://api.github.com/users/octocat/orgs"
          ]
        },
        "repos_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://api.github.com/users/octocat/repos"
          ]
        },
        "events_url": {
          "type": "string",
          "examples": [
            "https://api.github.com/users/octocat/events{/privacy}"
          ]
        },
        "received_events_url": {
          "type": "string",
          "format": "uri",
          "examples": [
            "https://api.github.com/users/octocat/received_events"
          ]
        },
        "type": {
          "type": "string",
          "examples": [
            "User"
          ]
        },
        "site_admin": {
          "type": "boolean"
        },
        "name": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "monalisa octocat"
          ]
        },
        "company": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "GitHub"
          ]
        },
        "blog": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "https://github.com/blog"
          ]
        },
        "location": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "San Francisco"
          ]
        },
        "email": {
          "type": [
            "string",
            "null"
          ],
          "format": "email",
          "examples": [
            "octocat@github.com"
          ]
        },
        "hireable": {
          "type": [
            "boolean",
            "null"
          ]
        },
        "bio": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "There once was..."
          ]
        },
        "twitter_username": {
          "type": [
            "string",
            "null"
          ],
          "examples": [
            "monalisa"
          ]
        },
        "public_repos": {
          "type": "integer",
          "examples": [
            2
          ]
        },
        "public_gists": {
          "type": "integer",
          "examples": [
            1
          ]
        },
        "followers": {
          "type": "integer",
          "examples": [
            20
          ]
        },
        "following": {
          "type": "integer",
          "examples": [
            0
          ]
        },
        "created_at": {
          "type": "string",
          "format": "date-time",
          "examples": [
            "2008-01-14T04:33:35Z"
          ]
        },
        "updated_at": {
          "type": "string",
          "format": "date-time",
          "examples": [
            "2008-01-14T04:33:35Z"
          ]
        },
        "private_gists": {
          "type": "integer",
          "examples": [
            81
          ]
        },
        "total_private_repos": {
          "type": "integer",
          "examples": [
            100
          ]
        },
        "owned_private_repos": {
          "type": "integer",
          "examples": [
            100
          ]
        },
        "disk_usage": {
          "type": "integer",
          "examples": [
            10000
          ]
        },
        "collaborators": {
          "type": "integer",
          "examples": [
            8
          ]
        },
        "two_factor_authentication": {
          "type": "boolean",
          "examples": [
            true
          ]
        },
        "plan": {
          "type": "object",
          "properties": {
            "collaborators": {
              "type": "integer"
            },
            "name": {
              "type": "string"
            },
            "space": {
              "type": "integer"
            },
            "private_repos": {
              "type": "integer"
            }
          },
          "required": [
            "collaborators",
            "name",
            "space",
            "private_repos"
          ]
        },
        "suspended_at": {
          "type": [
            "string",
            "null"
          ],
          "format": "date-time"
        },
        "business_plus": {
          "type": "boolean"
        },
        "ldap_dn": {
          "type": "string"
        }
      },
      "required": [
        "avatar_url",
        "events_url",
        "followers_url",
        "following_url",
        "gists_url",
        "gravatar_id",
        "html_url",
        "id",
        "node_id",
        "login",
        "organizations_url",
        "received_events_url",
        "repos_url",
        "site_admin",
        "starred_url",
        "subscriptions_url",
        "type",
        "url",
        "bio",
        "blog",
        "company",
        "email",
        "followers",
        "following",
        "hireable",
        "location",
        "name",
        "public_gists",
        "public_repos",
        "created_at",
        "updated_at",
        "collaborators",
        "disk_usage",
        "owned_private_repos",
        "private_gists",
        "total_private_repos",
        "two_factor_authentication"
      ]
    },
    {
      "title": "Public User",
      "description": "Public User",
      "type": "object",
      "properties": {
        "login": {
          "type": "string"
        },
        "id": {
          "type": "integer"
        },
        "node_id": {
          "type": "string"
        },
        "avatar_url": {
          "type": "string",
          "format": "uri"
        },
        "gravatar_id": {
          "type": [
            "string",
            "null"
          ]
        },
        "url": {
          "type": "string",
          "format": "uri"
        },
        "html_url": {
          "type": "string",
          "format": "uri"
        },
        "followers_url": {
          "type": "string",
          "format": "uri"
        },
        "following_url": {
          "type": "string"
        },
        "gists_url": {
          "type": "string"
        },
        "starred_url": {
          "type": "string"
        },
        "subscriptions_url": {
          "type": "string",
          "format": "uri"
        },
        "organizations_url": {
          "type": "string",
          "format": "uri"
        },
        "repos_url": {
          "type": "string",
          "format": "uri"
        },
        "events_url": {
          "type": "string"
        },
        "received_events_url": {
          "type": "string",
          "format": "uri"
        },
        "type": {
          "type": "string"
        },
        "site_admin": {
          "type": "boolean"
        },
        "name": {
          "type": [
            "string",
            "null"
          ]
        },
        "company": {
          "type": [
            "string",
            "null"
          ]
        },
        "blog": {
          "type": [
            "string",
            "null"
          ]
        },
        "location": {
          "type": [
            "string",
            "null"
          ]
        },
        "email": {
          "type": [
            "string",
            "null"
          ],
          "format": "email"
        },
        "hireable": {
          "type": [
            "boolean",
            "null"
          ]
        },
        "bio": {
          "type": [
            "string",
            "null"
          ]
        },
        "twitter_username": {
          "type": [
            "string",
            "null"
          ]
        },
        "public_repos": {
          "type": "integer"
        },
        "public_gists": {
          "type": "integer"
        },
        "followers": {
          "type": "integer"
        },
        "following": {
          "type": "integer"
        },
        "created_at": {
          "type": "string",
          "format": "date-time"
        },
        "updated_at": {
          "type": "string",
          "format": "date-time"
        },
        "plan": {
          "type": "object",
          "properties": {
            "collaborators": {
              "type": "integer"
            },
            "name": {
              "type": "string"
            },
            "space": {
              "type": "integer"
            },
            "private_repos": {
              "type": "integer"
            }
          },
          "required": [
            "collaborators",
            "name",
            "space",
            "private_repos"
          ]
        },
        "suspended_at": {
          "type": [
            "string",
            "null"
          ],
          "format": "date-time"
        },
        "private_gists": {
          "type": "integer",
          "examples": [
            1
          ]
        },
        "total_private_repos": {
          "type": "integer",
          "examples": [
            2
          ]
        },
        "owned_private_repos": {
          "type": "integer",
          "examples": [
            2
          ]
        },
        "disk_usage": {
          "type": "integer",
          "examples": [
            1
          ]
        },
        "collaborators": {
          "type": "integer",
          "examples": [
            3
          ]
        }
      },
      "required": [
        "avatar_url",
        "events_url",
        "followers_url",
        "following_url",
        "gists_url",
        "gravatar_id",
        "html_url",
        "id",
        "node_id",
        "login",
        "organizations_url",
        "received_events_url",
        "repos_url",
        "site_admin",
        "starred_url",
        "subscriptions_url",
        "type",
        "url",
        "bio",
        "blog",
        "company",
        "email",
        "followers",
        "following",
        "hireable",
        "location",
        "name",
        "public_gists",
        "public_repos",
        "created_at",
        "updated_at"
      ],
      "additionalProperties": false
    }
  ]
}

and event == 'pull_request_review'
)
is_empty_review = (
request.data.get('review', {}).get('state') == 'commented'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to reduce the verbosity of all of these dictionary accesses we're doing by assing them to variables.

e.g. review = request.data.get('review', {}), etc, etc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed this for some things that were used multiple times, but e.g. ref or pull_request are only used once, so I'm not sure it's worth it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, we want to avoid repetition, so if it's not repeated, it's fine.

and event == 'pull_request_review'
and request.data.get('review', {}).get('body') is None
)
is_black_non_main_push = (
request.data.get('ref') != 'refs/heads/main'
and repository.get('name') == 'black'
and repository.get('owner', {}).get('login') == 'psf'
and event == 'push'
)

is_bot_payload = (
is_coveralls
or (is_github_bot and not is_sentry)
or is_dependabot_branch_deletion
or is_bot_pr_approval
)
is_noisy_user_action = is_empty_review
should_ignore = is_bot_payload or is_noisy_user_action or is_black_non_main_push

if should_ignore:
return Response(
{'message': "Ignored by github-filter endpoint"},
status=status.HTTP_203_NON_AUTHORITATIVE_INFORMATION,
)

(response_status, headers, body) = self.send_webhook(
webhook_id, webhook_token, request.data, dict(request.headers),
)
headers.pop('Connection', None)
headers.pop('Content-Length', None)
return Response(data=body, headers=headers, status=response_status)

def send_webhook(
self,
webhook_id: str,
webhook_token: str,
data: dict,
headers: Mapping[str, str],
) -> tuple[int, dict[str, str], bytes]:
"""Execute a webhook on Discord's GitHub webhook endpoint."""
payload = json.dumps(data).encode()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just curious, why use urllib, encode the data yourself, etc. while httpx is listed in our dependencies and would shorten the work that needs to be done ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like urllib. It works perfectly fine, it's included with the standard library, and the risk of it being unmaintained is very very very close to 0. Also, this plays into my long-term plan of removing dependencies we don't need like httpx.

headers.pop('Content-Length', None)
headers.pop('Content-Type', None)
headers.pop('Host', None)
request = urllib.request.Request( # noqa: S310
f'https://discord.com/api/webhooks/{webhook_id}/{webhook_token}/github?wait=1',
data=payload,
headers={'Content-Type': 'application/json', **headers},
)

try:
with urllib.request.urlopen(request) as response: # noqa: S310
return (response.status, dict(response.getheaders()), response.read())
except urllib.error.HTTPError as err: # pragma: no cover
return (err.code, dict(err.headers), err.fp.read())