Skip to content

Commit

Permalink
GITC-478: Caching github user object. (#9804)
Browse files Browse the repository at this point in the history
* GITC-478: Caching github user object.

Have created model to store the serialized github entities. Have also reworked code, mainly in the `git/utils.py` file to make use of this cache.

* GITC-478: remove logs for sensitive data

* GITC-478: Adding tests
  • Loading branch information
nutrina authored Dec 8, 2021
1 parent c7c0ab7 commit 10fa4fc
Show file tree
Hide file tree
Showing 9 changed files with 295 additions and 6 deletions.
29 changes: 29 additions & 0 deletions app/git/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
"""Define the Grant admin layout.
Copyright (C) 2021 Gitcoin Core
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
from django.contrib import admin
from git.models import GitCache

class GitCacheAdmin(admin.ModelAdmin):
list_display = ['pk', 'category', 'handle']
search_fields = [
'id', 'handle'
]

admin.site.register(GitCache, GitCacheAdmin)
29 changes: 29 additions & 0 deletions app/git/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 2.2.24 on 2021-12-01 11:50

from django.db import migrations, models
import economy.models


class Migration(migrations.Migration):

initial = True

dependencies = [
]

operations = [
migrations.CreateModel(
name='GitCache',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created_on', models.DateTimeField(db_index=True, default=economy.models.get_time)),
('modified_on', models.DateTimeField(default=economy.models.get_time)),
('handle', models.CharField(blank=True, max_length=200)),
('category', models.CharField(blank=True, choices=[('user', 'User'), ('repo', 'Repository')], max_length=20)),
('data', models.BinaryField()),
],
options={
'unique_together': {('handle', 'category')},
},
),
]
73 changes: 73 additions & 0 deletions app/git/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@


# -*- coding: utf-8 -*-
"""Define models.
Copyright (C) 2021 Gitcoin Core
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import logging

from django.db import models

from economy.models import SuperModel

logger = logging.getLogger(__name__)


class GitCache(SuperModel):
"""Model used for storing serialized pygithub objects.
Attributes:
handle (str): The unique (within a category) handle for the data
category (str): the category of the data (user, repo, ...).
data (binary): The serialized object data.
"""

class Category:
USER = "user"
REPO = "repo"

CATEGORY_CHOICES = [
(Category.USER, 'User'),
(Category.REPO, 'Repository'),
]

handle = models.CharField(max_length=200, null=False, blank=True)
category = models.CharField(max_length=20, null=False, blank=True, choices=CATEGORY_CHOICES)
data = models.BinaryField()

class Meta:
unique_together = ["handle", "category"]

def __str__(self):
"""Return the string representation of a model."""
return f"[{self.category}] {self.handle}"

@classmethod
def get_user(self, handle):
"""Utility function to retreive a user object"""
try:
return self.objects.get(category=GitCache.Category.USER, handle=handle)
except self.DoesNotExist:
raise

def update_data(self, data):
"""Update the data field if it has changed."""
if self.data != data:
self.data = data
self.save()
Empty file.
19 changes: 19 additions & 0 deletions app/git/tests/factories/git_cache_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import factory
import pytest
from git.models import GitCache


@pytest.mark.django_db
class GitCacheFactory(factory.django.DjangoModelFactory):
class Meta:
model = GitCache

# Unique user handle
handle = factory.Sequence(lambda n: f"user_handle_{n}")

# Cycle through the choices and select one
category = factory.Sequence(lambda n: GitCache.CATEGORY_CHOICES[n % len(GitCache.CATEGORY_CHOICES)][0])

# Generate binary data depending on n
data = factory.Sequence(lambda n: ("{n}" * 100).encode("utf-8"))

Empty file.
40 changes: 40 additions & 0 deletions app/git/tests/models/test_git_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from git.tests.factories.git_cache_factory import GitCacheFactory
import pytest
from git.models import GitCache


@pytest.mark.django_db
class TestGitCache:
"""Test CLRMatch model."""

def test_creation(self):
"""Test GitCache returned by factory is valid."""

git_cache = GitCacheFactory()

assert isinstance(git_cache, GitCache)

def test_get_user(self):
"""Test get_user helper function."""

git_cache = GitCacheFactory()
git_cache.category = GitCache.Category.USER
handle = git_cache.handle
git_cache.save()

saved = GitCache.get_user(handle)
assert git_cache.id == saved.id

def test_update_data(self):
"""Test update_data helper function."""

git_cache = GitCacheFactory()
git_cache.category = GitCache.Category.USER
handle = git_cache.handle
git_cache.save()

new_data = "This is updated data".encode("utf-8")
git_cache.update_data(new_data)

saved = GitCache.get_user(handle)
assert new_data == saved.data.tobytes()
64 changes: 62 additions & 2 deletions app/git/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,21 @@
"""
from datetime import timedelta
from urllib.parse import quote_plus, urlencode
from unittest.mock import MagicMock, patch
from github import NamedUser

from django.conf import settings
from django.test.utils import override_settings
from django.utils import timezone
from git.models import GitCache
from git.tests.factories.git_cache_factory import GitCacheFactory
from faker import Faker

import responses
from git.utils import (
HEADERS, TOKEN_URL, build_auth_dict, delete_issue_comment, get_github_emails, get_github_primary_email,
get_issue_comments, get_issue_timeline_events, is_github_token_valid, org_name, patch_issue_comment,
post_issue_comment, post_issue_comment_reaction, repo_url, reset_token, revoke_token,
get_issue_comments, get_issue_timeline_events, github_connect, is_github_token_valid, org_name, patch_issue_comment,
post_issue_comment, post_issue_comment_reaction, repo_url, reset_token, revoke_token, _get_user
)
from test_plus.test import TestCase

Expand Down Expand Up @@ -230,3 +235,58 @@ def test_is_github_token_valid(self):
# post_issue_comment_reaction(owner, repo, comment_id, 'A comment.')

# assert responses.calls[0].request.url == url

@responses.activate
@patch('git.utils.github_connect')
def test_get_user_caching(self, mock_github_connect):
"""Test the github utility _get_user method."""
fake = Faker()

# Create a dummy handle and some binary data that is supposed to be the serialized user
user_handle = fake.user_name()
user_binary_data = fake.text().encode('utf-8')

def dump_mock(user_obj, file_obj):
file_obj.write(user_binary_data)

gh_user = MagicMock(spec=NamedUser)
gh_user.update = MagicMock()

# Mock the gh client
gh_client = mock_github_connect()
gh_client.load = MagicMock(return_value=gh_user)
gh_client.dump = dump_mock
gh_client.get_user = MagicMock(return_value=gh_user)

# Step 1: Make the call
_get_user(gh_client, user_handle)

# Verify what was called and that the user has been cached:
# - expected: get_user
# - not expected: loaded, update - because user is new
gh_client.get_user.assert_called_once_with(user_handle)
gh_client.load.assert_not_called()
gh_user.update.assert_not_called()

# Verify that user has been cached
saved_user = GitCache.get_user(user_handle)

assert saved_user.handle == user_handle
assert saved_user.data.tobytes() == user_binary_data

# Step 2: Repeat the call, user should be leaded from DB
gh_client.reset_mock()
gh_client.load.reset_mock()
gh_client.get_user.reset_mock()

loaded_user = _get_user(gh_client, user_handle)

# Verify what was called and that the user has been cached:
# - expected: load and update (for the user loaded from DB)
# - not expected: get_user (because user is already in DB)
gh_client.load.assert_called_once()
assert gh_client.load.call_args[0][0].getbuffer() == user_binary_data
gh_client.get_user.assert_not_called()
gh_user.update.assert_called_once()

assert loaded_user == gh_user
47 changes: 43 additions & 4 deletions app/git/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
from urllib.parse import quote_plus, urlencode

from django.conf import settings
from io import BytesIO
from django.utils import timezone

import dateutil.parser
import requests
from .models import GitCache
from github import Github
from github.GithubException import BadCredentialsException, GithubException, UnknownObjectException
from requests.exceptions import ConnectionError
Expand Down Expand Up @@ -62,7 +64,7 @@ def get_issue_details(org, repo, issue_num, token=None):
details = {'keywords': []}
try:
gh_client = github_connect(token)
org_user = gh_client.get_user(login=org)
org_user = _get_user(gh_client, org)
repo_obj = org_user.get_repo(repo)
issue_details = repo_obj.get_issue(issue_num)
langs = repo_obj.get_languages()
Expand All @@ -82,7 +84,7 @@ def get_issue_details(org, repo, issue_num, token=None):

def get_issue_state(org, repo, issue_num):
gh_client = github_connect()
org_user = gh_client.get_user(login=org)
org_user = _get_user(gh_client, org)
repo_obj = org_user.get_repo(repo)
issue_details = repo_obj.get_issue(issue_num)
return issue_details.state
Expand Down Expand Up @@ -257,7 +259,7 @@ def get_github_event_emails(oauth_token, username):

try:
gh_client = github_connect(oauth_token)
events = gh_client.get_user(username).get_public_events()
events = _get_user(gh_client, username).get_public_events()
for event in events:
payload = event.payload if event.payload else {}
for commit in payload.get('commits', []):
Expand Down Expand Up @@ -514,11 +516,48 @@ def get_interested_actions(github_url, username, email=''):
return actions_by_interested_party


def _get_user(gh_client, user=None):
"""Internal function to retrieve users
This function will attempt to retrieve the user from cache and update it.
If that fails, the user will be retreived via API
"""
ret = None
cached_user = None

# We only attempt reading from the cache table if a user handle is provided
if user:
# We'll attempt to load the users data from the cache, deserialize the data and update it
try:
cached_user = GitCache.get_user(user)
ret = gh_client.load(BytesIO(cached_user.data))
ret.update()
except GitCache.DoesNotExist:
logger.debug("User not found in cache")
except Exception:
logger.error("Failed to load user rom cache", exc_info=True)

# If no user has been retreived (either no handle or not in cache yet) we get the user
if not ret:
ret = gh_client.get_user(user) if user else gh_client.get_user()

# Cache the data if a user handle is provided
if user and ret:
if not cached_user:
cached_user = GitCache(handle=user, category=GitCache.Category.USER)

user_dump = BytesIO()
gh_client.dump(ret, user_dump)
cached_user.update_data(user_dump.getbuffer())

return ret


def get_user(user=None, token=None):
"""Get the github user details."""
try:
gh_client = github_connect(token)
return gh_client.get_user(user) if user else gh_client.get_user()
return _get_user(gh_client, user)
except GithubException as e:
# Do not log exception for github users which are deleted
if e.data.get("message") != 'Not Found':
Expand Down

0 comments on commit 10fa4fc

Please sign in to comment.