From 806e422bdf59bbefd995f243be275b9661d78d2f Mon Sep 17 00:00:00 2001 From: Olivier Jacques Date: Wed, 1 Nov 2023 15:42:37 +0100 Subject: [PATCH] Fix #38 - now use GraphQL to fetch authors information --- README.md | 28 +++++- mkdocs_git_committers_plugin_2/plugin.py | 104 ++++++++++++++++------- requirements.txt | 3 - setup.py | 2 +- 4 files changed, 96 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 2df3b3c..e94f6b8 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,18 @@ # mkdocs-git-committers-plugin-2 -This is a plugin which is a fork from the original [`mkdocs-git-committers-plugin`](https://github.com/byrnereese/mkdocs-git-committers-plugin) by @byrnereese. +MkDocs plugin for displaying a list of committers associated with a file in +mkdocs. The plugin uses [GitHub's GraphQL +API](https://docs.github.com/en/graphql) to fetch the list of contributors for +each page. -MkDocs plugin for displaying a list of committers associated with a file in mkdocs. +Other MkDocs plugins that use information to fetch authors: + +- [`mkdocs-git-authors-plugin`](https://github.com/timvink/mkdocs-git-authors-plugin) for displaying user names a number of lines contributed (uses local Git information) +- [`mkdocs-git-committers-plugin`](https://github.com/byrnereese/mkdocs-git-committers-plugin) display contributors for a page (uses local Git information, completed with REST GitHub API v3) + +## History + +This is a fork from the original [`mkdocs-git-committers-plugin`](https://github.com/byrnereese/mkdocs-git-committers-plugin) by @byrnereese. I had to create this fork so that it could be uploaded and distributed through PyPi. The package has been renamed to `mkdocs-git-committers-plugin-2`. @@ -12,12 +22,16 @@ This "v2" differs from the original by: - Eliminate the need to match git commit logs with entries in GitHub, and thus GitHub API calls - No more risk of matching the incorrect contributor as the information comes directly from GitHub - last_commit_date is now populated with local git info -- No need for GitHub personal access token, as there are no more GitHub GraphQL API calls +- Use a cache file to speed up following builds: authors are fetched from GitHub for a page only if that page has changed since the last build -All of the above massively improves accuracy and performances. +All of the above improves accuracy and performances. Note: the plugin configuration in `mkdocs.yml` still uses the original `git-committers` sections. +## material for mkdocs theme + +This plugin is integrated in the [material for mkdocs](https://squidfunk.github.io/mkdocs-material/) theme by [Martin Donath](https://github.com/squidfunk). + ## Limitations - Getting the contributors relies on what is available on GitHub. This means that for new files, the build will report no contributors (and informed you with a 404 error which can be ignored) @@ -37,8 +51,13 @@ plugins: - git-committers: repository: organization/repository branch: main + token: !ENV ["MKDOCS_GIT_COMMITTERS_APIKEY"] ``` +If the token is not set in `mkdocs.yml` it will be read from the `MKDOCS_GIT_COMMITTERS_APIKEY` environment variable. + +**Change in 2.0.0: if no token is present, the plugin will NOT add provide git committers.** + > **Note:** If you have no `plugins` entry in your config file yet, you'll likely also want to add the `search` plugin. MkDocs enables it by default if there is no `plugins` entry set, but now you have to enable it explicitly. More information about plugins in the [MkDocs documentation][mkdocs-plugins]. @@ -48,6 +67,7 @@ More information about plugins in the [MkDocs documentation][mkdocs-plugins]. - `enabled` - Disables plugin if set to `False` for e.g. local builds (default: `True`) - `repository` - The name of the repository, e.g. 'ojacques/mkdocs-git-committers-plugin-2' - `branch` - The name of the branch to get contributors from. Example: 'master' (default) +- `token` - A github fine-grained token for GitHub GraphQL API calls (classic tokens work too). The token does not need any scope: uncheck everything when creating the GitHub Token at [github.com/settings/personal-access-tokens/new](https://github.com/settings/personal-access-tokens/new), unless you access private repositories. - `enterprise_hostname` - For GitHub enterprise: the enterprise hostname. - `docs_path` - the path to the documentation folder. Defaults to `docs`. - `cache_dir` - The path which holds the authors cache file to speed up documentation builds. Defaults to `.cache/plugin/git-committers/`. The cache file is named `page-authors.json.json`. diff --git a/mkdocs_git_committers_plugin_2/plugin.py b/mkdocs_git_committers_plugin_2/plugin.py index b960a7c..5035c12 100644 --- a/mkdocs_git_committers_plugin_2/plugin.py +++ b/mkdocs_git_committers_plugin_2/plugin.py @@ -1,5 +1,4 @@ import os -import sys import logging from pprint import pprint from timeit import default_timer as timer @@ -13,9 +12,6 @@ import requests, json from requests.exceptions import HTTPError import time -import hashlib -import re -from bs4 import BeautifulSoup as bs from mkdocs_git_committers_plugin_2.exclude import exclude @@ -31,6 +27,7 @@ class GitCommittersPlugin(BasePlugin): ('enabled', config_options.Type(bool, default=True)), ('cache_dir', config_options.Type(str, default='.cache/plugin/git-committers')), ("exclude", config_options.Type(list, default=[])), + ('token', config_options.Type(str, default='')), ) def __init__(self): @@ -49,19 +46,83 @@ def on_config(self, config): return config LOG.info("git-committers plugin ENABLED") + if not self.config['token'] and 'MKDOCS_GIT_COMMITTERS_APIKEY' in os.environ: + self.config['token'] = os.environ['MKDOCS_GIT_COMMITTERS_APIKEY'] + if self.config['token'] and self.config['token'] != '': + self.auth_header = {'Authorization': 'token ' + self.config['token'] } + else: + LOG.warning("git-committers plugin now requires a GitHub token. Set it under 'token' mkdocs.yml config or MKDOCS_GIT_COMMITTERS_APIKEY environment variable.") if not self.config['repository']: LOG.error("git-committers plugin: repository not specified") return config if self.config['enterprise_hostname'] and self.config['enterprise_hostname'] != '': - self.githuburl = "https://" + self.config['enterprise_hostname'] + "/" + self.githuburl = "https://" + self.config['enterprise_hostname'] + "/api/graphql" else: - self.githuburl = "https://github.com/" + self.githuburl = "https://api.github.com/graphql" self.localrepo = Repo(".") self.branch = self.config['branch'] self.excluded_pages = self.config['exclude'] return config + # Get unique contributors for a given path using GitHub GraphQL API + def get_contributors_to_path(self, path): + # Query GraphQL API, and get a list of unique authors + query = { + "query": """ + { + repository(owner: "%s", name: "%s") { + object(expression: "%s") { + ... on Commit { + history(first: 100, path: "%s") { + nodes { + author { + user { + login + name + url + avatarUrl + } + } + } + } + } + } + } + } + """ % (self.config['repository'].split('/')[0], self.config['repository'].split('/')[1], self.branch, path) + } + authors = [] + if not hasattr(self, 'auth_header'): + # No auth token provided: return now + return None + LOG.info("git-committers: fetching contributors for " + path) + LOG.debug(" from " + self.githuburl) + r = requests.post(url=self.githuburl, json=query, headers=self.auth_header) + res = r.json() + #print(res) + if r.status_code == 200: + if res.get('data'): + if res['data']['repository']['object']['history']['nodes']: + for node in res['data']['repository']['object']['history']['nodes']: + # If user is not None (GitHub user was deleted) + if node['author']['user']: + login = node['author']['user']['login'] + if login not in [author['login'] for author in authors]: + authors.append({'login': node['author']['user']['login'], + 'name': node['author']['user']['name'], + 'url': node['author']['user']['url'], + 'avatar': node['author']['user']['avatarUrl']}) + return authors + else: + return [] + else: + LOG.warning("git-committers: Error from GitHub GraphQL call: " + res['errors'][0]['message']) + return [] + else: + return [] + return [] + def list_contributors(self, path): if exclude(path.lstrip(self.config['docs_path']), self.excluded_pages): return None, None @@ -78,38 +139,15 @@ def list_contributors(self, path): last_commit_date = datetime.now().strftime("%Y-%m-%d") return [], last_commit_date - # Try to leverage the cache + # Use the cache if present if cache date is newer than last commit date if path in self.cache_page_authors: if self.cache_date and time.strptime(last_commit_date, "%Y-%m-%d") < time.strptime(self.cache_date, "%Y-%m-%d"): return self.cache_page_authors[path]['authors'], self.cache_page_authors[path]['last_commit_date'] - url_contribs = self.githuburl + self.config['repository'] + "/contributors-list/" + self.config['branch'] + "/" + path - LOG.info("git-committers: fetching contributors for " + path) - LOG.debug(" from " + url_contribs) authors=[] - try: - response = requests.get(url_contribs) - response.raise_for_status() - except HTTPError as http_err: - LOG.error(f'git-committers: HTTP error occurred: {http_err}\n(404 is normal if file is not on GitHub yet or Git submodule)') - except Exception as err: - LOG.error(f'git-committers: Other error occurred: {err}') - else: - html = response.text - # Parse the HTML - soup = bs(html, "lxml") - lis = soup.find_all('li') - for li in lis: - a_tags = li.find_all('a') - login = a_tags[0]['href'].replace("/", "") - url = self.githuburl + login - name = login - img_tags = li.find_all('img') - avatar = img_tags[0]['src'] - avatar = re.sub(r'\?.*$', '', avatar) - authors.append({'login':login, 'name': name, 'url': url, 'avatar': avatar}) - # Update global cache_page_authors - self.cache_page_authors[path] = {'last_commit_date': last_commit_date, 'authors': authors} + authors = self.get_contributors_to_path(path) + + self.cache_page_authors[path] = {'last_commit_date': last_commit_date, 'authors': authors} return authors, last_commit_date diff --git a/requirements.txt b/requirements.txt index 024a3f5..1a97bd7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,2 @@ -beautifulsoup4 -gitpython -lxml>=4.9 mkdocs>=1.0.3 requests diff --git a/setup.py b/setup.py index 28754d3..8289153 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def load_requirements(requirements_files: Union[Path, list[Path]]) -> list: setup( name='mkdocs-git-committers-plugin-2', - version='1.2.0', + version='2.0.0', description='An MkDocs plugin to create a list of contributors on the page. The git-committers plugin will seed the template context with a list of github committers and other useful GIT info such as last modified date', long_description=README, long_description_content_type="text/markdown",