From 1c2f23628e7f36e83981e4ef652cda203a824b29 Mon Sep 17 00:00:00 2001 From: Mateusz Szostok Date: Wed, 21 Aug 2024 16:29:57 +0200 Subject: [PATCH] ci: update MAINTAINERS.yaml for each CODEOWNERS file change (#1315) Co-authored-by: Lukasz Gornicki --- .github/scripts/maintainers/.gitignore | 1 + .github/scripts/maintainers/README.md | 58 +++++++ .github/scripts/maintainers/cache.js | 64 ++++++++ .github/scripts/maintainers/gh_calls.js | 131 +++++++++++++++ .github/scripts/maintainers/index.js | 190 ++++++++++++++++++++++ .github/scripts/maintainers/summary.js | 99 +++++++++++ .github/workflows/update-maintainers.yaml | 130 +++++++++++++++ 7 files changed, 673 insertions(+) create mode 100644 .github/scripts/maintainers/.gitignore create mode 100644 .github/scripts/maintainers/README.md create mode 100644 .github/scripts/maintainers/cache.js create mode 100644 .github/scripts/maintainers/gh_calls.js create mode 100644 .github/scripts/maintainers/index.js create mode 100644 .github/scripts/maintainers/summary.js create mode 100644 .github/workflows/update-maintainers.yaml diff --git a/.github/scripts/maintainers/.gitignore b/.github/scripts/maintainers/.gitignore new file mode 100644 index 000000000..60923f546 --- /dev/null +++ b/.github/scripts/maintainers/.gitignore @@ -0,0 +1 @@ +github.api.cache.json diff --git a/.github/scripts/maintainers/README.md b/.github/scripts/maintainers/README.md new file mode 100644 index 000000000..6d82e01e4 --- /dev/null +++ b/.github/scripts/maintainers/README.md @@ -0,0 +1,58 @@ +# Maintainers + +The ["Update MAINTAINERS.yaml file"](../../workflows/update-maintainers.yaml) workflow, defined in the `community` repository performs a complete refresh by fetching all public repositories under AsyncAPI and their respective `CODEOWNERS` files. + +## Workflow Execution + +The "Update MAINTAINERS.yaml file" workflow is executed in the following scenarios: + +1. **Weekly Schedule**: The workflow runs automatically every week. It is useful, e.g. when some repositories are archived, renamed, or when a GitHub user account is removed. +2. **On Change**: When a `CODEOWNERS` file is changed in any repository under the AsyncAPI organization, the related repository triggers the workflow by emitting the `trigger-maintainers-update` event. +3. **Manual Trigger**: Users can manually trigger the workflow as needed. + +### Workflow Steps + +1. **Load Cache**: Attempt to read previously cached data from `github.api.cache.json` to optimize API calls. +2. **List All Repositories**: Retrieve a list of all public repositories under the AsyncAPI organization, skipping any repositories specified in the `IGNORED_REPOSITORIES` environment variable. +3. **Fetch `CODEOWNERS` Files**: For each repository: + - Detect the default branch (e.g., `main`, `master`, or a custom branch). + - Check for `CODEOWNERS` files in all valid locations as specified in the [GitHub documentation](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-file-location). +4. **Process `CODEOWNERS` Files**: + 1. Extract GitHub usernames from each `CODEOWNERS` file, excluding emails, team names, and users specified by the `IGNORED_USERS` environment variable. + 2. Retrieve profile information for each unique GitHub username. + 3. Collect a fresh list of repositories currently owned by each GitHub user. +5. **Refresh Maintainers List**: Iterate through the existing maintainers list: + - Delete the entry if it: + - Refers to a deleted GitHub account. + - Was not found in any `CODEOWNERS` file across all repositories in the AsyncAPI organization. + - Otherwise, update **only** the `repos` property. +6. **Add New Maintainers**: Append any new maintainers not present in the previous list. +7. **Changes Summary**: Provide details on why a maintainer was removed or changed directly on the GitHub Action [summary page](https://github.blog/2022-05-09-supercharging-github-actions-with-job-summaries/). +8. **Save Cache**: Save retrieved data in `github.api.cache.json`. + +## Job Details + +- **Concurrency**: Ensures the workflow does not run multiple times concurrently to avoid conflicts. +- **Wait for PRs to be Merged**: The workflow waits for pending pull requests to be merged before execution. If the merged pull request addresses all necessary fixes, it prevents unnecessary executions. + +## Handling Conflicts + +Since the job performs a full refresh each time, resolving conflicts is straightforward: + +1. Close the pull request with conflicts. +2. Navigate to the "Update MAINTAINERS.yaml file" workflow. +3. Trigger it manually by clicking "Run workflow". + +## Caching Mechanism + +Each execution of this action performs a full refresh through the following API calls: + +``` +ListRepos(AsyncAPI) # 1 call using GraphQL - not cached. + for each Repo + GetCodeownersFile(Repo) # N calls using REST API - all are cached. N refers to the number of public repositories under AsyncAPI. + for each codeowner + GetGitHubProfile(owner) # Y calls using REST API - all are cached. Y refers to unique GitHub users found across all CODEOWNERS files. +``` + +To avoid hitting the GitHub API rate limits, [conditional requests](https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#use-conditional-requests-if-appropriate) are used via `if-modified-since`. The API responses are saved into a `github.api.cache.json` file, which is later uploaded as a GitHub action cache item. diff --git a/.github/scripts/maintainers/cache.js b/.github/scripts/maintainers/cache.js new file mode 100644 index 000000000..0a52b4b7e --- /dev/null +++ b/.github/scripts/maintainers/cache.js @@ -0,0 +1,64 @@ +const fs = require("fs"); + +module.exports = { + fetchWithCache, + saveCache, + loadCache, + printAPICallsStats, +}; + +const CODEOWNERS_CACHE_PATH = "./.github/scripts/maintainers/github.api.cache.json"; + +let cacheEntries = {}; + +let numberOfFullFetches = 0; +let numberOfCacheHits = 0; + +function loadCache(core) { + try { + cacheEntries = JSON.parse(fs.readFileSync(CODEOWNERS_CACHE_PATH, "utf8")); + } catch (error) { + core.warning(`Cache was not restored: ${error}`); + } +} + +function saveCache() { + fs.writeFileSync(CODEOWNERS_CACHE_PATH, JSON.stringify(cacheEntries)); +} + +async function fetchWithCache(cacheKey, fetchFn, core) { + const cachedResp = cacheEntries[cacheKey]; + + try { + const { data, headers } = await fetchFn({ + headers: { + "if-modified-since": cachedResp?.lastModified ?? "", + }, + }); + + cacheEntries[cacheKey] = { + // last modified header is more reliable than etag while executing calls on GitHub Action + lastModified: headers["last-modified"], + data, + }; + + numberOfFullFetches++; + return data; + } catch (error) { + if (error.status === 304) { + numberOfCacheHits++; + core.debug(`Returning cached data for ${cacheKey}`); + return cachedResp.data; + } + throw error; + } +} + +function printAPICallsStats(core) { + core.startGroup("API calls statistic"); + core.info( + `Number of API calls count against rate limit: ${numberOfFullFetches}`, + ); + core.info(`Number of cache hits: ${numberOfCacheHits}`); + core.endGroup(); +} diff --git a/.github/scripts/maintainers/gh_calls.js b/.github/scripts/maintainers/gh_calls.js new file mode 100644 index 000000000..f10b5c2eb --- /dev/null +++ b/.github/scripts/maintainers/gh_calls.js @@ -0,0 +1,131 @@ +const { fetchWithCache } = require("./cache"); + +module.exports = { getGitHubProfile, getAllCodeownersFiles, getRepositories }; + +async function getRepositories(github, owner, ignoredRepos, core) { + core.startGroup( + `Getting list of all public, non-archived repositories owned by ${owner}`, + ); + + const query = ` + query repos($cursor: String, $owner: String!) { + organization(login: $owner) { + repositories(first: 100 after: $cursor visibility: PUBLIC isArchived: false orderBy: {field: CREATED_AT, direction: ASC} ) { + nodes { + name + } + pageInfo { + hasNextPage + endCursor + } + } + } + }`; + + const repos = []; + let cursor = null; + + do { + const result = await github.graphql(query, { owner, cursor }); + const { nodes, pageInfo } = result.organization.repositories; + repos.push(...nodes); + + cursor = pageInfo.hasNextPage ? pageInfo.endCursor : null; + } while (cursor); + + core.debug(`List of repositories for ${owner}:`); + core.debug(JSON.stringify(repos, null, 2)); + core.endGroup(); + + return repos.filter((repo) => !ignoredRepos.includes(repo.name)); +} + +async function getGitHubProfile(github, login, core) { + try { + const profile = await fetchWithCache( + `profile:${login}`, + async ({ headers }) => { + return github.rest.users.getByUsername({ + username: login, + headers, + }); + }, + core, + ); + return removeNulls({ + name: profile.name ?? login, + github: login, + twitter: profile.twitter_username, + availableForHire: profile.hireable, + isTscMember: false, + repos: [], + githubID: profile.id, + }); + } catch (error) { + if (error.status === 404) { + return null; + } + throw error; + } +} + +// Checks for all valid locations according to: +// https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-file-location +// +// Detect the repository default branch automatically. +async function getCodeownersFile(github, owner, repo, core) { + const paths = ["CODEOWNERS", "docs/CODEOWNERS", ".github/CODEOWNERS"]; + + for (const path of paths) { + try { + core.debug( + `[repo: ${owner}/${repo}]: Fetching CODEOWNERS file at ${path}`, + ); + return await fetchWithCache( + `owners:${owner}/${repo}`, + async ({ headers }) => { + return github.rest.repos.getContent({ + owner, + repo, + path, + headers: { + Accept: "application/vnd.github.raw+json", + ...headers, + }, + }); + }, + core, + ); + } catch (error) { + core.warning( + `[repo: ${owner}/${repo}]: Failed to fetch CODEOWNERS file at ${path}: ${error.message}`, + ); + } + } + + core.error( + `[repo: ${owner}/${repo}]: CODEOWNERS file not found in any of the expected locations.`, + ); + return null; +} + +async function getAllCodeownersFiles(github, owner, repos, core) { + core.startGroup(`Fetching CODEOWNERS files for ${repos.length} repositories`); + const files = []; + for (const repo of repos) { + const data = await getCodeownersFile(github, owner, repo.name, core); + if (!data) { + continue; + } + files.push({ + repo: repo.name, + content: data, + }); + } + core.endGroup(); + return files; +} + +function removeNulls(obj) { + return Object.fromEntries(Object.entries(obj).filter(([_, v]) => v != null)); +} diff --git a/.github/scripts/maintainers/index.js b/.github/scripts/maintainers/index.js new file mode 100644 index 000000000..be32d8da5 --- /dev/null +++ b/.github/scripts/maintainers/index.js @@ -0,0 +1,190 @@ +const yaml = require("js-yaml"); +const fs = require("fs"); +const { saveCache, loadCache, printAPICallsStats } = require("./cache"); +const { summarizeChanges } = require("./summary"); +const { + getAllCodeownersFiles, + getGitHubProfile, + getRepositories, +} = require("./gh_calls"); + +module.exports = async ({ github, context, core }) => { + try { + await run(github, context, core); + } catch (error) { + console.log(error); + core.setFailed(`An error occurred: ${error}`); + } +}; + +const config = { + ghToken: process.env.GH_TOKEN, + ignoredRepos: getCommaSeparatedInputList(process.env.IGNORED_REPOSITORIES), + ignoredUsers: getCommaSeparatedInputList(process.env.IGNORED_USERS), + maintainersFilePath: process.env.MAINTAINERS_FILE_PATH, +}; + +function getCommaSeparatedInputList(list) { + return ( + list + ?.split(",") + .map((item) => item.trim()) + .filter((item) => item !== "") ?? [] + ); +} + +function splitByWhitespace(line) { + return line.trim().split(/\s+/); +} + +function extractGitHubUsernames(codeownersContent, core) { + if (!codeownersContent) return []; + + const uniqueOwners = new Set(); + + for (const line of codeownersContent.split("\n")) { + // split by '#' to process comments separately + const [ownersLine, comment = ""] = line.split("#"); + + // 1. Check AsyncAPI custom owners + const triagers = comment.split(/docTriagers:|codeTriagers:/)[1] + if (triagers) { + const owners = splitByWhitespace(triagers) + owners.forEach(owner => uniqueOwners.add(owner)) + } + + // 2. Check GitHub native codeowners + const owners = splitByWhitespace(ownersLine); + + // the 1st element is the file location, we don't need it, so we start with 2nd item + for (const owner of owners.slice(1)) { + if (!owner.startsWith("@") || owner.includes("/")) { + core.warning(`Skipping '${owner}' as emails and teams are not supported yet`); + continue; + } + uniqueOwners.add(owner.slice(1)); // remove the '@' + } + } + + return uniqueOwners; +} + +async function collectCurrentMaintainers(codeownersFiles, github, core) { + core.startGroup(`Fetching GitHub profile information for each codeowner`); + + const currentMaintainers = {}; + for (const codeowners of codeownersFiles) { + const owners = extractGitHubUsernames(codeowners.content, core); + + for (const owner of owners) { + if (config.ignoredUsers.includes(owner)) { + core.debug( + `[repo: ${codeowners.repo}]: The user '${owner}' is on the ignore list. Skipping...`, + ); + continue; + } + const key = owner.toLowerCase(); + if (!currentMaintainers[key]) { + // Fetching GitHub profile is useful to ensure that all maintainers are valid (e.g., their GitHub accounts haven't been deleted). + const profile = await getGitHubProfile(github, owner, core); + if (!profile) { + core.warning( + `[repo: ${codeowners.repo}]: GitHub profile not found for ${owner}.`, + ); + continue; + } + + currentMaintainers[key] = { ...profile, repos: [] }; + } + + currentMaintainers[key].repos.push(codeowners.repo); + } + } + + core.endGroup(); + return currentMaintainers; +} + +function refreshPreviousMaintainers( + previousMaintainers, + currentMaintainers, + core, +) { + core.startGroup(`Refreshing previous maintainers list`); + + const updatedMaintainers = []; + + // 1. Iterate over the list of previous maintainers to: + // - Remove any maintainers who are not listed in any current CODEOWNERS files. + // - Update the repos list, ensuring that other properties (e.g., 'linkedin', 'slack', etc.) remain unchanged. + for (const previousEntry of previousMaintainers) { + const key = previousEntry.github.toLowerCase(); + const currentMaintainer = currentMaintainers[key]; + if (!currentMaintainer) { + core.info( + `The previous ${previousEntry.github} maintainer was not found in any CODEOWNERS file. Removing...`, + ); + continue; + } + delete currentMaintainers[key]; + + updatedMaintainers.push({ + ...previousEntry, + repos: currentMaintainer.repos, + githubID: currentMaintainer.githubID, + }); + } + + // 2. Append new codeowners who are not present in the previous Maintainers file. + const newMaintainers = Object.values(currentMaintainers); + updatedMaintainers.push(...newMaintainers); + + core.endGroup(); + return updatedMaintainers; +} + +async function run(github, context, core) { + if (!config.maintainersFilePath) { + core.setFailed("The MAINTAINERS_FILE_PATH is not defined"); + return; + } + loadCache(core); + + const repos = await getRepositories( + github, + context.repo.owner, + config.ignoredRepos, + core, + ); + const codeownersFiles = await getAllCodeownersFiles( + github, + context.repo.owner, + repos, + core, + ); + + const previousMaintainers = yaml.load( + fs.readFileSync(config.maintainersFilePath, "utf8"), + ); + + // 1. Collect new maintainers from all current CODEOWNERS files found across all repositories. + const currentMaintainers = await collectCurrentMaintainers( + codeownersFiles, + github, + core, + ); + + // 2. Refresh the repository list for existing maintainers and add any new maintainers to the list. + const refreshedMaintainers = refreshPreviousMaintainers( + previousMaintainers, + currentMaintainers, + core, + ); + + fs.writeFileSync(config.maintainersFilePath, yaml.dump(refreshedMaintainers)); + + printAPICallsStats(core); + + await summarizeChanges(previousMaintainers, refreshedMaintainers, core); + saveCache(); +} diff --git a/.github/scripts/maintainers/summary.js b/.github/scripts/maintainers/summary.js new file mode 100644 index 000000000..e07d03fd4 --- /dev/null +++ b/.github/scripts/maintainers/summary.js @@ -0,0 +1,99 @@ +module.exports = { summarizeChanges }; + +async function summarizeChanges(oldMaintainers, newMaintainers, core) { + const outOfSync = []; + const noLongerActive = []; + + const newMaintainersByGitHubName = new Map(); + for (const newMaintainer of newMaintainers) { + newMaintainersByGitHubName.set(newMaintainer.github, newMaintainer); + } + + for (const oldEntry of oldMaintainers) { + const newEntry = newMaintainersByGitHubName.get(oldEntry.github); + + if (!newEntry) { + noLongerActive.push([oldEntry.github, repositoriesLinks(oldEntry.repos)]); + continue; + } + + const { newOwnedRepos, noLongerOwnedRepos } = compareRepos( + oldEntry.repos, + newEntry.repos, + ); + + if (newOwnedRepos.length > 0 || noLongerOwnedRepos.length > 0) { + outOfSync.push([ + profileLink(oldEntry.github), + repositoriesLinks(newOwnedRepos), + repositoriesLinks(noLongerOwnedRepos), + ]); + } + } + + if (outOfSync.length > 0) { + core.summary.addHeading("⚠️ Out of Sync Maintainers", "2"); + core.summary.addTable([ + [ + { data: "Name", header: true }, + { data: "Newly added to CODEOWNERS", header: true }, + { data: "No longer in CODEOWNERS", header: true }, + ], + ...outOfSync, + ]); + core.summary.addBreak(); + } + + if (noLongerActive.length > 0) { + core.summary.addHeading( + "👻 Inactive Maintainers (not listed in any repositories)", + "2", + ); + + core.summary.addTable([ + [ + { data: "Name", header: true }, + { data: "Previously claimed ownership in repos", header: true }, + ], + ...noLongerActive, + ]); + + core.summary.addBreak(); + } + + await core.summary.write({ overwrite: true }); +} + +function compareRepos(oldRepos, newRepos) { + const newOwnedRepositories = []; + const noLongerOwnedRepositories = []; + + for (const repo of newRepos) { + if (!oldRepos.includes(repo)) { + newOwnedRepositories.push(repo); + } + } + + for (const repo of oldRepos) { + if (!newRepos.includes(repo)) { + noLongerOwnedRepositories.push(repo); + } + } + + return { + newOwnedRepos: newOwnedRepositories, + noLongerOwnedRepos: noLongerOwnedRepositories, + }; +} + +function repositoriesLinks(repos) { + return repos + .map((repo) => { + return `${repo}`; + }) + .join(", "); +} + +function profileLink(login) { + return `${login}`; +} diff --git a/.github/workflows/update-maintainers.yaml b/.github/workflows/update-maintainers.yaml new file mode 100644 index 000000000..858eb02aa --- /dev/null +++ b/.github/workflows/update-maintainers.yaml @@ -0,0 +1,130 @@ +# This action updates the `MAINTAINERS.yaml` file based on `CODEOWNERS` files in all organization repositories. +# It is triggered when a `CODEOWNERS` file is changed; the related repository triggers this workflow by emitting the `trigger-maintainers-update` event. +# It can also be triggered manually. + +name: Update MAINTAINERS.yaml file + +on: + push: + branches: [ master ] + paths: + - 'CODEOWNERS' + - '.github/scripts/maintainers/**' + - '.github/workflows/update-maintainers.yaml' + + schedule: + - cron: "0 10 * * SUN" # Runs at 10:00 AM UTC every Sunday. + + workflow_dispatch: + + repository_dispatch: + types: [ trigger-maintainers-update ] + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +env: + IGNORED_REPOSITORIES: "shape-up-process" + IGNORED_USERS: "asyncapi-bot-eve" + + BRANCH_NAME: "bot/update-maintainers-${{ github.run_id }}" + PR_TITLE: "docs(maintainers): update MAINTAINERS.yaml file with the latest CODEOWNERS changes" + +jobs: + update-maintainers: + name: Update MAINTAINERS.yaml based on CODEOWNERS files in all organization repositories + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + # If an action pushes code using the repository’s GITHUB_TOKEN, a pull request workflow will not run. + token: ${{ secrets.GH_TOKEN }} + + - name: Wait for active pull requests to be merged + env: + GH_TOKEN: ${{ github.token }} + TIMEOUT: 300 # Timeout in seconds + INTERVAL: 5 # Check interval in seconds + run: | + check_active_prs() { + ACTIVE_PULL_REQUESTS=$(gh -R $GITHUB_REPOSITORY pr list --search "is:pr ${PR_TITLE} in:title" --json id) + if [ "$ACTIVE_PULL_REQUESTS" == "[]" ]; then + return 1 # No active PRs + else + return 0 # Active PRs found + fi + } + + # Loop with timeout + elapsed_time=0 + while [ $elapsed_time -lt $TIMEOUT ]; do + if check_active_prs; then + echo "There is an active pull request. Waiting for it to be merged..." + else + echo "There is no active pull request. Proceeding with updating MAINTAINERS file." + git pull + exit 0 + fi + + sleep $INTERVAL + elapsed_time=$((elapsed_time + INTERVAL)) + done + + echo "Timeout reached. Proceeding with updating MAINTAINERS.yaml file with active pull request(s) present. It may result in merge conflict." + exit 0 + + - name: Restore cached GitHub API calls + uses: actions/cache/restore@v4 + with: + path: ./.github/scripts/maintainers/github.api.cache.json + key: github-api-cache + restore-keys: | + github-api-cache- + + - name: Installing Module + shell: bash + run: npm install js-yaml@4 --no-save + + - name: Run script updating MAINTAINERS.yaml + uses: actions/github-script@v7 + env: + GH_TOKEN: ${{ github.token }} + MAINTAINERS_FILE_PATH: "${{ github.workspace }}/MAINTAINERS.yaml" + with: + script: | + const script = require('./.github/scripts/maintainers/index.js') + await script({github, context, core}) + + - name: Save cached GitHub API calls + uses: actions/cache/save@v4 + with: + path: ./.github/scripts/maintainers/github.api.cache.json + # re-evaluate the key, so we update cache when file changes + key: github-api-cache-${{ hashfiles('./.github/scripts/maintainers/github.api.cache.json') }} + + - name: Create PR with latest changes + uses: peter-evans/create-pull-request@c5a7806660adbe173f04e3e038b0ccdcd758773c # https://github.com/peter-evans/create-pull-request/releases/tag/v6.1.0 + with: + token: ${{ secrets.GH_TOKEN }} + commit-message: ${{ env.PR_TITLE }} + committer: asyncapi-bot + author: asyncapi-bot + title: ${{ env.PR_TITLE }} + branch: ${{ env.BRANCH_NAME }} + body: | + **Description** + - Update MAINTAINERS.yaml based on CODEOWNERS files across all repositories in the organization. + + For details on why a maintainer was removed or changed, refer to the [Job summary page](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}). + + - name: Report workflow run status to Slack + uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 # https://github.com/rtCamp/action-slack-notify/releases/tag/v2.3.0 + if: failure() + env: + SLACK_WEBHOOK: ${{secrets.SLACK_CI_FAIL_NOTIFY}} + SLACK_TITLE: 🚨 Update MAINTAINERS.yaml file Workflow failed 🚨 + SLACK_MESSAGE: Failed to auto update MAINTAINERS.yaml file. + MSG_MINIMAL: true