cloud-gov · wz-gsa · Apr 11, 2024 · Apr 10, 2024
@@ -1,3 +1,8 @@
+# Use this script to add security.md files to all repos in an org that are missing one.
+# Replace ADD_GITHUB_USERNAME with valid usernames from the github org
+# This script requires a json file created by list_github_age_upstream_contrib.py to work.
+#
+
 import os
 import json
 import logging
@@ -10,12 +15,16 @@
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
 
-# Configuration
+# Configuration variables
 ORG_NAME = os.getenv("ORG_NAME", "cloud-gov")
 REPOS_JSON_PATH = os.getenv("REPOS_JSON_PATH", "repos.json")
+PRIMARY_REVIEWER = "ADD_GITHUB_USERNAME"
+ASSIGNEE = "ADD_GITHUB_USERNAME"
+FALLBACK_REVIEWER = "ADD_GITHUB_USERNAME"
 current_time = datetime.now().strftime("%Y%m%d%H%M%S")
 BASE_PATH = os.path.expanduser(f"~/Downloads/repos_{current_time}")
 
+# Security.md file content
 SECURITY_MD_CONTENT = """
 **Reporting Security Issues**
 
@@ -37,8 +46,8 @@
 """
 
 
+# Function to run shell commands
 def run_command(cmd, cwd=None, ignore_error=False):
-    """Run a command and return its output, handling errors."""
     try:
         result = subprocess.run(
             cmd,
@@ -57,6 +66,7 @@ def run_command(cmd, cwd=None, ignore_error=False):
         return None
 
 
+# Check GitHub CLI authentication
 def check_gh_auth():
     """Check if 'gh' CLI is authenticated."""
     if run_command(["gh", "auth", "status"], ignore_error=True):
@@ -65,6 +75,7 @@ def check_gh_auth():
         logging.error("GitHub CLI is not authenticated. Please check the setup.")
 
 
+# Fetch the default branch of the repository
 def get_default_branch(repo_name):
     """Fetch the default branch for the repository using 'gh'."""
     try:
@@ -79,6 +90,7 @@ def get_default_branch(repo_name):
         return None
 
 
+# Check if SECURITY.md exists
 def security_md_exists(repo_name):
     """Check if SECURITY.md exists in the repository's default branch."""
     try:
@@ -98,6 +110,7 @@ def security_md_exists(repo_name):
         return None
 
 
+# Clone the repository and prepare for adding SECURITY.md
 def clone_and_prepare_repo(repo_name):
     """Clone a repository and prepare it for adding SECURITY.md."""
     repo_path = os.path.join(BASE_PATH, repo_name)
@@ -120,94 +133,85 @@ def clone_and_prepare_repo(repo_name):
         return None, None
 
 
+# Add, commit, and push SECURITY
 def add_commit_push_security_md(repo_path, branch_name):
-    """Add SECURITY.md, commit, and push it."""
+    """Add SECURITY.md, commit with signature, and push it."""
     try:
         security_md_path = os.path.join(repo_path, "SECURITY.md")
         with open(security_md_path, "w") as file:
             file.write(SECURITY_MD_CONTENT)
         repo = git.Repo(repo_path)
         repo.index.add(["SECURITY.md"])
-        repo.index.commit("Add SECURITY.md")
+        # Commit with signing
+        repo.git.commit("-S", "-m", "Add SECURITY.md")
         origin = repo.remote(name="origin")
         origin.push(refspec=f"{branch_name}:{branch_name}")
         logging.info(
-            f"SECURITY.md added, committed, and pushed to {branch_name} in {repo_path}."
+            f"SECURITY.md added, signed commit, and pushed to {branch_name} in {repo_path}."
         )
     except Exception as e:
         logging.error(
-            f"Failed to add, commit, and push SECURITY.md for {repo_path}: {e}"
+            f"Failed to add, sign commit, and push SECURITY.md for {repo_path}: {e}"
         )
 
 
+# Create a pull request
 def create_pull_request(repo_path, branch_name, default_branch):
-    """Create a pull request for the branch and attempt to add reviewers."""
+    """Create a pull request for the branch, attempt to add reviewers, and assign 'wz-gsa'."""
     original_dir = os.getcwd()  # Save the current directory
     try:
         os.chdir(repo_path)  # Change to the repo's directory
         pr_body = """## Changes proposed in this pull request:
 
-- added Security.md
+- Added Security.md
 
 ## Things to check
 
 - Ensure everything looks correct
 
 ## Security considerations
 
-Improves security by adding Security.md"""
-        # Attempt to create the pull request and add the primary reviewer
-        result = run_command(
-            [
-                "gh",
-                "pr",
-                "create",
-                "--title",
-                "Add SECURITY.md",
-                "--body",
-                pr_body,
-                "--base",
-                default_branch,
-                "--head",
-                f"{ORG_NAME}:{branch_name}",
-                "--reviewer",
-                "cloud-gov/platform-ops",
-            ],  # Primary reviewer
-            ignore_error=True,
-        )
-
-        # If the primary reviewer addition fails, try the secondary reviewer
-        if "could not be requested" in result:
+- Improves security by adding Security.md"""
+        # Create the pull request, assign 'ASSIGNEE', and add the primary reviewer
+        command = [
+            "gh",
+            "pr",
+            "create",
+            "--title",
+            "Add SECURITY.md",
+            "--body",
+            pr_body,
+            "--base",
+            default_branch,
+            "--head",
+            f"{ORG_NAME}:{branch_name}",
+            "--reviewer",
+            PRIMARY_REVIEWER,
+            "--assignee",
+            ASSIGNEE,
+        ]
+
+        result = run_command(command, ignore_error=True)
+
+        if (
+            "Reviewers could not be requested" in result
+            or "Assignee could not be added" in result
+        ):
             logging.warning(
-                f"Failed to add cloud-gov/platform-ops as a reviewer, trying cloud-gov-pages-operations."
-            )
-            result = run_command(
-                [
-                    "gh",
-                    "pr",
-                    "create",
-                    "--title",
-                    "Add SECURITY.md",
-                    "--body",
-                    pr_body,
-                    "--base",
-                    default_branch,
-                    "--head",
-                    f"{ORG_NAME}:{branch_name}",
-                    "--reviewer",
-                    "drewbo",
-                ],  # Secondary reviewer
-                ignore_error=True,
+                "Attempting to add 'cloud-gov-pages-operations' as a fallback reviewer."
             )
+            command[11] = FALLBACK_REVIEWER  # Fallback reviewer
+            result = run_command(command, ignore_error=True)
 
-        if "could not be requested" in result:
-            logging.error(
-                f"Failed to add drewbo as a reviewer as well."
-            )
+        if "Reviewers could not be requested" in result:
+            logging.error("Failed to add any reviewers.")
         else:
-            logging.info(
-                f"Pull request created for {branch_name}. Reviewer added successfully."
-            )
+            logging.info("Reviewer successfully added.")
+
+        if "Assignee could not be added" in result:
+            logging.error("Failed to add 'wz-gsa' as the assignee.")
+        else:
+            logging.info("'wz-gsa' successfully assigned to the PR.")
 
     except Exception as e:
         logging.error(
@@ -217,6 +221,7 @@ def create_pull_request(repo_path, branch_name, default_branch):
         os.chdir(original_dir)  # Restore the original directory
 
 
+# Main function
 def main():
     check_gh_auth()
     if not os.path.exists(BASE_PATH):

@@ -1,33 +1,3 @@
-"""
-GitHub Organization Repository Analyzer
-
-This script communicates with the GitHub GraphQL API to analyze repositories within a specified GitHub organization.
-It is designed to fetch details about each repository, including its name, last update timestamp, fork status, and the existence of critical files (README.md, SECURITY.md, LICENSE.md).
-Additionally, it compiles a list of unique contributors for each repository.
-
-Key Features:
-- Fetches a list of repositories from the specified organization, excluding archived and private repositories to focus on active and public projects.
-- Checks for the presence of README.md, SECURITY.md, and LICENSE.md in each repository to assess basic documentation and security policy adherence.
-- Gathers a unique list of contributors for each repository, providing insight into community or team engagement.
-- Implements pagination to handle organizations with more than 100 repositories, ensuring comprehensive analysis without hitting the GitHub API's first-page data limit.
-- Outputs the collected data in both JSON and CSV formats, providing flexibility for further analysis or reporting. The JSON output offers a structured view, ideal for applications requiring detailed data processing. The CSV format is suitable for spreadsheets and other tools that support CSV, offering a straightforward way to view or share the analysis results.
-
-Output Files:
-- A JSON file named '<script_name>_<current_date_time>.json', containing detailed data about each repository in a structured format.
-- A CSV file named '<script_name>_<current_date_time>.csv', with columns for repository details and rows for each repository, including a concatenated list of contributors.
-
-Requirements:
-- A GitHub Personal Access Token set as an environment variable 'GITHUB_AUTH_TOKEN' with sufficient permissions to query repository and organization details.
-- The 'requests' Python package for making API requests.
-
-Usage:
-- Ensure the 'GITHUB_AUTH_TOKEN' environment variable is set with your GitHub Personal Access Token.
-- Update the 'ORG_NAME' variable in the script with the target organization's name.
-- Run the script. The output files will be saved in the current directory.
-
-Note: The script assumes all repositories have a similar structure for the fetched data. If a repository lacks certain details (like a default branch), the script handles these cases gracefully, marking contributors as 'No contributors or commit history' when applicable.
-"""
-
 import requests
 import json
 import os
@@ -45,18 +15,24 @@
 ORG_NAME = "cloud-gov"
 print(f"Organization set to {ORG_NAME}.")
 
+
 def run_query(query, max_retries=5):
     """Execute the GraphQL query with error handling for rate limits and network issues."""
     headers = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
     for attempt in range(max_retries):
-        response = requests.post("https://api.github.com/graphql", json={"query": query}, headers=headers)
+        response = requests.post(
+            "https://api.github.com/graphql", json={"query": query}, headers=headers
+        )
         if response.status_code == 200:
             return response.json()
         elif attempt < max_retries - 1:
             print(f"Attempt {attempt + 1} failed, retrying...")
             continue
         else:
-            raise Exception(f"Query failed after {max_retries} retries with status code {response.status_code}. {response.text}")
+            raise Exception(
+                f"Query failed after {max_retries} retries with status code {response.status_code}. {response.text}"
+            )
+
 
 def fetch_repositories():
     """Fetch all repositories including checks for README.md, SECURITY.md, and LICENSE.md with pagination."""
@@ -65,7 +41,7 @@ def fetch_repositories():
     has_next_page = True
 
     while has_next_page:
-        after_cursor = f', after: "{end_cursor}"' if end_cursor else ''
+        after_cursor = f', after: "{end_cursor}"' if end_cursor else ""
         query = f"""
         {{
           organization(login: "{ORG_NAME}") {{
@@ -132,18 +108,27 @@ def fetch_repositories():
 
     return all_edges
 
+
 def main():
     edges = fetch_repositories()
     data_for_json = []
     for edge in edges:
         repo = edge["node"]
         repo_url = repo["url"]
-        has_readme = 'Yes' if repo.get("readme") else 'No'
-        has_security = 'Yes' if repo.get("security") else 'No'
-        has_license = 'Yes' if repo.get("license") else 'No'
+        has_readme = "Yes" if repo.get("readme") else "No"
+        has_security = "Yes" if repo.get("security") else "No"
+        has_license = "Yes" if repo.get("license") else "No"
+
+        # Skip repositories that have SECURITY.md
+        if has_security == "Yes":
+            continue
 
         contributors_set = set()
-        if repo.get("defaultBranchRef") and repo["defaultBranchRef"].get("target") and repo["defaultBranchRef"]["target"].get("history"):
+        if (
+            repo.get("defaultBranchRef")
+            and repo["defaultBranchRef"].get("target")
+            and repo["defaultBranchRef"]["target"].get("history")
+        ):
             contributors_set = {
                 edge["node"]["author"]["user"]["login"]
                 for edge in repo["defaultBranchRef"]["target"]["history"]["edges"]
@@ -176,13 +161,14 @@ def main():
         json.dump(data_for_json, f_json, indent=2)
     print(f"Data successfully written to {json_filename}")
 
-    with open(csv_filename, 'w', newline='', encoding='utf-8') as f_csv:
+    with open(csv_filename, "w", newline="", encoding="utf-8") as f_csv:
         csv_columns = data_for_json[0].keys()
         writer = csv.DictWriter(f_csv, fieldnames=csv_columns)
         writer.writeheader()
         for data in data_for_json:
             writer.writerow(data)
     print(f"Data successfully written to {csv_filename}")
 
+
 if __name__ == "__main__":
     main()