Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add updated scripts for checking all repos for a security.md and then adding one if missing #317

Merged
merged 1 commit into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 62 additions & 57 deletions github/add_security_md_to_json_list.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Use this script to add security.md files to all repos in an org that are missing one.
# Replace ADD_GITHUB_USERNAME with valid usernames from the github org
# This script requires a json file created by list_github_age_upstream_contrib.py to work.
#

import os
import json
import logging
Expand All @@ -10,12 +15,16 @@
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

# Configuration
# Configuration variables
ORG_NAME = os.getenv("ORG_NAME", "cloud-gov")
REPOS_JSON_PATH = os.getenv("REPOS_JSON_PATH", "repos.json")
PRIMARY_REVIEWER = "ADD_GITHUB_USERNAME"
ASSIGNEE = "ADD_GITHUB_USERNAME"
FALLBACK_REVIEWER = "ADD_GITHUB_USERNAME"
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
BASE_PATH = os.path.expanduser(f"~/Downloads/repos_{current_time}")

# Security.md file content
SECURITY_MD_CONTENT = """
**Reporting Security Issues**

Expand All @@ -37,8 +46,8 @@
"""


# Function to run shell commands
def run_command(cmd, cwd=None, ignore_error=False):
"""Run a command and return its output, handling errors."""
try:
result = subprocess.run(
cmd,
Expand All @@ -57,6 +66,7 @@ def run_command(cmd, cwd=None, ignore_error=False):
return None


# Check GitHub CLI authentication
def check_gh_auth():
"""Check if 'gh' CLI is authenticated."""
if run_command(["gh", "auth", "status"], ignore_error=True):
Expand All @@ -65,6 +75,7 @@ def check_gh_auth():
logging.error("GitHub CLI is not authenticated. Please check the setup.")


# Fetch the default branch of the repository
def get_default_branch(repo_name):
"""Fetch the default branch for the repository using 'gh'."""
try:
Expand All @@ -79,6 +90,7 @@ def get_default_branch(repo_name):
return None


# Check if SECURITY.md exists
def security_md_exists(repo_name):
"""Check if SECURITY.md exists in the repository's default branch."""
try:
Expand All @@ -98,6 +110,7 @@ def security_md_exists(repo_name):
return None


# Clone the repository and prepare for adding SECURITY.md
def clone_and_prepare_repo(repo_name):
"""Clone a repository and prepare it for adding SECURITY.md."""
repo_path = os.path.join(BASE_PATH, repo_name)
Expand All @@ -120,94 +133,85 @@ def clone_and_prepare_repo(repo_name):
return None, None


# Add, commit, and push SECURITY
def add_commit_push_security_md(repo_path, branch_name):
"""Add SECURITY.md, commit, and push it."""
"""Add SECURITY.md, commit with signature, and push it."""
try:
security_md_path = os.path.join(repo_path, "SECURITY.md")
with open(security_md_path, "w") as file:
file.write(SECURITY_MD_CONTENT)
repo = git.Repo(repo_path)
repo.index.add(["SECURITY.md"])
repo.index.commit("Add SECURITY.md")
# Commit with signing
repo.git.commit("-S", "-m", "Add SECURITY.md")
origin = repo.remote(name="origin")
origin.push(refspec=f"{branch_name}:{branch_name}")
logging.info(
f"SECURITY.md added, committed, and pushed to {branch_name} in {repo_path}."
f"SECURITY.md added, signed commit, and pushed to {branch_name} in {repo_path}."
)
except Exception as e:
logging.error(
f"Failed to add, commit, and push SECURITY.md for {repo_path}: {e}"
f"Failed to add, sign commit, and push SECURITY.md for {repo_path}: {e}"
)


# Create a pull request
def create_pull_request(repo_path, branch_name, default_branch):
"""Create a pull request for the branch and attempt to add reviewers."""
"""Create a pull request for the branch, attempt to add reviewers, and assign 'wz-gsa'."""
original_dir = os.getcwd() # Save the current directory
try:
os.chdir(repo_path) # Change to the repo's directory
pr_body = """## Changes proposed in this pull request:

- added Security.md
- Added Security.md

## Things to check

- Ensure everything looks correct

## Security considerations

Improves security by adding Security.md"""
# Attempt to create the pull request and add the primary reviewer
result = run_command(
[
"gh",
"pr",
"create",
"--title",
"Add SECURITY.md",
"--body",
pr_body,
"--base",
default_branch,
"--head",
f"{ORG_NAME}:{branch_name}",
"--reviewer",
"cloud-gov/platform-ops",
], # Primary reviewer
ignore_error=True,
)

# If the primary reviewer addition fails, try the secondary reviewer
if "could not be requested" in result:
- Improves security by adding Security.md"""
# Create the pull request, assign 'ASSIGNEE', and add the primary reviewer
command = [
"gh",
"pr",
"create",
"--title",
"Add SECURITY.md",
"--body",
pr_body,
"--base",
default_branch,
"--head",
f"{ORG_NAME}:{branch_name}",
"--reviewer",
PRIMARY_REVIEWER,
"--assignee",
ASSIGNEE,
]

result = run_command(command, ignore_error=True)

if (
"Reviewers could not be requested" in result
or "Assignee could not be added" in result
):
logging.warning(
f"Failed to add cloud-gov/platform-ops as a reviewer, trying cloud-gov-pages-operations."
)
result = run_command(
[
"gh",
"pr",
"create",
"--title",
"Add SECURITY.md",
"--body",
pr_body,
"--base",
default_branch,
"--head",
f"{ORG_NAME}:{branch_name}",
"--reviewer",
"drewbo",
], # Secondary reviewer
ignore_error=True,
"Attempting to add 'cloud-gov-pages-operations' as a fallback reviewer."
)
command[11] = FALLBACK_REVIEWER # Fallback reviewer
result = run_command(command, ignore_error=True)

if "could not be requested" in result:
logging.error(
f"Failed to add drewbo as a reviewer as well."
)
if "Reviewers could not be requested" in result:
logging.error("Failed to add any reviewers.")
else:
logging.info(
f"Pull request created for {branch_name}. Reviewer added successfully."
)
logging.info("Reviewer successfully added.")

if "Assignee could not be added" in result:
logging.error("Failed to add 'wz-gsa' as the assignee.")
else:
logging.info("'wz-gsa' successfully assigned to the PR.")

except Exception as e:
logging.error(
Expand All @@ -217,6 +221,7 @@ def create_pull_request(repo_path, branch_name, default_branch):
os.chdir(original_dir) # Restore the original directory


# Main function
def main():
check_gh_auth()
if not os.path.exists(BASE_PATH):
Expand Down
62 changes: 24 additions & 38 deletions github/list_github_age_upstream_contrib.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,3 @@
"""
GitHub Organization Repository Analyzer

This script communicates with the GitHub GraphQL API to analyze repositories within a specified GitHub organization.
It is designed to fetch details about each repository, including its name, last update timestamp, fork status, and the existence of critical files (README.md, SECURITY.md, LICENSE.md).
Additionally, it compiles a list of unique contributors for each repository.

Key Features:
- Fetches a list of repositories from the specified organization, excluding archived and private repositories to focus on active and public projects.
- Checks for the presence of README.md, SECURITY.md, and LICENSE.md in each repository to assess basic documentation and security policy adherence.
- Gathers a unique list of contributors for each repository, providing insight into community or team engagement.
- Implements pagination to handle organizations with more than 100 repositories, ensuring comprehensive analysis without hitting the GitHub API's first-page data limit.
- Outputs the collected data in both JSON and CSV formats, providing flexibility for further analysis or reporting. The JSON output offers a structured view, ideal for applications requiring detailed data processing. The CSV format is suitable for spreadsheets and other tools that support CSV, offering a straightforward way to view or share the analysis results.

Output Files:
- A JSON file named '<script_name>_<current_date_time>.json', containing detailed data about each repository in a structured format.
- A CSV file named '<script_name>_<current_date_time>.csv', with columns for repository details and rows for each repository, including a concatenated list of contributors.

Requirements:
- A GitHub Personal Access Token set as an environment variable 'GITHUB_AUTH_TOKEN' with sufficient permissions to query repository and organization details.
- The 'requests' Python package for making API requests.

Usage:
- Ensure the 'GITHUB_AUTH_TOKEN' environment variable is set with your GitHub Personal Access Token.
- Update the 'ORG_NAME' variable in the script with the target organization's name.
- Run the script. The output files will be saved in the current directory.

Note: The script assumes all repositories have a similar structure for the fetched data. If a repository lacks certain details (like a default branch), the script handles these cases gracefully, marking contributors as 'No contributors or commit history' when applicable.
"""

import requests
import json
import os
Expand All @@ -45,18 +15,24 @@
ORG_NAME = "cloud-gov"
print(f"Organization set to {ORG_NAME}.")


def run_query(query, max_retries=5):
"""Execute the GraphQL query with error handling for rate limits and network issues."""
headers = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
for attempt in range(max_retries):
response = requests.post("https://api.github.com/graphql", json={"query": query}, headers=headers)
response = requests.post(
"https://api.github.com/graphql", json={"query": query}, headers=headers
)
if response.status_code == 200:
return response.json()
elif attempt < max_retries - 1:
print(f"Attempt {attempt + 1} failed, retrying...")
continue
else:
raise Exception(f"Query failed after {max_retries} retries with status code {response.status_code}. {response.text}")
raise Exception(
f"Query failed after {max_retries} retries with status code {response.status_code}. {response.text}"
)


def fetch_repositories():
"""Fetch all repositories including checks for README.md, SECURITY.md, and LICENSE.md with pagination."""
Expand All @@ -65,7 +41,7 @@ def fetch_repositories():
has_next_page = True

while has_next_page:
after_cursor = f', after: "{end_cursor}"' if end_cursor else ''
after_cursor = f', after: "{end_cursor}"' if end_cursor else ""
query = f"""
{{
organization(login: "{ORG_NAME}") {{
Expand Down Expand Up @@ -132,18 +108,27 @@ def fetch_repositories():

return all_edges


def main():
edges = fetch_repositories()
data_for_json = []
for edge in edges:
repo = edge["node"]
repo_url = repo["url"]
has_readme = 'Yes' if repo.get("readme") else 'No'
has_security = 'Yes' if repo.get("security") else 'No'
has_license = 'Yes' if repo.get("license") else 'No'
has_readme = "Yes" if repo.get("readme") else "No"
has_security = "Yes" if repo.get("security") else "No"
has_license = "Yes" if repo.get("license") else "No"

# Skip repositories that have SECURITY.md
if has_security == "Yes":
continue

contributors_set = set()
if repo.get("defaultBranchRef") and repo["defaultBranchRef"].get("target") and repo["defaultBranchRef"]["target"].get("history"):
if (
repo.get("defaultBranchRef")
and repo["defaultBranchRef"].get("target")
and repo["defaultBranchRef"]["target"].get("history")
):
contributors_set = {
edge["node"]["author"]["user"]["login"]
for edge in repo["defaultBranchRef"]["target"]["history"]["edges"]
Expand Down Expand Up @@ -176,13 +161,14 @@ def main():
json.dump(data_for_json, f_json, indent=2)
print(f"Data successfully written to {json_filename}")

with open(csv_filename, 'w', newline='', encoding='utf-8') as f_csv:
with open(csv_filename, "w", newline="", encoding="utf-8") as f_csv:
csv_columns = data_for_json[0].keys()
writer = csv.DictWriter(f_csv, fieldnames=csv_columns)
writer.writeheader()
for data in data_for_json:
writer.writerow(data)
print(f"Data successfully written to {csv_filename}")


if __name__ == "__main__":
main()
Loading