Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 79 additions & 74 deletions .github/scripts/article_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import sys
import re
import base64
from pathlib import Path
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
Expand Down Expand Up @@ -54,7 +55,11 @@ class ReviewCriteria:


class ArticleReviewer:
"""Main class for AI-powered article review."""
"""Main class for AI-powered article review.

Updated to fetch file contents directly from GitHub PR via API instead of
using local filesystem, resolving file access issues in CI/CD environments.
"""

def __init__(self):
# Validate required environment variables
Expand Down Expand Up @@ -309,34 +314,52 @@ def _is_non_text_file(self, filename: str) -> bool:

return file_ext in non_text_extensions

def get_file_content_from_pr(self, file_path: str) -> Optional[str]:
"""Get file content directly from the PR using GitHub API."""
try:
pr = self.repo.get_pull(self.pr_number)

# Get the head SHA from the PR
head_sha = pr.head.sha

# Get the file content from the specific commit
file_content = self.repo.get_contents(file_path, ref=head_sha)

# GitHub API always returns base64 encoded content
decoded_bytes = base64.b64decode(file_content.content)

# Try UTF-8 decoding first, fallback to latin-1 if it fails
try:
content = decoded_bytes.decode("utf-8")
except UnicodeDecodeError:
try:
content = decoded_bytes.decode("latin-1")
print(f"⚠️ Used latin-1 encoding fallback for {file_path}")
except UnicodeDecodeError:
print(f"❌ Could not decode file {file_path} with UTF-8 or latin-1")
return None

return content

except Exception as e:
print(f"❌ Error getting file content from PR for {file_path}: {e}")
return None

def extract_article_content(self, file_path: str) -> Dict[str, Any]:
"""Extract and analyze article content."""
"""Extract and analyze article content from PR."""
try:
# Check if file exists
if not os.path.exists(file_path):
print(f"⚠️ File not found: {file_path}")
return None
print(f"📖 Getting content from GitHub API for: {file_path}")

# Check if it's a text file we can process
if self._is_non_text_file(file_path.lower()):
print(f"⚠️ Skipping non-text file: {file_path}")
return None

# Read the file with proper encoding handling
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
# Try with different encoding if UTF-8 fails
try:
with open(file_path, "r", encoding="latin-1") as f:
content = f.read()
print(f"ℹ️ Used latin-1 encoding for: {file_path}")
except Exception as e:
print(
f"❌ Could not read file with any encoding: {file_path} - {e}"
)
return None
# Get file content directly from the PR
content = self.get_file_content_from_pr(file_path)
if not content:
print(f"❌ Could not get content for: {file_path}")
return None

# Skip empty files
if not content.strip():
Expand Down Expand Up @@ -574,6 +597,25 @@ def analyze_with_ai(self, article_data: Dict[str, Any]) -> Dict[str, Any]:
"code_quality_issues": [],
}

def check_file_exists_in_pr(self, file_path: str) -> bool:
"""Check if a file exists in the PR using GitHub API."""
try:
pr = self.repo.get_pull(self.pr_number)
head_sha = pr.head.sha
self.repo.get_contents(file_path, ref=head_sha)
return True
except Exception:
return False

def _normalize_github_path(self, article_dir: str, filename: str) -> str:
"""Normalize file path for GitHub API by handling root directory correctly."""
if article_dir == ".":
# For root directory, return just the filename
return filename
else:
# For subdirectories, join normally and convert backslashes
return os.path.join(article_dir, filename).replace("\\", "/")

def check_requirements_compliance(self, file_path: str) -> Dict[str, Any]:
"""Check if article follows the repository guidelines."""
compliance_issues = []
Expand All @@ -593,18 +635,21 @@ def check_requirements_compliance(self, file_path: str) -> Dict[str, Any]:
"or have 'article' in the directory/filename"
)

# Check for README.md in the same directory
# Check for README.md in the same directory using GitHub API
article_dir = os.path.dirname(file_path)
# Handle root-level files where dirname returns empty string
if not article_dir:
article_dir = "."

# Check for various case variations of README files
readme_variants = ["README.md", "readme.md", "Readme.md", "ReadMe.md"]
readme_exists = any(
os.path.exists(os.path.join(article_dir, variant))
for variant in readme_variants
)
readme_exists = False

for variant in readme_variants:
readme_path = self._normalize_github_path(article_dir, variant)
if self.check_file_exists_in_pr(readme_path):
readme_exists = True
break

# Ensure we're not checking if the file itself is README.md
file_name = os.path.basename(file_path)
Expand All @@ -613,11 +658,13 @@ def check_requirements_compliance(self, file_path: str) -> Dict[str, Any]:
if article_dir != ".":
compliance_issues.append("Missing README.md file in article directory")

# Check for src directory if code is included
src_path = os.path.join(article_dir, "src")
if os.path.exists(src_path):
requirements_path = os.path.join(article_dir, "requirements.txt")
if not os.path.exists(requirements_path):
# Check for src directory and requirements.txt using GitHub API
src_path = self._normalize_github_path(article_dir, "src")
if self.check_file_exists_in_pr(src_path):
requirements_path = self._normalize_github_path(
article_dir, "requirements.txt"
)
if not self.check_file_exists_in_pr(requirements_path):
compliance_issues.append(
"Missing requirements.txt file for code examples"
)
Expand All @@ -644,50 +691,8 @@ def run_review(self) -> None:
return

print(f"Found {len(changed_files)} article file(s) to review:")

# Validate file accessibility
accessible_files = []
for file in changed_files:
if os.path.exists(file):
accessible_files.append(file)
print(f" ✅ {file}")
else:
print(f" ❌ {file} (file not found)")

if not accessible_files:
print("❌ No accessible files found for review.")
# Create error response
error_response = {
"overall_score": 0,
"detailed_feedback": {
"file_access_error": {
"score": 0,
"feedback": f"None of the {len(changed_files)} identified files could be accessed. This may indicate a file path issue or the files may not exist in the current working directory.",
}
},
"suggestions": [
"Check that the working directory is correct",
"Ensure all files exist in the repository",
"Verify file paths are correct",
],
"technical_accuracy_notes": "Could not access any files for review",
"review_metadata": {
"reviewed_files": 0,
"total_files": len(changed_files),
"inaccessible_files": len(changed_files),
"review_timestamp": datetime.now(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
),
"pr_number": self.pr_number,
"repository": self.repository_name,
},
}

with open("review_results.json", "w") as f:
json.dump(error_response, f, indent=2)
return

changed_files = accessible_files # Only process accessible files
print(f" 📄 {file}")

all_reviews = {}

Expand Down