diff --git a/actions/setup/js/firewall_blocked_domains.cjs b/actions/setup/js/firewall_blocked_domains.cjs new file mode 100644 index 0000000000..b59db45769 --- /dev/null +++ b/actions/setup/js/firewall_blocked_domains.cjs @@ -0,0 +1,207 @@ +// @ts-check +/// + +/** + * Firewall Blocked Domains Module + * + * This module handles reading firewall logs and extracting blocked domains + * for display in AI-generated footers. + */ + +const fs = require("fs"); +const path = require("path"); +const { sanitizeDomainName } = require("./sanitize_content_core.cjs"); + +/** + * Parses a single firewall log line + * Format: timestamp client_ip:port domain dest_ip:port proto method status decision url user_agent + * @param {string} line - Log line to parse + * @returns {object|null} Parsed entry or null if invalid + */ +function parseFirewallLogLine(line) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) { + return null; + } + + // Split by whitespace but preserve quoted strings + const fields = trimmed.match(/(?:[^\s"]+|"[^"]*")+/g); + if (!fields || fields.length < 10) { + return null; + } + + // Only validate timestamp (essential for log format detection) + const timestamp = fields[0]; + if (!/^\d+(\.\d+)?$/.test(timestamp)) { + return null; + } + + return { + timestamp, + clientIpPort: fields[1], + domain: fields[2], + destIpPort: fields[3], + proto: fields[4], + method: fields[5], + status: fields[6], + decision: fields[7], + url: fields[8], + userAgent: fields[9]?.replace(/^"|"$/g, "") || "-", + }; +} + +/** + * Determines if a request was blocked based on decision and status + * @param {string} decision - Decision field (e.g., TCP_TUNNEL:HIER_DIRECT, NONE_NONE:HIER_NONE) + * @param {string} status - Status code (e.g., 200, 403, 0) + * @returns {boolean} True if request was blocked + */ +function isRequestBlocked(decision, status) { + // Check status code first + const statusCode = parseInt(status, 10); + if (statusCode === 403 || statusCode === 407) { + return true; + } + + // Check decision field + if (decision.includes("NONE_NONE") || decision.includes("TCP_DENIED")) { + return true; + } + + // Check for allowed indicators + if (statusCode === 200 || statusCode === 206 || statusCode === 304) { + return false; + } + + if (decision.includes("TCP_TUNNEL") || decision.includes("TCP_HIT") || decision.includes("TCP_MISS")) { + return false; + } + + // Default to blocked for safety + return true; +} + +/** + * Extracts the base domain from a domain:port string and sanitizes it + * @param {string} domainWithPort - Domain with port (e.g., "example.com:443") + * @returns {string} Sanitized base domain (e.g., "example.com") + */ +function extractAndSanitizeDomain(domainWithPort) { + if (!domainWithPort || domainWithPort === "-") { + return ""; + } + + // Remove port by taking everything before the last colon + const lastColonIndex = domainWithPort.lastIndexOf(":"); + const domain = lastColonIndex > 0 ? domainWithPort.substring(0, lastColonIndex) : domainWithPort; + + // Sanitize the domain using the same function as content sanitization + return sanitizeDomainName(domain); +} + +/** + * Reads firewall logs and extracts blocked domains + * + * This function checks two possible locations for firewall logs: + * 1. /tmp/gh-aw/sandbox/firewall/logs/ (original location during agent execution) + * 2. Path specified by logsDir parameter (for safe-outputs jobs with downloaded artifacts) + * + * @param {string} [logsDir] - Path to firewall logs directory. Defaults to /tmp/gh-aw/sandbox/firewall/logs + * @returns {string[]} Array of unique blocked domains (sanitized, sorted) + */ +function getBlockedDomains(logsDir) { + const squidLogsDir = logsDir || "/tmp/gh-aw/sandbox/firewall/logs/"; + + // Check if logs directory exists + if (!fs.existsSync(squidLogsDir)) { + return []; + } + + // Find all .log files + let files; + try { + files = fs.readdirSync(squidLogsDir).filter(file => file.endsWith(".log")); + } catch (error) { + // If we can't read the directory, return empty array + return []; + } + + if (files.length === 0) { + return []; + } + + // Parse all log files and collect blocked domains + const blockedDomainsSet = new Set(); + + for (const file of files) { + const filePath = path.join(squidLogsDir, file); + + let content; + try { + content = fs.readFileSync(filePath, "utf8"); + } catch (error) { + // Skip files we can't read + continue; + } + + const lines = content.split("\n").filter(line => line.trim()); + + for (const line of lines) { + const entry = parseFirewallLogLine(line); + if (!entry) { + continue; + } + + // Check if request was blocked + const isBlocked = isRequestBlocked(entry.decision, entry.status); + if (isBlocked) { + const sanitizedDomain = extractAndSanitizeDomain(entry.domain); + if (sanitizedDomain && sanitizedDomain !== "-") { + blockedDomainsSet.add(sanitizedDomain); + } + } + } + } + + // Convert to sorted array + return Array.from(blockedDomainsSet).sort(); +} + +/** + * Generates HTML details/summary section for blocked domains wrapped in a GitHub warning alert + * @param {string[]} blockedDomains - Array of blocked domain names + * @returns {string} GitHub warning alert with details section, or empty string if no blocked domains + */ +function generateBlockedDomainsSection(blockedDomains) { + if (!blockedDomains || blockedDomains.length === 0) { + return ""; + } + + const domainCount = blockedDomains.length; + const domainWord = domainCount === 1 ? "domain" : "domains"; + + let section = "\n\n> [!WARNING]\n"; + section += `>
\n`; + section += `> ⚠️ Firewall blocked ${domainCount} ${domainWord}\n`; + section += `>\n`; + section += `> The following ${domainWord} ${domainCount === 1 ? "was" : "were"} blocked by the firewall during workflow execution:\n`; + section += `>\n`; + + // List domains as bullet points (within the alert) + for (const domain of blockedDomains) { + section += `> - \`${domain}\`\n`; + } + + section += `>\n`; + section += `>
\n`; + + return section; +} + +module.exports = { + parseFirewallLogLine, + isRequestBlocked, + extractAndSanitizeDomain, + getBlockedDomains, + generateBlockedDomainsSection, +}; diff --git a/actions/setup/js/firewall_blocked_domains.test.cjs b/actions/setup/js/firewall_blocked_domains.test.cjs new file mode 100644 index 0000000000..0bb7af83bc --- /dev/null +++ b/actions/setup/js/firewall_blocked_domains.test.cjs @@ -0,0 +1,301 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import fs from "fs"; +import path from "path"; +import os from "os"; + +describe("firewall_blocked_domains.cjs", () => { + let parseFirewallLogLine; + let isRequestBlocked; + let extractAndSanitizeDomain; + let getBlockedDomains; + let generateBlockedDomainsSection; + let testDir; + + beforeEach(async () => { + // Create a temporary directory for test files + testDir = path.join(os.tmpdir(), `gh-aw-test-firewall-${Date.now()}`); + fs.mkdirSync(testDir, { recursive: true }); + + // Dynamic import to get fresh module state + const module = await import("./firewall_blocked_domains.cjs"); + parseFirewallLogLine = module.parseFirewallLogLine; + isRequestBlocked = module.isRequestBlocked; + extractAndSanitizeDomain = module.extractAndSanitizeDomain; + getBlockedDomains = module.getBlockedDomains; + generateBlockedDomainsSection = module.generateBlockedDomainsSection; + }); + + afterEach(() => { + // Clean up test directory + if (testDir && fs.existsSync(testDir)) { + fs.rmSync(testDir, { recursive: true, force: true }); + } + }); + + describe("parseFirewallLogLine", () => { + it("should parse valid firewall log line with blocked request", () => { + const line = '1761332530.474 172.30.0.20:35288 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"'; + const result = parseFirewallLogLine(line); + + expect(result).not.toBeNull(); + expect(result.timestamp).toBe("1761332530.474"); + expect(result.domain).toBe("blocked.example.com:443"); + expect(result.status).toBe("403"); + expect(result.decision).toBe("NONE_NONE:HIER_NONE"); + }); + + it("should parse valid firewall log line with allowed request", () => { + const line = '1761332530.474 172.30.0.20:35288 api.github.com:443 140.82.112.22:443 1.1 CONNECT 200 TCP_TUNNEL:HIER_DIRECT api.github.com:443 "-"'; + const result = parseFirewallLogLine(line); + + expect(result).not.toBeNull(); + expect(result.timestamp).toBe("1761332530.474"); + expect(result.domain).toBe("api.github.com:443"); + expect(result.status).toBe("200"); + expect(result.decision).toBe("TCP_TUNNEL:HIER_DIRECT"); + }); + + it("should return null for empty line", () => { + expect(parseFirewallLogLine("")).toBeNull(); + expect(parseFirewallLogLine(" ")).toBeNull(); + }); + + it("should return null for comment line", () => { + expect(parseFirewallLogLine("# This is a comment")).toBeNull(); + }); + + it("should return null for invalid timestamp", () => { + const line = 'invalid 172.30.0.20:35288 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"'; + expect(parseFirewallLogLine(line)).toBeNull(); + }); + + it("should return null for lines with fewer than 10 fields", () => { + expect(parseFirewallLogLine("1761332530.474 172.30.0.20:35288 blocked.example.com:443")).toBeNull(); + }); + }); + + describe("isRequestBlocked", () => { + it("should identify blocked request with 403 status", () => { + expect(isRequestBlocked("NONE_NONE:HIER_NONE", "403")).toBe(true); + }); + + it("should identify blocked request with 407 status", () => { + expect(isRequestBlocked("NONE_NONE:HIER_NONE", "407")).toBe(true); + }); + + it("should identify blocked request with NONE_NONE decision", () => { + expect(isRequestBlocked("NONE_NONE:HIER_NONE", "0")).toBe(true); + }); + + it("should identify blocked request with TCP_DENIED decision", () => { + expect(isRequestBlocked("TCP_DENIED:HIER_NONE", "0")).toBe(true); + }); + + it("should identify allowed request with 200 status", () => { + expect(isRequestBlocked("TCP_TUNNEL:HIER_DIRECT", "200")).toBe(false); + }); + + it("should identify allowed request with TCP_TUNNEL decision", () => { + expect(isRequestBlocked("TCP_TUNNEL:HIER_DIRECT", "200")).toBe(false); + }); + + it("should identify allowed request with TCP_HIT decision", () => { + expect(isRequestBlocked("TCP_HIT:HIER_DIRECT", "200")).toBe(false); + }); + + it("should default to blocked for ambiguous requests", () => { + expect(isRequestBlocked("UNKNOWN:UNKNOWN", "999")).toBe(true); + }); + }); + + describe("extractAndSanitizeDomain", () => { + it("should extract and sanitize domain from domain:port format", () => { + expect(extractAndSanitizeDomain("example.com:443")).toBe("example.com"); + expect(extractAndSanitizeDomain("api.github.com:443")).toBe("api.github.com"); + expect(extractAndSanitizeDomain("sub.domain.example.com:8080")).toBe("sub.domain.example..."); + }); + + it("should handle placeholder domain", () => { + expect(extractAndSanitizeDomain("-")).toBe(""); + }); + + it("should handle empty or null input", () => { + expect(extractAndSanitizeDomain("")).toBe(""); + expect(extractAndSanitizeDomain(null)).toBe(""); + }); + + it("should sanitize special characters in domain", () => { + expect(extractAndSanitizeDomain("ex@mple.com:443")).toBe("exmple.com"); + expect(extractAndSanitizeDomain("test_site.com:443")).toBe("testsite.com"); + }); + + it("should handle domain without port", () => { + expect(extractAndSanitizeDomain("example.com")).toBe("example.com"); + }); + }); + + describe("getBlockedDomains", () => { + it("should return empty array when logs directory does not exist", () => { + const nonExistentDir = path.join(testDir, "nonexistent"); + const result = getBlockedDomains(nonExistentDir); + + expect(result).toEqual([]); + }); + + it("should return empty array when no log files exist", () => { + const emptyDir = path.join(testDir, "empty"); + fs.mkdirSync(emptyDir, { recursive: true }); + + const result = getBlockedDomains(emptyDir); + + expect(result).toEqual([]); + }); + + it("should extract blocked domains from single log file", () => { + const logsDir = path.join(testDir, "logs1"); + fs.mkdirSync(logsDir, { recursive: true }); + + const logContent = [ + '1761332530.474 172.30.0.20:35288 blocked1.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked1.example.com:443 "-"', + '1761332530.475 172.30.0.20:35289 api.github.com:443 140.82.112.22:443 1.1 CONNECT 200 TCP_TUNNEL:HIER_DIRECT api.github.com:443 "-"', + '1761332530.476 172.30.0.20:35290 blocked2.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked2.example.com:443 "-"', + ].join("\n"); + + fs.writeFileSync(path.join(logsDir, "access.log"), logContent); + + const result = getBlockedDomains(logsDir); + + expect(result).toEqual(["blocked1.example.com", "blocked2.example.com"]); + }); + + it("should deduplicate blocked domains", () => { + const logsDir = path.join(testDir, "logs2"); + fs.mkdirSync(logsDir, { recursive: true }); + + const logContent = [ + '1761332530.474 172.30.0.20:35288 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"', + '1761332530.475 172.30.0.20:35289 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"', + '1761332530.476 172.30.0.20:35290 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"', + ].join("\n"); + + fs.writeFileSync(path.join(logsDir, "access.log"), logContent); + + const result = getBlockedDomains(logsDir); + + expect(result).toEqual(["blocked.example.com"]); + }); + + it("should aggregate blocked domains from multiple log files", () => { + const logsDir = path.join(testDir, "logs3"); + fs.mkdirSync(logsDir, { recursive: true }); + + const log1Content = '1761332530.474 172.30.0.20:35288 blocked1.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked1.example.com:443 "-"'; + const log2Content = '1761332530.475 172.30.0.20:35289 blocked2.example.com:443 140.82.112.22:443 1.1 CONNECT 407 TCP_DENIED:HIER_NONE blocked2.example.com:443 "-"'; + + fs.writeFileSync(path.join(logsDir, "access1.log"), log1Content); + fs.writeFileSync(path.join(logsDir, "access2.log"), log2Content); + + const result = getBlockedDomains(logsDir); + + expect(result).toEqual(["blocked1.example.com", "blocked2.example.com"]); + }); + + it("should sort blocked domains alphabetically", () => { + const logsDir = path.join(testDir, "logs4"); + fs.mkdirSync(logsDir, { recursive: true }); + + const logContent = [ + '1761332530.474 172.30.0.20:35288 zebra.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE zebra.example.com:443 "-"', + '1761332530.475 172.30.0.20:35289 alpha.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE alpha.example.com:443 "-"', + '1761332530.476 172.30.0.20:35290 mike.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE mike.example.com:443 "-"', + ].join("\n"); + + fs.writeFileSync(path.join(logsDir, "access.log"), logContent); + + const result = getBlockedDomains(logsDir); + + expect(result).toEqual(["alpha.example.com", "mike.example.com", "zebra.example.com"]); + }); + + it("should filter out placeholder domains", () => { + const logsDir = path.join(testDir, "logs5"); + fs.mkdirSync(logsDir, { recursive: true }); + + const logContent = [ + '1761332530.474 172.30.0.20:35288 - 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE - "-"', + '1761332530.475 172.30.0.20:35289 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"', + ].join("\n"); + + fs.writeFileSync(path.join(logsDir, "access.log"), logContent); + + const result = getBlockedDomains(logsDir); + + expect(result).toEqual(["blocked.example.com"]); + }); + + it("should handle invalid log lines gracefully", () => { + const logsDir = path.join(testDir, "logs6"); + fs.mkdirSync(logsDir, { recursive: true }); + + const logContent = ["# Comment line", "invalid line", '1761332530.474 172.30.0.20:35288 blocked.example.com:443 140.82.112.22:443 1.1 CONNECT 403 NONE_NONE:HIER_NONE blocked.example.com:443 "-"', "", "short"].join("\n"); + + fs.writeFileSync(path.join(logsDir, "access.log"), logContent); + + const result = getBlockedDomains(logsDir); + + expect(result).toEqual(["blocked.example.com"]); + }); + }); + + describe("generateBlockedDomainsSection", () => { + it("should return empty string when no blocked domains", () => { + expect(generateBlockedDomainsSection([])).toBe(""); + expect(generateBlockedDomainsSection(null)).toBe(""); + expect(generateBlockedDomainsSection(undefined)).toBe(""); + }); + + it("should generate details section for single blocked domain", () => { + const result = generateBlockedDomainsSection(["blocked.example.com"]); + + expect(result).toContain("> [!WARNING]"); + expect(result).toContain(">
"); + expect(result).toContain(">
"); + expect(result).toContain("> ⚠️ Firewall blocked 1 domain"); + expect(result).toContain("> - `blocked.example.com`"); + expect(result).toContain("> The following domain was blocked by the firewall during workflow execution:"); + }); + + it("should generate details section for multiple blocked domains", () => { + const domains = ["alpha.example.com", "beta.example.com", "gamma.example.com"]; + const result = generateBlockedDomainsSection(domains); + + expect(result).toContain("> [!WARNING]"); + expect(result).toContain(">
"); + expect(result).toContain(">
"); + expect(result).toContain("> ⚠️ Firewall blocked 3 domains"); + expect(result).toContain("> - `alpha.example.com`"); + expect(result).toContain("> - `beta.example.com`"); + expect(result).toContain("> - `gamma.example.com`"); + }); + + it("should use correct singular/plural form", () => { + const singleResult = generateBlockedDomainsSection(["single.com"]); + expect(singleResult).toContain("1 domain"); + expect(singleResult).toContain("domain was blocked"); + + const multiResult = generateBlockedDomainsSection(["one.com", "two.com"]); + expect(multiResult).toContain("2 domains"); + expect(multiResult).toContain("domains were blocked"); + }); + + it("should format domains with backticks", () => { + const result = generateBlockedDomainsSection(["example.com"]); + expect(result).toMatch(/> - `example\.com`/); + }); + + it("should start with double newline and warning alert", () => { + const result = generateBlockedDomainsSection(["example.com"]); + expect(result).toMatch(/^\n\n> \[!WARNING\]/); + }); + }); +}); diff --git a/actions/setup/js/generate_footer.cjs b/actions/setup/js/generate_footer.cjs index c782734023..dc88551386 100644 --- a/actions/setup/js/generate_footer.cjs +++ b/actions/setup/js/generate_footer.cjs @@ -3,6 +3,7 @@ const fs = require("fs"); const { getMissingInfoSections } = require("./missing_messages_helper.cjs"); +const { getBlockedDomains, generateBlockedDomainsSection } = require("./firewall_blocked_domains.cjs"); /** * Generates a standalone workflow-id XML comment marker for searchability. @@ -118,6 +119,13 @@ function generateFooter(workflowName, runUrl, workflowSource, workflowSourceURL, footer += missingInfoSections; } + // Add firewall blocked domains section if any domains were blocked + const blockedDomains = getBlockedDomains(); + const blockedDomainsSection = generateBlockedDomainsSection(blockedDomains); + if (blockedDomainsSection) { + footer += blockedDomainsSection; + } + // Add XML comment marker for traceability footer += "\n\n" + generateXMLMarker(workflowName, runUrl); diff --git a/actions/setup/js/generate_footer.test.cjs b/actions/setup/js/generate_footer.test.cjs index 75f50ae121..f2012d3ffb 100644 --- a/actions/setup/js/generate_footer.test.cjs +++ b/actions/setup/js/generate_footer.test.cjs @@ -190,6 +190,13 @@ describe("generate_footer.cjs", () => { expect(result).toContain("model: gpt-5"); expect(result).toContain("run: https://github.com/test/repo/actions/runs/123 -->"); }); + + it("should not include blocked domains section when no firewall logs exist", () => { + const result = generateFooter("Test Workflow", "https://github.com/test/repo/actions/runs/123", "", "", undefined, undefined, undefined); + + expect(result).not.toContain("⚠️ Firewall blocked"); + expect(result).not.toContain("
"); + }); }); describe("generateXMLMarker", () => { diff --git a/actions/setup/js/messages_footer.cjs b/actions/setup/js/messages_footer.cjs index 24e3c3a555..75d48df91a 100644 --- a/actions/setup/js/messages_footer.cjs +++ b/actions/setup/js/messages_footer.cjs @@ -10,6 +10,7 @@ const { getMessages, renderTemplate, toSnakeCase } = require("./messages_core.cjs"); const { getMissingInfoSections } = require("./missing_messages_helper.cjs"); +const { getBlockedDomains, generateBlockedDomainsSection } = require("./firewall_blocked_domains.cjs"); /** * @typedef {Object} FooterContext @@ -250,6 +251,13 @@ function generateFooterWithMessages(workflowName, runUrl, workflowSource, workfl footer += missingInfoSections; } + // Add firewall blocked domains section if any domains were blocked + const blockedDomains = getBlockedDomains(); + const blockedDomainsSection = generateBlockedDomainsSection(blockedDomains); + if (blockedDomainsSection) { + footer += blockedDomainsSection; + } + // Add XML comment marker for traceability footer += "\n\n" + generateXMLMarker(workflowName, runUrl); diff --git a/pkg/cli/workflows/test-firewall-blocked-domains-footer.md b/pkg/cli/workflows/test-firewall-blocked-domains-footer.md new file mode 100644 index 0000000000..140484c02e --- /dev/null +++ b/pkg/cli/workflows/test-firewall-blocked-domains-footer.md @@ -0,0 +1,61 @@ +--- +on: + workflow_dispatch: +engine: copilot +source: github/gh-aw/workflows/test-firewall-blocked-domains-footer.md@main +permissions: + contents: read + issues: read + pull-requests: read +network: + firewall: true + allowed: + - defaults + - github + blocked: + - pypi.org + - npmjs.org +safe-outputs: + create-issue: + title-prefix: "[test] " + expires: 2h +timeout-minutes: 5 +tools: + github: + bash: + - "*" +--- + +# Test Firewall Blocked Domains Footer + +This workflow tests that the footer includes a collapsed details section showing blocked domains when the firewall blocks access to certain domains. + +## Test Steps + +1. **Attempt to access blocked domains** - Try to access `pypi.org` and `npmjs.org` which are configured as blocked in this workflow +2. **Attempt to access allowed domains** - Access `api.github.com` to verify allowed domains still work +3. **Create a test issue** - Create an issue to verify the footer contains the blocked domains section + +## Output + +Create an issue with: +- Title: "Firewall Blocked Domains Footer Test - Run {{ github.run_id }}" +- Body: A brief summary of the test results: + - Which domains were blocked (pypi.org, npmjs.org) + - Which domains were allowed (api.github.com) + - Confirmation that the test passed + +The footer should automatically include a collapsed `
` section showing the blocked domains. + +## Test Commands + +Run these commands to trigger the firewall: + +```bash +# Try to access blocked domains (these should fail) +curl -I https://pypi.org 2>&1 | head -5 || echo "pypi.org blocked as expected" +curl -I https://npmjs.org 2>&1 | head -5 || echo "npmjs.org blocked as expected" + +# Try to access allowed domain (this should succeed) +curl -I https://api.github.com 2>&1 | head -5 +```