diff --git a/AGENTS.md b/AGENTS.md index 16b96003..7436d9bc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -217,9 +217,12 @@ The codebase follows a modular architecture with clear separation of concerns: **Agent Execution Container** (`containers/agent/`) - Based on `ubuntu:22.04` with iptables, curl, git, nodejs, npm -- Mounts entire host filesystem at `/host` and user home directory for full access +- Mounts entire host filesystem at `/host` **read-only** for security - `NET_ADMIN` capability required for iptables setup during initialization -- **Security:** `NET_ADMIN` is dropped via `capsh --drop=cap_net_admin` before executing user commands, preventing malicious code from modifying iptables rules +- **Security:** + - Host filesystem mounted read-only (`/:/host:ro`) prevents accidental or malicious writes + - `NET_ADMIN` is dropped via `capsh --drop=cap_net_admin` before executing user commands, preventing malicious code from modifying iptables rules + - Use `--enable-chroot` flag if you need to run host binaries not available in the container - Two-stage entrypoint: 1. `setup-iptables.sh`: Configures iptables NAT rules to redirect HTTP/HTTPS traffic to Squid (agent container only) 2. `entrypoint.sh`: Drops NET_ADMIN capability, then executes user command as non-root user diff --git a/containers/agent/entrypoint.sh b/containers/agent/entrypoint.sh index cd41182e..70aede87 100644 --- a/containers/agent/entrypoint.sh +++ b/containers/agent/entrypoint.sh @@ -232,9 +232,11 @@ AWFEOF # Java needs LD_LIBRARY_PATH to find libjli.so and other shared libs echo "export LD_LIBRARY_PATH=\"${AWF_JAVA_HOME}/lib:${AWF_JAVA_HOME}/lib/server:\$LD_LIBRARY_PATH\"" >> "/host${SCRIPT_FILE}" fi - # Add GOROOT if provided (required for Go on GitHub Actions with trimmed binaries) + # Add GOROOT/bin to PATH if provided (required for Go on GitHub Actions with trimmed binaries) + # This ensures the correct Go version is found even if AWF_HOST_PATH has wrong ordering if [ -n "${AWF_GOROOT}" ]; then - echo "[entrypoint] Using host GOROOT for chroot: ${AWF_GOROOT}" + echo "[entrypoint] Adding GOROOT/bin to PATH: ${AWF_GOROOT}/bin" + echo "export PATH=\"${AWF_GOROOT}/bin:\$PATH\"" >> "/host${SCRIPT_FILE}" echo "export GOROOT=\"${AWF_GOROOT}\"" >> "/host${SCRIPT_FILE}" fi else @@ -251,9 +253,11 @@ export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" # Add Cargo bin for Rust (common in development) [ -d "$HOME/.cargo/bin" ] && export PATH="$HOME/.cargo/bin:$PATH" AWFEOF - # Add GOROOT if provided (required for Go on GitHub Actions with trimmed binaries) + # Add GOROOT/bin to PATH if provided (required for Go on GitHub Actions with trimmed binaries) + # This ensures the correct Go version is found even if PATH has wrong ordering if [ -n "${AWF_GOROOT}" ]; then - echo "[entrypoint] Using host GOROOT for chroot: ${AWF_GOROOT}" + echo "[entrypoint] Adding GOROOT/bin to PATH: ${AWF_GOROOT}/bin" + echo "export PATH=\"${AWF_GOROOT}/bin:\$PATH\"" >> "/host${SCRIPT_FILE}" echo "export GOROOT=\"${AWF_GOROOT}\"" >> "/host${SCRIPT_FILE}" fi fi @@ -288,14 +292,17 @@ AWFEOF exec capsh --drop=${CAPS_TO_DROP} --user=${HOST_USER} -- -c 'exec ${SCRIPT_FILE}' " else - # Original behavior - run in container filesystem + # Non-chroot mode - run in container filesystem with read-only host mount # Drop capabilities and privileges, then execute the user command - # This prevents malicious code from modifying iptables rules or using chroot + # This prevents malicious code from modifying iptables rules # Security note: capsh --drop removes capabilities from the bounding set, # preventing any process (even if it escalates to root) from acquiring them # The order of operations: # 1. capsh drops capabilities from the bounding set (cannot be regained) # 2. gosu switches to awfuser (drops root privileges) # 3. exec replaces the current process with the user command + # + # Note: Host filesystem is mounted read-only at /host for security. + # If you need to run host binaries, use --enable-chroot flag instead. exec capsh --drop=$CAPS_TO_DROP -- -c "exec gosu awfuser $(printf '%q ' "$@")" fi diff --git a/containers/agent/setup-iptables.sh b/containers/agent/setup-iptables.sh index f2cf67c8..8186819a 100644 --- a/containers/agent/setup-iptables.sh +++ b/containers/agent/setup-iptables.sh @@ -36,7 +36,8 @@ SQUID_PORT="${SQUID_PROXY_PORT:-3128}" echo "[iptables] Squid proxy: ${SQUID_HOST}:${SQUID_PORT}" # Resolve Squid hostname to IP -SQUID_IP=$(getent hosts "$SQUID_HOST" | awk '{ print $1 }' | head -n 1) +# Use awk's NR to get first line to avoid host binary dependency in chroot mode +SQUID_IP=$(getent hosts "$SQUID_HOST" | awk 'NR==1 { print $1 }') if [ -z "$SQUID_IP" ]; then echo "[iptables] ERROR: Could not resolve Squid proxy hostname: $SQUID_HOST" exit 1 diff --git a/docs-site/src/content/docs/reference/cli-reference.md b/docs-site/src/content/docs/reference/cli-reference.md index d5e0db21..cb4eb177 100644 --- a/docs-site/src/content/docs/reference/cli-reference.md +++ b/docs-site/src/content/docs/reference/cli-reference.md @@ -19,7 +19,7 @@ awf [options] -- | Option | Type | Default | Description | |--------|------|---------|-------------| -| `--allow-domains ` | string | — | Comma-separated list of allowed domains (required unless `--allow-domains-file` used) | +| `--allow-domains ` | string | — | Comma-separated list of allowed domains (optional; if not specified, all network access is blocked) | | `--allow-domains-file ` | string | — | Path to file containing allowed domains | | `--block-domains ` | string | — | Comma-separated list of blocked domains (takes precedence over allowed) | | `--block-domains-file ` | string | — | Path to file containing blocked domains | @@ -32,6 +32,7 @@ awf [options] -- | `--build-local` | flag | `false` | Build containers locally instead of pulling from registry | | `--image-registry ` | string | `ghcr.io/github/gh-aw-firewall` | Container image registry | | `--image-tag ` | string | `latest` | Container image tag | +| `--skip-pull` | flag | `false` | Use local images without pulling from registry | | `-e, --env ` | string | `[]` | Environment variable (repeatable) | | `--env-all` | flag | `false` | Pass all host environment variables | | `-v, --mount ` | string | `[]` | Volume mount (repeatable) | @@ -47,9 +48,15 @@ awf [options] -- Comma-separated list of allowed domains. Domains automatically match all subdomains. Supports wildcard patterns and protocol-specific filtering. +**If no domains are specified, all network access is blocked.** This is useful for running commands that should have no network access. + ```bash +# Allow specific domains --allow-domains github.com,npmjs.org --allow-domains '*.github.com,api-*.example.com' + +# No network access (empty or omitted) +awf -- echo "offline command" ``` #### Protocol-Specific Filtering @@ -181,6 +188,31 @@ Custom container image registry URL. Container image tag to use. +### `--skip-pull` + +Use local images without pulling from the registry. This is useful for: + +- **Air-gapped environments** where registry access is unavailable +- **CI systems with pre-warmed image caches** to avoid unnecessary network calls +- **Local development** when images are already cached + +```bash +# Pre-pull images first +docker pull ghcr.io/github/gh-aw-firewall/squid:latest +docker pull ghcr.io/github/gh-aw-firewall/agent:latest + +# Use with --skip-pull to avoid re-pulling +sudo awf --skip-pull --allow-domains github.com -- curl https://api.github.com +``` + +:::caution[Image Verification] +When using `--skip-pull`, you are responsible for verifying image authenticity. The firewall cannot verify that locally cached images haven't been tampered with. See [Image Verification](/gh-aw-firewall/docs/image-verification/) for cosign verification instructions. +::: + +:::note[Incompatible with --build-local] +The `--skip-pull` flag cannot be used with `--build-local` since building images requires pulling base images from the registry. +::: + ### `-e, --env ` Pass environment variable to container. Can be specified multiple times. diff --git a/docs/usage.md b/docs/usage.md index bf6c59ad..a029a778 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,8 @@ sudo awf [options] Options: - --allow-domains Comma-separated list of allowed domains (required) + --allow-domains Comma-separated list of allowed domains (optional) + If not specified, all network access is blocked Example: github.com,api.github.com,arxiv.org --allow-domains-file Path to file containing allowed domains --block-domains Comma-separated list of blocked domains diff --git a/package-lock.json b/package-lock.json index a807c94c..c37455db 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@github/agentic-workflow-firewall", - "version": "0.13.1", + "version": "0.13.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@github/agentic-workflow-firewall", - "version": "0.13.1", + "version": "0.13.4", "license": "MIT", "dependencies": { "chalk": "^4.1.2", diff --git a/package.json b/package.json index cf918c5a..bc3f3e15 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@github/agentic-workflow-firewall", - "version": "0.13.1", + "version": "0.13.4", "description": "Network firewall for agentic workflows with domain whitelisting", "main": "dist/cli.js", "bin": { diff --git a/src/cli-workflow.ts b/src/cli-workflow.ts index 4551d328..62ad1511 100644 --- a/src/cli-workflow.ts +++ b/src/cli-workflow.ts @@ -4,7 +4,7 @@ export interface WorkflowDependencies { ensureFirewallNetwork: () => Promise<{ squidIp: string }>; setupHostIptables: (squidIp: string, port: number, dnsServers: string[]) => Promise; writeConfigs: (config: WrapperConfig) => Promise; - startContainers: (workDir: string, allowedDomains: string[], proxyLogsDir?: string) => Promise; + startContainers: (workDir: string, allowedDomains: string[], proxyLogsDir?: string, skipPull?: boolean) => Promise; runAgentCommand: ( workDir: string, allowedDomains: string[], @@ -51,7 +51,7 @@ export async function runMainWorkflow( await dependencies.writeConfigs(config); // Step 2: Start containers - await dependencies.startContainers(config.workDir, config.allowedDomains, config.proxyLogsDir); + await dependencies.startContainers(config.workDir, config.allowedDomains, config.proxyLogsDir, config.skipPull); onContainersStarted?.(); // Step 3: Wait for agent to complete diff --git a/src/cli.test.ts b/src/cli.test.ts index ea697c90..c3a3d435 100644 --- a/src/cli.test.ts +++ b/src/cli.test.ts @@ -1,5 +1,5 @@ import { Command } from 'commander'; -import { parseEnvironmentVariables, parseDomains, parseDomainsFile, escapeShellArg, joinShellArgs, parseVolumeMounts, isValidIPv4, isValidIPv6, parseDnsServers, validateAgentImage, isAgentImagePreset, AGENT_IMAGE_PRESETS, processAgentImageOption } from './cli'; +import { parseEnvironmentVariables, parseDomains, parseDomainsFile, escapeShellArg, joinShellArgs, parseVolumeMounts, isValidIPv4, isValidIPv6, parseDnsServers, validateAgentImage, isAgentImagePreset, AGENT_IMAGE_PRESETS, processAgentImageOption, validateSkipPullWithBuildLocal } from './cli'; import { redactSecrets } from './redact-secrets'; import * as fs from 'fs'; import * as path from 'path'; @@ -666,6 +666,7 @@ describe('cli', () => { expect(result.invalidMount).toBe('invalid-mount'); } }); + }); describe('IPv4 validation', () => { @@ -1140,4 +1141,48 @@ describe('cli', () => { }); }); }); + + describe('validateSkipPullWithBuildLocal', () => { + it('should return valid when both flags are false', () => { + const result = validateSkipPullWithBuildLocal(false, false); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should return valid when both flags are undefined', () => { + const result = validateSkipPullWithBuildLocal(undefined, undefined); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should return valid when only skipPull is true', () => { + const result = validateSkipPullWithBuildLocal(true, false); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should return valid when only buildLocal is true', () => { + const result = validateSkipPullWithBuildLocal(false, true); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should return invalid when both skipPull and buildLocal are true', () => { + const result = validateSkipPullWithBuildLocal(true, true); + expect(result.valid).toBe(false); + expect(result.error).toContain('--skip-pull cannot be used with --build-local'); + }); + + it('should return valid when skipPull is true and buildLocal is undefined', () => { + const result = validateSkipPullWithBuildLocal(true, undefined); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should return valid when skipPull is undefined and buildLocal is true', () => { + const result = validateSkipPullWithBuildLocal(undefined, true); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + }); }); diff --git a/src/cli.ts b/src/cli.ts index 2ddc7aec..08a876f8 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -243,6 +243,35 @@ export function processAgentImageOption( }; } +/** + * Result of validating flag combinations + */ +export interface FlagValidationResult { + /** Whether the validation passed */ + valid: boolean; + /** Error message if validation failed */ + error?: string; +} + +/** + * Validates that --skip-pull is not used with --build-local + * @param skipPull - Whether --skip-pull flag was provided + * @param buildLocal - Whether --build-local flag was provided + * @returns FlagValidationResult with validation status and error message + */ +export function validateSkipPullWithBuildLocal( + skipPull: boolean | undefined, + buildLocal: boolean | undefined +): FlagValidationResult { + if (skipPull && buildLocal) { + return { + valid: false, + error: '--skip-pull cannot be used with --build-local. Building images requires pulling base images from the registry.', + }; + } + return { valid: true }; +} + /** * Parses and validates DNS servers from a comma-separated string * @param input - Comma-separated DNS server string (e.g., "8.8.8.8,1.1.1.1") @@ -507,6 +536,11 @@ program 'Container image tag', 'latest' ) + .option( + '--skip-pull', + 'Use local images without pulling from registry (requires images to be pre-downloaded)', + false + ) .option( '-e, --env ', 'Additional environment variables to pass to container (can be specified multiple times)', @@ -631,10 +665,9 @@ program } } - // Ensure at least one domain is specified + // Log when no domains are specified (all network access will be blocked) if (allowedDomains.length === 0) { - logger.error('At least one domain must be specified with --allow-domains or --allow-domains-file'); - process.exit(1); + logger.debug('No allowed domains specified - all network access will be blocked'); } // Remove duplicates (in case domains appear in both sources) @@ -788,6 +821,7 @@ program tty: options.tty || false, workDir: options.workDir, buildLocal: options.buildLocal, + skipPull: options.skipPull, agentImage, imageRegistry: options.imageRegistry, imageTag: options.imageTag, @@ -816,6 +850,13 @@ program process.exit(1); } + // Error if --skip-pull is used with --build-local (incompatible flags) + const skipPullValidation = validateSkipPullWithBuildLocal(config.skipPull, config.buildLocal); + if (!skipPullValidation.valid) { + logger.error(`❌ ${skipPullValidation.error}`); + process.exit(1); + } + // Warn if --enable-host-access is used with host.docker.internal in allowed domains if (config.enableHostAccess) { const hasHostDomain = allowedDomains.some(d => diff --git a/src/docker-manager.test.ts b/src/docker-manager.test.ts index 04bd4916..13d829c9 100644 --- a/src/docker-manager.test.ts +++ b/src/docker-manager.test.ts @@ -490,7 +490,7 @@ describe('docker-manager', () => { const agent = result.services.agent; const volumes = agent.volumes as string[]; - expect(volumes).toContain('/:/host:rw'); + expect(volumes).toContain('/:/host:ro'); expect(volumes).toContain('/tmp:/tmp:rw'); expect(volumes.some((v: string) => v.includes('agent-logs'))).toBe(true); }); @@ -504,8 +504,8 @@ describe('docker-manager', () => { const agent = result.services.agent; const volumes = agent.volumes as string[]; - // Should NOT include blanket /:/host:rw mount - expect(volumes).not.toContain('/:/host:rw'); + // Should NOT include blanket /:/host:ro mount + expect(volumes).not.toContain('/:/host:ro'); // Should include custom mounts expect(volumes).toContain('/workspace:/workspace:ro'); @@ -521,8 +521,8 @@ describe('docker-manager', () => { const agent = result.services.agent; const volumes = agent.volumes as string[]; - // Should include blanket /:/host:rw mount - expect(volumes).toContain('/:/host:rw'); + // Should include blanket /:/host:ro mount + expect(volumes).toContain('/:/host:ro'); }); it('should use selective mounts when enableChroot is true', () => { @@ -623,6 +623,47 @@ describe('docker-manager', () => { expect(environment.AWF_CHROOT_ENABLED).toBe('true'); }); + it('should pass GOROOT, CARGO_HOME, JAVA_HOME to container when enableChroot is true and env vars are set', () => { + const originalGoroot = process.env.GOROOT; + const originalCargoHome = process.env.CARGO_HOME; + const originalJavaHome = process.env.JAVA_HOME; + + process.env.GOROOT = '/usr/local/go'; + process.env.CARGO_HOME = '/home/user/.cargo'; + process.env.JAVA_HOME = '/usr/lib/jvm/java-17'; + + try { + const configWithChroot = { + ...mockConfig, + enableChroot: true + }; + const result = generateDockerCompose(configWithChroot, mockNetworkConfig); + const agent = result.services.agent; + const environment = agent.environment as Record; + + expect(environment.AWF_GOROOT).toBe('/usr/local/go'); + expect(environment.AWF_CARGO_HOME).toBe('/home/user/.cargo'); + expect(environment.AWF_JAVA_HOME).toBe('/usr/lib/jvm/java-17'); + } finally { + // Restore original values + if (originalGoroot !== undefined) { + process.env.GOROOT = originalGoroot; + } else { + delete process.env.GOROOT; + } + if (originalCargoHome !== undefined) { + process.env.CARGO_HOME = originalCargoHome; + } else { + delete process.env.CARGO_HOME; + } + if (originalJavaHome !== undefined) { + process.env.JAVA_HOME = originalJavaHome; + } else { + delete process.env.JAVA_HOME; + } + } + }); + it('should not set AWF_CHROOT_ENABLED when enableChroot is false', () => { const result = generateDockerCompose(mockConfig, mockNetworkConfig); const agent = result.services.agent; @@ -932,6 +973,24 @@ describe('docker-manager', () => { }); }); + describe('allowHostPorts option', () => { + it('should set AWF_ALLOW_HOST_PORTS when allowHostPorts is specified', () => { + const config = { ...mockConfig, enableHostAccess: true, allowHostPorts: '8080,3000' }; + const result = generateDockerCompose(config, mockNetworkConfig); + const env = result.services.agent.environment as Record; + + expect(env.AWF_ALLOW_HOST_PORTS).toBe('8080,3000'); + }); + + it('should NOT set AWF_ALLOW_HOST_PORTS when allowHostPorts is undefined', () => { + const config = { ...mockConfig, enableHostAccess: true }; + const result = generateDockerCompose(config, mockNetworkConfig); + const env = result.services.agent.environment as Record; + + expect(env.AWF_ALLOW_HOST_PORTS).toBeUndefined(); + }); + }); + it('should override environment variables with additionalEnv', () => { const originalEnv = process.env.GITHUB_TOKEN; process.env.GITHUB_TOKEN = 'original_token'; @@ -1246,6 +1305,22 @@ describe('docker-manager', () => { ); }); + it('should continue when removing existing containers fails', async () => { + // First call (docker rm) throws an error, but we should continue + mockExecaFn.mockRejectedValueOnce(new Error('No such container')); + // Second call (docker compose up) succeeds + mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); + + await startContainers(testDir, ['github.com']); + + // Should still call docker compose up even if rm failed + expect(mockExecaFn).toHaveBeenCalledWith( + 'docker', + ['compose', 'up', '-d'], + { cwd: testDir, stdio: 'inherit' } + ); + }); + it('should run docker compose up', async () => { mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); @@ -1259,6 +1334,32 @@ describe('docker-manager', () => { ); }); + it('should run docker compose up with --pull never when skipPull is true', async () => { + mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); + mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); + + await startContainers(testDir, ['github.com'], undefined, true); + + expect(mockExecaFn).toHaveBeenCalledWith( + 'docker', + ['compose', 'up', '-d', '--pull', 'never'], + { cwd: testDir, stdio: 'inherit' } + ); + }); + + it('should run docker compose up without --pull never when skipPull is false', async () => { + mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); + mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); + + await startContainers(testDir, ['github.com'], undefined, false); + + expect(mockExecaFn).toHaveBeenCalledWith( + 'docker', + ['compose', 'up', '-d'], + { cwd: testDir, stdio: 'inherit' } + ); + }); + it('should handle docker compose failure', async () => { mockExecaFn.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 } as any); mockExecaFn.mockRejectedValueOnce(new Error('Docker compose failed')); diff --git a/src/docker-manager.ts b/src/docker-manager.ts index be53af78..94e8cd1f 100644 --- a/src/docker-manager.ts +++ b/src/docker-manager.ts @@ -497,8 +497,9 @@ export function generateDockerCompose( } else if (!config.enableChroot) { // If no custom mounts specified AND not using chroot mode, // include blanket host filesystem mount for backward compatibility - logger.debug('No custom mounts specified, using blanket /:/host:rw mount'); - agentVolumes.unshift('/:/host:rw'); + // Security: Host filesystem is mounted read-only to prevent accidental or malicious writes + logger.debug('No custom mounts specified, using blanket /:/host:ro mount'); + agentVolumes.unshift('/:/host:ro'); } // Agent service configuration @@ -800,8 +801,12 @@ async function checkSquidLogs(workDir: string, proxyLogsDir?: string): Promise<{ /** * Starts Docker Compose services + * @param workDir - Working directory containing Docker Compose config + * @param allowedDomains - List of allowed domains for error reporting + * @param proxyLogsDir - Optional custom directory for proxy logs + * @param skipPull - If true, use local images without pulling from registry */ -export async function startContainers(workDir: string, allowedDomains: string[], proxyLogsDir?: string): Promise { +export async function startContainers(workDir: string, allowedDomains: string[], proxyLogsDir?: string, skipPull?: boolean): Promise { logger.info('Starting containers...'); // Force remove any existing containers with these names to avoid conflicts @@ -817,7 +822,12 @@ export async function startContainers(workDir: string, allowedDomains: string[], } try { - await execa('docker', ['compose', 'up', '-d'], { + const composeArgs = ['compose', 'up', '-d']; + if (skipPull) { + composeArgs.push('--pull', 'never'); + logger.debug('Using --pull never (skip-pull mode)'); + } + await execa('docker', composeArgs, { cwd: workDir, stdio: 'inherit', }); diff --git a/src/logs/log-aggregator.test.ts b/src/logs/log-aggregator.test.ts index 13c63397..887e4833 100644 --- a/src/logs/log-aggregator.test.ts +++ b/src/logs/log-aggregator.test.ts @@ -105,6 +105,110 @@ describe('log-aggregator', () => { expect(stats.byDomain.has('-')).toBe(true); expect(stats.byDomain.has('github.com')).toBe(true); }); + + it('should filter out transaction-end-before-headers entries', () => { + const entries: ParsedLogEntry[] = [ + createLogEntry({ + domain: 'github.com', + url: 'github.com:443', + isAllowed: true + }), + createLogEntry({ + domain: '-', + url: 'error:transaction-end-before-headers', + decision: 'NONE_NONE:HIER_NONE', + statusCode: 0, + isAllowed: false + }), + createLogEntry({ + domain: 'npmjs.org', + url: 'npmjs.org:443', + isAllowed: true + }), + ]; + + const stats = aggregateLogs(entries); + + // Should only count the two valid entries + expect(stats.totalRequests).toBe(2); // Only actual requests, not benign operational entries + expect(stats.allowedRequests).toBe(2); + expect(stats.deniedRequests).toBe(0); + expect(stats.uniqueDomains).toBe(2); + expect(stats.byDomain.has('github.com')).toBe(true); + expect(stats.byDomain.has('npmjs.org')).toBe(true); + expect(stats.byDomain.has('-')).toBe(false); // Filtered entry not in domain stats + }); + + it('should handle multiple transaction-end-before-headers entries', () => { + const entries: ParsedLogEntry[] = [ + createLogEntry({ + domain: 'github.com', + url: 'github.com:443', + isAllowed: true + }), + createLogEntry({ + domain: '-', + url: 'error:transaction-end-before-headers', + clientIp: '::1', // healthcheck from localhost + decision: 'NONE_NONE:HIER_NONE', + statusCode: 0, + isAllowed: false + }), + createLogEntry({ + domain: '-', + url: 'error:transaction-end-before-headers', + clientIp: '172.30.0.20', // shutdown-time connection closure + decision: 'NONE_NONE:HIER_NONE', + statusCode: 0, + isAllowed: false + }), + createLogEntry({ + domain: 'npmjs.org', + url: 'npmjs.org:443', + isAllowed: true + }), + ]; + + const stats = aggregateLogs(entries); + + expect(stats.totalRequests).toBe(2); // Only actual requests + expect(stats.allowedRequests).toBe(2); + expect(stats.deniedRequests).toBe(0); + expect(stats.uniqueDomains).toBe(2); + }); + + it('should still count time range from all entries including filtered ones', () => { + const entries: ParsedLogEntry[] = [ + createLogEntry({ + timestamp: 1000.0, + domain: 'github.com', + url: 'github.com:443', + isAllowed: true + }), + createLogEntry({ + timestamp: 1500.0, + domain: '-', + url: 'error:transaction-end-before-headers', + decision: 'NONE_NONE:HIER_NONE', + statusCode: 0, + isAllowed: false + }), + createLogEntry({ + timestamp: 2000.0, + domain: 'npmjs.org', + url: 'npmjs.org:443', + isAllowed: true + }), + ]; + + const stats = aggregateLogs(entries); + + // Time range should span all entries, even filtered ones + expect(stats.timeRange).toEqual({ + start: 1000.0, + end: 2000.0, + }); + }); }); describe('loadAllLogs', () => { diff --git a/src/logs/log-aggregator.ts b/src/logs/log-aggregator.ts index ad578d31..69b721d9 100644 --- a/src/logs/log-aggregator.ts +++ b/src/logs/log-aggregator.ts @@ -53,9 +53,10 @@ export function aggregateLogs(entries: ParsedLogEntry[]): AggregatedStats { let deniedRequests = 0; let minTimestamp = Infinity; let maxTimestamp = -Infinity; + let totalRequests = 0; for (const entry of entries) { - // Track time range + // Track time range for all entries if (entry.timestamp < minTimestamp) { minTimestamp = entry.timestamp; } @@ -63,6 +64,15 @@ export function aggregateLogs(entries: ParsedLogEntry[]): AggregatedStats { maxTimestamp = entry.timestamp; } + // Skip benign operational entries (connection closures without HTTP headers) + // These appear during healthchecks and shutdown-time keep-alive connection closures + if (entry.url === 'error:transaction-end-before-headers') { + continue; + } + + // Count this as a real request + totalRequests++; + // Count allowed/denied if (entry.isAllowed) { allowedRequests++; @@ -91,7 +101,6 @@ export function aggregateLogs(entries: ParsedLogEntry[]): AggregatedStats { } } - const totalRequests = entries.length; const uniqueDomains = byDomain.size; const timeRange = entries.length > 0 ? { start: minTimestamp, end: maxTimestamp } : null; diff --git a/src/squid-config.test.ts b/src/squid-config.test.ts index fdd4cde7..f6bd3aab 100644 --- a/src/squid-config.test.ts +++ b/src/squid-config.test.ts @@ -476,6 +476,32 @@ describe('generateSquidConfig', () => { const result = generateSquidConfig(config); expect(result).toContain('access_log /var/log/squid/access.log firewall_detailed'); }); + + it('should filter localhost healthcheck probes from logs', () => { + const config: SquidConfig = { + domains: ['example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + expect(result).toContain('acl healthcheck_localhost src 127.0.0.1 ::1'); + expect(result).toContain('log_access deny healthcheck_localhost'); + }); + + it('should place healthcheck filter before access_log directive', () => { + const config: SquidConfig = { + domains: ['example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // Verify the order: ACL definition, then log_access deny, then access_log + const aclIndex = result.indexOf('acl healthcheck_localhost'); + const logAccessIndex = result.indexOf('log_access deny healthcheck_localhost'); + const accessLogIndex = result.indexOf('access_log /var/log/squid/access.log'); + + expect(aclIndex).toBeGreaterThan(-1); + expect(logAccessIndex).toBeGreaterThan(aclIndex); + expect(accessLogIndex).toBeGreaterThan(logAccessIndex); + }); }); describe('Streaming/Long-lived Connection Support', () => { @@ -1374,3 +1400,21 @@ describe('Dangerous ports blocklist in generateSquidConfig', () => { }).not.toThrow(); }); }); + +describe('Empty Domain List', () => { + it('should generate config that denies all traffic when no domains are specified', () => { + const config = { + domains: [], + port: 3128, + }; + const result = generateSquidConfig(config); + // Should deny all traffic when no domains are allowed + expect(result).toContain('http_access deny all'); + // Should have a comment indicating no domains configured + expect(result).toContain('# No domains configured'); + // Should not have any allowed_domains ACL + expect(result).not.toContain('acl allowed_domains'); + expect(result).not.toContain('acl allowed_http_only'); + expect(result).not.toContain('acl allowed_https_only'); + }); +}); diff --git a/src/squid-config.ts b/src/squid-config.ts index a4c44ac8..8029bbad 100644 --- a/src/squid-config.ts +++ b/src/squid-config.ts @@ -511,6 +511,10 @@ pinger_enable off # Note: For CONNECT requests (HTTPS), the domain is in the URL field logformat firewall_detailed %ts.%03tu %>a:%>p %{Host}>h %Hs %Ss:%Sh %ru "%{User-Agent}>h" +# Don't log healthcheck probes from localhost +acl healthcheck_localhost src 127.0.0.1 ::1 +log_access deny healthcheck_localhost + # Access log and cache configuration access_log /var/log/squid/access.log firewall_detailed cache_log /var/log/squid/cache.log diff --git a/src/types.ts b/src/types.ts index 75fd9828..9873fdd5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -138,15 +138,28 @@ export interface WrapperConfig { /** * Whether to build container images locally instead of pulling from registry - * + * * When true, Docker images are built from local Dockerfiles in containers/squid * and containers/agent directories. When false (default), images are pulled * from the configured registry. - * + * * @default false */ buildLocal?: boolean; + /** + * Whether to skip pulling images from the registry + * + * When true, Docker Compose will use locally available images without + * attempting to pull from the registry. This is useful when images are + * pre-downloaded or in air-gapped environments. + * + * If the required images are not available locally, container startup will fail. + * + * @default false + */ + skipPull?: boolean; + /** * Agent container image preset or custom base image * @@ -198,7 +211,7 @@ export interface WrapperConfig { * - 'host_path:container_path:rw' (read-write) * * These are in addition to essential mounts (Docker socket, HOME, /tmp). - * The blanket /:/host:rw mount is removed when custom mounts are specified. + * The blanket /:/host:ro mount is removed when custom mounts are specified. * * @example ['/workspace:/workspace:ro', '/data:/data:rw'] */ diff --git a/tests/integration/empty-domains.test.ts b/tests/integration/empty-domains.test.ts new file mode 100644 index 00000000..4c1fce5a --- /dev/null +++ b/tests/integration/empty-domains.test.ts @@ -0,0 +1,149 @@ +/** + * Empty Domains Tests + * + * These tests verify the behavior when no domains are allowed: + * - All network access should be blocked + * - Commands that don't require network should still work + * - Debug logs should indicate no domains are configured + */ + +/// + +import { describe, test, expect, beforeAll, afterAll } from '@jest/globals'; +import { createRunner, AwfRunner } from '../fixtures/awf-runner'; +import { cleanup } from '../fixtures/cleanup'; + +describe('Empty Domains (No Network Access)', () => { + let runner: AwfRunner; + + beforeAll(async () => { + await cleanup(false); + runner = createRunner(); + }); + + afterAll(async () => { + await cleanup(false); + }); + + describe('Network Blocking', () => { + test('should block all network access when no domains are specified', async () => { + // Try to access a website without any allowed domains + const result = await runner.runWithSudo( + 'curl -f --max-time 5 https://example.com', + { + allowDomains: [], // Empty domains list + logLevel: 'debug', + timeout: 60000, + } + ); + + // Request should fail because no domains are allowed + expect(result).toFail(); + }, 120000); + + test('should block HTTPS traffic when no domains are specified', async () => { + const result = await runner.runWithSudo( + 'curl -f --max-time 5 https://api.github.com/zen', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + expect(result).toFail(); + }, 120000); + + test('should block HTTP traffic when no domains are specified', async () => { + const result = await runner.runWithSudo( + 'curl -f --max-time 5 http://httpbin.org/get', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + expect(result).toFail(); + }, 120000); + }); + + describe('Offline Commands', () => { + test('should allow commands that do not require network access', async () => { + const result = await runner.runWithSudo( + 'echo "Hello, offline world!"', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + expect(result).toSucceed(); + expect(result.stdout).toContain('Hello, offline world!'); + }, 120000); + + test('should allow file system operations without network', async () => { + const result = await runner.runWithSudo( + 'bash -c "echo test > /tmp/test.txt && cat /tmp/test.txt && rm /tmp/test.txt"', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + expect(result).toSucceed(); + expect(result.stdout).toContain('test'); + }, 120000); + + test('should allow local computations without network', async () => { + const result = await runner.runWithSudo( + 'bash -c "expr 2 + 2"', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + expect(result).toSucceed(); + expect(result.stdout).toContain('4'); + }, 120000); + }); + + describe('Debug Output', () => { + test('should indicate no domains are configured in debug output', async () => { + const result = await runner.runWithSudo( + 'echo "test"', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + expect(result).toSucceed(); + // Should show debug message about no domains + expect(result.stderr).toMatch(/No allowed domains specified|all network access will be blocked/i); + }, 120000); + }); + + describe('DNS Behavior', () => { + test('should block network access even when DNS resolution succeeds', async () => { + // DNS lookups should work (we allow DNS traffic), but connecting should fail + // because the domain isn't in the allowlist + const result = await runner.runWithSudo( + 'bash -c "host example.com > /dev/null 2>&1 && curl -f --max-time 5 https://example.com || echo network_blocked"', + { + allowDomains: [], + logLevel: 'debug', + timeout: 60000, + } + ); + + // The network request should be blocked + expect(result.stdout).toContain('network_blocked'); + }, 120000); + }); +});