diff --git a/.gitignore b/.gitignore index 1fd9cd3..96c2472 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ coverage/ # Temporary files tmp/ temp/ +.npm-install-hash # Browser profiles profiles/ diff --git a/skills/dev-browser/SKILL.md b/skills/dev-browser/SKILL.md index 21e4bd4..cbad2f7 100644 --- a/skills/dev-browser/SKILL.md +++ b/skills/dev-browser/SKILL.md @@ -15,17 +15,44 @@ Browser automation that maintains page state across script executions. Write sma ## Setup -Two modes available. Ask the user if unclear which to use. +```bash +./skills/dev-browser/server.sh & +``` -### Standalone Mode (Default) +**Wait for the `Ready` message before running scripts.** -Launches a new Chromium browser for fresh automation sessions. +The server auto-detects the best browser mode based on user configuration at `~/.dev-browser/config.json`: -```bash -./skills/dev-browser/server.sh & +- **External Browser** (default when Chrome for Testing is installed): Uses Chrome for Testing via CDP. Browser stays open after automation. +- **Standalone**: Uses Playwright's built-in Chromium. Use `--standalone` flag to force this mode. + +**Flags:** +- `--standalone` - Force standalone Playwright mode +- `--headless` - Run headless (standalone mode only) + +### Configuration + +Browser settings are configured in `~/.dev-browser/config.json`: + +```json +{ + "browser": { + "mode": "auto", + "path": "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" + } +} ``` -Add `--headless` flag if user requests it. **Wait for the `Ready` message before running scripts.** +| Setting | Values | Description | +|---------|--------|-------------| +| `browser.mode` | `"auto"` (default), `"external"`, `"standalone"` | `auto` uses Chrome for Testing if found, otherwise Playwright | +| `browser.path` | Path string | Custom browser executable path (auto-detected if not set) | +| `browser.userDataDir` | Path string | Browser profile directory for external mode (uses browser's default if not set) | + +**Auto-detection paths:** +- **macOS**: `/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing` +- **Linux**: `/opt/google/chrome-for-testing/chrome`, `/usr/bin/google-chrome-for-testing` +- **Windows**: `C:\Program Files\Google\Chrome for Testing\Application\chrome.exe` ### Extension Mode diff --git a/skills/dev-browser/docs/CONCURRENCY.md b/skills/dev-browser/docs/CONCURRENCY.md new file mode 100644 index 0000000..37f8267 --- /dev/null +++ b/skills/dev-browser/docs/CONCURRENCY.md @@ -0,0 +1,177 @@ +# Multi-Agent Concurrency Support + +This document explains how dev-browser supports multiple concurrent agents and the design decisions behind the implementation. + +## The Problem + +When multiple AI agents (e.g., Claude Code sub-agents) run browser automation tasks in parallel, they need to avoid conflicts. The original dev-browser design assumed a single server on a fixed port, which creates a bottleneck: + +> "dev-browser is in fact a single point of congestion now, nullifying the advantages of dev browser" +> — [PR #15 discussion](https://github.com/SawyerHood/dev-browser/pull/15#issuecomment-3698722432) + +## Solution: Dynamic Port Allocation + +Each agent automatically gets its own HTTP API server on a unique port: + +``` +Agent 1 ──► server (port 9222) ──┐ +Agent 2 ──► server (port 9224) ──┼──► Shared Browser (CDP 9223) +Agent 3 ──► server (port 9226) ──┘ +``` + +### How It Works + +1. **Port Auto-Assignment**: When `port` is not specified, the server finds an available port in the configured range (default: 9222-9300, step 2) + +2. **Port Discovery**: Server outputs `PORT=XXXX` to stdout, which agents parse to know which port to connect to + +3. **Server Tracking**: Active servers are tracked in `~/.dev-browser/active-servers.json` for coordination + +4. **Shared Browser**: In external browser mode, all servers connect to the same browser via CDP, minimizing resource usage + +## Design Decisions + +### Options Considered + +#### Option 1: Manual Port Assignment (Rejected) + +From [PR #15](https://github.com/SawyerHood/dev-browser/pull/15), the initial proposal was to add `--port` and `--cdp-port` CLI flags for manual assignment. + +**Why rejected**: Requires agents to coordinate port selection, adds complexity to agent implementation, and creates potential for conflicts. + +#### Option 2: Singleton Server with Named Pages (Rejected) + +Have one persistent server handling all agents, using page names for isolation. + +**Why rejected**: Incompatible with the plugin architecture where each agent spawns its own server process. Also creates a true single point of failure. + +#### Option 3: Dynamic Port Allocation (Chosen) + +Servers automatically discover and claim available ports. + +**Why chosen**: +- Zero configuration required +- Agents don't need to coordinate +- Works with existing plugin architecture +- Each agent is isolated (failure doesn't affect others) +- Memory overhead is acceptable (~140MB per server) + +### Memory Considerations + +Each dev-browser server uses approximately: +- **Node.js + Playwright + Express**: ~140MB +- **Browser (if standalone mode)**: ~300MB additional + +In external browser mode, multiple servers share one browser, making the per-agent overhead just ~140MB. + +## Configuration + +Create `~/.dev-browser/config.json` to customize behavior: + +```json +{ + "portRange": { + "start": 9222, + "end": 9300, + "step": 2 + }, + "cdpPort": 9223 +} +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `portRange.start` | 9222 | First port to try for HTTP API | +| `portRange.end` | 9300 | Last port to try | +| `portRange.step` | 2 | Port increment (avoids CDP port collision) | +| `cdpPort` | 9223 | Chrome DevTools Protocol port | + +## Usage Examples + +### Multiple Agents (External Browser Mode) + +```bash +# Terminal 1: Start Chrome for Testing, then: +BROWSER_PATH="/path/to/chrome" npx tsx scripts/start-external-browser.ts +# Output: PORT=9222 + +# Terminal 2: Second agent +npx tsx scripts/start-external-browser.ts +# Output: PORT=9224 + +# Terminal 3: Third agent +npx tsx scripts/start-external-browser.ts +# Output: PORT=9226 + +# All agents share the same browser on CDP port 9223 +``` + +### Multiple Agents (Standalone Mode) + +```bash +# Terminal 1: First agent launches its own browser +npx tsx scripts/start-server.ts +# Output: PORT=9222 + +# Terminal 2: Second agent launches separate browser +npx tsx scripts/start-server.ts +# Output: PORT=9224 +``` + +### Programmatic Usage + +```typescript +import { serve, serveWithExternalBrowser } from "dev-browser"; + +// Port is automatically assigned +const server1 = await serve(); // Gets port 9222 +const server2 = await serve(); // Gets port 9224 + +console.log(`Server 1 on port ${server1.port}`); +console.log(`Server 2 on port ${server2.port}`); + +// Or with external browser +const external1 = await serveWithExternalBrowser(); +const external2 = await serveWithExternalBrowser(); +// Both connect to same browser on CDP 9223 +``` + +## Troubleshooting + +### "No available ports in range" + +Too many servers running. Check active servers: + +```bash +cat ~/.dev-browser/active-servers.json +``` + +Clean up stale entries (servers that crashed): + +```bash +rm ~/.dev-browser/active-servers.json +``` + +### Port Conflicts + +If a specific port is required, set `PORT` environment variable: + +```bash +PORT=9250 npx tsx scripts/start-external-browser.ts +``` + +### Checking Server Status + +```bash +# List all active servers +cat ~/.dev-browser/active-servers.json + +# Test a specific server +curl http://localhost:9222/ +# Returns: {"wsEndpoint":"ws://...","mode":"external-browser","port":9222} +``` + +## References + +- [PR #15: Multi-port support discussion](https://github.com/SawyerHood/dev-browser/pull/15) +- [PR #20: External browser mode](https://github.com/SawyerHood/dev-browser/pull/20) diff --git a/skills/dev-browser/scripts/get-browser-config.ts b/skills/dev-browser/scripts/get-browser-config.ts new file mode 100644 index 0000000..127d2eb --- /dev/null +++ b/skills/dev-browser/scripts/get-browser-config.ts @@ -0,0 +1,37 @@ +/** + * Output resolved browser configuration for shell scripts. + * + * Usage: npx tsx scripts/get-browser-config.ts + * + * Output format (shell-eval compatible): + * BROWSER_MODE="external" + * BROWSER_PATH="/path/to/chrome" + * BROWSER_USER_DATA_DIR="/path/to/profile" + */ + +import { getResolvedBrowserConfig } from "@/config.js"; + +/** + * Shell-escape a string value for safe eval. + */ +function shellEscape(value: string): string { + // Use double quotes and escape special characters + return `"${value.replace(/"/g, '\\"')}"`; +} + +try { + const config = getResolvedBrowserConfig(); + + // Output in shell-eval format with proper quoting + console.log(`BROWSER_MODE=${shellEscape(config.mode)}`); + console.log(`BROWSER_PATH=${shellEscape(config.path || "")}`); + // Only output userDataDir if explicitly configured + console.log(`BROWSER_USER_DATA_DIR=${shellEscape(config.userDataDir || "")}`); +} catch (err) { + // On error, output standalone mode as fallback + console.error(`Warning: ${err instanceof Error ? err.message : err}`); + console.log(`BROWSER_MODE="standalone"`); + console.log(`BROWSER_PATH=""`); + console.log(`BROWSER_USER_DATA_DIR=""`); + process.exit(0); // Don't fail - standalone is a valid fallback +} diff --git a/skills/dev-browser/scripts/start-external-browser.ts b/skills/dev-browser/scripts/start-external-browser.ts new file mode 100644 index 0000000..1d41e7c --- /dev/null +++ b/skills/dev-browser/scripts/start-external-browser.ts @@ -0,0 +1,89 @@ +/** + * Start dev-browser server connecting to an external browser via CDP. + * + * This mode is ideal for: + * - Chrome for Testing or other specific browser builds + * - Development workflows where you want the browser visible + * - Keeping the browser open after automation for manual inspection + * - Running multiple agents concurrently (each gets its own port automatically) + * + * Environment variables: + * PORT - HTTP API port (default: auto-assigned from 9222-9300) + * CDP_PORT - Browser's CDP port (default: 9223) + * BROWSER_PATH - Path to browser executable (for auto-launch) + * USER_DATA_DIR - Browser profile directory (default: ~/.dev-browser-profile) + * AUTO_LAUNCH - Whether to auto-launch browser if not running (default: true) + * + * Configuration file: ~/.dev-browser/config.json + * { + * "portRange": { "start": 9222, "end": 9300, "step": 2 }, + * "cdpPort": 9223 + * } + * + * Example with Chrome for Testing: + * BROWSER_PATH="/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" \ + * npx tsx scripts/start-external-browser.ts + * + * Multi-agent usage: + * # Terminal 1: First agent gets port 9222 + * npx tsx scripts/start-external-browser.ts + * # Output: PORT=9222 + * + * # Terminal 2: Second agent gets port 9224 + * npx tsx scripts/start-external-browser.ts + * # Output: PORT=9224 + * + * # Both agents share the same browser on CDP port 9223 + */ + +import { serveWithExternalBrowser } from "@/external-browser.js"; +import { mkdirSync } from "fs"; +import { join, dirname } from "path"; +import { fileURLToPath } from "url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const tmpDir = join(__dirname, "..", "tmp"); + +// Create tmp directory if it doesn't exist +mkdirSync(tmpDir, { recursive: true }); + +// Configuration from environment (PORT is optional - will be auto-assigned) +const port = process.env.PORT ? parseInt(process.env.PORT, 10) : undefined; +const cdpPort = process.env.CDP_PORT ? parseInt(process.env.CDP_PORT, 10) : undefined; +const browserPath = process.env.BROWSER_PATH; +// Only pass userDataDir if explicitly set - let browser use default profile otherwise +const userDataDir = process.env.USER_DATA_DIR || undefined; +const autoLaunch = process.env.AUTO_LAUNCH !== "false"; + +console.log("Starting dev-browser with external browser mode..."); +console.log(` HTTP API port: ${port ?? "auto (dynamic)"}`); +console.log(` CDP port: ${cdpPort ?? "from config (default: 9223)"}`); +if (browserPath) { + console.log(` Browser path: ${browserPath}`); +} +console.log(` User data dir: ${userDataDir ?? "(default profile)"}`); +console.log(` Auto-launch: ${autoLaunch}`); +console.log(` Config: ~/.dev-browser/config.json`); +console.log(""); + +const server = await serveWithExternalBrowser({ + port, + cdpPort, + browserPath, + userDataDir, + autoLaunch, +}); + +console.log(""); +console.log(`Dev browser server started`); +console.log(` WebSocket: ${server.wsEndpoint}`); +console.log(` HTTP API: http://localhost:${server.port}`); +console.log(` Mode: ${server.mode}`); +console.log(` Tmp directory: ${tmpDir}`); +console.log(""); +console.log("Ready"); +console.log(""); +console.log("Press Ctrl+C to stop (browser will remain open)"); + +// Keep the process running +await new Promise(() => {}); diff --git a/skills/dev-browser/scripts/start-server.ts b/skills/dev-browser/scripts/start-server.ts index e130a27..ccc2135 100644 --- a/skills/dev-browser/scripts/start-server.ts +++ b/skills/dev-browser/scripts/start-server.ts @@ -1,3 +1,31 @@ +/** + * Start dev-browser server in standalone mode (launches Playwright Chromium). + * + * This mode: + * - Launches a dedicated Playwright Chromium browser + * - Owns the browser lifecycle (closes when server stops) + * - Supports multiple concurrent agents via dynamic port allocation + * + * Environment variables: + * PORT - HTTP API port (default: auto-assigned from 9222-9300) + * HEADLESS - Run browser in headless mode (default: false) + * + * Configuration file: ~/.dev-browser/config.json + * { + * "portRange": { "start": 9222, "end": 9300, "step": 2 }, + * "cdpPort": 9223 + * } + * + * Multi-agent usage: + * # Terminal 1: First agent gets port 9222, launches browser + * npx tsx scripts/start-server.ts + * # Output: PORT=9222 + * + * # Terminal 2: Second agent gets port 9224, launches separate browser + * npx tsx scripts/start-server.ts + * # Output: PORT=9224 + */ + import { serve } from "@/index.js"; import { execSync } from "child_process"; import { mkdirSync, existsSync, readdirSync } from "fs"; @@ -9,9 +37,7 @@ const tmpDir = join(__dirname, "..", "tmp"); const profileDir = join(__dirname, "..", "profiles"); // Create tmp and profile directories if they don't exist -console.log("Creating tmp directory..."); mkdirSync(tmpDir, { recursive: true }); -console.log("Creating profiles directory..."); mkdirSync(profileDir, { recursive: true }); // Install Playwright browsers if not already installed @@ -72,46 +98,33 @@ try { console.log("You may need to run: npx playwright install chromium"); } -// Check if server is already running -console.log("Checking for existing servers..."); -try { - const res = await fetch("http://localhost:9222", { - signal: AbortSignal.timeout(1000), - }); - if (res.ok) { - console.log("Server already running on port 9222"); - process.exit(0); - } -} catch { - // Server not running, continue to start -} +// Configuration from environment (PORT is optional - will be auto-assigned) +const port = process.env.PORT ? parseInt(process.env.PORT, 10) : undefined; +const headless = process.env.HEADLESS === "true"; -// Clean up stale CDP port if HTTP server isn't running (crash recovery) -// This handles the case where Node crashed but Chrome is still running on 9223 -try { - const pid = execSync("lsof -ti:9223", { encoding: "utf-8" }).trim(); - if (pid) { - console.log(`Cleaning up stale Chrome process on CDP port 9223 (PID: ${pid})`); - execSync(`kill -9 ${pid}`); - } -} catch { - // No process on CDP port, which is expected -} +console.log(""); +console.log("Starting dev browser server (standalone mode)..."); +console.log(` HTTP API port: ${port ?? "auto (dynamic)"}`); +console.log(` Headless: ${headless}`); +console.log(` Config: ~/.dev-browser/config.json`); +console.log(""); -console.log("Starting dev browser server..."); -const headless = process.env.HEADLESS === "true"; const server = await serve({ - port: 9222, + port, headless, profileDir, }); +console.log(""); console.log(`Dev browser server started`); console.log(` WebSocket: ${server.wsEndpoint}`); +console.log(` HTTP API: http://localhost:${server.port}`); console.log(` Tmp directory: ${tmpDir}`); console.log(` Profile directory: ${profileDir}`); -console.log(`\nReady`); -console.log(`\nPress Ctrl+C to stop`); +console.log(""); +console.log("Ready"); +console.log(""); +console.log("Press Ctrl+C to stop"); // Keep the process running await new Promise(() => {}); diff --git a/skills/dev-browser/server.sh b/skills/dev-browser/server.sh index 50369a4..fc1604c 100755 --- a/skills/dev-browser/server.sh +++ b/skills/dev-browser/server.sh @@ -8,17 +8,82 @@ cd "$SCRIPT_DIR" # Parse command line arguments HEADLESS=false +FORCE_STANDALONE=false while [[ "$#" -gt 0 ]]; do case $1 in --headless) HEADLESS=true ;; + --standalone) FORCE_STANDALONE=true ;; *) echo "Unknown parameter: $1"; exit 1 ;; esac shift done -echo "Installing dependencies..." -npm install +# Conditional npm install - only if node_modules missing or package-lock changed +NEEDS_INSTALL=false +HASH_FILE="$SCRIPT_DIR/.npm-install-hash" -echo "Starting dev-browser server..." -export HEADLESS=$HEADLESS -npx tsx scripts/start-server.ts +if [ ! -d "$SCRIPT_DIR/node_modules" ]; then + NEEDS_INSTALL=true +elif [ -f "$SCRIPT_DIR/package-lock.json" ]; then + CURRENT_HASH=$(shasum "$SCRIPT_DIR/package-lock.json" 2>/dev/null | cut -d' ' -f1) + SAVED_HASH=$(cat "$HASH_FILE" 2>/dev/null || echo "") + if [ "$CURRENT_HASH" != "$SAVED_HASH" ]; then + NEEDS_INSTALL=true + fi +fi + +if [ "$NEEDS_INSTALL" = true ]; then + echo "Installing dependencies..." + npm install --prefer-offline --no-audit --no-fund + # Save hash for next time + if [ -f "$SCRIPT_DIR/package-lock.json" ]; then + shasum "$SCRIPT_DIR/package-lock.json" | cut -d' ' -f1 > "$HASH_FILE" + fi +else + echo "Dependencies up to date (skipping npm install)" +fi + +# Get browser configuration from config file +# Config is at ~/.dev-browser/config.json +if [ "$FORCE_STANDALONE" = true ]; then + BROWSER_MODE="standalone" + BROWSER_PATH="" +else + # Read config using TypeScript helper + CONFIG_OUTPUT=$(npx tsx scripts/get-browser-config.ts 2>/dev/null) + if [ $? -eq 0 ]; then + eval "$CONFIG_OUTPUT" + else + # Fallback to standalone if config read fails + BROWSER_MODE="standalone" + BROWSER_PATH="" + fi +fi + +# Start the appropriate server mode +if [ "$BROWSER_MODE" = "external" ] && [ -n "$BROWSER_PATH" ]; then + echo "Starting dev-browser server (External Browser mode)..." + echo " Browser: $BROWSER_PATH" + echo " Config: ~/.dev-browser/config.json" + echo " Use --standalone flag to force standalone Playwright mode" + echo "" + + export BROWSER_PATH + # Only export USER_DATA_DIR if explicitly configured (not empty) + if [ -n "$BROWSER_USER_DATA_DIR" ]; then + export USER_DATA_DIR="$BROWSER_USER_DATA_DIR" + fi + npx tsx scripts/start-external-browser.ts +else + echo "Starting dev-browser server (Standalone mode)..." + if [ "$FORCE_STANDALONE" = true ]; then + echo " Standalone mode forced via --standalone flag" + elif [ -z "$BROWSER_PATH" ]; then + echo " Chrome for Testing not found - using Playwright Chromium" + echo " Configure browser.path in ~/.dev-browser/config.json" + fi + echo "" + + export HEADLESS=$HEADLESS + npx tsx scripts/start-server.ts +fi diff --git a/skills/dev-browser/src/config.ts b/skills/dev-browser/src/config.ts new file mode 100644 index 0000000..f9237df --- /dev/null +++ b/skills/dev-browser/src/config.ts @@ -0,0 +1,503 @@ +/** + * Port management for multi-agent concurrency support. + * + * When multiple Claude Code agents (or other automation tools) run dev-browser + * concurrently, each needs its own HTTP API server port while potentially + * sharing the same browser instance. + * + * This module provides: + * - Dynamic port allocation to avoid conflicts + * - Server tracking for coordination + * - Orphaned browser detection and cleanup (crash recovery) + * - Config file support for preferences + * - PORT=XXXX output for agent discovery + * + * @see https://github.com/SawyerHood/dev-browser/pull/15#issuecomment-3698722432 + */ + +import { createServer } from "net"; +import { execSync } from "child_process"; +import { mkdirSync, existsSync, readFileSync, writeFileSync } from "fs"; +import { join } from "path"; + +/** + * Browser mode selection. + * - "auto": Detect Chrome for Testing, fall back to standalone (default) + * - "external": Always use external browser via CDP (fail if not found) + * - "standalone": Always use Playwright's built-in Chromium + */ +export type BrowserMode = "auto" | "external" | "standalone"; + +/** + * Browser configuration for dev-browser. + */ +export interface BrowserConfig { + /** + * Browser mode selection (default: "auto") + * - "auto": Detect Chrome for Testing, fall back to standalone + * - "external": Always use external browser via CDP + * - "standalone": Always use Playwright's built-in Chromium + */ + mode: BrowserMode; + /** + * Path to browser executable for external mode. + * If not set, uses platform-specific defaults: + * - macOS: /Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing + * - Linux: /opt/google/chrome-for-testing/chrome or google-chrome-for-testing + * - Windows: C:\Program Files\Google\Chrome for Testing\Application\chrome.exe + */ + path?: string; + /** + * User data directory for browser profile. + * Default: ~/.dev-browser-profile + */ + userDataDir?: string; +} + +/** + * Configuration for dev-browser multi-agent support. + */ +export interface DevBrowserConfig { + /** + * Port range for HTTP API servers. + * Each concurrent agent gets a port from this range. + */ + portRange: { + /** First port to try (default: 9222) */ + start: number; + /** Last port to try (default: 9300) */ + end: number; + /** Port increment - use 2 to avoid CDP port collision (default: 2) */ + step: number; + }; + /** CDP port for external browser mode (default: 9223) */ + cdpPort: number; + /** Browser configuration */ + browser: BrowserConfig; +} + +/** + * Information about a registered server. + */ +export interface ServerInfo { + /** Process ID of the server */ + pid: number; + /** CDP port the server's browser is using (for orphan detection) */ + cdpPort?: number; + /** Browser process ID (for standalone mode cleanup) */ + browserPid?: number; + /** Server mode: 'standalone' owns browser, 'external' connects to shared browser */ + mode: "standalone" | "external"; + /** Timestamp when server was registered */ + startedAt: string; +} + +const CONFIG_DIR = join(process.env.HOME || "", ".dev-browser"); +const CONFIG_FILE = join(CONFIG_DIR, "config.json"); +const SERVERS_FILE = join(CONFIG_DIR, "active-servers.json"); + +/** + * Get platform-specific default browser path for Chrome for Testing. + */ +function getDefaultBrowserPath(): string | undefined { + const platform = process.platform; + + if (platform === "darwin") { + // macOS: Check standard installation path + const macPath = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"; + if (existsSync(macPath)) { + return macPath; + } + } else if (platform === "linux") { + // Linux: Check common installation paths + const linuxPaths = [ + "/opt/google/chrome-for-testing/chrome", + "/usr/bin/google-chrome-for-testing", + "/usr/local/bin/chrome-for-testing", + ]; + for (const path of linuxPaths) { + if (existsSync(path)) { + return path; + } + } + } else if (platform === "win32") { + // Windows: Check standard installation path + const winPath = "C:\\Program Files\\Google\\Chrome for Testing\\Application\\chrome.exe"; + if (existsSync(winPath)) { + return winPath; + } + } + + return undefined; +} + +/** + * Default configuration values. + */ +const DEFAULT_CONFIG: DevBrowserConfig = { + portRange: { + start: 9222, + end: 9300, + step: 2, // Skip odd ports to avoid CDP port collision + }, + cdpPort: 9223, + browser: { + mode: "auto", + // userDataDir intentionally not set - let browser use its default profile + // unless user explicitly configures it in ~/.dev-browser/config.json + }, +}; + +/** + * Load configuration from ~/.dev-browser/config.json with defaults. + * Merges user config with defaults and resolves platform-specific browser paths. + */ +export function loadConfig(): DevBrowserConfig { + let config = { ...DEFAULT_CONFIG }; + + try { + if (existsSync(CONFIG_FILE)) { + const content = readFileSync(CONFIG_FILE, "utf-8"); + const userConfig = JSON.parse(content); + config = { + ...DEFAULT_CONFIG, + ...userConfig, + portRange: { + ...DEFAULT_CONFIG.portRange, + ...(userConfig.portRange || {}), + }, + browser: { + ...DEFAULT_CONFIG.browser, + ...(userConfig.browser || {}), + }, + }; + } + } catch (err) { + console.warn(`Warning: Could not load config from ${CONFIG_FILE}:`, err); + } + + // Resolve browser path: user config > auto-detection > undefined + if (!config.browser.path) { + config.browser.path = getDefaultBrowserPath(); + } + + return config; +} + +/** + * Get resolved browser configuration for use by server scripts. + * Returns the effective browser mode and path based on config and detection. + */ +export function getResolvedBrowserConfig(): { + mode: "external" | "standalone"; + path?: string; + userDataDir?: string; +} { + const config = loadConfig(); + const { browser } = config; + + // Determine effective mode + let effectiveMode: "external" | "standalone"; + + if (browser.mode === "standalone") { + effectiveMode = "standalone"; + } else if (browser.mode === "external") { + if (!browser.path) { + throw new Error( + `Browser mode is "external" but no browser path configured or detected. ` + + `Set browser.path in ~/.dev-browser/config.json or install Chrome for Testing.` + ); + } + effectiveMode = "external"; + } else { + // "auto" mode: use external if browser found, otherwise standalone + effectiveMode = browser.path ? "external" : "standalone"; + } + + return { + mode: effectiveMode, + path: browser.path, + // Only include userDataDir if explicitly configured by user + // For external mode, let the browser use its default profile unless specified + userDataDir: browser.userDataDir, + }; +} + +/** + * Check if a port is available by attempting to bind to it. + * Checks both IPv4 and IPv6 to match Express's default binding behavior. + */ +export async function isPortAvailable(port: number): Promise { + // Check default binding (IPv6 on most systems, which Express uses) + const defaultAvailable = await new Promise((resolve) => { + const server = createServer(); + server.once("error", () => resolve(false)); + server.once("listening", () => { + server.close(() => resolve(true)); + }); + server.listen(port); + }); + + if (!defaultAvailable) return false; + + // Also check IPv4 for completeness + const ipv4Available = await new Promise((resolve) => { + const server = createServer(); + server.once("error", () => resolve(false)); + server.once("listening", () => { + server.close(() => resolve(true)); + }); + server.listen(port, "0.0.0.0"); + }); + + return ipv4Available; +} + +/** + * Find an available port in the configured range. + * @throws Error if no ports are available + */ +export async function findAvailablePort(config?: DevBrowserConfig): Promise { + const { portRange } = config || loadConfig(); + const { start, end, step } = portRange; + + for (let port = start; port < end; port += step) { + if (await isPortAvailable(port)) { + return port; + } + } + + throw new Error( + `No available ports in range ${start}-${end} (step ${step}). ` + + `Too many dev-browser servers may be running. ` + + `Check ~/.dev-browser/active-servers.json for active servers.` + ); +} + +/** + * Check if a process exists. + */ +function processExists(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +/** + * Load the servers file, handling both old format (pid only) and new format (ServerInfo). + */ +function loadServersFile(): Record { + if (!existsSync(SERVERS_FILE)) { + return {}; + } + + try { + const content = readFileSync(SERVERS_FILE, "utf-8"); + const data = JSON.parse(content); + + // Handle migration from old format { port: pid } to new format { port: ServerInfo } + const servers: Record = {}; + for (const [port, value] of Object.entries(data)) { + if (typeof value === "number") { + // Old format: migrate to new format + servers[port] = { + pid: value, + mode: "standalone", // Assume standalone for old entries + startedAt: new Date().toISOString(), + }; + } else { + // New format + servers[port] = value as ServerInfo; + } + } + return servers; + } catch { + return {}; + } +} + +/** + * Save the servers file. + */ +function saveServersFile(servers: Record): void { + mkdirSync(CONFIG_DIR, { recursive: true }); + writeFileSync(SERVERS_FILE, JSON.stringify(servers, null, 2)); +} + +/** + * Clean up stale entries from servers file (processes that no longer exist). + */ +function cleanupStaleEntries(servers: Record): Record { + const cleaned: Record = {}; + for (const [port, info] of Object.entries(servers)) { + if (processExists(info.pid)) { + cleaned[port] = info; + } + } + return cleaned; +} + +/** + * Register a server for coordination tracking. + * This helps coordinate shutdown behavior and orphan detection. + */ +export function registerServer( + port: number, + pid: number, + options?: { + cdpPort?: number; + browserPid?: number; + mode?: "standalone" | "external"; + } +): void { + mkdirSync(CONFIG_DIR, { recursive: true }); + + let servers = loadServersFile(); + servers = cleanupStaleEntries(servers); + + servers[port.toString()] = { + pid, + cdpPort: options?.cdpPort, + browserPid: options?.browserPid, + mode: options?.mode ?? "standalone", + startedAt: new Date().toISOString(), + }; + + saveServersFile(servers); +} + +/** + * Unregister a server and return the count of remaining servers. + */ +export function unregisterServer(port: number): number { + let servers = loadServersFile(); + delete servers[port.toString()]; + servers = cleanupStaleEntries(servers); + saveServersFile(servers); + return Object.keys(servers).length; +} + +/** + * Get the count of currently active servers. + */ +export function getActiveServerCount(): number { + const servers = loadServersFile(); + const cleaned = cleanupStaleEntries(servers); + return Object.keys(cleaned).length; +} + +/** + * Get process ID listening on a specific port (macOS/Linux). + * Returns null if no process is listening or on error. + */ +function getProcessOnPort(port: number): number | null { + try { + // Works on macOS and Linux + const output = execSync(`lsof -ti:${port}`, { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + }).trim(); + + if (output) { + // May return multiple PIDs, take the first one + const firstLine = output.split("\n")[0] ?? ""; + const pid = parseInt(firstLine, 10); + return isNaN(pid) ? null : pid; + } + } catch { + // No process on port or lsof not available + } + return null; +} + +/** + * Information about an orphaned browser. + */ +export interface OrphanedBrowser { + cdpPort: number; + pid: number; +} + +/** + * Detect orphaned browsers - browsers running on CDP ports with no registered server. + * + * This handles crash recovery: if a server crashed without cleanup, its browser + * may still be running. This function identifies such orphans. + * + * @param cdpPorts - CDP ports to check (default: common ports 9223, 9225, etc.) + * @returns List of orphaned browsers + */ +export function detectOrphanedBrowsers(cdpPorts?: number[]): OrphanedBrowser[] { + const servers = loadServersFile(); + const cleanedServers = cleanupStaleEntries(servers); + + // Get CDP ports that have active servers + const activeCdpPorts = new Set(); + for (const info of Object.values(cleanedServers)) { + if (info.cdpPort) { + activeCdpPorts.add(info.cdpPort); + } + } + + // Default ports to check if not specified + const portsToCheck = cdpPorts ?? [9223, 9225, 9227, 9229, 9231]; + + const orphans: OrphanedBrowser[] = []; + for (const cdpPort of portsToCheck) { + // Skip if an active server claims this CDP port + if (activeCdpPorts.has(cdpPort)) { + continue; + } + + // Check if something is running on this port + const pid = getProcessOnPort(cdpPort); + if (pid !== null) { + orphans.push({ cdpPort, pid }); + } + } + + return orphans; +} + +/** + * Clean up orphaned browsers from previous crashed sessions. + * + * This is useful for standalone mode where the server owns the browser lifecycle. + * Only kills processes that are truly orphaned (no registered server). + * + * @param cdpPorts - CDP ports to check for orphans + * @returns Number of orphaned browsers cleaned up + */ +export function cleanupOrphanedBrowsers(cdpPorts?: number[]): number { + const orphans = detectOrphanedBrowsers(cdpPorts); + let cleaned = 0; + + for (const orphan of orphans) { + try { + console.log( + `Cleaning up orphaned browser on CDP port ${orphan.cdpPort} (PID: ${orphan.pid})` + ); + process.kill(orphan.pid, "SIGTERM"); + cleaned++; + } catch (err) { + console.warn( + `Warning: Could not kill orphaned process ${orphan.pid}: ${err}` + ); + } + } + + return cleaned; +} + +/** + * Output the assigned port for agent discovery. + * Agents parse this output to know which port to connect to. + * + * Format: PORT=XXXX + */ +export function outputPortForDiscovery(port: number): void { + console.log(`PORT=${port}`); +} diff --git a/skills/dev-browser/src/external-browser.ts b/skills/dev-browser/src/external-browser.ts new file mode 100644 index 0000000..9da775f --- /dev/null +++ b/skills/dev-browser/src/external-browser.ts @@ -0,0 +1,382 @@ +import express, { type Express, type Request, type Response } from "express"; +import { chromium, type Browser, type BrowserContext, type Page } from "playwright"; +import { spawn } from "child_process"; +import type { Socket } from "net"; +import type { + GetPageRequest, + GetPageResponse, + ListPagesResponse, + ServerInfoResponse, +} from "./types"; +import { + loadConfig, + findAvailablePort, + registerServer, + unregisterServer, + outputPortForDiscovery, +} from "./config.js"; + +export interface ExternalBrowserOptions { + /** + * HTTP API port. If not specified, a port is automatically assigned + * from the configured range (default: 9222-9300, step 2). + * This enables multiple agents to run concurrently. + */ + port?: number; + /** CDP port where external browser is listening (default: 9223) */ + cdpPort?: number; + /** Path to browser executable (for auto-launch) */ + browserPath?: string; + /** User data directory for browser profile (for auto-launch) */ + userDataDir?: string; + /** Whether to auto-launch browser if not running (default: true) */ + autoLaunch?: boolean; +} + +export interface ExternalBrowserServer { + wsEndpoint: string; + port: number; + mode: "external-browser"; + stop: () => Promise; +} + +/** + * Check if a browser is running on the specified CDP port + */ +async function isBrowserRunning(cdpPort: number): Promise { + try { + const res = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, { + signal: AbortSignal.timeout(2000), + }); + return res.ok; + } catch { + return false; + } +} + +/** + * Get the CDP WebSocket endpoint from a running browser + */ +async function getCdpEndpoint(cdpPort: number, maxRetries = 60): Promise { + for (let i = 0; i < maxRetries; i++) { + try { + const res = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, { + signal: AbortSignal.timeout(2000), + }); + if (res.ok) { + const data = (await res.json()) as { webSocketDebuggerUrl: string }; + return data.webSocketDebuggerUrl; + } + } catch { + // Browser not ready yet + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + throw new Error(`Browser did not start on port ${cdpPort} within ${maxRetries * 0.5}s`); +} + +/** + * Launch browser as a detached process (survives server shutdown) + */ +function launchBrowserDetached( + browserPath: string, + cdpPort: number, + userDataDir?: string +): void { + const args = [ + `--remote-debugging-port=${cdpPort}`, + "--no-first-run", + "--no-default-browser-check", + ]; + + // Only add user-data-dir if explicitly configured + // This lets the browser use its default profile when not specified + if (userDataDir) { + args.push(`--user-data-dir=${userDataDir}`); + } + + console.log(`Launching browser: ${browserPath}`); + console.log(` CDP port: ${cdpPort}`); + console.log(` User data: ${userDataDir ?? "(default profile)"}`); + + const child = spawn(browserPath, args, { + detached: true, + stdio: "ignore", + }); + child.unref(); +} + +/** + * Helper to add timeout to promises + */ +function withTimeout(promise: Promise, ms: number, message: string): Promise { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(new Error(`Timeout: ${message}`)), ms) + ), + ]); +} + +/** + * Serve dev-browser by connecting to an external browser via CDP. + * + * This mode is ideal for: + * - Using Chrome for Testing or other specific browser builds + * - Keeping the browser open after automation (for manual inspection) + * - Development workflows where you want to see automation in a visible browser + * + * The browser lifecycle is managed externally - this server only connects/disconnects. + */ +export async function serveWithExternalBrowser( + options: ExternalBrowserOptions = {} +): Promise { + const config = loadConfig(); + + // Use dynamic port allocation if port not specified + const port = options.port ?? await findAvailablePort(config); + const cdpPort = options.cdpPort ?? config.cdpPort; + const autoLaunch = options.autoLaunch ?? true; + const browserPath = options.browserPath; + // Only use userDataDir if explicitly provided - let browser use default profile otherwise + const userDataDir = options.userDataDir; + + // Validate port numbers + if (port < 1 || port > 65535) { + throw new Error(`Invalid port: ${port}. Must be between 1 and 65535`); + } + if (cdpPort < 1 || cdpPort > 65535) { + throw new Error(`Invalid cdpPort: ${cdpPort}. Must be between 1 and 65535`); + } + if (port === cdpPort) { + throw new Error("port and cdpPort must be different"); + } + + // Check if browser is running, optionally launch it + const running = await isBrowserRunning(cdpPort); + + if (!running) { + if (autoLaunch && browserPath) { + console.log(`Browser not running on port ${cdpPort}, launching...`); + launchBrowserDetached(browserPath, cdpPort, userDataDir); + } else if (autoLaunch && !browserPath) { + throw new Error( + `Browser not running on port ${cdpPort} and no browserPath provided for auto-launch. ` + + `Either start the browser manually with --remote-debugging-port=${cdpPort} or provide browserPath.` + ); + } else { + throw new Error( + `Browser not running on port ${cdpPort}. ` + + `Start it with --remote-debugging-port=${cdpPort}` + ); + } + } else { + console.log(`Browser already running on port ${cdpPort}`); + } + + // Wait for CDP endpoint + console.log("Waiting for CDP endpoint..."); + const wsEndpoint = await getCdpEndpoint(cdpPort); + console.log(`CDP WebSocket endpoint: ${wsEndpoint}`); + + // Connect to the browser via CDP + console.log("Connecting to browser via CDP..."); + const browser: Browser = await chromium.connectOverCDP(`http://127.0.0.1:${cdpPort}`); + console.log("Connected to external browser"); + + // Get the default context (user's browsing context) + const contexts = browser.contexts(); + const context: BrowserContext = contexts[0] || await browser.newContext(); + + // Registry entry type for page tracking + interface PageEntry { + page: Page; + targetId: string; + } + + // Registry: name -> PageEntry + const registry = new Map(); + + // Helper to get CDP targetId for a page + async function getTargetId(page: Page): Promise { + const cdpSession = await context.newCDPSession(page); + try { + const { targetInfo } = await cdpSession.send("Target.getTargetInfo"); + return targetInfo.targetId; + } finally { + await cdpSession.detach(); + } + } + + // Express server for page management + const app: Express = express(); + app.use(express.json()); + + // GET / - server info + app.get("/", (_req: Request, res: Response) => { + const response: ServerInfoResponse & { mode: string } = { + wsEndpoint, + mode: "external-browser", + }; + res.json(response); + }); + + // GET /pages - list all pages + app.get("/pages", (_req: Request, res: Response) => { + const response: ListPagesResponse = { + pages: Array.from(registry.keys()), + }; + res.json(response); + }); + + // POST /pages - get or create page + app.post("/pages", async (req: Request, res: Response) => { + const body = req.body as GetPageRequest; + const { name } = body; + + if (!name || typeof name !== "string") { + res.status(400).json({ error: "name is required and must be a string" }); + return; + } + + if (name.length === 0) { + res.status(400).json({ error: "name cannot be empty" }); + return; + } + + if (name.length > 256) { + res.status(400).json({ error: "name must be 256 characters or less" }); + return; + } + + // Check if page already exists + let entry = registry.get(name); + if (!entry) { + // Create new page in the context (with timeout to prevent hangs) + const page = await withTimeout(context.newPage(), 30000, "Page creation timed out after 30s"); + const targetId = await getTargetId(page); + entry = { page, targetId }; + registry.set(name, entry); + + // Clean up registry when page is closed (e.g., user clicks X) + page.on("close", () => { + registry.delete(name); + }); + } + + const response: GetPageResponse = { wsEndpoint, name, targetId: entry.targetId }; + res.json(response); + }); + + // DELETE /pages/:name - close a page + app.delete("/pages/:name", async (req: Request<{ name: string }>, res: Response) => { + const name = decodeURIComponent(req.params.name); + const entry = registry.get(name); + + if (entry) { + await entry.page.close(); + registry.delete(name); + res.json({ success: true }); + return; + } + + res.status(404).json({ error: "page not found" }); + }); + + // Start the server + const server = app.listen(port, () => { + console.log(`HTTP API server running on port ${port}`); + }); + + // Register this server for multi-agent coordination (external mode doesn't own the browser) + registerServer(port, process.pid, { cdpPort, mode: "external" }); + + // Output port for agent discovery (agents parse this to know which port to connect to) + outputPortForDiscovery(port); + + // Track active connections for clean shutdown + const connections = new Set(); + server.on("connection", (socket: Socket) => { + connections.add(socket); + socket.on("close", () => connections.delete(socket)); + }); + + // Track if cleanup has been called to avoid double cleanup + let cleaningUp = false; + + // Cleanup function - disconnects but does NOT close the browser + const cleanup = async () => { + if (cleaningUp) return; + cleaningUp = true; + + console.log("\nShutting down..."); + + // Close all active HTTP connections + for (const socket of connections) { + socket.destroy(); + } + connections.clear(); + + // Close managed pages (pages we created, not user's existing tabs) + for (const entry of registry.values()) { + try { + await entry.page.close(); + } catch { + // Page might already be closed + } + } + registry.clear(); + + // Disconnect from browser (does NOT close it) + try { + await browser.close(); + } catch { + // Already disconnected + } + + server.close(); + + // Unregister this server + const remainingServers = unregisterServer(port); + console.log( + `Server stopped. Browser remains open. ` + + `${remainingServers} other server(s) still running.` + ); + }; + + // Signal handlers + const signals = ["SIGINT", "SIGTERM", "SIGHUP"] as const; + + const signalHandler = async () => { + await cleanup(); + process.exit(0); + }; + + const errorHandler = async (err: unknown) => { + console.error("Unhandled error:", err); + await cleanup(); + process.exit(1); + }; + + // Register handlers + signals.forEach((sig) => process.on(sig, signalHandler)); + process.on("uncaughtException", errorHandler); + process.on("unhandledRejection", errorHandler); + + // Helper to remove all handlers + const removeHandlers = () => { + signals.forEach((sig) => process.off(sig, signalHandler)); + process.off("uncaughtException", errorHandler); + process.off("unhandledRejection", errorHandler); + }; + + return { + wsEndpoint, + port, + mode: "external-browser", + async stop() { + removeHandlers(); + await cleanup(); + }, + }; +} diff --git a/skills/dev-browser/src/index.ts b/skills/dev-browser/src/index.ts index 24fd619..3d74ba2 100644 --- a/skills/dev-browser/src/index.ts +++ b/skills/dev-browser/src/index.ts @@ -10,9 +10,37 @@ import type { ListPagesResponse, ServerInfoResponse, } from "./types"; +import { + loadConfig, + findAvailablePort, + registerServer, + unregisterServer, + outputPortForDiscovery, + cleanupOrphanedBrowsers, +} from "./config.js"; export type { ServeOptions, GetPageResponse, ListPagesResponse, ServerInfoResponse }; +// Re-export external browser mode +export { + serveWithExternalBrowser, + type ExternalBrowserOptions, + type ExternalBrowserServer, +} from "./external-browser.js"; + +// Re-export configuration utilities +export { + loadConfig, + findAvailablePort, + cleanupOrphanedBrowsers, + detectOrphanedBrowsers, + type DevBrowserConfig, + type BrowserConfig, + type BrowserMode, + type ServerInfo, + type OrphanedBrowser, +} from "./config.js"; + export interface DevBrowserServer { wsEndpoint: string; port: number; @@ -52,9 +80,12 @@ function withTimeout(promise: Promise, ms: number, message: string): Promi } export async function serve(options: ServeOptions = {}): Promise { - const port = options.port ?? 9222; + const config = loadConfig(); + + // Use dynamic port allocation if port not specified + const port = options.port ?? await findAvailablePort(config); const headless = options.headless ?? false; - const cdpPort = options.cdpPort ?? 9223; + const cdpPort = options.cdpPort ?? config.cdpPort; const profileDir = options.profileDir; // Validate port numbers @@ -77,6 +108,14 @@ export async function serve(options: ServeOptions = {}): Promise 0) { + // Give the OS a moment to release the port + await new Promise((resolve) => setTimeout(resolve, 500)); + } + console.log("Launching browser with persistent context..."); // Launch persistent context - this persists cookies, localStorage, cache, etc. @@ -189,6 +228,12 @@ export async function serve(options: ServeOptions = {}): Promise(); server.on("connection", (socket: Socket) => { @@ -230,7 +275,10 @@ export async function serve(options: ServeOptions = {}): Promise