Skip to content

Added stagehand_get_html tool to Stagehand MCP Server #81

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions stagehand/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
],
"scripts": {
"build": "tsc && shx chmod +x dist/*.js",
"dev": "npm run build && STAGEHAND_HTTP_PORT=8081 node dist/index.js",
"prepare": "npm run build",
"watch": "tsc --watch"
},
Expand Down
94 changes: 94 additions & 0 deletions stagehand/src/httpStaticServer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import http from "http";
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// Function to clean the tmp directory
function cleanTmpDirectory(directory: string) {
if (!fs.existsSync(directory)) {
fs.mkdirSync(directory, { recursive: true });
return;
}

try {
const files = fs.readdirSync(directory);
for (const file of files) {
if (file.startsWith('.')) continue; // Skip hidden files

const filePath = path.join(directory, file);
fs.unlinkSync(filePath);
}
console.log(`Cleaned tmp directory: ${directory}`);
} catch (err) {
console.error(`Error cleaning tmp directory: ${err}`);
}
}

export function startStaticHttpServer() {
const TMP_DIR = path.resolve(__dirname, "../tmp");
const HTTP_PORT = process.env.STAGEHAND_HTTP_PORT ? parseInt(process.env.STAGEHAND_HTTP_PORT, 10) : 8080;

// Clean the tmp directory on startup
cleanTmpDirectory(TMP_DIR);

const server = http.createServer((req, res) => {
if (!req.url) {
res.writeHead(400);
res.end("Bad Request");
return;
}
// Only allow /tmp/ URLs
if (!req.url.startsWith("/tmp/")) {
res.writeHead(404);
res.end("Not Found");
return;
}
// Directory listing for /tmp/ or /tmp
if (req.url === "/tmp/" || req.url === "/tmp") {
fs.readdir(TMP_DIR, (err, files) => {
if (err) {
res.writeHead(500);
res.end("Failed to read directory");
return;
}
const links = files
.filter(f => !f.startsWith("."))
.map(f => `<li><a href=\"/tmp/${encodeURIComponent(f)}\">${f}</a></li>`) // encode for safety
.join("\n");
const html = `<!DOCTYPE html><html><head><title>tmp Directory Listing</title></head><body><h1>Files in /tmp/</h1><ul>${links}</ul></body></html>`;
res.writeHead(200, { "Content-Type": "text/html" });
res.end(html);
});
return;
}
// Serve individual files
const filePath = path.join(TMP_DIR, req.url.replace("/tmp/", ""));
if (!filePath.startsWith(TMP_DIR)) {
res.writeHead(403);
res.end("Forbidden");
return;
}
fs.readFile(filePath, (err, data) => {
if (err) {
res.writeHead(404);
res.end("Not Found");
return;
}
res.writeHead(200, { "Content-Type": "text/html" });
res.end(data);
});
});
let actualPort = HTTP_PORT;
server.listen(HTTP_PORT, () => {
const address = server.address();
if (address && typeof address === 'object') {
actualPort = address.port;
}
// eslint-disable-next-line no-console
console.log(`Static file server running at http://localhost:${actualPort}/tmp/`);
});
return { server, port: actualPort };
}
13 changes: 11 additions & 2 deletions stagehand/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@

import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { createServer } from "./server.js";
import { ensureLogDirectory, registerExitHandlers, scheduleLogRotation, setupLogRotation } from "./logging.js";
import {
ensureLogDirectory,
registerExitHandlers,
scheduleLogRotation,
setupLogRotation,
} from "./logging.js";
import { startStaticHttpServer } from "./httpStaticServer.js";

// Run setup for logging
ensureLogDirectory();
setupLogRotation();
scheduleLogRotation();
registerExitHandlers();

// Start the static HTTP server for /tmp and capture the port
const { port: staticHttpPort } = startStaticHttpServer();

// Run the server
async function runServer() {
const server = createServer();
const transport = new StdioServerTransport();
await server.connect(transport);
server.sendLoggingMessage({
level: "info",
data: "Stagehand MCP server is ready to accept requests",
data: `Stagehand MCP server is ready to accept requests. Static HTTP server running on port ${staticHttpPort}`,
});
}

Expand Down
160 changes: 160 additions & 0 deletions stagehand/src/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ import { Stagehand } from "@browserbasehq/stagehand";
import { CallToolResult, Tool } from "@modelcontextprotocol/sdk/types.js";
import { getServerInstance, operationLogs } from "./logging.js";
import { screenshots } from "./resources.js";
import { fileURLToPath } from "url";
import path from "path";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// Define the Stagehand tools
export const TOOLS: Tool[] = [
Expand Down Expand Up @@ -69,6 +74,21 @@ export const TOOLS: Tool[] = [
required: ["instruction"],
},
},
{
name: "stagehand_get_html",
description:
"Captures the raw HTML of the current webpage. Use this tool when you need to analyze the page structure or extract specific HTML elements. This tool returns a URL that you need to download to access the HTML content.",
inputSchema: {
type: "object",
properties: {
selector: {
type: "string",
description:
"Optional selector to get HTML for a specific element. Both CSS and XPath selectors are supported. If omitted, returns the entire page HTML.",
},
},
},
},
{
name: "screenshot",
description:
Expand Down Expand Up @@ -233,6 +253,146 @@ export async function handleToolCall(
};
}

case "stagehand_get_html":
try {
const html = await stagehand.page.evaluate((selector) => {
if (selector) {
try {
// Check if the selector is an XPath selector
if (
selector.startsWith("/") ||
selector.startsWith("./") ||
selector.startsWith("//")
) {
// Handle XPath selector
const result = document.evaluate(
selector,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
const element = result.singleNodeValue;
if (!element || !(element instanceof Element)) {
return `<!DOCTYPE html>
<html>
<head><title>XPath Element Not Found</title></head>
<body>
<h1>XPath Element Not Found</h1>
<p>The XPath selector did not match any elements on the page.</p>
<h2>Details:</h2>
<ul>
<li><strong>XPath Selector:</strong> ${selector}</li>
<li><strong>Document Title:</strong> ${document.title}</li>
<li><strong>Document URL:</strong> ${document.location.href}</li>
<li><strong>Page Content Length:</strong> ${document.documentElement.outerHTML.length} characters</li>
</ul>
<h2>Suggestions:</h2>
<ul>
<li>Check if the XPath selector is correct</li>
<li>Verify that the element exists on the page</li>
<li>Try using browser developer tools to test the XPath selector</li>
<li>Consider using a CSS selector instead if possible</li>
</ul>
</body>
</html>`;
}
return element.outerHTML;
} else {
// Handle CSS selector
const element = document.querySelector(selector);
if (!element || !(element instanceof Element)) {
return `<!DOCTYPE html>
<html>
<head><title>CSS Element Not Found</title></head>
<body>
<h1>CSS Element Not Found</h1>
<p>The CSS selector did not match any elements on the page.</p>
<h2>Details:</h2>
<ul>
<li><strong>CSS Selector:</strong> ${selector}</li>
<li><strong>Document Title:</strong> ${document.title}</li>
<li><strong>Document URL:</strong> ${document.location.href}</li>
<li><strong>Page Content Length:</strong> ${
document.documentElement.outerHTML.length
} characters</li>
<li><strong>Similar Elements:</strong> ${
Array.from(document.querySelectorAll("*"))
.filter(
(el) =>
el.tagName.toLowerCase() ===
selector.split(/[.#\[\s>+~]/)[0].toLowerCase()
)
.slice(0, 5)
.map(
(el) =>
`&lt;${el.tagName.toLowerCase()}${el.id ? ` id="${el.id}"` : ""}${
el.className ? ` class="${el.className}"` : ""
}&gt;`
)
.join(", ") || "None found"
}</li>
</ul>
<h2>Suggestions:</h2>
<ul>
<li>Check if the CSS selector syntax is correct</li>
<li>Verify that the element exists on the page</li>
<li>Try using browser developer tools to test the CSS selector</li>
<li>Consider using a simpler selector (e.g., by ID or a unique class)</li>
<li>Check if the element is dynamically added to the page</li>
</ul>
</body>
</html>`;
}
return element.outerHTML;
}
} catch (err: unknown) {
return `Selector error: ${
err instanceof Error ? err.message : String(err)
}. For XPath, use '//' or '/' prefix. For CSS, use standard selectors.`;
}
}
return document.documentElement.outerHTML;
}, args.selector || null);

// Save HTML to a file in the tmp directory
const fs = await import("fs/promises");
const { randomBytes } = await import("crypto");
const TMP_DIR = path.resolve(__dirname, "../tmp");
await fs.mkdir(TMP_DIR, { recursive: true });
const unique = `${Date.now()}-${randomBytes(6).toString("hex")}`;
const filename = `stagehand-html-${unique}.html`;
const filePath = path.join(TMP_DIR, filename);
await fs.writeFile(filePath, html, "utf8");
const port = process.env.STAGEHAND_HTTP_PORT || 8080;
const url = `http://localhost:${port}/tmp/${filename}`;

return {
content: [
{
type: "text",
text: `HTML saved to: ${url}`,
},
],
isError: false,
};
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
return {
content: [
{
type: "text",
text: `Failed to get HTML: ${errorMsg}`,
},
{
type: "text",
text: `Operation logs:\n${operationLogs.join("\n")}`,
},
],
isError: true,
};
}

case "screenshot":
try {
const screenshotBuffer = await stagehand.page.screenshot({
Expand Down