|
| 1 | +--- |
| 2 | +title: "Puppeteer" |
| 3 | +sidebarTitle: "Puppeteer" |
| 4 | +description: "These examples demonstrate how to use Puppeteer with Trigger.dev." |
| 5 | +--- |
| 6 | + |
| 7 | +import LocalDevelopment from "/snippets/local-development-extensions.mdx"; |
| 8 | +import ScrapingWarning from "/snippets/web-scraping-warning.mdx"; |
| 9 | + |
| 10 | +## Overview |
| 11 | + |
| 12 | +There are 3 example tasks to follow on this page: |
| 13 | + |
| 14 | +1. [Basic example](/examples/puppeteer#basic-example) |
| 15 | +2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page) |
| 16 | +3. [Scrape content from a web page](/examples/puppeteer#scrape-content-from-a-web-page) |
| 17 | + |
| 18 | +<ScrapingWarning/> |
| 19 | + |
| 20 | +## Build configurations |
| 21 | + |
| 22 | +To use all examples on this page, you'll first need to add these build settings to your `trigger.config.ts` file: |
| 23 | + |
| 24 | +```ts trigger.config.ts |
| 25 | +import { defineConfig } from "@trigger.dev/sdk/v3"; |
| 26 | + |
| 27 | +export default defineConfig({ |
| 28 | + project: "<project ref>", |
| 29 | + // Your other config settings... |
| 30 | + build: { |
| 31 | + // This is required to use the Puppeteer library |
| 32 | + extensions: [puppeteer()], |
| 33 | + }, |
| 34 | +}); |
| 35 | +``` |
| 36 | + |
| 37 | +Learn more about [build configurations](/config/config-file#build-configuration) including setting default retry settings, customizing the build environment, and more. |
| 38 | + |
| 39 | +## Set an environment variable |
| 40 | + |
| 41 | +Set the following environment variable in your [Trigger.dev dashboard](/deploy-environment-variables) or [using the SDK](/deploy-environment-variables#in-your-code): |
| 42 | + |
| 43 | +```bash |
| 44 | +PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable", |
| 45 | +``` |
| 46 | + |
| 47 | +## Basic example |
| 48 | + |
| 49 | +### Overview |
| 50 | + |
| 51 | +In this example we use [Puppeteer](https://pptr.dev/) to log out the title of a web page, in this case from the [Trigger.dev](https://trigger.dev) landing page. |
| 52 | + |
| 53 | +### Task code |
| 54 | + |
| 55 | +```ts trigger/puppeteer-basic-example.ts |
| 56 | +import { logger, task } from "@trigger.dev/sdk/v3"; |
| 57 | +import puppeteer from "puppeteer"; |
| 58 | + |
| 59 | +export const puppeteerTask = task({ |
| 60 | + id: "puppeteer-log-title", |
| 61 | + run: async () => { |
| 62 | + const browser = await puppeteer.launch(); |
| 63 | + const page = await browser.newPage(); |
| 64 | + |
| 65 | + await page.goto("https://trigger.dev"); |
| 66 | + |
| 67 | + const content = await page.title(); |
| 68 | + logger.info("Content", { content }); |
| 69 | + |
| 70 | + await browser.close(); |
| 71 | + }, |
| 72 | +}); |
| 73 | +``` |
| 74 | + |
| 75 | +### Testing your task |
| 76 | + |
| 77 | +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests). |
| 78 | + |
| 79 | +## Generate a PDF from a web page |
| 80 | + |
| 81 | +### Overview |
| 82 | + |
| 83 | +In this example we use [Puppeteer](https://pptr.dev/) to generate a PDF from the [Trigger.dev](https://trigger.dev) landing page and upload it to [Cloudflare R2](https://developers.cloudflare.com/r2/). |
| 84 | + |
| 85 | +### Task code |
| 86 | + |
| 87 | +```ts trigger/puppeteer-generate-pdf.ts |
| 88 | +import { logger, task } from "@trigger.dev/sdk/v3"; |
| 89 | +import puppeteer from "puppeteer"; |
| 90 | +import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3"; |
| 91 | + |
| 92 | +// Initialize S3 client |
| 93 | +const s3Client = new S3Client({ |
| 94 | + region: "auto", |
| 95 | + endpoint: process.env.S3_ENDPOINT, |
| 96 | + credentials: { |
| 97 | + accessKeyId: process.env.R2_ACCESS_KEY_ID ?? "", |
| 98 | + secretAccessKey: process.env.R2_SECRET_ACCESS_KEY ?? "", |
| 99 | + }, |
| 100 | +}); |
| 101 | + |
| 102 | +export const puppeteerWebpageToPDF = task({ |
| 103 | + id: "puppeteer-webpage-to-pdf", |
| 104 | + run: async () => { |
| 105 | + const browser = await puppeteer.launch(); |
| 106 | + const page = await browser.newPage(); |
| 107 | + const response = await page.goto("https://trigger.dev"); |
| 108 | + const url = response?.url() ?? "No URL found"; |
| 109 | + |
| 110 | + // Generate PDF from the web page |
| 111 | + const generatePdf = await page.pdf(); |
| 112 | + |
| 113 | + logger.info("PDF generated from URL", { url }); |
| 114 | + |
| 115 | + await browser.close(); |
| 116 | + |
| 117 | + // Upload to R2 |
| 118 | + const s3Key = `pdfs/test.pdf`; |
| 119 | + const uploadParams = { |
| 120 | + Bucket: process.env.S3_BUCKET, |
| 121 | + Key: s3Key, |
| 122 | + Body: generatePdf, |
| 123 | + ContentType: "application/pdf", |
| 124 | + }; |
| 125 | + |
| 126 | + logger.log("Uploading to R2 with params", uploadParams); |
| 127 | + |
| 128 | + // Upload the PDF to R2 and return the URL. |
| 129 | + await s3Client.send(new PutObjectCommand(uploadParams)); |
| 130 | + const s3Url = `https://${process.env.S3_BUCKET}.s3.amazonaws.com/${s3Key}`; |
| 131 | + logger.log("PDF uploaded to R2", { url: s3Url }); |
| 132 | + return { pdfUrl: s3Url }; |
| 133 | + }, |
| 134 | +}); |
| 135 | + |
| 136 | +``` |
| 137 | + |
| 138 | +### Testing your task |
| 139 | + |
| 140 | +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests). |
| 141 | + |
| 142 | +## Scrape content from a web page |
| 143 | + |
| 144 | +### Overview |
| 145 | + |
| 146 | +In this example we use [Puppeteer](https://pptr.dev/) with a [BrowserBase](https://www.browserbase.com/) proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. See [this list](/examples/puppeteer#proxying) for more proxying services we recommend. |
| 147 | + |
| 148 | +<Note> |
| 149 | + When web scraping, you MUST use the technique below which uses a proxy with Puppeteer. Direct scraping without using `browserWSEndpoint` is prohibited and will result in account suspension. |
| 150 | +</Note> |
| 151 | + |
| 152 | +### Task code |
| 153 | + |
| 154 | +```ts trigger/scrape-website.ts |
| 155 | +import { logger, task } from "@trigger.dev/sdk/v3"; |
| 156 | +import puppeteer from "puppeteer-core"; |
| 157 | + |
| 158 | +export const puppeteerScrapeWithProxy = task({ |
| 159 | + id: "puppeteer-scrape-with-proxy", |
| 160 | + run: async () => { |
| 161 | + const browser = await puppeteer.connect({ |
| 162 | + browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${process.env.BROWSERBASE_API_KEY}`, |
| 163 | + }); |
| 164 | + |
| 165 | + const page = await browser.newPage(); |
| 166 | + |
| 167 | + // Set up BrowserBase proxy authentication |
| 168 | + await page.authenticate({ |
| 169 | + username: "api", |
| 170 | + password: process.env.BROWSERBASE_API_KEY || "", |
| 171 | + }); |
| 172 | + |
| 173 | + try { |
| 174 | + // Navigate to the target website |
| 175 | + await page.goto("https://trigger.dev", { waitUntil: "networkidle0" }); |
| 176 | + |
| 177 | + // Scrape the GitHub stars count |
| 178 | + const starCount = await page.evaluate(() => { |
| 179 | + const starElement = document.querySelector(".github-star-count"); |
| 180 | + const text = starElement?.textContent ?? "0"; |
| 181 | + const numberText = text.replace(/[^0-9]/g, ""); |
| 182 | + return parseInt(numberText); |
| 183 | + }); |
| 184 | + |
| 185 | + logger.info("GitHub star count", { starCount }); |
| 186 | + |
| 187 | + return { starCount }; |
| 188 | + } catch (error) { |
| 189 | + logger.error("Error during scraping", { |
| 190 | + error: error instanceof Error ? error.message : String(error), |
| 191 | + }); |
| 192 | + throw error; |
| 193 | + } finally { |
| 194 | + await browser.close(); |
| 195 | + } |
| 196 | + }, |
| 197 | +}); |
| 198 | +``` |
| 199 | + |
| 200 | +### Testing your task |
| 201 | + |
| 202 | +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests). |
| 203 | + |
| 204 | +<LocalDevelopment packages={"the Puppeteer library."} /> |
| 205 | + |
| 206 | +## Proxying |
| 207 | + |
| 208 | +If you're using Trigger.dev Cloud and Puppeteer or any other tool to scrape content from websites you don't own, you'll need to proxy your requests. **If you don't you'll risk getting our IP address blocked and we will ban you from our service.** |
| 209 | + |
| 210 | +Here are a list of proxy services we recommend: |
| 211 | + |
| 212 | +- [Browserbase](https://www.browserbase.com/) |
| 213 | +- [Brightdata](https://brightdata.com/) |
| 214 | +- [Browserless](https://browserless.io/) |
| 215 | +- [Oxylabs](https://oxylabs.io/) |
| 216 | +- [ScrapingBee](https://scrapingbee.com/) |
| 217 | +- [Smartproxy](https://smartproxy.com/) |
0 commit comments