diff --git a/docs/guides/examples/firecrawl-url-crawl.mdx b/docs/guides/examples/firecrawl-url-crawl.mdx new file mode 100644 index 0000000000..b55b362693 --- /dev/null +++ b/docs/guides/examples/firecrawl-url-crawl.mdx @@ -0,0 +1,103 @@ +--- +title: "Crawl a URL using Firecrawl" +sidebarTitle: "Firecrawl URL crawl" +description: "This example demonstrates how to crawl a URL using Firecrawl with Trigger.dev." +--- + +## Overview + +Firecrawl is a tool for crawling websites and extracting clean markdown that's structured in an LLM-ready format. + +Here are two examples of how to use Firecrawl with Trigger.dev: + +## Prerequisites + +- A project with [Trigger.dev initialized](/quick-start) +- A [Firecrawl](https://firecrawl.dev/) account + +## Example 1: crawl an entire website with Firecrawl + +This task crawls a website and returns the `crawlResult` object. You can set the `limit` parameter to control the number of URLs that are crawled. + +```ts trigger/firecrawl-url-crawl.ts +import FirecrawlApp from "@mendable/firecrawl-js"; +import { task } from "@trigger.dev/sdk/v3"; + +// Initialize the Firecrawl client with your API key +const firecrawlClient = new FirecrawlApp({ + apiKey: process.env.FIRECRAWL_API_KEY, // Get this from your Firecrawl dashboard +}); + +export const firecrawlCrawl = task({ + id: "firecrawl-crawl", + run: async (payload: { url: string }) => { + const { url } = payload; + + // Crawl: scrapes all the URLs of a web page and return content in LLM-ready format + const crawlResult = await firecrawlClient.crawlUrl(url, { + limit: 100, // Limit the number of URLs to crawl + scrapeOptions: { + formats: ["markdown", "html"], + }, + }); + + if (!crawlResult.success) { + throw new Error(`Failed to crawl: ${crawlResult.error}`); + } + + return { + data: crawlResult, + }; + }, +}); +``` + +### Testing your task + +You can test your task by triggering it from the Trigger.dev dashboard. + +```json +"url": "" // Replace with the URL you want to crawl +``` + +## Example 2: scrape a single URL with Firecrawl + +This task scrapes a single URL and returns the `scrapeResult` object. + +```ts trigger/firecrawl-url-scrape.ts +import FirecrawlApp, { ScrapeResponse } from "@mendable/firecrawl-js"; +import { task } from "@trigger.dev/sdk/v3"; + +// Initialize the Firecrawl client with your API key +const firecrawlClient = new FirecrawlApp({ + apiKey: process.env.FIRECRAWL_API_KEY, // Get this from your Firecrawl dashboard +}); + +export const firecrawlScrape = task({ + id: "firecrawl-scrape", + run: async (payload: { url: string }) => { + const { url } = payload; + + // Scrape: scrapes a URL and get its content in LLM-ready format (markdown, structured data via LLM Extract, screenshot, html) + const scrapeResult = (await firecrawlClient.scrapeUrl(url, { + formats: ["markdown", "html"], + })) as ScrapeResponse; + + if (!scrapeResult.success) { + throw new Error(`Failed to scrape: ${scrapeResult.error}`); + } + + return { + data: scrapeResult, + }; + }, +}); +``` + +### Testing your task + +You can test your task by triggering it from the Trigger.dev dashboard. + +```json +"url": "" // Replace with the URL you want to scrape +``` diff --git a/docs/guides/introduction.mdx b/docs/guides/introduction.mdx index fcc46e2454..4e8afe399c 100644 --- a/docs/guides/introduction.mdx +++ b/docs/guides/introduction.mdx @@ -37,21 +37,23 @@ Get set up fast using our detailed walk-through guides. Tasks you can copy and paste to get started with Trigger.dev. They can all be extended and customized to fit your needs. -| Example task | Description | -| :---------------------------------------------------------------------------- | :----------------------------------------------------------------------------- | -| [DALL·E 3 image generation](/guides/examples/dall-e3-generate-image) | Use OpenAI's GPT-4o and DALL·E 3 to generate an image and text. | -| [Deepgram audio transcription](/guides/examples/deepgram-transcribe-audio) | Transcribe audio using Deepgram's speech recognition API. | -| [FFmpeg video processing](/guides/examples/ffmpeg-video-processing) | Use FFmpeg to process a video in various ways and save it to Cloudflare R2. | -| [OpenAI with retrying](/guides/examples/open-ai-with-retrying) | Create a reusable OpenAI task with custom retry options. | -| [PDF to image](/guides/examples/pdf-to-image) | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2. | -| [React to PDF](/guides/examples/react-pdf) | Use `react-pdf` to generate a PDF and save it to Cloudflare R2. | -| [Puppeteer](/guides/examples/puppeteer) | Use Puppeteer to generate a PDF or scrape a webpage. | -| [Resend email sequence](/guides/examples/resend-email-sequence) | Send a sequence of emails over several days using Resend with Trigger.dev. | -| [Sentry error tracking](/guides/examples/sentry-error-tracking) | Automatically send errors to Sentry from your tasks. | -| [Sharp image processing](/guides/examples/sharp-image-processing) | Use Sharp to process an image and save it to Cloudflare R2. | -| [Supabase database operations](/guides/examples/supabase-database-operations) | Run basic CRUD operations on a table in a Supabase database using Trigger.dev. | -| [Supabase Storage upload](/guides/examples/supabase-storage-upload) | Download a video from a URL and upload it to Supabase Storage using S3. | -| [Vercel AI SDK](/guides/examples/vercel-ai-sdk) | Use Vercel AI SDK to generate text using OpenAI. | +| Example task | Description | +| :---------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------- | +| [DALL·E 3 image generation](/guides/examples/dall-e3-generate-image) | Use OpenAI's GPT-4o and DALL·E 3 to generate an image and text. | +| [Deepgram audio transcription](/guides/examples/deepgram-transcribe-audio) | Transcribe audio using Deepgram's speech recognition API. | +| [FFmpeg video processing](/guides/examples/ffmpeg-video-processing) | Use FFmpeg to process a video in various ways and save it to Cloudflare R2. | +| [Firecrawl URL crawl](/guides/examples/firecrawl-url-crawl) | Learn how to use Firecrawl to crawl a URL and return LLM-ready markdown. | +| [OpenAI with retrying](/guides/examples/open-ai-with-retrying) | Create a reusable OpenAI task with custom retry options. | +| [PDF to image](/guides/examples/pdf-to-image) | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2. | +| [React to PDF](/guides/examples/react-pdf) | Use `react-pdf` to generate a PDF and save it to Cloudflare R2. | +| [Puppeteer](/guides/examples/puppeteer) | Use Puppeteer to generate a PDF or scrape a webpage. | +| [Resend email sequence](/guides/examples/resend-email-sequence) | Send a sequence of emails over several days using Resend with Trigger.dev. | +| [Scrape Hacker News](/guides/examples/scrape-hacker-news) | Scrape Hacker News using BrowserBase and Puppeteer, summarize the articles with ChatGPT and send an email of the summary every weekday using Resend. | +| [Sentry error tracking](/guides/examples/sentry-error-tracking) | Automatically send errors to Sentry from your tasks. | +| [Sharp image processing](/guides/examples/sharp-image-processing) | Use Sharp to process an image and save it to Cloudflare R2. | +| [Supabase database operations](/guides/examples/supabase-database-operations) | Run basic CRUD operations on a table in a Supabase database using Trigger.dev. | +| [Supabase Storage upload](/guides/examples/supabase-storage-upload) | Download a video from a URL and upload it to Supabase Storage using S3. | +| [Vercel AI SDK](/guides/examples/vercel-ai-sdk) | Use Vercel AI SDK to generate text using OpenAI. | If you would like to see a guide for your framework, or an example task for your use case, please diff --git a/docs/mint.json b/docs/mint.json index 17e305f13c..03278df9da 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -370,6 +370,7 @@ "guides/examples/dall-e3-generate-image", "guides/examples/deepgram-transcribe-audio", "guides/examples/ffmpeg-video-processing", + "guides/examples/firecrawl-url-crawl", "guides/examples/open-ai-with-retrying", "guides/examples/pdf-to-image", "guides/examples/puppeteer",