|
| 1 | +# 🤖 Agentic Scraper |
| 2 | + |
| 3 | +The Agentic Scraper enables AI-powered browser automation for complex interactions like form filling, clicking buttons, and navigating multi-step workflows. |
| 4 | + |
| 5 | +## 🚀 Quick Start |
| 6 | + |
| 7 | +```javascript |
| 8 | +import { agenticScraper, getAgenticScraperRequest } from 'scrapegraph-js'; |
| 9 | + |
| 10 | +const apiKey = 'your-api-key'; |
| 11 | +const url = 'https://dashboard.scrapegraphai.com/'; |
| 12 | +const steps = [ |
| 13 | + 'Type email@gmail.com in email input box', |
| 14 | + 'Type test-password@123 in password inputbox', |
| 15 | + 'click on login' |
| 16 | +]; |
| 17 | + |
| 18 | +// Submit automation request |
| 19 | +const response = await agenticScraper(apiKey, url, steps, true); |
| 20 | +console.log('Request ID:', response.request_id); |
| 21 | + |
| 22 | +// Check results |
| 23 | +const result = await getAgenticScraperRequest(apiKey, response.request_id); |
| 24 | +console.log('Status:', result.status); |
| 25 | +``` |
| 26 | + |
| 27 | +## 📚 API Reference |
| 28 | + |
| 29 | +### `agenticScraper(apiKey, url, steps, useSession)` |
| 30 | + |
| 31 | +Performs automated browser actions on a webpage. |
| 32 | + |
| 33 | +**Parameters:** |
| 34 | +- `apiKey` (string): Your ScrapeGraph AI API key |
| 35 | +- `url` (string): The URL of the webpage to interact with |
| 36 | +- `steps` (string[]): Array of automation steps to perform |
| 37 | +- `useSession` (boolean, optional): Whether to use session management (default: true) |
| 38 | + |
| 39 | +**Returns:** Promise<Object> with `request_id` and initial `status` |
| 40 | + |
| 41 | +**Example Steps:** |
| 42 | +```javascript |
| 43 | +const steps = [ |
| 44 | + 'click on search bar', |
| 45 | + 'type "laptop" in search input', |
| 46 | + 'press Enter key', |
| 47 | + 'wait for 2 seconds', |
| 48 | + 'click on first result', |
| 49 | + 'scroll down to reviews' |
| 50 | +]; |
| 51 | +``` |
| 52 | + |
| 53 | +### `getAgenticScraperRequest(apiKey, requestId)` |
| 54 | + |
| 55 | +Retrieves the status or result of an agentic scraper request. |
| 56 | + |
| 57 | +**Parameters:** |
| 58 | +- `apiKey` (string): Your ScrapeGraph AI API key |
| 59 | +- `requestId` (string): The request ID from a previous agentic scraper call |
| 60 | + |
| 61 | +**Returns:** Promise<Object> with: |
| 62 | +- `status`: 'pending', 'completed', or 'failed' |
| 63 | +- `result`: Automation results (when completed) |
| 64 | +- `error`: Error message (when failed) |
| 65 | +- `created_at`: Request creation timestamp |
| 66 | +- `completed_at`: Completion timestamp (when completed) |
| 67 | + |
| 68 | +## 🎯 Use Cases |
| 69 | + |
| 70 | +### 🔐 Login Automation |
| 71 | +```javascript |
| 72 | +const loginSteps = [ |
| 73 | + 'click on email input', |
| 74 | + 'type "user@example.com" in email field', |
| 75 | + 'click on password input', |
| 76 | + 'type "password123" in password field', |
| 77 | + 'click login button', |
| 78 | + 'wait for dashboard to load' |
| 79 | +]; |
| 80 | + |
| 81 | +const response = await agenticScraper(apiKey, 'https://app.example.com/login', loginSteps, true); |
| 82 | +``` |
| 83 | + |
| 84 | +### 🛒 E-commerce Interaction |
| 85 | +```javascript |
| 86 | +const shoppingSteps = [ |
| 87 | + 'click on search bar', |
| 88 | + 'type "wireless headphones" in search', |
| 89 | + 'press Enter', |
| 90 | + 'wait for results to load', |
| 91 | + 'click on first product', |
| 92 | + 'click add to cart button', |
| 93 | + 'click view cart' |
| 94 | +]; |
| 95 | + |
| 96 | +const response = await agenticScraper(apiKey, 'https://shop.example.com', shoppingSteps, true); |
| 97 | +``` |
| 98 | + |
| 99 | +### 📝 Form Submission |
| 100 | +```javascript |
| 101 | +const formSteps = [ |
| 102 | + 'click on name input', |
| 103 | + 'type "John Doe" in name field', |
| 104 | + 'click on email input', |
| 105 | + 'type "john@example.com" in email field', |
| 106 | + 'click on message textarea', |
| 107 | + 'type "Hello, this is a test message" in message area', |
| 108 | + 'click submit button' |
| 109 | +]; |
| 110 | + |
| 111 | +const response = await agenticScraper(apiKey, 'https://example.com/contact', formSteps, false); |
| 112 | +``` |
| 113 | + |
| 114 | +## ⚡ Advanced Usage |
| 115 | + |
| 116 | +### Polling for Results |
| 117 | +```javascript |
| 118 | +async function waitForCompletion(requestId, timeoutSeconds = 120) { |
| 119 | + const startTime = Date.now(); |
| 120 | + const timeout = timeoutSeconds * 1000; |
| 121 | + |
| 122 | + while (Date.now() - startTime < timeout) { |
| 123 | + const status = await getAgenticScraperRequest(apiKey, requestId); |
| 124 | + |
| 125 | + if (status.status === 'completed') { |
| 126 | + return status.result; |
| 127 | + } else if (status.status === 'failed') { |
| 128 | + throw new Error(status.error); |
| 129 | + } |
| 130 | + |
| 131 | + await new Promise(resolve => setTimeout(resolve, 5000)); // Wait 5 seconds |
| 132 | + } |
| 133 | + |
| 134 | + throw new Error('Timeout waiting for completion'); |
| 135 | +} |
| 136 | +``` |
| 137 | + |
| 138 | +### Error Handling |
| 139 | +```javascript |
| 140 | +try { |
| 141 | + const response = await agenticScraper(apiKey, url, steps, true); |
| 142 | + const result = await waitForCompletion(response.request_id); |
| 143 | + console.log('Automation successful:', result); |
| 144 | +} catch (error) { |
| 145 | + if (error.message.includes('validation')) { |
| 146 | + console.log('Input validation failed:', error.message); |
| 147 | + } else if (error.message.includes('timeout')) { |
| 148 | + console.log('Automation timed out'); |
| 149 | + } else { |
| 150 | + console.log('Automation failed:', error.message); |
| 151 | + } |
| 152 | +} |
| 153 | +``` |
| 154 | + |
| 155 | +## 📝 Step Syntax |
| 156 | + |
| 157 | +Steps should be written in natural language describing the action to perform: |
| 158 | + |
| 159 | +### Clicking Elements |
| 160 | +- `"click on login button"` |
| 161 | +- `"click on search icon"` |
| 162 | +- `"click on first result"` |
| 163 | + |
| 164 | +### Typing Text |
| 165 | +- `"type 'username' in email field"` |
| 166 | +- `"type 'password123' in password input"` |
| 167 | +- `"type 'search query' in search box"` |
| 168 | + |
| 169 | +### Keyboard Actions |
| 170 | +- `"press Enter key"` |
| 171 | +- `"press Tab key"` |
| 172 | +- `"press Escape key"` |
| 173 | + |
| 174 | +### Waiting |
| 175 | +- `"wait for 2 seconds"` |
| 176 | +- `"wait for page to load"` |
| 177 | +- `"wait for results to appear"` |
| 178 | + |
| 179 | +### Scrolling |
| 180 | +- `"scroll down"` |
| 181 | +- `"scroll to bottom"` |
| 182 | +- `"scroll to top"` |
| 183 | + |
| 184 | +## 🔧 Best Practices |
| 185 | + |
| 186 | +1. **Use Session Management**: Set `useSession: true` for multi-step workflows |
| 187 | +2. **Add Wait Steps**: Include wait times between actions for reliability |
| 188 | +3. **Be Specific**: Use descriptive selectors like "login button" vs "button" |
| 189 | +4. **Handle Timeouts**: Implement proper timeout handling for long operations |
| 190 | +5. **Validate Inputs**: Check URLs and steps before making requests |
| 191 | + |
| 192 | +## 🚨 Common Errors |
| 193 | + |
| 194 | +### Input Validation Errors |
| 195 | +```javascript |
| 196 | +// ❌ Invalid URL |
| 197 | +await agenticScraper(apiKey, 'not-a-url', steps); |
| 198 | + |
| 199 | +// ❌ Empty steps |
| 200 | +await agenticScraper(apiKey, url, []); |
| 201 | + |
| 202 | +// ❌ Invalid step |
| 203 | +await agenticScraper(apiKey, url, ['click button', '']); // Empty step |
| 204 | +``` |
| 205 | + |
| 206 | +### Runtime Errors |
| 207 | +- **Element not found**: Make steps more specific or add wait times |
| 208 | +- **Timeout**: Increase polling timeout or break down complex steps |
| 209 | +- **Session expired**: Use session management for multi-step flows |
| 210 | + |
| 211 | +## 🌐 cURL Equivalent |
| 212 | + |
| 213 | +```bash |
| 214 | +curl --location 'https://api.scrapegraphai.com/v1/agentic-scrapper' \ |
| 215 | +--header 'SGAI-APIKEY: your-api-key' \ |
| 216 | +--header 'Content-Type: application/json' \ |
| 217 | +--data-raw '{ |
| 218 | + "url": "https://dashboard.scrapegraphai.com/", |
| 219 | + "use_session": true, |
| 220 | + "steps": [ |
| 221 | + "Type email@gmail.com in email input box", |
| 222 | + "Type test-password@123 in password inputbox", |
| 223 | + "click on login" |
| 224 | + ] |
| 225 | +}' |
| 226 | +``` |
| 227 | + |
| 228 | +## 📖 Examples |
| 229 | + |
| 230 | +Check out the example files in the `/examples` directory: |
| 231 | + |
| 232 | +- `agenticScraper_example.js` - Basic usage |
| 233 | +- `getAgenticScraperRequest_example.js` - Status checking |
| 234 | +- `agenticScraper_complete_example.js` - Complete workflow |
| 235 | +- `agenticScraper_advanced_example.js` - Advanced patterns with error handling |
| 236 | + |
| 237 | +## 💡 Tips |
| 238 | + |
| 239 | +- Start with simple steps and gradually add complexity |
| 240 | +- Test individual steps before combining them |
| 241 | +- Use browser developer tools to identify element selectors |
| 242 | +- Consider mobile vs desktop layouts when writing steps |
| 243 | +- Monitor request status regularly for long-running automations |
0 commit comments