-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrcm.js
44 lines (35 loc) · 1.15 KB
/
rcm.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
// lowkey need error handling
import puppeteer from "puppeteer";
import fs from "fs/promises";
const browser = await puppeteer.launch({
// if you want to see the magic
headless: true,
});
const page = await browser.newPage();
const peopleSelector = "p.mui-16nhkcs";
const nextSelector =
'button:not([disabled]) > svg[data-testid="ChevronRightIcon"]';
// shoutout
await page.goto("https://www.reversecanarymission.org/search", {
waitUntil: "networkidle0",
});
const people = [];
while (true) {
const nextButton = await page.$(nextSelector);
if (!nextButton) {
break;
}
const newPeople = await page.evaluate((selector) => {
const elements = Array.from(document.querySelectorAll(selector));
return elements.map((element) => element.textContent);
}, peopleSelector);
people.push(...newPeople);
console.log("scraped:", people.length);
// try not to get blocked challenge (ironic)
await nextButton.click({ delay: 500 });
await page.waitForNavigation({ waitUntil: "networkidle0" });
}
await browser.close();
// i saw some duplicates
const unique = [...new Set(people)];
await fs.writeFile("people.json", JSON.stringify(unique), "utf-8");