-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathscrappers.js
111 lines (89 loc) · 3.77 KB
/
scrappers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
const utils = require("./utils");
const blackListedIDS = [
"62c0a0266dc4b2010018f8df62e8dc4fdd568901001aa014",
"62e8dc4fdd568901001aa01462f7c675dd5689010020be44",
"5ef054d702a182010059961262e8dc4fdd568901001aa014"
];
async function scrapDiscussions(page) {
// Find all the conversations
const conversations = await page.$$(
"div.messageList > div > div > a",
(elements) => elements
);
// Get all messages elements in the UI
const elementsHrefs = await Promise.all(
conversations.map((el) => el.getProperty("href"))
);
// This is the URLS of every conversations that needs to be checked
const hrefs = await Promise.all(elementsHrefs.map((el) => el.jsonValue()));
return hrefs.filter(
(href) => blackListedIDS.find((id) => href.includes(id)) === undefined
);
}
async function scrapConversation(page, href, OUR_TINDER_USER_ID) {
// Get the conversation ID
const urlParams = href.split("/");
const conversationId = urlParams[urlParams.length - 1];
// Spy the request to get the one containing the messages
const httpResponseWeWaitForPromise = page.waitForResponse((response) => {
const urlToSpy = `https://api.gotinder.com/v2/matches/${conversationId}/messages`;
return response.url().startsWith(urlToSpy) && response.status() === 200;
});
// Open the conversation
await Promise.all([
page.waitForNavigation(),
page.goto(href, { waitUntil: "networkidle2" }),
// Wait for the name to be display
page.waitForSelector("nav > a > span")
]);
// Get the match's name in the UI
const theirName = await page.$eval("nav > a > span", (el) => el.innerText);
console.log("Scrapping message for ", theirName);
// Wait for the HTTP call response containing the messages
const httpResponseWeWait = await httpResponseWeWaitForPromise;
const conversationResponse = await httpResponseWeWait.json();
// Messages ordered from the most recent, to the latest
const messages = conversationResponse.data.messages;
/**
* STRUCTURE OF A MESSAGE
"_id": "62f931baecf99a010083392f",
"match_id": "61560a0ac6d55901005ef67562e8dc4fdd568901001aa014",
"sent_date": "2022-08-14T17:32:42.899Z",
"message": "I have them for like 2 months 🙄",
"to": "61560a0ac6d55901005ef675",
"from": "62e8dc4fdd568901001aa014",
"created_date": "2022-08-14T17:32:42.899Z",
"timestamp": 1660498362899
*/
// Unfortunately Tinder sends the last message through websockets,
// so we need to grab the last message within the UI itself
// Also, there is a display bug when sometimes, the last message is display as the first one...
// So we need to run some checks to be certain to have the proper conversation history
// We get all the message elements
const messagesUI = await page.$$("div.msg", async (el) =>
el.map(async (x) => await x.getAttribute("innerText").toJson())
);
// We grab the ref of the first and last one
const lastMessageInUIEl = messagesUI[messagesUI.length - 1];
const firstMessageInUIEl = messagesUI[0];
// We grab their content and sender
const [firstMessage, lastMessage] = await Promise.all([
utils.getTextAndRecipient(firstMessageInUIEl, OUR_TINDER_USER_ID, ""),
utils.getTextAndRecipient(lastMessageInUIEl, OUR_TINDER_USER_ID, "")
]);
// Run check and reconstruct the message history, one of the first or last message will already be in the list message list
// Meaning we need to add the other
const messagesHistory = utils.reconstructMessageHistory(
messages,
firstMessage,
lastMessage
);
const orderedMessages = messagesHistory.sort(function (x, y) {
return x.timestamp - y.timestamp;
});
return { messages: orderedMessages, theirName, conversationId };
}
module.exports = {
scrapConversation,
scrapDiscussions
};