-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.js
189 lines (170 loc) · 5.08 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
const express = require("express");
const app = express();
const { chromium } = require("playwright-chromium");
const mongoose = require("mongoose");
const multer = require("multer");
const upload = multer();
const story = require(__dirname + "/models/story"); //dtabase model
app.use(express.json());
app.use(express.urlencoded());
app.use(express.static(__dirname + "/public"));
//connecting to database
mongoose.connect(
"mongodb+srv://appy:appypass@cluster0.mzeud.mongodb.net/insta-scrapper?retryWrites=true&w=majority",
{ useNewUrlParser: true, useUnifiedTopology: true }
);
const db = mongoose.connection;
db.on("error", console.error.bind(console, "connection error:"));
//function to open a headless browser and scrape instagram, save stories to database
async function scrape(userName, pswd) {
//open browser
const browser = await chromium.launch({
args: ["--no-sandbox"],
});
const context = await browser.newContext();
const page = await context.newPage();
await page.goto("https://www.instagram.com/accounts/login/");
//take 1st screen shot
await page.screenshot({
path: `./public/snapshots/ig-sign-in.png`,
});
//check if the right page loads
try {
await page.waitForSelector("[type=submit]", {
state: "visible",
});
} catch (e) {
throw new Error("Ip blocked by Instagram sentry");
}
// Take second screen shot
await page.screenshot({
path: `./public/snapshots/ig-sign-in.png`,
});
//logging in
await page.type("[name=username]", userName); // ->
await page.type('[type="password"]', pswd); // ->
await page.click("[type=submit]");
try {
await page.waitForSelector("[placeholder=Search]", {
state: "visible",
});
} catch (e) {
throw new Error("Invalid User Name or Password");
}
await page.goto("https://www.instagram.com/", {
state: "visible",
timeout: 60000,
});
await page.waitForSelector("div.Fd_fQ", {
state: "visible",
timeout: 60000,
});
await page.click("div.EcJQs", { position: { x: 60, y: 40 } });
await page.waitForSelector("div.qbCDp img", {
state: "visible",
timeout: 60000,
});
//Take snap shot after opening stories
await page.screenshot({ path: `./public/snapshots/profile.png` });
let url = page.url();
console.log(url);
let stories = 0;
//This loop checks if story is prsent and if true saves that story to database
while (
url != `https://www.instagram.com/` &&
url != "https://www.instagram.com/" + userName
) {
let data = await page.evaluate(() => {
const video = document.querySelectorAll("video.y-yJ5 source");
if (video.length != 0) {
const urls = Array.from(video).map((v) => v.src);
return urls;
}
const images = document.querySelectorAll("div.qbCDp img");
const urls = Array.from(images).map((v) => v.src);
return urls;
});
//saving to database
story.create(
{ userName: userName, storyURL: data[0] },
function (err, story_inst) {
if (err) throw new Error(err);
}
);
console.log(data);
stories++;
await page.click("button.FhutL");
await page.waitForSelector("img", {
state: "visible",
});
url = page.url();
console.log(url);
}
console.log(stories + " stories added");
await browser.close();
return stories;
}
//root
app.get("/", function (request, response) {
response.sendFile("./public/index.html");
});
//after stories are added
app.get("/added", function (request, response) {
response.sendFile("./public/added.html", { root: __dirname });
});
//error page
app.get("/error", function (request, response) {
response.sendFile("./public/error.html", { root: __dirname });
});
//heroku-error page
app.get("/heroku-error", function (request, response) {
response.sendFile("./public/heroku-error.html", { root: __dirname });
});
//to view snapshot
app.get("/pic1", function (request, response) {
response.sendFile("./public/snapshots/ig-sign-in.png", { root: __dirname });
});
//link to added stories
app.get("/stories", async (request, response) => {
const data = await story.find({});
let res = "<h2>Link to the added stories</h1>";
let index = 0;
if (data.length == 0) {
res += "<h3>No added stories</h3>";
} else {
data.forEach(function (i) {
res +=
'<div><a href="' +
i.storyURL +
'" target="_blank">link to story {' +
index++ +
"}</a></div>";
});
}
res += '<h3><a href=" / ">Add Stories</a></h3>';
response.send(res);
});
//post request to add stories
app.post("/scrape", upload.none(), async (request, response) => {
let req = request.body;
if (req.userName == "" || req.pswd == "") {
console.log("Please enter username and password");
response.redirect("/error");
return;
}
console.log("Running for " + req.userName);
try {
await scrape(req.userName, req.pswd);
} catch (e) {
console.log(e);
response.redirect("/error");
return;
}
response.redirect("/added");
});
let port = process.env.PORT;
if (port == null || port == "") {
port = 3000;
}
app.listen(port);
console.log("App is runung on port " + port);