-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdomains.js
73 lines (51 loc) · 1.49 KB
/
domains.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
var request = require("request"),
data = require(__dirname + "/data.js"),
scraped = {};
/**
* Takes the most recent 1000 submissions for a particular
* domain for analysis
*/
var url = "http://api.thriftdb.com/api.hnsearch.com/items/_search" +
"?sortby=create_ts+desc" +
"&filter[fields][type]=submission" +
"&q=";
setTimeout(function() {
var scrape = arguments.callee;
var queue = data.domainQueue;
if(queue.length == 0 || !data.ready()) {
return setTimeout(scrape, 1000);
}
var domain = queue[0];
if(scraped[domain]) {
queue.splice(queue.indexOf(domain), 1);
return setTimeout(scrape, 1000);
}
console.log("scraping domain", domain, "queue", queue.length);
var offset = 0;
(function() {
var geturl = arguments.callee;
var options = {
url: url + domain + "&start=" + offset + "&limit=100"
};
console.log("domain", domain, offset);
request(options, function (error, response, body) {
if (error) {
console.log("scrape.error (domains.js#40) " + error);
return setTimeout(scrape, 1000);
};
var jbody = JSON.parse(body);
if(jbody.results && jbody.results.length > 0) {
for(var i=0; i<jbody.results.length; i++) {
data.queueUser(jbody.results[i].item.username);
}
}
if(offset < jbody.hits && offset != 900) {
offset += 100;
return setTimeout(geturl, 1000);
}
queue.splice(queue.indexOf(domain), 1);
scraped[domain] = true;
return setTimeout(scrape, 1000);
});
})();
}, 1000);