forked from nfriedly/node-unblocker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
blocklist.js
79 lines (64 loc) · 2.17 KB
/
blocklist.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
var fs = require('fs'),
path = require("path"),
Url = require('url');
function Watchlist(filename){
this.filename = filename;
this.data = [];
this.readData();
var that = this;
fs.watch(filename, function(cur, prev){
that.readData();
});
}
Watchlist.prototype.readData = function(){
// send the file out if it exists and it's readable, error otherwise
var that = this;
path.exists(that.filename, function(exists) {
if (!exists) {
throw "Blocklist file " + that.filename + " does not exist";
}
fs.readFile(that.filename, "binary", function(err, data) {
if (err) {
throw err;
}
that.data = data.split("\n")
.map(function(val){return val.trim();})
.filter(function(val){return val != "";});
});
});
}
var domains = new Watchlist( __dirname + "/domain-blocklist.txt"),
keywords = new Watchlist( __dirname + "/keyword-blocklist.txt");
exports.urlAllowed = function(url){
if(typeof url == "string"){
url = Url.parse(url);
}
// short-circut: if the exact domain is in the list, then return early
if(domains.data.indexOf(url.hostname) != -1){
console.log("url blocked due to domain name: ", url.hostname, domains.data);
return false;
}
if(url.hostname) {
// next check each sub-domain, skipping the final one since we just checked it above
var hostname_parts = url.hostname.split("."),
i = (hostname_parts[hostname_parts.length-2] == "co") ? 3 : 2, // ignore domains like co.uk
cur_domain;
for(; i<= hostname_parts.length-1; i++){
cur_domain = hostname_parts.slice(-1*i).join('.'); // first site.com, then www.site.com, etc.
if(domains.data.indexOf(cur_domain) != -1){
console.log("url blocked on subdomain ", cur_domain, domains.data);
return false;
}
}
}
// lastly, go through each keyword in the list and check if it's in the url anywhere
if(keywords.data.some(function(keyword){
if( url.href.indexOf(keyword) != -1 ){
console.log("url blocked on keyword", keyword, keywords.data)}
return url.href.indexOf(keyword) != -1;
})){
return false;
}
// if it's passed the above tests, than the url looks safe
return true;
}