-
Notifications
You must be signed in to change notification settings - Fork 1
/
pf_detect_bots.user.js
204 lines (168 loc) · 6.99 KB
/
pf_detect_bots.user.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
// ==UserScript==
// @name ProgrammersForum Detect Bots
// @namespace programmersforum.ru
// @version 1.9.5
// @description adds detectBots function that loads the list of online users and counts bots, and logUsers/startLogDaemon functions to save users into IndexedDB
// @author Alex P
// @include *programmersforum.ru/*
// @require https://cdn.jsdelivr.net/npm/lodash@4.17.15/lodash.min.js
// @require https://unpkg.com/dexie@2.0.4/dist/dexie.js
// @require https://cdn.jsdelivr.net/npm/file-saver@2.0.2/dist/FileSaver.min.js
// @require https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.24.0/moment.min.js
// @require https://unpkg.com/papaparse@5.1.0/papaparse.min.js
// @require https://cdn.jsdelivr.net/npm/ua-parser-js@0/dist/ua-parser.min.js
// @grant none
// @downloadURL https://github.com/AlexP11223/ProgForumRuUserscripts/raw/master/pf_detect_bots.user.js
// ==/UserScript==
(function () {
'use strict';
const OUTPUT_TIMEZONE = 3;
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function isBot(user) {
return user.detections && user.detections.length;
}
function identify(users) {
const botUaParts = [
'bot', 'crawl', 'spider', 'batch', 'bing',
'share', 'preview', 'facebook', 'vk.com',
'curl', 'indy', 'http',
'media', 'metrics', '(compatible)',
'zh-cn', 'zh_cn', 'mb2345', 'liebao', 'micromessenger', 'kinza', // chinese https://www.johnlarge.co.uk/blocking-aggressive-chinese-crawlers-scrapers-bots/
];
const ipCounts = _.countBy(users, 'ip');
const subnet3Counts = _.countBy(users, u => u.subnet(3));
const subnet2Counts = _.countBy(users, u => u.subnet(2));
const detectors = [
user => user.useragent.length < 20 || botUaParts.some(s => user.useragent.includes(s)) ? 'ua' : null,
user => ipCounts[user.ip] > 2 ? 'ip' : null,
user => subnet3Counts[user.subnet(3)] > 5 ? 'subnet3' : null,
user => subnet2Counts[user.subnet(2)] > 20 ? 'subnet2' : null,
];
return users.map(user => {
const detections = detectors.map(d => d(user)).filter(Boolean);
return Object.assign(user, {
detections,
isBot: function () {
return isBot(this);
},
})
});
}
async function loadOnlineUsers(url = '/online.php?s=&sortfield=time&sortorder=desc&who=all&ua=1&pp=200') {
console.log(`Loading ${url}`);
const html = await $.get(url);
const users = $(html).find('[id^=resolveip]').toArray()
.map(el => ({
ip: $(el).text().trim(),
useragent: $(el).parent().text().trim().split('\n').slice(-1)[0].toLowerCase().trim(),
subnet: function (n) {
return this.ip.split('.').slice(0, n).join('.');
},
}));
const nextPageLinks = $(html).find('a[href^=online][rel=next]');
if (!nextPageLinks.length) {
return users;
}
const nextPageUrl = nextPageLinks[0].href;
return users.concat(await loadOnlineUsers(nextPageUrl));
}
window.detectBots = async function () {
const users = identify(await loadOnlineUsers());
const bots = users.filter(u => u.isBot());
const mapToOutput = u => ({ ip: u.ip, useragent: u.useragent, detections: u.detections.join(', ') });
console.log(`${bots.length} bots, ${users.length - bots.length} normal users`);
console.log('Bots:');
console.log(bots.map(mapToOutput));
console.log('Normal users:');
console.log(users.filter(u => !u.isBot()).map(mapToOutput));
window.onlineUsers = users;
};
let _db = null;
function db() {
if (!_db) {
_db = new Dexie('UserLogDatabase');
_db.version(1).stores({ users: '++, date, ip, useragent, *detections' });
}
return _db;
}
window.userLogsDb = db;
window.logUsers = async function () {
const users = identify(await loadOnlineUsers());
await db().users.bulkPut(users.map(u => ({
date: new Date(),
ip: u.ip,
useragent: u.useragent,
detections: u.detections,
})));
console.log(`Saved ${users.length} users to db`);
};
window.startLogDaemon = async function () {
while (true) {
try {
await logUsers();
} catch (e) {
console.log(e);
}
await sleep(20 * 60 * 1000);
}
};
window.FILTER_IS_BOT = isBot;
window.FILTER_IS_NORMAL_USER = user => !isBot(user);
window.filterUserLogs = async function (filter = () => true, startDate = '2019-10-30 00:00:00+03', endDate = new Date()) {
return await db().users
.where('date').between(moment(startDate).toDate(), moment(endDate).toDate())
.and(filter)
.toArray();
};
window.exportToCsv = function (records, header = [], fileName = 'data.csv') {
const csv = Papa.unparse([header].concat(records), {
skipEmptyLines: true,
});
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8'});
saveAs(blob, fileName);
};
window.exportUsersToCsv = function (users) {
const header = ['Date/Time', 'IP', 'User-Agent', 'Detections'];
const records = users.map(u => [
moment(u.date).utcOffset(OUTPUT_TIMEZONE).format('YYYY-MM-DD HH:mm:ss'),
u.ip,
u.useragent,
u.detections.join(', '),
]);
exportToCsv(records, header, 'users.csv');
};
window.uniqueUsers = function (users) {
return _.uniqBy(users, 'ip');
};
window.countVisitorsByTime = function (users) {
return _.countBy(users, u => moment(u.date).utcOffset(OUTPUT_TIMEZONE).seconds(0).milliseconds(0).format('YYYY-MM-DD HH:mm'));
};
// some user-agents are truncated
function isYandexBrowser(ua) {
return ua.split(' ').pop().indexOf('Ya') === 0;
}
function getOsNameVersion(uaData) {
const name = uaData.os.name;
const version = uaData.os.version;
const allowedVersions = ['XP', 'Vista', '7', '8', '8.1', '10'];
return [name, allowedVersions.includes(version) ? version : ''].join(' ').trim();
}
function getBrowserName(uaData) {
if (!uaData.browser.name) {
return '';
}
const name = uaData.browser.name.toLowerCase();
if (name === 'chrome' && isYandexBrowser(uaData.ua)) {
return 'yandex';
}
return name;
}
window.countUsersOS = function (users) {
return _.countBy(users, u => getOsNameVersion(UAParser(u.useragent)));
};
window.countUsersBrowsers = function (users) {
return _.countBy(users, u => getBrowserName(UAParser(u.useragent)));
};
})();