-
Notifications
You must be signed in to change notification settings - Fork 50
/
Copy path爬虫.js
38 lines (33 loc) · 964 Bytes
/
爬虫.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
var http = require('http');
var https = require('https');
var fs = require('fs');
http.get('http://www.cnblogs.com/hustskyking/', function(res) {
console.log('响应:' + res.statusCode);
//console.log(res);
var body = [], len = 0;
res.on('data', function(chunk){
body.push(chunk);
len += chunk.length;
});
res.on('end', function(){
body = Buffer.concat(body, len);
var bodyStr = body.toString();
getTitle(bodyStr);
//console.log(bodyStr);
});
}).on('error', function(e) {
console.log('错误:' + e.message);
});
var getTitle = function(bodyStr){
//var matches = bodyStr.match(/href=".*?\.html"/g);
var matches = bodyStr.match(/<a(.*?)class="PostTitle"(.*?)<\/a>/g);
console.log(matches);
var res = matches.join('\n');
writeIntoFile(res);
};
var writeIntoFile = function(str){
fs.writeFile('index.txt', str, function (err) {
if (err) throw err;
console.log('数据已保存~');
});
};