-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
90 lines (81 loc) · 1.96 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
var request = require('request');
var htmlparser = require("htmlparser2");
function parse(data, cb) {
cb = cb || function() {};
var items = [];
var scopes = [];
var getNextText = null;
var tags = [];
var parser = new htmlparser.Parser({
onopentag: function(name, attribs){
var scope = scopes.length && scopes[scopes.length - 1];
if(attribs.hasOwnProperty('itemscope')) {
// create a new scope
if (attribs.itemprop && scopes.length) {
// chain the scopes
scope = scope[attribs.itemprop] = {};
}
else {
scope = {};
}
scopes.push(scope);
tags.push('SCOPE');
}
else {
tags.push(false);
}
if (scope) {
if(attribs.itemtype) {
scope.type = attribs.itemtype;
}
if(attribs.itemprop && !attribs.hasOwnProperty('itemscope')) {
if (attribs.content) {
scope[attribs.itemprop] = attribs.content;
}
else {
tags.pop();
tags.push('TEXT');
scope[attribs.itemprop] = '';
getNextText = attribs.itemprop;
}
}
}
},
ontext: function(text) {
if (getNextText) {
scopes[scopes.length - 1][getNextText] += text.replace(/^\s+|\s+$/g, "");
}
},
onclosetag: function(tagname){
var tag = tags.pop();
if(tag === 'SCOPE') {
var item = scopes.pop();
if (!scopes.length) {
items.push(item);
}
}
else if (tag === 'TEXT') {
getNextText = false;
}
},
onerror: function(err) {
cb(err);
},
onend: function() {
cb(null, items);
}
});
parser.write(data);
parser.done();
}
function parseUrl(url, cb) {
cb = cb || function() {};
request(url, function(err, res, body) {
if (err) {
return cb(err);
}
parse(body, cb);
});
}
exports.parse = parse;
exports.parseUrl = parseUrl;