-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
66 lines (58 loc) · 1.85 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
const request = require('request');
const htmlParser = require('htmlparser2');
// This is the name of the id of the element we are interested in
var ID_TO_FIND = 'geofilter';
// THis is the url we want to load
var URL = 'http://epic.gsfc.nasa.gov/';
// I have to turn on logging when the parser sees `ID_TO_FIND`
var isLoggingOn = false;
// Keeps track of how many nested divs we've seen since logging was turned on.
// A stack would be another way to do it ... push/pop instead of inc/dec
var divCt = 0
var fileNameParser = new htmlParser.Parser({
// look for the image name
});
// request will go fetch `URL` and call the callback function when it's ready
request(URL, function (error, response, body) {
// ensure the request was successful
if (!error && response.statusCode === 200) {
var parser = new htmlParser.Parser({
onopentag: function(name, attributes) {
if (name === 'div' && attributes.id === ID_TO_FIND) {
console.log('Found the ' + ID_TO_FIND + ' element!');
isLoggingOn = true;
}
else if (isLoggingOn && name === 'div') {
console.log(attributes.class);
divCt++;
}
},
ontext: function(text) {
if (isLoggingOn) {
console.log('-->', text);
}
},
onclosetag: function(name) {
if (isLoggingOn && name === 'div') {
divCt--;
if (divCt < 0) {
isLoggingOn = false
}
}
}
}, {decodeEntities: true});
parser.write(body);
parser.end();
}
});
// Keeping this down here so I don't forget about this `trumpet` library.
// It's a much cleaner api for parsing html if you are ready to work with streams.
// var trumpet = require('trumpet');
// var tr = trumpet();
// tr.select('.b span', function (node) {
// node.html(function (html) {
// console.log(node.name + ': ' + html);
// });
// });
// var fs = require('fs');
// fs.createReadStream(__dirname + '/select.html').pipe(tr);