-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
138 lines (122 loc) · 3.75 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
var cheerio = require('cheerio')
exports.load = function(html, options) {
$ = cheerio.load(html)
var elements = $('*')
var defaultFilter = function() {
$('script').remove()
$('textarea').remove()
$('select').remove()
$('input').remove()
$('font').remove()
$('style').remove()
$('center').remove()
$('form').remove()
elements.removeAttr('style')
elements.removeAttr('bgcolor')
}
if (options) {
if (options.filter) {
if (options.filter.elems)
options.filter.elems.forEach(function(elem) { $(elem).remove() })
if (options.filter.attribs)
options.filter.attribs.forEach(function(attrib) { elements.removeAttr(attrib) })
}
else defaultFilter()
}
else defaultFilter()
var formatter = function(selector) {
if (!selector) selector = 'html'
return {
obj : function(cb) {
cb(simplifyKids($(selector)))
},
json : function(cb) {
var simple = simplifyKids($(selector))
cb(JSON.stringify(simple, null, ' '))
},
query : function(selector) {
return formatter(selector)
},
// query : function(selector, cb) {
// var simple = {}
// try {
// simple = simplifyKids($(selector));
// }
// catch(err) {
// console.log(err)
// simple = makeSimple($('html')[0]);
// }
// cb(simple)
// },
clean : function(cb) {
var simple = simplifyKids($(selector))
var props = ''
var getprops = function(obj) {
if (typeof obj === "string") {
props += obj.trim() + '\n'
}
else {
for(var prop in obj) {
var kids = obj[prop].children
if (kids) {
kids.forEach(function(kid) {
getprops(kid)
})
}
}
}
}
getprops(simple)
cb(props)
}
}
}
return formatter()
}
var makeSimple = function(elem) {
var simple = { }
var getattribs = function() {
if (!empty(elem.attribs)) simple[elem.name] = elem.attribs
else simple[elem.name] = {}
}
switch (elem.type) {
case "tag":
case "script":
case "style":
getattribs()
var kids = simplifyKids(elem.children)
if (kids && kids.length)
simple[elem.name].children = kids
break
case "text":
return elem.data.trim()
case "comment":
var trimmed = elem.data.trim()
if (trimmed) simple.comment = trimmed
break
default:
var unkown = {}
unkown[elem.type] = elem
console.log(unkown)
break
}
return simple
}
var simplifyKids = function(kids) {
var simpleKids = []
if (kids) {
for (var i=0; i<kids.length; i++) {
var kid = kids[i];
if (kid) {
var simpleKid = makeSimple(kid)
if (!empty(simpleKid))
simpleKids.push(simpleKid)
}
}
}
return simpleKids
}
var empty = function(obj) {
if (typeof obj.length != 'undefined') return obj.length == 0
return Object.keys(obj).length == 0
}