node.js html readability parser
var readability = require("readability");
var html = "<html>xxxxxxxxxxx</html>";
readability.parse(html, function(err, article){
/* article
article = {
title: "", // page title
text: "", //text content
html: "", //pretty html content
time: {
title: 10, //parse title elapsed milliseconds
article: 100 //parse content elapsed milliseconds
}
}
*/
});
1. pass an object:
var options = {
url: "http://example.com/article/some-article.html", // url is optional, if supply, can convert relative url to absolute.
content: "<html>some html</html>"
};
readability.parse(options, function(err, article){
//some code
});
2. pass a html string:
var html = "<html>some html</html>";
readability.parse(html, function(err, article){
//some code
});
3. pass an url:
var url = "http://example.com/article/some-article.html";
readability.parse(url, function(err, article){
//some code
});