Skip to content

Commit

Permalink
update package.json
Browse files Browse the repository at this point in the history
  • Loading branch information
lealife committed Mar 26, 2021
1 parent 7dd8bab commit 0e00d31
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 20 deletions.
20 changes: 6 additions & 14 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
"main": "main.js",
"dependencies": {
"@electron/remote": "^1.0.4",
"adm-zip": "0.4.11",
"adm-zip": ">=0.4.11",
"async": "0.9.0",
"iconv-lite": "0.4.15",
"mkdirp": "0.3.5",
"nedb": "1.1.1",
"needle": "0.7.10",
"node-getmac": "1.0.3",
"resanitize": "0.3.0",
"validator": ">=3.22.1",
"xml2js": "0.4.6"
}
}
2 changes: 1 addition & 1 deletion public/plugins/export_evernote/enml.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* 使得能尽量(不能保证100%)导入evernote, 即使能导入成功但有可能不能同步!! 因为没有完全验证html
*/

var resanitize = require('resanitize');
var resanitize = require('src/resanitize');

// Portable override before pull request merge into it
resanitize.stripUnsafeAttrs = function(str, unsafeAttrs) {
Expand Down
2 changes: 1 addition & 1 deletion public/plugins/export_leanote/plugin.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ define(function() {
}

async = require('async');
resanitize = require('resanitize');
resanitize = require('src/resanitize');

me._inited = true;
},
Expand Down
2 changes: 1 addition & 1 deletion public/plugins/import_html/import.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
var async = require('async');
var resanitize = require('resanitize');
var resanitize = require('src/resanitize');
var iconv = require('iconv-lite');

var Import = {
Expand Down
2 changes: 1 addition & 1 deletion public/plugins/import_leanote/import.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
var async = require('async');
var resanitize = require('resanitize');
var resanitize = require('src/resanitize');

var Import = {
// 解析Leanote
Expand Down
287 changes: 287 additions & 0 deletions src/resanitize.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
/*!
* resanitize - Regular expression-based HTML sanitizer and ad remover, geared toward RSS feed descriptions
* Copyright(c) 2012 Dan MacTough <danmactough@gmail.com>
* All rights reserved.
*/

/**
* Dependencies
*/
var validator = require('validator');
/**
* Remove unsafe parts and ads from HTML
*
* Example:
*
* var resanitize = require('resanitize');
* resanitize('<div style="border: 400px solid pink;">Headline</div>');
* // => '<div>Headline</div>'
*
* References:
* - http://en.wikipedia.org/wiki/C0_and_C1_control_codes
* - http://en.wikipedia.org/wiki/Unicode_control_characters
* - http://www.utf8-chartable.de/unicode-utf8-table.pl
*
* @param {String|Buffer} HTML string to sanitize
* @return {String} sanitized HTML
* @api public
*/
function resanitize (str) {
if ('string' !== typeof str) {
if (Buffer.isBuffer(str)) {
str = str.toString();
}
else {
throw new TypeError('Invalid argument: must be String or Buffer');
}
}
str = stripAsciiCtrlChars(str);
str = stripExtendedCtrlChars(str);
str = fixSpace(str);
str = stripComments(str);
str = stripAds(str); // It's important that this comes before the remainder
// because it matches on certain attribute values that
// get stripped below.
str = validator.sanitize(str).xss().replace(/\[removed\]/g, '')
str = fixImages(str);
str = stripUnsafeTags(str);
str = stripUnsafeAttrs(str);
return str;
}
module.exports = resanitize;

/**
* Replace UTF-8 non-breaking space with a regular space and strip null bytes
*/
function fixSpace (str) {
return str.replace(/\u00A0/g, ' ') // Unicode non-breaking space
.replace(/[\u2028\u2029]/g, '') // UCS newline characters
.replace(/\0/g, '');
}
module.exports.fixSpace = fixSpace;

/**
* Strip superfluous whitespace
*/
function stripHtmlExtraSpace (str) {
return str.replace(/<(div|p)[^>]*?>\s*?(?:<br[^>]*?>)*?\s*?<\/\1>/gi, '')
.replace(/<(div|span)[^>]*?>\s*?<\/\1>/gi, '');
}
module.exports.stripHtmlExtraSpace = stripHtmlExtraSpace;

/**
* Strip ASCII control characters
*/
function stripAsciiCtrlChars (str) {
return str.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/g, '');
}
module.exports.stripAsciiCtrlChars = stripAsciiCtrlChars;

/**
* Strip ISO 6429 control characters
*/
function stripExtendedCtrlChars (str) {
return str.replace(/[\u0080-\u009F]+/g, '');
}
module.exports.stripExtendedCtrlChars = stripExtendedCtrlChars;

/**
* Strip HTML comments
*/
function stripComments (str) {
return str.replace(/<!--[^>]*?-->/g, '');
}
module.exports.stripComments = stripComments;

/**
* Permit only the provided attributes to remain in the tag
*/
function filterAttrs () {
var allowed = [];

if (Array.isArray(arguments[0])) {
allowed = arguments[0];
} else {
allowed = Array.prototype.slice.call(arguments);
}
return function (attr, name) {
if ( ~allowed.indexOf(name && name.toLowerCase()) ) {
return attr;
} else {
return '';
}
};
}
module.exports.filterAttrs = filterAttrs;

/**
* Strip the provided attributes from the tag
*/
function stripAttrs () {
var banned = []
, regexes = [];

if (Array.isArray(arguments[0])) {
banned = arguments[0].filter(function (attr) {
if ('string' === typeof attr) {
return true;
}
else if (attr.constructor && 'RegExp' === attr.constructor.name) {
regexes.push(attr);
}
});
} else {
banned = Array.prototype.slice.call(arguments).filter(function (attr) {
if ('string' === typeof attr) {
return true;
}
else if (attr.constructor && 'RegExp' === attr.constructor.name) {
regexes.push(attr);
}
});
}
return function (attr, name) {
if ( ~banned.indexOf(name && name.toLowerCase()) || regexes.some(function (re) { return re.test(name); }) ) {
return '';
} else {
return attr;
}
};
}
module.exports.stripAttrs = stripAttrs;

/**
* Filter an HTML opening or self-closing tag
*/
function filterTag (nextFilter) {
return function (rematch) {
if ('function' === typeof nextFilter) {
rematch = rematch.replace(/([^\s"']+?)=("|')[^>]+?\2/g, nextFilter);
}
// Cleanup extra whitespace
return rematch.replace(/\s+/g, ' ')
.replace(/ (\/)?>/, '$1>');
};
}
module.exports.filterTag = filterTag;

function fixImages (str) {
return str.replace(/(<img[^>]*?>)/g, filterTag(filterAttrs('src', 'alt', 'title', 'height', 'width')) );
}
module.exports.fixImages = fixImages;

function stripUnsafeAttrs (str) {
var unsafe = [ 'id'
, 'class'
, 'style'
, 'accesskey'
, 'action'
, 'autocomplete'
, 'autofocus'
, 'clear'
, 'contextmenu'
, 'contenteditable'
, 'draggable'
, 'dropzone'
, 'method'
, 'tabindex'
, 'target'
, /on\w+/i
, /data-\w+/i
];
return str.replace(/<([^ >]+?) [^>]*?>/g, filterTag(stripAttrs(unsafe)));
}
module.exports.stripUnsafeAttrs = stripUnsafeAttrs;

function stripUnsafeTags (str) {
var el = /<(?:form|input|font|blink|script|style|comment|plaintext|xmp|link|listing|meta|body|frame|frameset)\b/;
var ct = 0, max = 2;

// We'll repeatedly try to strip any maliciously nested elements up to [max] times
while (el.test(str) && ct++ < max) {
str = str.replace(/<form[^>]*?>[\s\S]*?<\/form>/gi, '')
.replace(/<input[^>]*?>[\s\S]*?<\/input>/gi, '')
.replace(/<\/?(?:form|input|font|blink)[^>]*?>/gi, '')
// These are XSS/security risks
.replace(/<script[^>]*?>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*?>[\s\S]*?<\/style>/gi, '') // shouldn't work anyway...
.replace(/<comment[^>]*?>[\s\S]*?<\/comment>/gi, '')
.replace(/<plaintext[^>]*?>[\s\S]*?<\/plaintext>/gi, '')
.replace(/<xmp[^>]*?>[\s\S]*?<\/xmp>/gi, '')
.replace(/<\/?(?:link|listing|meta|body|frame|frameset)[^>]*?>/gi, '')
// Delete iframes, except those inserted by Google in lieu of video embeds
.replace(/<iframe(?![^>]*?src=("|')\S+?reader.googleusercontent.com\/reader\/embediframe.+?\1)[^>]*?>[\s\S]*?<\/iframe>/gi, '')
;
}
if (el.test(str)) {
// We couldn't safely strip the HTML, so we return an empty string
return '';
}
return str;
}
module.exports.stripUnsafeTags = stripUnsafeTags;

function stripAds (str) {
return str.replace(/<div[^>]*?class=("|')snap_preview\1[^>]*?>(?:<br[^>]*?>)?([\s\S]*?)<\/div>/gi, '$2')
.replace(/<div[^>]*?class=("|')(?:feedflare|zemanta-pixie)\1[^>]*?>[\s\S]*?<\/div>/gi, '')
.replace(/<!--AD BEGIN-->[\s\S]*?<!--AD END-->\s*/gi, '')
.replace(/<table[^>]*?>[\s\S]*?<\/table>\s*?<div[^>]*?>[\s\S]*?Ads by Pheedo[\s\S]*?<\/div>/gi, '')
.replace(/<table[^>]*?>.*?<img[^>]*?src=("|')[^>]*?advertisement[^>]*?\1.*?>.*?<\/table>/gi, '')
.replace(/<br[^>]*?>\s*?<br[^>]*?>\s*?<span[^>]*?class=("|')advertisement\1[^>]*?>[\s\S]*?<\/span>[\s\S]*<div[^>]*?>[\s\S]*?Ads by Pheedo[\s\S]*?<\/div>/gi, '')
.replace(/<br[^>]*?>\s*?<br[^>]*?>\s*?(?:<[^>]+>\s*)*?<hr[^>]*?>\s*?<div[^>]*?>(?:Featured Advertiser|Presented By:)<\/div>[\s\S]*<div[^>]*?>[\s\S]*?(?:Ads by Pheedo|ads\.pheedo\.com)[\s\S]*?<\/div>/gi, '')
.replace(/<br[^>]*?>\s*?<br[^>]*?>\s*?<a[^>]*?href=("|')http:\/\/[^>]*?\.?(?:pheedo|pheedcontent)\.com\/[^>]*?\1[\s\S]*?<\/a>[\s\S]*$/gim, '')
.replace(/<div[^>]*?class=("|')cbw snap_nopreview\1[^>]*?>[\s\S]*$/gim, '')
.replace(/<div><a href=(?:"|')http:\/\/d\.techcrunch\.com\/ck\.php[\s\S]*?<\/div> */gi, '')
.replace(/<(p|div)[^>]*?>\s*?<a[^>]*?href=("|')[^>]+?\2[^>]*?><img[^>]*?src=("|')http:\/\/(?:feedads\.googleadservices|feedproxy\.google|feeds2?\.feedburner)\.com\/(?:~|%7e)[^>]*?\/[^>]+?\3[^>]*?>[\s\S]*?<\/\1>/gi, '')
.replace(/<(p|div)[^>]*?>\s*?<a[^>]*?href=("|')[^>]+?\2[^>]*?><img[^>]*?src=("|')http:\/\/feeds\.[^>]+?\.[^>]+?\/(?:~|%7e)[^>]\/[^>]+?\3[^>]*?>[\s\S]*?<\/\1>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/feeds\.[^>]+?\.[^>]+?\/(?:~|%7e)[a-qs-z]\/[^>]+?\1[\s\S]*?<\/a>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/[^>]*?\.?addtoany\.com\/[^>]*?\1[\s\S]*?<\/a>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/feeds\.wordpress\.com\/[\.\d]+?\/(?:comments|go[\s\S]*)\/[^>]+?\1[\s\S]*?<\/a> ?/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/[^>]*?\.?doubleclick\.net\/[^>]*?\1[\s\S]*?<\/a>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/[^>]*?\.?fmpub\.net\/adserver\/[^>]*?\1[\s\S]*?<\/a>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/[^>]*?\.?eyewonderlabs\.com\/[^>]*?\1[\s\S]*?<\/a>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/[^>]*?\.?pheedo\.com\/[^>]*?\1[\s\S]*?<\/a>/gi, '')
.replace(/<a[^>]*?href=("|')http:\/\/api\.tweetmeme\.com\/share\?[^>]*?\1[\s\S]*?<\/a>/gi, '')
.replace(/<p><a[^>]*?href=("|')http:\/\/rss\.cnn\.com\/+?(?:~|%7e)a\/[^>]*?\1[\s\S]*?<\/p>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/feeds\.[^>]+?\.[^>]+?\/(?:~|%7e)r\/[^>]+?\1[\s\S]*?>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/rss\.nytimes\.com\/c\/[^>]*?\1.*?>.*$/gim, '')
.replace(/<img[^>]*?src=("|')http:\/\/feeds\.washingtonpost\.com\/c\/[^>]*?\1.*?>.*$/gim, '')
.replace(/<img[^>]*?src=("|')http:\/\/[^>]*?\.?feedsportal\.com\/c\/[^>]*?\1.*?>.*$/gim, '')
.replace(/<img[^>]*?src=("|')http:\/\/(?:feedads\.googleadservices|feedproxy\.google|feeds2\.feedburner)\.com\/(?:~|%7e)r\/[^>]+?\1[\s\S]*?>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/rss\.cnn\.com\/~r\/[^>]*?\1[\s\S]*?>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/[^>]*?\.?fmpub\.net\/adserver\/[^>]*?\1[\s\S]*?>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/[^>]*?\.?pheedo\.com\/[^>]*?\1[\s\S]*?>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/stats\.wordpress\.com\/[\w]\.gif\?[^>]*?\1[\s\S]*?>/gi, '')
.replace(/<img[^>]*?src=("|')http:\/\/feeds\.wired\.com\/c\/[^>]*?\1.*?>.*$/gim, '')
.replace(/<p><strong><em>Crunch Network[\s\S]*?<\/p>/gi, '')
.replace(/<embed[^>]*?castfire_player[\s\S]*?> *?(<\/embed>)?/gi, '')
.replace(/<embed[^>]*?src=("|')[^>]*?castfire\.com[^>]+?\1[\s\S]*?> *?(<\/embed>)?/gi, '')
.replace(/<p align=("|')right\1><em>Sponsor<\/em>[\s\S]*?<\/p>/gi, '')
.replace(/<div [\s\S]*?<img [^>]*?src=(?:"|')[^>]*?\/share-buttons\/[\s\S]*?<\/div>[\s]*/gi, '')
// This is that annoying footer in every delicious item
.replace(/<span[^>]*?>\s*?<a[^>]*?href=("|')[^\1]+?src=feed_newsgator\1[^>]*?>[\s\S]*<\/span>/gi, '')
// This is the annoying footer from ATL
.replace(/<p[^>]*?><strong><a[^>]*?href=("|')[^>]*?abovethelaw\.com\/[\s\S]+?\1[^>]*?>Continue reading[\s\S]*<\/p>/gi, '')
// This is the annoying link at the end of WaPo articles
.replace(/<a[^>]*?>Read full article[\s\S]*?<\/a>/gi, '')
// These ads go...
.replace(/<div[^>]*?><a[^>]*?href=("|')[^>]*?crunchbase\.com\/company\/[\s\S]+?\1[^>]*?>[\s\S]*?<div[\s\S]*?>Loading information about[\s\S]*?<\/div>/gi, '')
.replace(/<div[^>]*?class=("|')cb_widget_[^>]+?\1[\s\S]*?><\/div>/gi, '')
.replace(/<div[^>]*?class=("|')cb_widget_[^>]+?\1[\s\S]*?>[\s\S]*?<\/div>/gi, '')
// Before these
.replace(/<a[^>]*?href=("|')[^>]*?crunchbase\.com\/\1[\s\S]*?<\/a>\s*/gi, '')
.replace(/<div[^>]*?class=("|')cb_widget\1[^>]*?>[\s\S]*?<\/div>/gi, '')
// Clean up some empty things
//.replace(/<(div|p|span)[^>]*?>(\s|<br *?\/?>|(?R))*?<\/\1>/gi, '')
.replace(/(\s|<br[^>]*?\/?>)*$/gim, '')
;
}
module.exports.stripAds = stripAds;

/**
* Dumbly strip angle brackets
*/
function stripHtml (str) {
return str.replace(/<.*?>/g, '');
}
module.exports.stripHtml = stripHtml;

0 comments on commit 0e00d31

Please sign in to comment.