From 14bc7398d873c2fb6f41ac89dff93aaeefa96e4e Mon Sep 17 00:00:00 2001 From: Innei Date: Thu, 1 Aug 2024 17:32:03 +0800 Subject: [PATCH 1/2] feat: init readability Signed-off-by: Innei --- icons/mgc/sparkles_2_cute_re.svg | 1 + icons/mgc/sparkles_2_filled.svg | 1 + package.json | 6 +- patches/@mozilla__readability@0.5.0.patch | 2109 +++++++++++++++++ pnpm-lock.yaml | 301 ++- src/main/lib/readability.ts | 24 + src/main/tipc/index.ts | 2 + src/main/tipc/reader.ts | 17 + src/renderer/src/atoms/readability.ts | 53 + .../src/hooks/biz/useEntryActions.tsx | 57 +- .../src/modules/entry-column/item.tsx | 2 +- .../entry-column/social-media-item.tsx | 2 +- .../src/modules/entry-content/header.tsx | 3 +- .../src/modules/entry-content/index.tsx | 38 +- 14 files changed, 2596 insertions(+), 20 deletions(-) create mode 100644 icons/mgc/sparkles_2_cute_re.svg create mode 100644 icons/mgc/sparkles_2_filled.svg create mode 100644 patches/@mozilla__readability@0.5.0.patch create mode 100644 src/main/lib/readability.ts create mode 100644 src/main/tipc/reader.ts create mode 100644 src/renderer/src/atoms/readability.ts diff --git a/icons/mgc/sparkles_2_cute_re.svg b/icons/mgc/sparkles_2_cute_re.svg new file mode 100644 index 0000000000..068f112798 --- /dev/null +++ b/icons/mgc/sparkles_2_cute_re.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/icons/mgc/sparkles_2_filled.svg b/icons/mgc/sparkles_2_filled.svg new file mode 100644 index 0000000000..f7e901c408 --- /dev/null +++ b/icons/mgc/sparkles_2_filled.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/package.json b/package.json index 5ffd8794b7..ed5f56671c 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ "@hono/auth-js": "1.0.10", "@hookform/resolvers": "3.9.0", "@iconify/tools": "4.0.4", + "@mozilla/readability": "^0.5.0", "@radix-ui/react-alert-dialog": "1.1.1", "@radix-ui/react-avatar": "1.1.0", "@radix-ui/react-checkbox": "1.1.1", @@ -89,7 +90,9 @@ "idb-keyval": "6.2.1", "immer": "10.1.1", "jotai": "2.9.1", + "jsdom": "^24.1.1", "lethargy": "1.0.9", + "linkedom": "^0.18.4", "lodash-es": "4.17.21", "lowdb": "7.0.1", "markdown-it": "14.1.0", @@ -171,7 +174,8 @@ "patchedDependencies": { "sonner@1.5.0": "patches/sonner@1.5.0.patch", "hono@4.4.7": "patches/hono@4.4.7.patch", - "immer@10.1.1": "patches/immer@10.1.1.patch" + "immer@10.1.1": "patches/immer@10.1.1.patch", + "@mozilla/readability@0.5.0": "patches/@mozilla__readability@0.5.0.patch" } }, "simple-git-hooks": { diff --git a/patches/@mozilla__readability@0.5.0.patch b/patches/@mozilla__readability@0.5.0.patch new file mode 100644 index 0000000000..7ea789420c --- /dev/null +++ b/patches/@mozilla__readability@0.5.0.patch @@ -0,0 +1,2109 @@ +diff --git a/Readability.js b/Readability.js +index b745aa01d8ea23e55c5c309c29262f6fa0a74a01..21756f48574d33a7bd35ce7d47be106f922a9908 100644 +--- a/Readability.js ++++ b/Readability.js +@@ -30,7 +30,9 @@ function Readability(doc, options) { + doc = options; + options = arguments[2]; + } else if (!doc || !doc.documentElement) { +- throw new Error("First argument to Readability constructor should be a document object."); ++ throw new Error( ++ "First argument to Readability constructor should be a document object." ++ ); + } + options = options || {}; + +@@ -44,37 +46,43 @@ function Readability(doc, options) { + + // Configurable options + this._debug = !!options.debug; +- this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE; +- this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES; ++ this._maxElemsToParse = ++ options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE; ++ this._nbTopCandidates = ++ options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES; + this._charThreshold = options.charThreshold || this.DEFAULT_CHAR_THRESHOLD; +- this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []); ++ this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat( ++ options.classesToPreserve || [] ++ ); + this._keepClasses = !!options.keepClasses; +- this._serializer = options.serializer || function(el) { +- return el.innerHTML; +- }; ++ this._serializer = ++ options.serializer || ++ function (el) { ++ return el.innerHTML; ++ }; + this._disableJSONLD = !!options.disableJSONLD; + this._allowedVideoRegex = options.allowedVideoRegex || this.REGEXPS.videos; + + // Start with all flags set +- this._flags = this.FLAG_STRIP_UNLIKELYS | +- this.FLAG_WEIGHT_CLASSES | +- this.FLAG_CLEAN_CONDITIONALLY; +- ++ this._flags = ++ this.FLAG_STRIP_UNLIKELYS | ++ this.FLAG_WEIGHT_CLASSES | ++ this.FLAG_CLEAN_CONDITIONALLY; + + // Control whether log messages are sent to the console + if (this._debug) { +- let logNode = function(node) { ++ let logNode = function (node) { + if (node.nodeType == node.TEXT_NODE) { + return `${node.nodeName} ("${node.textContent}")`; + } +- let attrPairs = Array.from(node.attributes || [], function(attr) { ++ let attrPairs = Array.from(node.attributes || [], function (attr) { + return `${attr.name}="${attr.value}"`; + }).join(" "); + return `<${node.localName} ${attrPairs}>`; + }; + this.log = function () { + if (typeof console !== "undefined") { +- let args = Array.from(arguments, arg => { ++ let args = Array.from(arguments, (arg) => { + if (arg && arg.nodeType == this.ELEMENT_NODE) { + return logNode(arg); + } +@@ -84,9 +92,11 @@ function Readability(doc, options) { + console.log.apply(console, args); + } else if (typeof dump !== "undefined") { + /* global dump */ +- var msg = Array.prototype.map.call(arguments, function(x) { +- return (x && x.nodeName) ? logNode(x) : x; +- }).join(" "); ++ var msg = Array.prototype.map ++ .call(arguments, function (x) { ++ return x && x.nodeName ? logNode(x) : x; ++ }) ++ .join(" "); + dump("Reader: (Readability) " + msg + "\n"); + } + }; +@@ -112,7 +122,9 @@ Readability.prototype = { + DEFAULT_N_TOP_CANDIDATES: 5, + + // Element tags to score by default. +- DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","), ++ DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre" ++ .toUpperCase() ++ .split(","), + + // The default number of chars an article must have in order to return a result + DEFAULT_CHAR_THRESHOLD: 500, +@@ -122,16 +134,21 @@ Readability.prototype = { + REGEXPS: { + // NOTE: These two regular expressions are duplicated in + // Readability-readerable.js. Please keep both copies in sync. +- unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, ++ unlikelyCandidates: ++ /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, + okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i, + +- positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, +- negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, +- extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, ++ positive: ++ /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, ++ negative: ++ /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, ++ extraneous: ++ /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i, + byline: /byline|author|dateline|writtenby|p-author/i, + replaceFonts: /<(\/?)font[^>]*>/gi, + normalize: /\s{2,}/g, +- videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i, ++ videos: ++ /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i, + shareElements: /(\b|_)(share|sharedaddy)(\b|_)/i, + nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, + prevLink: /(prev|earl|old|new|<|«)/i, +@@ -145,40 +162,106 @@ Readability.prototype = { + // see: https://en.wikipedia.org/wiki/Comma#Comma_variants + commas: /\u002C|\u060C|\uFE50|\uFE10|\uFE11|\u2E41|\u2E34|\u2E32|\uFF0C/g, + // See: https://schema.org/Article +- jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/ ++ jsonLdArticleTypes: ++ /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/, + }, + +- UNLIKELY_ROLES: [ "menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog" ], ++ UNLIKELY_ROLES: [ ++ "menu", ++ "menubar", ++ "complementary", ++ "navigation", ++ "alert", ++ "alertdialog", ++ "dialog", ++ ], + +- DIV_TO_P_ELEMS: new Set([ "BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL" ]), ++ DIV_TO_P_ELEMS: new Set([ ++ "BLOCKQUOTE", ++ "DL", ++ "DIV", ++ "IMG", ++ "OL", ++ "P", ++ "PRE", ++ "TABLE", ++ "UL", ++ ]), + + ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"], + +- PRESENTATIONAL_ATTRIBUTES: [ "align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace" ], ++ PRESENTATIONAL_ATTRIBUTES: [ ++ "align", ++ "background", ++ "bgcolor", ++ "border", ++ "cellpadding", ++ "cellspacing", ++ "frame", ++ "hspace", ++ "rules", ++ "style", ++ "valign", ++ "vspace", ++ ], + +- DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [ "TABLE", "TH", "TD", "HR", "PRE" ], ++ DEPRECATED_SIZE_ATTRIBUTE_ELEMS: ["TABLE", "TH", "TD", "HR", "PRE"], + + // The commented out elements qualify as phrasing content but tend to be + // removed by readability when put into paragraphs, so we ignore them here. + PHRASING_ELEMS: [ + // "CANVAS", "IFRAME", "SVG", "VIDEO", +- "ABBR", "AUDIO", "B", "BDO", "BR", "BUTTON", "CITE", "CODE", "DATA", +- "DATALIST", "DFN", "EM", "EMBED", "I", "IMG", "INPUT", "KBD", "LABEL", +- "MARK", "MATH", "METER", "NOSCRIPT", "OBJECT", "OUTPUT", "PROGRESS", "Q", +- "RUBY", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "SUB", +- "SUP", "TEXTAREA", "TIME", "VAR", "WBR" ++ "ABBR", ++ "AUDIO", ++ "B", ++ "BDO", ++ "BR", ++ "BUTTON", ++ "CITE", ++ "CODE", ++ "DATA", ++ "DATALIST", ++ "DFN", ++ "EM", ++ "EMBED", ++ "I", ++ "IMG", ++ "INPUT", ++ "KBD", ++ "LABEL", ++ "MARK", ++ "MATH", ++ "METER", ++ "NOSCRIPT", ++ "OBJECT", ++ "OUTPUT", ++ "PROGRESS", ++ "Q", ++ "RUBY", ++ "SAMP", ++ "SCRIPT", ++ "SELECT", ++ "SMALL", ++ "SPAN", ++ "STRONG", ++ "SUB", ++ "SUP", ++ "TEXTAREA", ++ "TIME", ++ "VAR", ++ "WBR", + ], + + // These are the classes that readability sets itself. +- CLASSES_TO_PRESERVE: [ "page" ], ++ CLASSES_TO_PRESERVE: ["page"], + + // These are the list of HTML entities that need to be escaped. + HTML_ESCAPE_MAP: { +- "lt": "<", +- "gt": ">", +- "amp": "&", +- "quot": '"', +- "apos": "'", ++ lt: "<", ++ gt: ">", ++ amp: "&", ++ quot: '"', ++ apos: "'", + }, + + /** +@@ -186,8 +269,8 @@ Readability.prototype = { + * + * @param Element + * @return void +- **/ +- _postProcessContent: function(articleContent) { ++ **/ ++ _postProcessContent: function (articleContent) { + // Readability cannot open relative uris so we convert them to absolute uris. + this._fixRelativeUris(articleContent); + +@@ -209,7 +292,7 @@ Readability.prototype = { + * @param Function filterFn the function to use as a filter + * @return void + */ +- _removeNodes: function(nodeList, filterFn) { ++ _removeNodes: function (nodeList, filterFn) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _removeNodes"); +@@ -232,7 +315,7 @@ Readability.prototype = { + * @param String newTagName the new tag name to use + * @return void + */ +- _replaceNodeTags: function(nodeList, newTagName) { ++ _replaceNodeTags: function (nodeList, newTagName) { + // Avoid ever operating on live node lists. + if (this._docJSDOMParser && nodeList._isLiveNodeList) { + throw new Error("Do not pass live node lists to _replaceNodeTags"); +@@ -253,7 +336,7 @@ Readability.prototype = { + * @param Function fn The iterate function. + * @return void + */ +- _forEachNode: function(nodeList, fn) { ++ _forEachNode: function (nodeList, fn) { + Array.prototype.forEach.call(nodeList, fn, this); + }, + +@@ -268,7 +351,7 @@ Readability.prototype = { + * @param Function fn The test function. + * @return void + */ +- _findNode: function(nodeList, fn) { ++ _findNode: function (nodeList, fn) { + return Array.prototype.find.call(nodeList, fn, this); + }, + +@@ -283,7 +366,7 @@ Readability.prototype = { + * @param Function fn The iterate function. + * @return Boolean + */ +- _someNode: function(nodeList, fn) { ++ _someNode: function (nodeList, fn) { + return Array.prototype.some.call(nodeList, fn, this); + }, + +@@ -298,7 +381,7 @@ Readability.prototype = { + * @param Function fn The iterate function. + * @return Boolean + */ +- _everyNode: function(nodeList, fn) { ++ _everyNode: function (nodeList, fn) { + return Array.prototype.every.call(nodeList, fn, this); + }, + +@@ -308,23 +391,26 @@ Readability.prototype = { + * @return ...NodeList + * @return Array + */ +- _concatNodeLists: function() { ++ _concatNodeLists: function () { + var slice = Array.prototype.slice; + var args = slice.call(arguments); +- var nodeLists = args.map(function(list) { ++ var nodeLists = args.map(function (list) { + return slice.call(list); + }); + return Array.prototype.concat.apply([], nodeLists); + }, + +- _getAllNodesWithTag: function(node, tagNames) { ++ _getAllNodesWithTag: function (node, tagNames) { + if (node.querySelectorAll) { + return node.querySelectorAll(tagNames.join(",")); + } +- return [].concat.apply([], tagNames.map(function(tag) { +- var collection = node.getElementsByTagName(tag); +- return Array.isArray(collection) ? collection : Array.from(collection); +- })); ++ return [].concat.apply( ++ [], ++ tagNames.map(function (tag) { ++ var collection = node.getElementsByTagName(tag); ++ return Array.isArray(collection) ? collection : Array.from(collection); ++ }) ++ ); + }, + + /** +@@ -335,11 +421,11 @@ Readability.prototype = { + * @param Element + * @return void + */ +- _cleanClasses: function(node) { ++ _cleanClasses: function (node) { + var classesToPreserve = this._classesToPreserve; + var className = (node.getAttribute("class") || "") + .split(/\s+/) +- .filter(function(cls) { ++ .filter(function (cls) { + return classesToPreserve.indexOf(cls) != -1; + }) + .join(" "); +@@ -362,7 +448,7 @@ Readability.prototype = { + * @param Element + * @return void + */ +- _fixRelativeUris: function(articleContent) { ++ _fixRelativeUris: function (articleContent) { + var baseURI = this._doc.baseURI; + var documentURI = this._doc.documentURI; + function toAbsoluteURI(uri) { +@@ -381,14 +467,17 @@ Readability.prototype = { + } + + var links = this._getAllNodesWithTag(articleContent, ["a"]); +- this._forEachNode(links, function(link) { ++ this._forEachNode(links, function (link) { + var href = link.getAttribute("href"); + if (href) { + // Remove links with javascript: URIs, since + // they won't work after scripts have been removed from the page. + if (href.indexOf("javascript:") === 0) { + // if the link only contains simple text content, it can be converted to a text node +- if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) { ++ if ( ++ link.childNodes.length === 1 && ++ link.childNodes[0].nodeType === this.TEXT_NODE ++ ) { + var text = this._doc.createTextNode(link.textContent); + link.parentNode.replaceChild(text, link); + } else { +@@ -406,10 +495,15 @@ Readability.prototype = { + }); + + var medias = this._getAllNodesWithTag(articleContent, [ +- "img", "picture", "figure", "video", "audio", "source" ++ "img", ++ "picture", ++ "figure", ++ "video", ++ "audio", ++ "source", + ]); + +- this._forEachNode(medias, function(media) { ++ this._forEachNode(medias, function (media) { + var src = media.getAttribute("src"); + var poster = media.getAttribute("poster"); + var srcset = media.getAttribute("srcset"); +@@ -423,27 +517,40 @@ Readability.prototype = { + } + + if (srcset) { +- var newSrcset = srcset.replace(this.REGEXPS.srcsetUrl, function(_, p1, p2, p3) { +- return toAbsoluteURI(p1) + (p2 || "") + p3; +- }); ++ var newSrcset = srcset.replace( ++ this.REGEXPS.srcsetUrl, ++ function (_, p1, p2, p3) { ++ return toAbsoluteURI(p1) + (p2 || "") + p3; ++ } ++ ); + + media.setAttribute("srcset", newSrcset); + } + }); + }, + +- _simplifyNestedElements: function(articleContent) { ++ _simplifyNestedElements: function (articleContent) { + var node = articleContent; + + while (node) { +- if (node.parentNode && ["DIV", "SECTION"].includes(node.tagName) && !(node.id && node.id.startsWith("readability"))) { ++ if ( ++ node.parentNode && ++ ["DIV", "SECTION"].includes(node.tagName) && ++ !(node.id && node.id.startsWith("readability")) ++ ) { + if (this._isElementWithoutContent(node)) { + node = this._removeAndGetNext(node); + continue; +- } else if (this._hasSingleTagInsideElement(node, "DIV") || this._hasSingleTagInsideElement(node, "SECTION")) { ++ } else if ( ++ this._hasSingleTagInsideElement(node, "DIV") || ++ this._hasSingleTagInsideElement(node, "SECTION") ++ ) { + var child = node.children[0]; + for (var i = 0; i < node.attributes.length; i++) { +- child.setAttribute(node.attributes[i].name, node.attributes[i].value); ++ child.setAttribute( ++ node.attributes[i].name, ++ node.attributes[i].value ++ ); + } + node.parentNode.replaceChild(child, node); + node = child; +@@ -460,7 +567,7 @@ Readability.prototype = { + * + * @return string + **/ +- _getArticleTitle: function() { ++ _getArticleTitle: function () { + var doc = this._doc; + var curTitle = ""; + var origTitle = ""; +@@ -470,8 +577,12 @@ Readability.prototype = { + + // If they had an element with id "title" in their HTML + if (typeof curTitle !== "string") +- curTitle = origTitle = this._getInnerText(doc.getElementsByTagName("title")[0]); +- } catch (e) {/* ignore exceptions setting the title. */} ++ curTitle = origTitle = this._getInnerText( ++ doc.getElementsByTagName("title")[0] ++ ); ++ } catch (e) { ++ /* ignore exceptions setting the title. */ ++ } + + var titleHadHierarchicalSeparators = false; + function wordCount(str) { +@@ -479,7 +590,7 @@ Readability.prototype = { + } + + // If there's a separator in the title, first remove the final part +- if ((/ [\|\-\\\/>»] /).test(curTitle)) { ++ if (/ [\|\-\\\/>»] /.test(curTitle)) { + titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle); + curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, "$1"); + +@@ -495,7 +606,7 @@ Readability.prototype = { + doc.getElementsByTagName("h2") + ); + var trimmedTitle = curTitle.trim(); +- var match = this._someNode(headings, function(heading) { ++ var match = this._someNode(headings, function (heading) { + return heading.textContent.trim() === trimmedTitle; + }); + +@@ -515,8 +626,7 @@ Readability.prototype = { + } else if (curTitle.length > 150 || curTitle.length < 15) { + var hOnes = doc.getElementsByTagName("h1"); + +- if (hOnes.length === 1) +- curTitle = this._getInnerText(hOnes[0]); ++ if (hOnes.length === 1) curTitle = this._getInnerText(hOnes[0]); + } + + curTitle = curTitle.trim().replace(this.REGEXPS.normalize, " "); +@@ -525,9 +635,12 @@ Readability.prototype = { + // title or we decreased the number of words by more than 1 word, use + // the original title. + var curTitleWordCount = wordCount(curTitle); +- if (curTitleWordCount <= 4 && +- (!titleHadHierarchicalSeparators || +- curTitleWordCount != wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1)) { ++ if ( ++ curTitleWordCount <= 4 && ++ (!titleHadHierarchicalSeparators || ++ curTitleWordCount != ++ wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1) ++ ) { + curTitle = origTitle; + } + +@@ -540,7 +653,7 @@ Readability.prototype = { + * + * @return void + **/ +- _prepDocument: function() { ++ _prepDocument: function () { + var doc = this._doc; + + // Remove all style tags in head +@@ -560,9 +673,11 @@ Readability.prototype = { + */ + _nextNode: function (node) { + var next = node; +- while (next +- && (next.nodeType != this.ELEMENT_NODE) +- && this.REGEXPS.whitespace.test(next.textContent)) { ++ while ( ++ next && ++ next.nodeType != this.ELEMENT_NODE && ++ this.REGEXPS.whitespace.test(next.textContent) ++ ) { + next = next.nextSibling; + } + return next; +@@ -576,7 +691,7 @@ Readability.prototype = { + *
foo
bar

abc

+ */ + _replaceBrs: function (elem) { +- this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function(br) { ++ this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function (br) { + var next = br.nextSibling; + + // Whether 2 or more
elements have been found and replaced with a +@@ -586,7 +701,7 @@ Readability.prototype = { + // If we find a
chain, remove the
s until we hit another node + // or non-whitespace. This leaves behind the first
in the chain + // (which will be replaced with a

later). +- while ((next = this._nextNode(next)) && (next.tagName == "BR")) { ++ while ((next = this._nextNode(next)) && next.tagName == "BR") { + replaced = true; + var brSibling = next.nextSibling; + next.parentNode.removeChild(next); +@@ -605,12 +720,10 @@ Readability.prototype = { + // If we've hit another

, we're done adding children to this

. + if (next.tagName == "BR") { + var nextElem = this._nextNode(next.nextSibling); +- if (nextElem && nextElem.tagName == "BR") +- break; ++ if (nextElem && nextElem.tagName == "BR") break; + } + +- if (!this._isPhrasingContent(next)) +- break; ++ if (!this._isPhrasingContent(next)) break; + + // Otherwise, make this node a child of the new

. + var sibling = next.nextSibling; +@@ -622,8 +735,7 @@ Readability.prototype = { + p.removeChild(p.lastChild); + } + +- if (p.parentNode.tagName === "P") +- this._setNodeTag(p.parentNode, "DIV"); ++ if (p.parentNode.tagName === "P") this._setNodeTag(p.parentNode, "DIV"); + } + }); + }, +@@ -641,12 +753,14 @@ Readability.prototype = { + replacement.appendChild(node.firstChild); + } + node.parentNode.replaceChild(replacement, node); +- if (node.readability) +- replacement.readability = node.readability; ++ if (node.readability) replacement.readability = node.readability; + + for (var i = 0; i < node.attributes.length; i++) { + try { +- replacement.setAttribute(node.attributes[i].name, node.attributes[i].value); ++ replacement.setAttribute( ++ node.attributes[i].name, ++ node.attributes[i].value ++ ); + } catch (ex) { + /* it's possible for setAttribute() to throw if the attribute name + * isn't a valid XML Name. Such attributes can however be parsed from +@@ -666,7 +780,7 @@ Readability.prototype = { + * @param Element + * @return void + **/ +- _prepArticle: function(articleContent) { ++ _prepArticle: function (articleContent) { + this._cleanStyles(articleContent); + + // Check for data tables before we continue, to avoid removing items in +@@ -692,7 +806,10 @@ Readability.prototype = { + + this._forEachNode(articleContent.children, function (topCandidate) { + this._cleanMatchedNodes(topCandidate, function (node, matchString) { +- return this.REGEXPS.shareElements.test(matchString) && node.textContent.length < shareElementThreshold; ++ return ( ++ this.REGEXPS.shareElements.test(matchString) && ++ node.textContent.length < shareElementThreshold ++ ); + }); + }); + +@@ -710,38 +827,56 @@ Readability.prototype = { + this._cleanConditionally(articleContent, "div"); + + // replace H1 with H2 as H1 should be only title that is displayed separately +- this._replaceNodeTags(this._getAllNodesWithTag(articleContent, ["h1"]), "h2"); ++ this._replaceNodeTags( ++ this._getAllNodesWithTag(articleContent, ["h1"]), ++ "h2" ++ ); + + // Remove extra paragraphs +- this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function (paragraph) { +- var imgCount = paragraph.getElementsByTagName("img").length; +- var embedCount = paragraph.getElementsByTagName("embed").length; +- var objectCount = paragraph.getElementsByTagName("object").length; +- // At this point, nasty iframes have been removed, only remain embedded video ones. +- var iframeCount = paragraph.getElementsByTagName("iframe").length; +- var totalCount = imgCount + embedCount + objectCount + iframeCount; +- +- return totalCount === 0 && !this._getInnerText(paragraph, false); +- }); ++ this._removeNodes( ++ this._getAllNodesWithTag(articleContent, ["p"]), ++ function (paragraph) { ++ var imgCount = paragraph.getElementsByTagName("img").length; ++ var embedCount = paragraph.getElementsByTagName("embed").length; ++ var objectCount = paragraph.getElementsByTagName("object").length; ++ // At this point, nasty iframes have been removed, only remain embedded video ones. ++ var iframeCount = paragraph.getElementsByTagName("iframe").length; ++ var totalCount = imgCount + embedCount + objectCount + iframeCount; ++ ++ return totalCount === 0 && !this._getInnerText(paragraph, false); ++ } ++ ); + +- this._forEachNode(this._getAllNodesWithTag(articleContent, ["br"]), function(br) { +- var next = this._nextNode(br.nextSibling); +- if (next && next.tagName == "P") +- br.parentNode.removeChild(br); +- }); ++ this._forEachNode( ++ this._getAllNodesWithTag(articleContent, ["br"]), ++ function (br) { ++ var next = this._nextNode(br.nextSibling); ++ if (next && next.tagName == "P") br.parentNode.removeChild(br); ++ } ++ ); + + // Remove single-cell tables +- this._forEachNode(this._getAllNodesWithTag(articleContent, ["table"]), function(table) { +- var tbody = this._hasSingleTagInsideElement(table, "TBODY") ? table.firstElementChild : table; +- if (this._hasSingleTagInsideElement(tbody, "TR")) { +- var row = tbody.firstElementChild; +- if (this._hasSingleTagInsideElement(row, "TD")) { +- var cell = row.firstElementChild; +- cell = this._setNodeTag(cell, this._everyNode(cell.childNodes, this._isPhrasingContent) ? "P" : "DIV"); +- table.parentNode.replaceChild(cell, table); ++ this._forEachNode( ++ this._getAllNodesWithTag(articleContent, ["table"]), ++ function (table) { ++ var tbody = this._hasSingleTagInsideElement(table, "TBODY") ++ ? table.firstElementChild ++ : table; ++ if (this._hasSingleTagInsideElement(tbody, "TR")) { ++ var row = tbody.firstElementChild; ++ if (this._hasSingleTagInsideElement(row, "TD")) { ++ var cell = row.firstElementChild; ++ cell = this._setNodeTag( ++ cell, ++ this._everyNode(cell.childNodes, this._isPhrasingContent) ++ ? "P" ++ : "DIV" ++ ); ++ table.parentNode.replaceChild(cell, table); ++ } + } + } +- }); ++ ); + }, + + /** +@@ -750,9 +885,9 @@ Readability.prototype = { + * + * @param Element + * @return void +- **/ +- _initializeNode: function(node) { +- node.readability = {"contentScore": 0}; ++ **/ ++ _initializeNode: function (node) { ++ node.readability = { contentScore: 0 }; + + switch (node.tagName) { + case "DIV": +@@ -790,7 +925,7 @@ Readability.prototype = { + node.readability.contentScore += this._getClassWeight(node); + }, + +- _removeAndGetNext: function(node) { ++ _removeAndGetNext: function (node) { + var nextNode = this._getNextNode(node, true); + node.parentNode.removeChild(node); + return nextNode; +@@ -803,7 +938,7 @@ Readability.prototype = { + * + * Calling this in a loop will traverse the DOM depth-first. + */ +- _getNextNode: function(node, ignoreSelfAndKids) { ++ _getNextNode: function (node, ignoreSelfAndKids) { + // First check for kids if those aren't being ignored + if (!ignoreSelfAndKids && node.firstElementChild) { + return node.firstElementChild; +@@ -825,18 +960,24 @@ Readability.prototype = { + // 1 = same text, 0 = completely different text + // works the way that it splits both texts into words and then finds words that are unique in second text + // the result is given by the lower length of unique parts +- _textSimilarity: function(textA, textB) { +- var tokensA = textA.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean); +- var tokensB = textB.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean); ++ _textSimilarity: function (textA, textB) { ++ var tokensA = textA ++ .toLowerCase() ++ .split(this.REGEXPS.tokenize) ++ .filter(Boolean); ++ var tokensB = textB ++ .toLowerCase() ++ .split(this.REGEXPS.tokenize) ++ .filter(Boolean); + if (!tokensA.length || !tokensB.length) { + return 0; + } +- var uniqTokensB = tokensB.filter(token => !tokensA.includes(token)); ++ var uniqTokensB = tokensB.filter((token) => !tokensA.includes(token)); + var distanceB = uniqTokensB.join(" ").length / tokensB.join(" ").length; + return 1 - distanceB; + }, + +- _checkByline: function(node, matchString) { ++ _checkByline: function (node, matchString) { + if (this._articleByline) { + return false; + } +@@ -846,7 +987,12 @@ Readability.prototype = { + var itemprop = node.getAttribute("itemprop"); + } + +- if ((rel === "author" || (itemprop && itemprop.indexOf("author") !== -1) || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) { ++ if ( ++ (rel === "author" || ++ (itemprop && itemprop.indexOf("author") !== -1) || ++ this.REGEXPS.byline.test(matchString)) && ++ this._isValidByline(node.textContent) ++ ) { + this._articleByline = node.textContent.trim(); + return true; + } +@@ -854,13 +1000,13 @@ Readability.prototype = { + return false; + }, + +- _getNodeAncestors: function(node, maxDepth) { ++ _getNodeAncestors: function (node, maxDepth) { + maxDepth = maxDepth || 0; +- var i = 0, ancestors = []; ++ var i = 0, ++ ancestors = []; + while (node.parentNode) { + ancestors.push(node.parentNode); +- if (maxDepth && ++i === maxDepth) +- break; ++ if (maxDepth && ++i === maxDepth) break; + node = node.parentNode; + } + return ancestors; +@@ -872,7 +1018,7 @@ Readability.prototype = { + * + * @param page a document to run upon. Needs to be a full document, complete with body. + * @return Element +- **/ ++ **/ + _grabArticle: function (page) { + this.log("**** grabArticle ****"); + var doc = this._doc; +@@ -889,7 +1035,9 @@ Readability.prototype = { + + while (true) { + this.log("Starting grabArticle loop"); +- var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS); ++ var stripUnlikelyCandidates = this._flagIsActive( ++ this.FLAG_STRIP_UNLIKELYS ++ ); + + // First, node prepping. Trash nodes that look cruddy (like ones with the + // class name "comment", etc), and turn divs into P tags where they have been +@@ -900,7 +1048,6 @@ Readability.prototype = { + let shouldRemoveTitleHeader = true; + + while (node) { +- + if (node.tagName === "HTML") { + this._articleLang = node.getAttribute("lang"); + } +@@ -914,7 +1061,10 @@ Readability.prototype = { + } + + // User is not able to see elements applied with both "aria-modal = true" and "role = dialog" +- if (node.getAttribute("aria-modal") == "true" && node.getAttribute("role") == "dialog") { ++ if ( ++ node.getAttribute("aria-modal") == "true" && ++ node.getAttribute("role") == "dialog" ++ ) { + node = this._removeAndGetNext(node); + continue; + } +@@ -926,7 +1076,11 @@ Readability.prototype = { + } + + if (shouldRemoveTitleHeader && this._headerDuplicatesTitle(node)) { +- this.log("Removing header: ", node.textContent.trim(), this._articleTitle.trim()); ++ this.log( ++ "Removing header: ", ++ node.textContent.trim(), ++ this._articleTitle.trim() ++ ); + shouldRemoveTitleHeader = false; + node = this._removeAndGetNext(node); + continue; +@@ -934,29 +1088,44 @@ Readability.prototype = { + + // Remove unlikely candidates + if (stripUnlikelyCandidates) { +- if (this.REGEXPS.unlikelyCandidates.test(matchString) && +- !this.REGEXPS.okMaybeItsACandidate.test(matchString) && +- !this._hasAncestorTag(node, "table") && +- !this._hasAncestorTag(node, "code") && +- node.tagName !== "BODY" && +- node.tagName !== "A") { ++ if ( ++ this.REGEXPS.unlikelyCandidates.test(matchString) && ++ !this.REGEXPS.okMaybeItsACandidate.test(matchString) && ++ !this._hasAncestorTag(node, "table") && ++ !this._hasAncestorTag(node, "code") && ++ node.tagName !== "BODY" && ++ node.tagName !== "A" ++ ) { + this.log("Removing unlikely candidate - " + matchString); + node = this._removeAndGetNext(node); + continue; + } + + if (this.UNLIKELY_ROLES.includes(node.getAttribute("role"))) { +- this.log("Removing content with role " + node.getAttribute("role") + " - " + matchString); ++ this.log( ++ "Removing content with role " + ++ node.getAttribute("role") + ++ " - " + ++ matchString ++ ); + node = this._removeAndGetNext(node); + continue; + } + } + + // Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe). +- if ((node.tagName === "DIV" || node.tagName === "SECTION" || node.tagName === "HEADER" || +- node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" || +- node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") && +- this._isElementWithoutContent(node)) { ++ if ( ++ (node.tagName === "DIV" || ++ node.tagName === "SECTION" || ++ node.tagName === "HEADER" || ++ node.tagName === "H1" || ++ node.tagName === "H2" || ++ node.tagName === "H3" || ++ node.tagName === "H4" || ++ node.tagName === "H5" || ++ node.tagName === "H6") && ++ this._isElementWithoutContent(node) ++ ) { + node = this._removeAndGetNext(node); + continue; + } +@@ -993,7 +1162,10 @@ Readability.prototype = { + // element. DIVs with only a P element inside and no text content can be + // safely converted into plain P elements to avoid confusing the scoring + // algorithm with DIVs with are, in practice, paragraphs. +- if (this._hasSingleTagInsideElement(node, "P") && this._getLinkDensity(node) < 0.25) { ++ if ( ++ this._hasSingleTagInsideElement(node, "P") && ++ this._getLinkDensity(node) < 0.25 ++ ) { + var newNode = node.children[0]; + node.parentNode.replaceChild(newNode, node); + node = newNode; +@@ -1011,21 +1183,22 @@ Readability.prototype = { + * Then add their score to their parent node. + * + * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. +- **/ ++ **/ + var candidates = []; +- this._forEachNode(elementsToScore, function(elementToScore) { +- if (!elementToScore.parentNode || typeof(elementToScore.parentNode.tagName) === "undefined") ++ this._forEachNode(elementsToScore, function (elementToScore) { ++ if ( ++ !elementToScore.parentNode || ++ typeof elementToScore.parentNode.tagName === "undefined" ++ ) + return; + + // If this paragraph is less than 25 characters, don't even count it. + var innerText = this._getInnerText(elementToScore); +- if (innerText.length < 25) +- return; ++ if (innerText.length < 25) return; + + // Exclude nodes with no ancestor. + var ancestors = this._getNodeAncestors(elementToScore, 5); +- if (ancestors.length === 0) +- return; ++ if (ancestors.length === 0) return; + + var contentScore = 0; + +@@ -1039,11 +1212,15 @@ Readability.prototype = { + contentScore += Math.min(Math.floor(innerText.length / 100), 3); + + // Initialize and score ancestors. +- this._forEachNode(ancestors, function(ancestor, level) { +- if (!ancestor.tagName || !ancestor.parentNode || typeof(ancestor.parentNode.tagName) === "undefined") ++ this._forEachNode(ancestors, function (ancestor, level) { ++ if ( ++ !ancestor.tagName || ++ !ancestor.parentNode || ++ typeof ancestor.parentNode.tagName === "undefined" ++ ) + return; + +- if (typeof(ancestor.readability) === "undefined") { ++ if (typeof ancestor.readability === "undefined") { + this._initializeNode(ancestor); + candidates.push(ancestor); + } +@@ -1052,12 +1229,9 @@ Readability.prototype = { + // - parent: 1 (no division) + // - grandparent: 2 + // - great grandparent+: ancestor level * 3 +- if (level === 0) +- var scoreDivider = 1; +- else if (level === 1) +- scoreDivider = 2; +- else +- scoreDivider = level * 3; ++ if (level === 0) var scoreDivider = 1; ++ else if (level === 1) scoreDivider = 2; ++ else scoreDivider = level * 3; + ancestor.readability.contentScore += contentScore / scoreDivider; + }); + }); +@@ -1071,7 +1245,9 @@ Readability.prototype = { + // Scale the final candidates score based on link density. Good content + // should have a relatively small link density (5% or less) and be mostly + // unaffected by this operation. +- var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate)); ++ var candidateScore = ++ candidate.readability.contentScore * ++ (1 - this._getLinkDensity(candidate)); + candidate.readability.contentScore = candidateScore; + + this.log("Candidate:", candidate, "with score " + candidateScore); +@@ -1079,7 +1255,10 @@ Readability.prototype = { + for (var t = 0; t < this._nbTopCandidates; t++) { + var aTopCandidate = topCandidates[t]; + +- if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) { ++ if ( ++ !aTopCandidate || ++ candidateScore > aTopCandidate.readability.contentScore ++ ) { + topCandidates.splice(t, 0, candidate); + if (topCandidates.length > this._nbTopCandidates) + topCandidates.pop(); +@@ -1113,8 +1292,14 @@ Readability.prototype = { + // and whose scores are quite closed with current `topCandidate` node. + var alternativeCandidateAncestors = []; + for (var i = 1; i < topCandidates.length; i++) { +- if (topCandidates[i].readability.contentScore / topCandidate.readability.contentScore >= 0.75) { +- alternativeCandidateAncestors.push(this._getNodeAncestors(topCandidates[i])); ++ if ( ++ topCandidates[i].readability.contentScore / ++ topCandidate.readability.contentScore >= ++ 0.75 ++ ) { ++ alternativeCandidateAncestors.push( ++ this._getNodeAncestors(topCandidates[i]) ++ ); + } + } + var MINIMUM_TOPCANDIDATES = 3; +@@ -1122,8 +1307,17 @@ Readability.prototype = { + parentOfTopCandidate = topCandidate.parentNode; + while (parentOfTopCandidate.tagName !== "BODY") { + var listsContainingThisAncestor = 0; +- for (var ancestorIndex = 0; ancestorIndex < alternativeCandidateAncestors.length && listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ancestorIndex++) { +- listsContainingThisAncestor += Number(alternativeCandidateAncestors[ancestorIndex].includes(parentOfTopCandidate)); ++ for ( ++ var ancestorIndex = 0; ++ ancestorIndex < alternativeCandidateAncestors.length && ++ listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ++ ancestorIndex++ ++ ) { ++ listsContainingThisAncestor += Number( ++ alternativeCandidateAncestors[ancestorIndex].includes( ++ parentOfTopCandidate ++ ) ++ ); + } + if (listsContainingThisAncestor >= MINIMUM_TOPCANDIDATES) { + topCandidate = parentOfTopCandidate; +@@ -1153,8 +1347,7 @@ Readability.prototype = { + continue; + } + var parentScore = parentOfTopCandidate.readability.contentScore; +- if (parentScore < scoreThreshold) +- break; ++ if (parentScore < scoreThreshold) break; + if (parentScore > lastScore) { + // Alright! We found a better parent to use. + topCandidate = parentOfTopCandidate; +@@ -1167,7 +1360,10 @@ Readability.prototype = { + // If the top candidate is the only child, use parent instead. This will help sibling + // joining logic when adjacent content is actually located in parent's sibling node. + parentOfTopCandidate = topCandidate.parentNode; +- while (parentOfTopCandidate.tagName != "BODY" && parentOfTopCandidate.children.length == 1) { ++ while ( ++ parentOfTopCandidate.tagName != "BODY" && ++ parentOfTopCandidate.children.length == 1 ++ ) { + topCandidate = parentOfTopCandidate; + parentOfTopCandidate = topCandidate.parentNode; + } +@@ -1180,10 +1376,12 @@ Readability.prototype = { + // that might also be related. Things like preambles, content split by ads + // that we removed, etc. + var articleContent = doc.createElement("DIV"); +- if (isPaging) +- articleContent.id = "readability-content"; ++ if (isPaging) articleContent.id = "readability-content"; + +- var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); ++ var siblingScoreThreshold = Math.max( ++ 10, ++ topCandidate.readability.contentScore * 0.2 ++ ); + // Keep potential top candidate's parent node to try to get text direction of it later. + parentOfTopCandidate = topCandidate.parentNode; + var siblings = parentOfTopCandidate.children; +@@ -1192,8 +1390,17 @@ Readability.prototype = { + var sibling = siblings[s]; + var append = false; + +- this.log("Looking at sibling node:", sibling, sibling.readability ? ("with score " + sibling.readability.contentScore) : ""); +- this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : "Unknown"); ++ this.log( ++ "Looking at sibling node:", ++ sibling, ++ sibling.readability ++ ? "with score " + sibling.readability.contentScore ++ : "" ++ ); ++ this.log( ++ "Sibling has score", ++ sibling.readability ? sibling.readability.contentScore : "Unknown" ++ ); + + if (sibling === topCandidate) { + append = true; +@@ -1201,11 +1408,17 @@ Readability.prototype = { + var contentBonus = 0; + + // Give a bonus if sibling nodes and top candidates have the example same classname +- if (sibling.className === topCandidate.className && topCandidate.className !== "") ++ if ( ++ sibling.className === topCandidate.className && ++ topCandidate.className !== "" ++ ) + contentBonus += topCandidate.readability.contentScore * 0.2; + +- if (sibling.readability && +- ((sibling.readability.contentScore + contentBonus) >= siblingScoreThreshold)) { ++ if ( ++ sibling.readability && ++ sibling.readability.contentScore + contentBonus >= ++ siblingScoreThreshold ++ ) { + append = true; + } else if (sibling.nodeName === "P") { + var linkDensity = this._getLinkDensity(sibling); +@@ -1214,8 +1427,12 @@ Readability.prototype = { + + if (nodeLength > 80 && linkDensity < 0.25) { + append = true; +- } else if (nodeLength < 80 && nodeLength > 0 && linkDensity === 0 && +- nodeContent.search(/\.( |$)/) !== -1) { ++ } else if ( ++ nodeLength < 80 && ++ nodeLength > 0 && ++ linkDensity === 0 && ++ nodeContent.search(/\.( |$)/) !== -1 ++ ) { + append = true; + } + } +@@ -1286,15 +1503,27 @@ Readability.prototype = { + + if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) { + this._removeFlag(this.FLAG_STRIP_UNLIKELYS); +- this._attempts.push({articleContent: articleContent, textLength: textLength}); ++ this._attempts.push({ ++ articleContent: articleContent, ++ textLength: textLength, ++ }); + } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) { + this._removeFlag(this.FLAG_WEIGHT_CLASSES); +- this._attempts.push({articleContent: articleContent, textLength: textLength}); ++ this._attempts.push({ ++ articleContent: articleContent, ++ textLength: textLength, ++ }); + } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) { + this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY); +- this._attempts.push({articleContent: articleContent, textLength: textLength}); ++ this._attempts.push({ ++ articleContent: articleContent, ++ textLength: textLength, ++ }); + } else { +- this._attempts.push({articleContent: articleContent, textLength: textLength}); ++ this._attempts.push({ ++ articleContent: articleContent, ++ textLength: textLength, ++ }); + // No luck after removing flags, just return the longest text we found during the different loops + this._attempts.sort(function (a, b) { + return b.textLength - a.textLength; +@@ -1312,10 +1541,11 @@ Readability.prototype = { + + if (parseSuccessful) { + // Find out text direction from ancestors of final top candidate. +- var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate)); +- this._someNode(ancestors, function(ancestor) { +- if (!ancestor.tagName) +- return false; ++ var ancestors = [parentOfTopCandidate, topCandidate].concat( ++ this._getNodeAncestors(parentOfTopCandidate) ++ ); ++ this._someNode(ancestors, function (ancestor) { ++ if (!ancestor.tagName) return false; + var articleDir = ancestor.getAttribute("dir"); + if (articleDir) { + this._articleDir = articleDir; +@@ -1336,10 +1566,10 @@ Readability.prototype = { + * @param possibleByline {string} - a string to check whether its a byline. + * @return Boolean - whether the input string is a byline. + */ +- _isValidByline: function(byline) { ++ _isValidByline: function (byline) { + if (typeof byline == "string" || byline instanceof String) { + byline = byline.trim(); +- return (byline.length > 0) && (byline.length < 100); ++ return byline.length > 0 && byline.length < 100; + } + return false; + }, +@@ -1350,18 +1580,23 @@ Readability.prototype = { + * @param str {string} - a string to unescape. + * @return string without HTML entity. + */ +- _unescapeHtmlEntities: function(str) { ++ _unescapeHtmlEntities: function (str) { + if (!str) { + return str; + } + + var htmlEscapeMap = this.HTML_ESCAPE_MAP; +- return str.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag) { +- return htmlEscapeMap[tag]; +- }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(_, hex, numStr) { +- var num = parseInt(hex || numStr, hex ? 16 : 10); +- return String.fromCharCode(num); +- }); ++ return str ++ .replace(/&(quot|amp|apos|lt|gt);/g, function (_, tag) { ++ return htmlEscapeMap[tag]; ++ }) ++ .replace( ++ /&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, ++ function (_, hex, numStr) { ++ var num = parseInt(hex || numStr, hex ? 16 : 10); ++ return String.fromCharCode(num); ++ } ++ ); + }, + + /** +@@ -1374,11 +1609,17 @@ Readability.prototype = { + + var metadata; + +- this._forEachNode(scripts, function(jsonLdElement) { +- if (!metadata && jsonLdElement.getAttribute("type") === "application/ld+json") { ++ this._forEachNode(scripts, function (jsonLdElement) { ++ if ( ++ !metadata && ++ jsonLdElement.getAttribute("type") === "application/ld+json" ++ ) { + try { + // Strip CDATA markers if present +- var content = jsonLdElement.textContent.replace(/^\s*\s*$/g, ""); ++ var content = jsonLdElement.textContent.replace( ++ /^\s*\s*$/g, ++ "" ++ ); + var parsed = JSON.parse(content); + if ( + !parsed["@context"] || +@@ -1388,10 +1629,8 @@ Readability.prototype = { + } + + if (!parsed["@type"] && Array.isArray(parsed["@graph"])) { +- parsed = parsed["@graph"].find(function(it) { +- return (it["@type"] || "").match( +- this.REGEXPS.jsonLdArticleTypes +- ); ++ parsed = parsed["@graph"].find(function (it) { ++ return (it["@type"] || "").match(this.REGEXPS.jsonLdArticleTypes); + }); + } + +@@ -1405,14 +1644,19 @@ Readability.prototype = { + + metadata = {}; + +- if (typeof parsed.name === "string" && typeof parsed.headline === "string" && parsed.name !== parsed.headline) { ++ if ( ++ typeof parsed.name === "string" && ++ typeof parsed.headline === "string" && ++ parsed.name !== parsed.headline ++ ) { + // we have both name and headline element in the JSON-LD. They should both be the same but some websites like aktualne.cz + // put their own name into "name" and the article title to "headline" which confuses Readability. So we try to check if either + // "name" or "headline" closely matches the html title, and if so, use that one. If not, then we use "name" by default. + + var title = this._getArticleTitle(); + var nameMatches = this._textSimilarity(parsed.name, title) > 0.75; +- var headlineMatches = this._textSimilarity(parsed.headline, title) > 0.75; ++ var headlineMatches = ++ this._textSimilarity(parsed.headline, title) > 0.75; + + if (headlineMatches && !nameMatches) { + metadata.title = parsed.headline; +@@ -1427,12 +1671,16 @@ Readability.prototype = { + if (parsed.author) { + if (typeof parsed.author.name === "string") { + metadata.byline = parsed.author.name.trim(); +- } else if (Array.isArray(parsed.author) && parsed.author[0] && typeof parsed.author[0].name === "string") { ++ } else if ( ++ Array.isArray(parsed.author) && ++ parsed.author[0] && ++ typeof parsed.author[0].name === "string" ++ ) { + metadata.byline = parsed.author +- .filter(function(author) { ++ .filter(function (author) { + return author && typeof author.name === "string"; + }) +- .map(function(author) { ++ .map(function (author) { + return author.name.trim(); + }) + .join(", "); +@@ -1441,10 +1689,7 @@ Readability.prototype = { + if (typeof parsed.description === "string") { + metadata.excerpt = parsed.description.trim(); + } +- if ( +- parsed.publisher && +- typeof parsed.publisher.name === "string" +- ) { ++ if (parsed.publisher && typeof parsed.publisher.name === "string") { + metadata.siteName = parsed.publisher.name.trim(); + } + if (typeof parsed.datePublished === "string") { +@@ -1467,19 +1712,21 @@ Readability.prototype = { + * + * @return Object with optional "excerpt" and "byline" properties + */ +- _getArticleMetadata: function(jsonld) { ++ _getArticleMetadata: function (jsonld) { + var metadata = {}; + var values = {}; + var metaElements = this._doc.getElementsByTagName("meta"); + + // property is a space-separated list of values +- var propertyPattern = /\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi; ++ var propertyPattern = ++ /\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi; + + // name is a single value +- var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i; ++ var namePattern = ++ /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i; + + // Find description tags. +- this._forEachNode(metaElements, function(element) { ++ this._forEachNode(metaElements, function (element) { + var elementName = element.getAttribute("name"); + var elementProperty = element.getAttribute("property"); + var content = element.getAttribute("content"); +@@ -1511,42 +1758,44 @@ Readability.prototype = { + }); + + // get title +- metadata.title = jsonld.title || +- values["dc:title"] || +- values["dcterm:title"] || +- values["og:title"] || +- values["weibo:article:title"] || +- values["weibo:webpage:title"] || +- values["title"] || +- values["twitter:title"]; ++ metadata.title = ++ jsonld.title || ++ values["dc:title"] || ++ values["dcterm:title"] || ++ values["og:title"] || ++ values["weibo:article:title"] || ++ values["weibo:webpage:title"] || ++ values["title"] || ++ values["twitter:title"]; + + if (!metadata.title) { + metadata.title = this._getArticleTitle(); + } + + // get author +- metadata.byline = jsonld.byline || +- values["dc:creator"] || +- values["dcterm:creator"] || +- values["author"]; ++ metadata.byline = ++ jsonld.byline || ++ values["dc:creator"] || ++ values["dcterm:creator"] || ++ values["author"]; + + // get description +- metadata.excerpt = jsonld.excerpt || +- values["dc:description"] || +- values["dcterm:description"] || +- values["og:description"] || +- values["weibo:article:description"] || +- values["weibo:webpage:description"] || +- values["description"] || +- values["twitter:description"]; ++ metadata.excerpt = ++ jsonld.excerpt || ++ values["dc:description"] || ++ values["dcterm:description"] || ++ values["og:description"] || ++ values["weibo:article:description"] || ++ values["weibo:webpage:description"] || ++ values["description"] || ++ values["twitter:description"]; + + // get site name +- metadata.siteName = jsonld.siteName || +- values["og:site_name"]; ++ metadata.siteName = jsonld.siteName || values["og:site_name"]; + + // get article published time +- metadata.publishedTime = jsonld.datePublished || +- values["article:published_time"] || null; ++ metadata.publishedTime = ++ jsonld.datePublished || values["article:published_time"] || null; + + // in many sites the meta value is escaped with HTML entities, + // so here we need to unescape it +@@ -1564,8 +1813,8 @@ Readability.prototype = { + * whether as a direct child or as its descendants. + * + * @param Element +- **/ +- _isSingleImage: function(node) { ++ **/ ++ _isSingleImage: function (node) { + if (node.tagName === "IMG") { + return true; + } +@@ -1584,12 +1833,12 @@ Readability.prototype = { + * some sites (e.g. Medium). + * + * @param Element +- **/ +- _unwrapNoscriptImages: function(doc) { ++ **/ ++ _unwrapNoscriptImages: function (doc) { + // Find img without source or attributes that might contains image, and remove it. + // This is done to prevent a placeholder img is replaced by img from noscript in next step. + var imgs = Array.from(doc.getElementsByTagName("img")); +- this._forEachNode(imgs, function(img) { ++ this._forEachNode(imgs, function (img) { + for (var i = 0; i < img.attributes.length; i++) { + var attr = img.attributes[i]; + switch (attr.name) { +@@ -1610,7 +1859,7 @@ Readability.prototype = { + + // Next find noscript and try to extract its image + var noscripts = Array.from(doc.getElementsByTagName("noscript")); +- this._forEachNode(noscripts, function(noscript) { ++ this._forEachNode(noscripts, function (noscript) { + // Parse content of noscript and make sure it only contains image + var tmp = doc.createElement("div"); + tmp.innerHTML = noscript.innerHTML; +@@ -1635,7 +1884,11 @@ Readability.prototype = { + continue; + } + +- if (attr.name === "src" || attr.name === "srcset" || /\.(jpg|jpeg|png|webp)/i.test(attr.value)) { ++ if ( ++ attr.name === "src" || ++ attr.name === "srcset" || ++ /\.(jpg|jpeg|png|webp)/i.test(attr.value) ++ ) { + if (newImg.getAttribute(attr.name) === attr.value) { + continue; + } +@@ -1658,8 +1911,8 @@ Readability.prototype = { + * Removes script tags from the document. + * + * @param Element +- **/ +- _removeScripts: function(doc) { ++ **/ ++ _removeScripts: function (doc) { + this._removeNodes(this._getAllNodesWithTag(doc, ["script", "noscript"])); + }, + +@@ -1670,25 +1923,31 @@ Readability.prototype = { + * + * @param Element + * @param string tag of child element +- **/ +- _hasSingleTagInsideElement: function(element, tag) { ++ **/ ++ _hasSingleTagInsideElement: function (element, tag) { + // There should be exactly 1 element child with given tag + if (element.children.length != 1 || element.children[0].tagName !== tag) { + return false; + } + + // And there should be no text nodes with real content +- return !this._someNode(element.childNodes, function(node) { +- return node.nodeType === this.TEXT_NODE && +- this.REGEXPS.hasContent.test(node.textContent); ++ return !this._someNode(element.childNodes, function (node) { ++ return ( ++ node.nodeType === this.TEXT_NODE && ++ this.REGEXPS.hasContent.test(node.textContent) ++ ); + }); + }, + +- _isElementWithoutContent: function(node) { +- return node.nodeType === this.ELEMENT_NODE && ++ _isElementWithoutContent: function (node) { ++ return ( ++ node.nodeType === this.ELEMENT_NODE && + node.textContent.trim().length == 0 && + (node.children.length == 0 || +- node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length); ++ node.children.length == ++ node.getElementsByTagName("br").length + ++ node.getElementsByTagName("hr").length) ++ ); + }, + + /** +@@ -1697,25 +1956,35 @@ Readability.prototype = { + * @param Element + */ + _hasChildBlockElement: function (element) { +- return this._someNode(element.childNodes, function(node) { +- return this.DIV_TO_P_ELEMS.has(node.tagName) || +- this._hasChildBlockElement(node); ++ return this._someNode(element.childNodes, function (node) { ++ return ( ++ this.DIV_TO_P_ELEMS.has(node.tagName) || ++ this._hasChildBlockElement(node) ++ ); + }); + }, + + /*** + * Determine if a node qualifies as phrasing content. + * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content +- **/ +- _isPhrasingContent: function(node) { +- return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.indexOf(node.tagName) !== -1 || +- ((node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") && +- this._everyNode(node.childNodes, this._isPhrasingContent)); ++ **/ ++ _isPhrasingContent: function (node) { ++ return ( ++ node.nodeType === this.TEXT_NODE || ++ this.PHRASING_ELEMS.indexOf(node.tagName) !== -1 || ++ ((node.tagName === "A" || ++ node.tagName === "DEL" || ++ node.tagName === "INS") && ++ this._everyNode(node.childNodes, this._isPhrasingContent)) ++ ); + }, + +- _isWhitespace: function(node) { +- return (node.nodeType === this.TEXT_NODE && node.textContent.trim().length === 0) || +- (node.nodeType === this.ELEMENT_NODE && node.tagName === "BR"); ++ _isWhitespace: function (node) { ++ return ( ++ (node.nodeType === this.TEXT_NODE && ++ node.textContent.trim().length === 0) || ++ (node.nodeType === this.ELEMENT_NODE && node.tagName === "BR") ++ ); + }, + + /** +@@ -1725,9 +1994,10 @@ Readability.prototype = { + * @param Element + * @param Boolean normalizeSpaces (default: true) + * @return string +- **/ +- _getInnerText: function(e, normalizeSpaces) { +- normalizeSpaces = (typeof normalizeSpaces === "undefined") ? true : normalizeSpaces; ++ **/ ++ _getInnerText: function (e, normalizeSpaces) { ++ normalizeSpaces = ++ typeof normalizeSpaces === "undefined" ? true : normalizeSpaces; + var textContent = e.textContent.trim(); + + if (normalizeSpaces) { +@@ -1742,8 +2012,8 @@ Readability.prototype = { + * @param Element + * @param string - what to split on. Default is "," + * @return number (integer) +- **/ +- _getCharCount: function(e, s) { ++ **/ ++ _getCharCount: function (e, s) { + s = s || ","; + return this._getInnerText(e).split(s).length - 1; + }, +@@ -1754,10 +2024,9 @@ Readability.prototype = { + * + * @param Element + * @return void +- **/ +- _cleanStyles: function(e) { +- if (!e || e.tagName.toLowerCase() === "svg") +- return; ++ **/ ++ _cleanStyles: function (e) { ++ if (!e || e.tagName.toLowerCase() === "svg") return; + + // Remove `style` and deprecated presentational attributes + for (var i = 0; i < this.PRESENTATIONAL_ATTRIBUTES.length; i++) { +@@ -1782,16 +2051,15 @@ Readability.prototype = { + * + * @param Element + * @return number (float) +- **/ +- _getLinkDensity: function(element) { ++ **/ ++ _getLinkDensity: function (element) { + var textLength = this._getInnerText(element).length; +- if (textLength === 0) +- return 0; ++ if (textLength === 0) return 0; + + var linkLength = 0; + + // XXX implement _reduceNodeList? +- this._forEachNode(element.getElementsByTagName("a"), function(linkNode) { ++ this._forEachNode(element.getElementsByTagName("a"), function (linkNode) { + var href = linkNode.getAttribute("href"); + var coefficient = href && this.REGEXPS.hashUrl.test(href) ? 0.3 : 1; + linkLength += this._getInnerText(linkNode).length * coefficient; +@@ -1806,29 +2074,24 @@ Readability.prototype = { + * + * @param Element + * @return number (Integer) +- **/ +- _getClassWeight: function(e) { +- if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) +- return 0; ++ **/ ++ _getClassWeight: function (e) { ++ if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) return 0; + + var weight = 0; + + // Look for a special classname +- if (typeof(e.className) === "string" && e.className !== "") { +- if (this.REGEXPS.negative.test(e.className)) +- weight -= 25; ++ if (typeof e.className === "string" && e.className !== "") { ++ if (this.REGEXPS.negative.test(e.className)) weight -= 25; + +- if (this.REGEXPS.positive.test(e.className)) +- weight += 25; ++ if (this.REGEXPS.positive.test(e.className)) weight += 25; + } + + // Look for a special ID +- if (typeof(e.id) === "string" && e.id !== "") { +- if (this.REGEXPS.negative.test(e.id)) +- weight -= 25; ++ if (typeof e.id === "string" && e.id !== "") { ++ if (this.REGEXPS.negative.test(e.id)) weight -= 25; + +- if (this.REGEXPS.positive.test(e.id)) +- weight += 25; ++ if (this.REGEXPS.positive.test(e.id)) weight += 25; + } + + return weight; +@@ -1842,10 +2105,10 @@ Readability.prototype = { + * @param string tag to clean + * @return void + **/ +- _clean: function(e, tag) { ++ _clean: function (e, tag) { + var isEmbed = ["object", "embed", "iframe"].indexOf(tag) !== -1; + +- this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(element) { ++ this._removeNodes(this._getAllNodesWithTag(e, [tag]), function (element) { + // Allow youtube and vimeo videos through as people usually want to see those. + if (isEmbed) { + // First, check the elements attributes to see if any of them contain youtube or vimeo +@@ -1856,7 +2119,10 @@ Readability.prototype = { + } + + // For embed with tag, check inner HTML as well. +- if (element.tagName === "object" && this._allowedVideoRegex.test(element.innerHTML)) { ++ if ( ++ element.tagName === "object" && ++ this._allowedVideoRegex.test(element.innerHTML) ++ ) { + return false; + } + } +@@ -1874,14 +2140,16 @@ Readability.prototype = { + * @param Function filterFn a filter to invoke to determine whether this node 'counts' + * @return Boolean + */ +- _hasAncestorTag: function(node, tagName, maxDepth, filterFn) { ++ _hasAncestorTag: function (node, tagName, maxDepth, filterFn) { + maxDepth = maxDepth || 3; + tagName = tagName.toUpperCase(); + var depth = 0; + while (node.parentNode) { +- if (maxDepth > 0 && depth > maxDepth) +- return false; +- if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode))) ++ if (maxDepth > 0 && depth > maxDepth) return false; ++ if ( ++ node.parentNode.tagName === tagName && ++ (!filterFn || filterFn(node.parentNode)) ++ ) + return true; + node = node.parentNode; + depth++; +@@ -1892,7 +2160,7 @@ Readability.prototype = { + /** + * Return an object indicating how many rows and columns this table has. + */ +- _getRowAndColumnCount: function(table) { ++ _getRowAndColumnCount: function (table) { + var rows = 0; + var columns = 0; + var trs = table.getElementsByTagName("tr"); +@@ -1901,7 +2169,7 @@ Readability.prototype = { + if (rowspan) { + rowspan = parseInt(rowspan, 10); + } +- rows += (rowspan || 1); ++ rows += rowspan || 1; + + // Now look for column-related info + var columnsInThisRow = 0; +@@ -1911,11 +2179,11 @@ Readability.prototype = { + if (colspan) { + colspan = parseInt(colspan, 10); + } +- columnsInThisRow += (colspan || 1); ++ columnsInThisRow += colspan || 1; + } + columns = Math.max(columns, columnsInThisRow); + } +- return {rows: rows, columns: columns}; ++ return { rows: rows, columns: columns }; + }, + + /** +@@ -1923,7 +2191,7 @@ Readability.prototype = { + * similar checks as + * https://searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19 + */ +- _markDataTables: function(root) { ++ _markDataTables: function (root) { + var tables = root.getElementsByTagName("table"); + for (var i = 0; i < tables.length; i++) { + var table = tables[i]; +@@ -1951,7 +2219,7 @@ Readability.prototype = { + + // If the table has a descendant with any of these tags, consider a data table: + var dataTableDescendants = ["col", "colgroup", "tfoot", "thead", "th"]; +- var descendantExists = function(tag) { ++ var descendantExists = function (tag) { + return !!table.getElementsByTagName(tag)[0]; + }; + if (dataTableDescendants.some(descendantExists)) { +@@ -1978,82 +2246,98 @@ Readability.prototype = { + + /* convert images and figures that have properties like data-src into images that can be loaded without JS */ + _fixLazyImages: function (root) { +- this._forEachNode(this._getAllNodesWithTag(root, ["img", "picture", "figure"]), function (elem) { +- // In some sites (e.g. Kotaku), they put 1px square image as base64 data uri in the src attribute. +- // So, here we check if the data uri is too short, just might as well remove it. +- if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) { +- // Make sure it's not SVG, because SVG can have a meaningful image in under 133 bytes. +- var parts = this.REGEXPS.b64DataUrl.exec(elem.src); +- if (parts[1] === "image/svg+xml") { +- return; +- } +- +- // Make sure this element has other attributes which contains image. +- // If it doesn't, then this src is important and shouldn't be removed. +- var srcCouldBeRemoved = false; +- for (var i = 0; i < elem.attributes.length; i++) { +- var attr = elem.attributes[i]; +- if (attr.name === "src") { +- continue; ++ this._forEachNode( ++ this._getAllNodesWithTag(root, ["img", "picture", "figure"]), ++ function (elem) { ++ // In some sites (e.g. Kotaku), they put 1px square image as base64 data uri in the src attribute. ++ // So, here we check if the data uri is too short, just might as well remove it. ++ if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) { ++ // Make sure it's not SVG, because SVG can have a meaningful image in under 133 bytes. ++ var parts = this.REGEXPS.b64DataUrl.exec(elem.src); ++ if (parts[1] === "image/svg+xml") { ++ return; + } + +- if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { +- srcCouldBeRemoved = true; +- break; ++ // Make sure this element has other attributes which contains image. ++ // If it doesn't, then this src is important and shouldn't be removed. ++ var srcCouldBeRemoved = false; ++ for (var i = 0; i < elem.attributes.length; i++) { ++ var attr = elem.attributes[i]; ++ if (attr.name === "src") { ++ continue; ++ } ++ ++ if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { ++ srcCouldBeRemoved = true; ++ break; ++ } + } +- } + +- // Here we assume if image is less than 100 bytes (or 133B after encoded to base64) +- // it will be too small, therefore it might be placeholder image. +- if (srcCouldBeRemoved) { +- var b64starts = elem.src.search(/base64\s*/i) + 7; +- var b64length = elem.src.length - b64starts; +- if (b64length < 133) { +- elem.removeAttribute("src"); ++ // Here we assume if image is less than 100 bytes (or 133B after encoded to base64) ++ // it will be too small, therefore it might be placeholder image. ++ if (srcCouldBeRemoved) { ++ var b64starts = elem.src.search(/base64\s*/i) + 7; ++ var b64length = elem.src.length - b64starts; ++ if (b64length < 133) { ++ elem.removeAttribute("src"); ++ } + } + } +- } + +- // also check for "null" to work around https://github.com/jsdom/jsdom/issues/2580 +- if ((elem.src || (elem.srcset && elem.srcset != "null")) && elem.className.toLowerCase().indexOf("lazy") === -1) { +- return; +- } +- +- for (var j = 0; j < elem.attributes.length; j++) { +- attr = elem.attributes[j]; +- if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") { +- continue; +- } +- var copyTo = null; +- if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) { +- copyTo = "srcset"; +- } else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) { +- copyTo = "src"; ++ // also check for "null" to work around https://github.com/jsdom/jsdom/issues/2580 ++ if ( ++ (elem.src || (elem.srcset && elem.srcset != "null")) && ++ elem.className.toLowerCase().indexOf("lazy") === -1 ++ ) { ++ return; + } +- if (copyTo) { +- //if this is an img or picture, set the attribute directly +- if (elem.tagName === "IMG" || elem.tagName === "PICTURE") { +- elem.setAttribute(copyTo, attr.value); +- } else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) { +- //if the item is a
that does not contain an image or picture, create one and place it inside the figure +- //see the nytimes-3 testcase for an example +- var img = this._doc.createElement("img"); +- img.setAttribute(copyTo, attr.value); +- elem.appendChild(img); ++ ++ for (var j = 0; j < elem.attributes.length; j++) { ++ attr = elem.attributes[j]; ++ if ( ++ attr.name === "src" || ++ attr.name === "srcset" || ++ attr.name === "alt" ++ ) { ++ continue; ++ } ++ var copyTo = null; ++ if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) { ++ copyTo = "srcset"; ++ } else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) { ++ copyTo = "src"; ++ } ++ if (copyTo) { ++ //if this is an img or picture, set the attribute directly ++ if (elem.tagName === "IMG" || elem.tagName === "PICTURE") { ++ elem.setAttribute(copyTo, attr.value); ++ } else if ( ++ elem.tagName === "FIGURE" && ++ !this._getAllNodesWithTag(elem, ["img", "picture"]).length ++ ) { ++ //if the item is a
that does not contain an image or picture, create one and place it inside the figure ++ //see the nytimes-3 testcase for an example ++ var img = this._doc.createElement("img"); ++ img.setAttribute(copyTo, attr.value); ++ elem.appendChild(img); ++ } + } + } + } +- }); ++ ); + }, + +- _getTextDensity: function(e, tags) { ++ _getTextDensity: function (e, tags) { + var textLength = this._getInnerText(e, true).length; + if (textLength === 0) { + return 0; + } + var childrenLength = 0; + var children = this._getAllNodesWithTag(e, tags); +- this._forEachNode(children, (child) => childrenLength += this._getInnerText(child, true).length); ++ this._forEachNode( ++ children, ++ (child) => (childrenLength += this._getInnerText(child, true).length) ++ ); + return childrenLength / textLength; + }, + +@@ -2063,18 +2347,17 @@ Readability.prototype = { + * + * @return void + **/ +- _cleanConditionally: function(e, tag) { +- if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) +- return; ++ _cleanConditionally: function (e, tag) { ++ if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) return; + + // Gather counts for other typical elements embedded within. + // Traverse backwards so we can remove nodes at the same time + // without effecting the traversal. + // + // TODO: Consider taking into account original contentScore here. +- this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(node) { ++ this._removeNodes(this._getAllNodesWithTag(e, [tag]), function (node) { + // First check if this node IS data table, in which case don't remove it. +- var isDataTable = function(t) { ++ var isDataTable = function (t) { + return t._readabilityDataTable; + }; + +@@ -2082,7 +2365,10 @@ Readability.prototype = { + if (!isList) { + var listLength = 0; + var listNodes = this._getAllNodesWithTag(node, ["ul", "ol"]); +- this._forEachNode(listNodes, (list) => listLength += this._getInnerText(list).length); ++ this._forEachNode( ++ listNodes, ++ (list) => (listLength += this._getInnerText(list).length) ++ ); + isList = listLength / this._getInnerText(node).length > 0.9; + } + +@@ -2117,10 +2403,21 @@ Readability.prototype = { + var img = node.getElementsByTagName("img").length; + var li = node.getElementsByTagName("li").length - 100; + var input = node.getElementsByTagName("input").length; +- var headingDensity = this._getTextDensity(node, ["h1", "h2", "h3", "h4", "h5", "h6"]); ++ var headingDensity = this._getTextDensity(node, [ ++ "h1", ++ "h2", ++ "h3", ++ "h4", ++ "h5", ++ "h6", ++ ]); + + var embedCount = 0; +- var embeds = this._getAllNodesWithTag(node, ["object", "embed", "iframe"]); ++ var embeds = this._getAllNodesWithTag(node, [ ++ "object", ++ "embed", ++ "iframe", ++ ]); + + for (var i = 0; i < embeds.length; i++) { + // If this embed has attribute that matches video regex, don't delete it. +@@ -2131,7 +2428,10 @@ Readability.prototype = { + } + + // For embed with tag, check inner HTML as well. +- if (embeds[i].tagName === "object" && this._allowedVideoRegex.test(embeds[i].innerHTML)) { ++ if ( ++ embeds[i].tagName === "object" && ++ this._allowedVideoRegex.test(embeds[i].innerHTML) ++ ) { + return false; + } + +@@ -2144,11 +2444,16 @@ Readability.prototype = { + var haveToRemove = + (img > 1 && p / img < 0.5 && !this._hasAncestorTag(node, "figure")) || + (!isList && li > p) || +- (input > Math.floor(p/3)) || +- (!isList && headingDensity < 0.9 && contentLength < 25 && (img === 0 || img > 2) && !this._hasAncestorTag(node, "figure")) || ++ input > Math.floor(p / 3) || ++ (!isList && ++ headingDensity < 0.9 && ++ contentLength < 25 && ++ (img === 0 || img > 2) && ++ !this._hasAncestorTag(node, "figure")) || + (!isList && weight < 25 && linkDensity > 0.2) || + (weight >= 25 && linkDensity > 0.5) || +- ((embedCount === 1 && contentLength < 75) || embedCount > 1); ++ (embedCount === 1 && contentLength < 75) || ++ embedCount > 1; + // Allow simple lists of images to remain in pages + if (isList && haveToRemove) { + for (var x = 0; x < node.children.length; x++) { +@@ -2177,7 +2482,7 @@ Readability.prototype = { + * @param Function determines whether a node should be removed + * @return void + **/ +- _cleanMatchedNodes: function(e, filter) { ++ _cleanMatchedNodes: function (e, filter) { + var endOfSearchMarkerNode = this._getNextNode(e, true); + var next = this._getNextNode(e); + while (next && next != endOfSearchMarkerNode) { +@@ -2194,10 +2499,10 @@ Readability.prototype = { + * + * @param Element + * @return void +- **/ +- _cleanHeaders: function(e) { ++ **/ ++ _cleanHeaders: function (e) { + let headingNodes = this._getAllNodesWithTag(e, ["h1", "h2"]); +- this._removeNodes(headingNodes, function(node) { ++ this._removeNodes(headingNodes, function (node) { + let shouldRemove = this._getClassWeight(node) < 0; + if (shouldRemove) { + this.log("Removing header with low class weight:", node); +@@ -2213,7 +2518,7 @@ Readability.prototype = { + * @param Element the node to check. + * @return boolean indicating whether this is a title-like header. + */ +- _headerDuplicatesTitle: function(node) { ++ _headerDuplicatesTitle: function (node) { + if (node.tagName != "H1" && node.tagName != "H2") { + return false; + } +@@ -2222,21 +2527,27 @@ Readability.prototype = { + return this._textSimilarity(this._articleTitle, heading) > 0.75; + }, + +- _flagIsActive: function(flag) { ++ _flagIsActive: function (flag) { + return (this._flags & flag) > 0; + }, + +- _removeFlag: function(flag) { ++ _removeFlag: function (flag) { + this._flags = this._flags & ~flag; + }, + +- _isProbablyVisible: function(node) { ++ _isProbablyVisible: function (node) { + // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. +- return (!node.style || node.style.display != "none") +- && (!node.style || node.style.visibility != "hidden") +- && !node.hasAttribute("hidden") ++ return ( ++ (!node.style || node.style.display != "none") && ++ (!node.style || node.style.visibility != "hidden") && ++ !(node.hasAttribute("hidden") && !node.id.startsWith("S:")) && + //check for "fallback-image" so that wikimedia math images are displayed +- && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); ++ (!node.hasAttribute("aria-hidden") || ++ node.getAttribute("aria-hidden") != "true" || ++ (node.className && ++ node.className.indexOf && ++ node.className.indexOf("fallback-image") !== -1)) ++ ); + }, + + /** +@@ -2256,7 +2567,9 @@ Readability.prototype = { + if (this._maxElemsToParse > 0) { + var numTags = this._doc.getElementsByTagName("*").length; + if (numTags > this._maxElemsToParse) { +- throw new Error("Aborting parsing document; " + numTags + " elements found"); ++ throw new Error( ++ "Aborting parsing document; " + numTags + " elements found" ++ ); + } + } + +@@ -2275,8 +2588,7 @@ Readability.prototype = { + this._articleTitle = metadata.title; + + var articleContent = this._grabArticle(); +- if (!articleContent) +- return null; ++ if (!articleContent) return null; + + this.log("Grabbed: " + articleContent.innerHTML); + +@@ -2303,9 +2615,9 @@ Readability.prototype = { + length: textContent.length, + excerpt: metadata.excerpt, + siteName: metadata.siteName || this._articleSiteName, +- publishedTime: metadata.publishedTime ++ publishedTime: metadata.publishedTime, + }; +- } ++ }, + }; + + if (typeof module === "object") { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index acfee375fe..7482291b43 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -5,6 +5,9 @@ settings: excludeLinksFromLockfile: false patchedDependencies: + '@mozilla/readability@0.5.0': + hash: fgkvsbckled47trggkhdkimzbm + path: patches/@mozilla__readability@0.5.0.patch hono@4.4.7: hash: ycbk46disqruhfjducp47b5fl4 path: patches/hono@4.4.7.patch @@ -40,6 +43,9 @@ importers: '@iconify/tools': specifier: 4.0.4 version: 4.0.4 + '@mozilla/readability': + specifier: ^0.5.0 + version: 0.5.0(patch_hash=fgkvsbckled47trggkhdkimzbm) '@radix-ui/react-alert-dialog': specifier: 1.1.1 version: 1.1.1(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -193,9 +199,15 @@ importers: jotai: specifier: 2.9.1 version: 2.9.1(@types/react@18.3.3)(react@18.3.1) + jsdom: + specifier: ^24.1.1 + version: 24.1.1 lethargy: specifier: 1.0.9 version: 1.0.9 + linkedom: + specifier: ^0.18.4 + version: 0.18.4 lodash-es: specifier: 4.17.21 version: 4.17.21 @@ -418,7 +430,7 @@ importers: version: 4.3.2(typescript@5.5.4)(vite@5.3.4(@types/node@20.14.12)) vitest: specifier: 2.0.4 - version: 2.0.4(@types/node@20.14.12) + version: 2.0.4(@types/node@20.14.12)(jsdom@24.1.1) packages: @@ -1063,6 +1075,10 @@ packages: resolution: {integrity: sha512-9QOtNffcOF/c1seMCDnjckb3R9WHcG34tky+FHpNKKCW0wc/scYLwMtO+ptyGUfMW0/b/n4qRiALlaFHc9Oj7Q==} engines: {node: '>= 10.0.0'} + '@mozilla/readability@0.5.0': + resolution: {integrity: sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==} + engines: {node: '>=14.0.0'} + '@nodelib/fs.scandir@2.1.5': resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==} engines: {node: '>= 8'} @@ -2674,6 +2690,10 @@ packages: resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} engines: {node: '>= 6.0.0'} + agent-base@7.1.1: + resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==} + engines: {node: '>= 14'} + agentkeepalive@4.5.0: resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==} engines: {node: '>= 8.0.0'} @@ -3207,9 +3227,20 @@ packages: resolution: {integrity: sha512-0LrrStPOdJj+SPCCrGhzryycLjwcgUSHBtxNA8aIDxf0GLsRh1cKYhB00Gd1lDOS4yGH69+SNn13+TWbVHETFQ==} engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0, npm: '>=7.0.0'} + cssom@0.5.0: + resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==} + + cssstyle@4.0.1: + resolution: {integrity: sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==} + engines: {node: '>=18'} + csstype@3.1.3: resolution: {integrity: sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==} + data-urls@5.0.0: + resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} + engines: {node: '>=18'} + dayjs@1.11.12: resolution: {integrity: sha512-Rt2g+nTbLlDWZTwwrIXjy9MeiZmSDI375FvZs72ngxx8PDC6YXOeR3q5LAuPzjZQxhiWdRKac7RKV+YyQYfYIg==} @@ -3238,6 +3269,9 @@ packages: supports-color: optional: true + decimal.js@10.4.3: + resolution: {integrity: sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==} + decode-named-character-reference@1.0.2: resolution: {integrity: sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==} @@ -4255,12 +4289,22 @@ packages: htm@3.1.1: resolution: {integrity: sha512-983Vyg8NwUE7JkZ6NmOqpCZ+sh1bKv2iYTlUkzlWmA5JD2acKoxd4KVxbMmxX/85mtfdnDmTFoNKcg5DGAvxNQ==} + html-encoding-sniffer@4.0.0: + resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} + engines: {node: '>=18'} + + html-escaper@3.0.3: + resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==} + html-void-elements@3.0.0: resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} htmlparser2@8.0.2: resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==} + htmlparser2@9.1.0: + resolution: {integrity: sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==} + http-cache-semantics@4.1.1: resolution: {integrity: sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==} @@ -4268,6 +4312,10 @@ packages: resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==} engines: {node: '>= 6'} + http-proxy-agent@7.0.2: + resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} + engines: {node: '>= 14'} + http2-wrapper@1.0.3: resolution: {integrity: sha512-V+23sDMr12Wnz7iTcDeJr3O6AIxlnvT/bmaAAAP/Xda35C90p9599p0F1eHR/N1KILWSoWVAiOMFjBBXaXSMxg==} engines: {node: '>=10.19.0'} @@ -4276,6 +4324,10 @@ packages: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} + https-proxy-agent@7.0.5: + resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==} + engines: {node: '>= 14'} + human-signals@2.1.0: resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==} engines: {node: '>=10.17.0'} @@ -4464,6 +4516,9 @@ packages: resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==} engines: {node: '>=0.10.0'} + is-potential-custom-element-name@1.0.1: + resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==} + is-property@1.0.2: resolution: {integrity: sha512-Ks/IoX00TtClbGQr4TWXemAnktAQvYB7HzcCxDGqEZU6oCmb2INHuOoKxbtR+HFkmYWBKv/dOZtGRiAjDhj92g==} @@ -4551,6 +4606,15 @@ packages: resolution: {integrity: sha512-YtOli5Cmzy3q4dP26GraSOeAhqecewG04hoO8DY56CH4KJ9Fvv5qKWUCCo3HZob7esJQHCv6/+bnTy72xZZaVQ==} engines: {node: '>=12.0.0'} + jsdom@24.1.1: + resolution: {integrity: sha512-5O1wWV99Jhq4DV7rCLIoZ/UIhyQeDR7wHVyZAHAshbrvZsLs+Xzz7gtwnlJTJDjleiTKh54F4dXrX70vJQTyJQ==} + engines: {node: '>=18'} + peerDependencies: + canvas: ^2.11.2 + peerDependenciesMeta: + canvas: + optional: true + jsesc@0.5.0: resolution: {integrity: sha512-uZz5UnB7u4T9LvwmFqXii7pZSouaRPorGs5who1Ip7VO0wxanFvBL7GkM6dTHlgX+jhBApRetaWpnDabOeTcnA==} hasBin: true @@ -4640,6 +4704,9 @@ packages: lines-and-columns@1.2.4: resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} + linkedom@0.18.4: + resolution: {integrity: sha512-JhLErxMIEOKByMi3fURXgI1fYOzR87L1Cn0+MI9GlMckFrqFZpV1SUGox1jcKtsKN3y6JgclcQf0FzZT//BuGw==} + linkify-it@5.0.0: resolution: {integrity: sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==} @@ -5127,6 +5194,9 @@ packages: nth-check@2.1.1: resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + nwsapi@2.2.12: + resolution: {integrity: sha512-qXDmcVlZV4XRtKFzddidpfVP4oMSGhga+xdMc25mv8kaLUHtgzCDhUxkrN8exkGdTlLNaXj7CV3GtON7zuGZ+w==} + oauth4webapi@2.11.1: resolution: {integrity: sha512-aNzOnL98bL6izG97zgnZs1PFEyO4WDVRhz2Pd066NPak44w5ESLRCYmJIyey8avSBPOMtBjhF3ZDDm7bIb7UOg==} @@ -5507,6 +5577,9 @@ packages: proxy-from-env@1.1.0: resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + psl@1.9.0: + resolution: {integrity: sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==} + pump@3.0.0: resolution: {integrity: sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==} @@ -5518,6 +5591,9 @@ packages: resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} engines: {node: '>=6'} + querystringify@2.2.0: + resolution: {integrity: sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==} + queue-microtask@1.2.3: resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==} @@ -5747,6 +5823,9 @@ packages: resolution: {integrity: sha512-nQFEv9gRw6SJAwWD2LrL0NmQvAcO7FBwJbwmr2ttPAacfy0xuiOjE5zt+zM4xDyuyvUaxBi/9gb2SoCyNEVJcw==} engines: {node: '>=8.6.0'} + requires-port@1.0.0: + resolution: {integrity: sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==} + resedit@2.0.2: resolution: {integrity: sha512-UKTnq602iVe+W5SyRAQx/WdWMnlDiONfXBLFg/ur4QE4EQQ8eP7Jgm5mNXdK12kKawk1vvXPja2iXKqZiGDW6Q==} engines: {node: '>=14', npm: '>=7'} @@ -5829,6 +5908,12 @@ packages: engines: {node: '>=18.0.0', npm: '>=8.0.0'} hasBin: true + rrweb-cssom@0.6.0: + resolution: {integrity: sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw==} + + rrweb-cssom@0.7.1: + resolution: {integrity: sha512-TrEMa7JGdVm0UThDJSx7ddw5nVm3UJS9o9CCIZ72B1vSyEZoziDqBYP3XIoi/12lKrJR8rE3jeFHMok2F/Mnsg==} + run-parallel@1.2.0: resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==} @@ -5847,6 +5932,10 @@ packages: sax@1.4.1: resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==} + saxes@6.0.0: + resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==} + engines: {node: '>=v12.22.7'} + scheduler@0.23.2: resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==} @@ -6119,6 +6208,9 @@ packages: resolution: {integrity: sha512-sBFp7fA+IfZ/7BMcg8/JSEqDD1qZXBUyliT76yk3zIYVu2fMwFVAghhAJ9vBM5tJUtHW5qcD0pmeEGQs1EK14w==} engines: {node: '>= 4.7.0'} + symbol-tree@3.2.4: + resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} + synckit@0.6.2: resolution: {integrity: sha512-Vhf+bUa//YSTYKseDiiEuQmhGCoIF3CVBhunm3r/DQnYiGT4JssmnKQc44BIyOZRK2pKjXXAgbhfmbeoC9CJpA==} engines: {node: '>=12.20'} @@ -6208,9 +6300,17 @@ packages: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} + tough-cookie@4.1.4: + resolution: {integrity: sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==} + engines: {node: '>=6'} + tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + tr46@5.0.0: + resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==} + engines: {node: '>=18'} + trigram-utils@2.0.1: resolution: {integrity: sha512-nfWIXHEaB+HdyslAfMxSqWKDdmqY9I32jS7GnqpdWQnLH89r6A5sdk3fDVYqGAZ0CrT8ovAFSAo6HRiWcWNIGQ==} @@ -6301,6 +6401,9 @@ packages: ufo@1.5.3: resolution: {integrity: sha512-Y7HYmWaFwPUmkoQCUIAYpKqkOf+SbVj/2fJJZ4RJMCfZp0rTGwRbzQD+HghfnhKOjL9E01okqz+ncJskGYfBNw==} + uhyphen@0.2.0: + resolution: {integrity: sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==} + undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} @@ -6347,6 +6450,10 @@ packages: resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==} engines: {node: '>= 4.0.0'} + universalify@0.2.0: + resolution: {integrity: sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==} + engines: {node: '>= 4.0.0'} + universalify@2.0.1: resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==} engines: {node: '>= 10.0.0'} @@ -6370,6 +6477,9 @@ packages: uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} + url-parse@1.5.10: + resolution: {integrity: sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==} + use-callback-ref@1.3.2: resolution: {integrity: sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA==} engines: {node: '>=10'} @@ -6506,6 +6616,10 @@ packages: jsdom: optional: true + w3c-xmlserializer@5.0.0: + resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==} + engines: {node: '>=18'} + wcwidth@1.0.1: resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==} @@ -6518,6 +6632,10 @@ packages: webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + webidl-conversions@7.0.0: + resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==} + engines: {node: '>=12'} + webpack-sources@3.2.3: resolution: {integrity: sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w==} engines: {node: '>=10.13.0'} @@ -6525,6 +6643,18 @@ packages: webpack-virtual-modules@0.5.0: resolution: {integrity: sha512-kyDivFZ7ZM0BVOUteVbDFhlRt7Ah/CSPwJdi8hBpkK7QLumUqdLtVfm/PX/hkcnrvr0i77fO5+TjZ94Pe+C9iw==} + whatwg-encoding@3.1.1: + resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} + engines: {node: '>=18'} + + whatwg-mimetype@4.0.0: + resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} + engines: {node: '>=18'} + + whatwg-url@14.0.0: + resolution: {integrity: sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==} + engines: {node: '>=18'} + whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} @@ -6568,10 +6698,29 @@ packages: wrappy@1.0.2: resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + ws@8.18.0: + resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==} + engines: {node: '>=10.0.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: '>=5.0.2' + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + + xml-name-validator@5.0.0: + resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==} + engines: {node: '>=18'} + xmlbuilder@15.1.1: resolution: {integrity: sha512-yMqGBqtXyeN1e3TGYvgNgDVZ3j84W4cwkOXQswghol6APgZWaff9lnbvN7MHYJOiXsvGPXtjTYJEiC9J2wv9Eg==} engines: {node: '>=8.0'} + xmlchars@2.2.0: + resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + xtend@4.0.2: resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==} engines: {node: '>=0.4'} @@ -7617,6 +7766,8 @@ snapshots: transitivePeerDependencies: - supports-color + '@mozilla/readability@0.5.0(patch_hash=fgkvsbckled47trggkhdkimzbm)': {} + '@nodelib/fs.scandir@2.1.5': dependencies: '@nodelib/fs.stat': 2.0.5 @@ -9540,6 +9691,12 @@ snapshots: transitivePeerDependencies: - supports-color + agent-base@7.1.1: + dependencies: + debug: 4.3.5 + transitivePeerDependencies: + - supports-color + agentkeepalive@4.5.0: dependencies: humanize-ms: 1.2.1 @@ -10152,8 +10309,19 @@ snapshots: dependencies: css-tree: 2.2.1 + cssom@0.5.0: {} + + cssstyle@4.0.1: + dependencies: + rrweb-cssom: 0.6.0 + csstype@3.1.3: {} + data-urls@5.0.0: + dependencies: + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + dayjs@1.11.12: {} debug@2.6.9: @@ -10168,6 +10336,8 @@ snapshots: dependencies: ms: 2.1.2 + decimal.js@10.4.3: {} + decode-named-character-reference@1.0.2: dependencies: character-entities: 2.0.2 @@ -11486,6 +11656,12 @@ snapshots: htm@3.1.1: {} + html-encoding-sniffer@4.0.0: + dependencies: + whatwg-encoding: 3.1.1 + + html-escaper@3.0.3: {} + html-void-elements@3.0.0: {} htmlparser2@8.0.2: @@ -11495,6 +11671,13 @@ snapshots: domutils: 3.1.0 entities: 4.5.0 + htmlparser2@9.1.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.1.0 + entities: 4.5.0 + http-cache-semantics@4.1.1: {} http-proxy-agent@5.0.0: @@ -11505,6 +11688,13 @@ snapshots: transitivePeerDependencies: - supports-color + http-proxy-agent@7.0.2: + dependencies: + agent-base: 7.1.1 + debug: 4.3.5 + transitivePeerDependencies: + - supports-color + http2-wrapper@1.0.3: dependencies: quick-lru: 5.1.1 @@ -11517,6 +11707,13 @@ snapshots: transitivePeerDependencies: - supports-color + https-proxy-agent@7.0.5: + dependencies: + agent-base: 7.1.1 + debug: 4.3.5 + transitivePeerDependencies: + - supports-color + human-signals@2.1.0: {} human-signals@5.0.0: {} @@ -11682,6 +11879,8 @@ snapshots: is-plain-object@5.0.0: {} + is-potential-custom-element-name@1.0.1: {} + is-property@1.0.2: optional: true @@ -11745,6 +11944,34 @@ snapshots: jsdoc-type-pratt-parser@4.0.0: {} + jsdom@24.1.1: + dependencies: + cssstyle: 4.0.1 + data-urls: 5.0.0 + decimal.js: 10.4.3 + form-data: 4.0.0 + html-encoding-sniffer: 4.0.0 + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.5 + is-potential-custom-element-name: 1.0.1 + nwsapi: 2.2.12 + parse5: 7.1.2 + rrweb-cssom: 0.7.1 + saxes: 6.0.0 + symbol-tree: 3.2.4 + tough-cookie: 4.1.4 + w3c-xmlserializer: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-encoding: 3.1.1 + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + ws: 8.18.0 + xml-name-validator: 5.0.0 + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + jsesc@0.5.0: {} jsesc@2.5.2: {} @@ -11822,6 +12049,14 @@ snapshots: lines-and-columns@1.2.4: {} + linkedom@0.18.4: + dependencies: + css-select: 5.1.0 + cssom: 0.5.0 + html-escaper: 3.0.3 + htmlparser2: 9.1.0 + uhyphen: 0.2.0 + linkify-it@5.0.0: dependencies: uc.micro: 2.1.0 @@ -12472,6 +12707,8 @@ snapshots: dependencies: boolbase: 1.0.0 + nwsapi@2.2.12: {} + oauth4webapi@2.11.1: {} object-assign@4.1.1: {} @@ -12823,6 +13060,8 @@ snapshots: proxy-from-env@1.1.0: {} + psl@1.9.0: {} + pump@3.0.0: dependencies: end-of-stream: 1.4.4 @@ -12832,6 +13071,8 @@ snapshots: punycode@2.3.1: {} + querystringify@2.2.0: {} + queue-microtask@1.2.3: {} quick-lru@5.1.1: {} @@ -13092,6 +13333,8 @@ snapshots: transitivePeerDependencies: - supports-color + requires-port@1.0.0: {} + resedit@2.0.2: dependencies: pe-library: 1.0.1 @@ -13192,6 +13435,10 @@ snapshots: '@rollup/rollup-win32-x64-msvc': 4.18.0 fsevents: 2.3.3 + rrweb-cssom@0.6.0: {} + + rrweb-cssom@0.7.1: {} + run-parallel@1.2.0: dependencies: queue-microtask: 1.2.3 @@ -13208,6 +13455,10 @@ snapshots: sax@1.4.1: {} + saxes@6.0.0: + dependencies: + xmlchars: 2.2.0 + scheduler@0.23.2: dependencies: loose-envify: 1.4.0 @@ -13464,6 +13715,8 @@ snapshots: swiper@11.1.8: {} + symbol-tree@3.2.4: {} + synckit@0.6.2: dependencies: tslib: 2.6.3 @@ -13575,8 +13828,19 @@ snapshots: dependencies: is-number: 7.0.0 + tough-cookie@4.1.4: + dependencies: + psl: 1.9.0 + punycode: 2.3.1 + universalify: 0.2.0 + url-parse: 1.5.10 + tr46@0.0.3: {} + tr46@5.0.0: + dependencies: + punycode: 2.3.1 + trigram-utils@2.0.1: dependencies: collapse-white-space: 2.1.0 @@ -13645,6 +13909,8 @@ snapshots: ufo@1.5.3: {} + uhyphen@0.2.0: {} + undici-types@5.26.5: {} unicorn-magic@0.1.0: {} @@ -13704,6 +13970,8 @@ snapshots: universalify@0.1.2: {} + universalify@0.2.0: {} + universalify@2.0.1: {} unorm@1.6.0: @@ -13732,6 +14000,11 @@ snapshots: dependencies: punycode: 2.3.1 + url-parse@1.5.10: + dependencies: + querystringify: 2.2.0 + requires-port: 1.0.0 + use-callback-ref@1.3.2(@types/react@18.3.3)(react@18.3.1): dependencies: react: 18.3.1 @@ -13838,7 +14111,7 @@ snapshots: '@types/node': 20.14.12 fsevents: 2.3.3 - vitest@2.0.4(@types/node@20.14.12): + vitest@2.0.4(@types/node@20.14.12)(jsdom@24.1.1): dependencies: '@ampproject/remapping': 2.3.0 '@vitest/expect': 2.0.4 @@ -13861,6 +14134,7 @@ snapshots: why-is-node-running: 2.3.0 optionalDependencies: '@types/node': 20.14.12 + jsdom: 24.1.1 transitivePeerDependencies: - less - lightningcss @@ -13870,6 +14144,10 @@ snapshots: - supports-color - terser + w3c-xmlserializer@5.0.0: + dependencies: + xml-name-validator: 5.0.0 + wcwidth@1.0.1: dependencies: defaults: 1.0.4 @@ -13880,10 +14158,23 @@ snapshots: webidl-conversions@3.0.1: {} + webidl-conversions@7.0.0: {} + webpack-sources@3.2.3: {} webpack-virtual-modules@0.5.0: {} + whatwg-encoding@3.1.1: + dependencies: + iconv-lite: 0.6.3 + + whatwg-mimetype@4.0.0: {} + + whatwg-url@14.0.0: + dependencies: + tr46: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-url@5.0.0: dependencies: tr46: 0.0.3 @@ -13930,8 +14221,14 @@ snapshots: wrappy@1.0.2: {} + ws@8.18.0: {} + + xml-name-validator@5.0.0: {} + xmlbuilder@15.1.1: {} + xmlchars@2.2.0: {} + xtend@4.0.2: {} y18n@5.0.8: {} diff --git a/src/main/lib/readability.ts b/src/main/lib/readability.ts new file mode 100644 index 0000000000..f7a13f3239 --- /dev/null +++ b/src/main/lib/readability.ts @@ -0,0 +1,24 @@ +import { Readability } from "@mozilla/readability" +import { name, version } from "@pkg" +import { parseHTML } from "linkedom" +import { fetch } from "ofetch" + +import { isDev } from "../env" + +const userAgents = `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 ${name}/${version}` + +export async function readability(url: string) { + const documentString = await fetch(url, { + headers: { + "User-Agent": userAgents, + }, + }).then((res) => res.text()) + + // FIXME: linkedom does not handle relative addresses in strings. Refer to + // @see https://github.com/WebReflection/linkedom/issues/153 + // JSDOM handles it correctly, but JSDOM introduces canvas binding. + const reader = new Readability(parseHTML(documentString).document, { + debug: isDev, + }) + return reader.parse() +} diff --git a/src/main/tipc/index.ts b/src/main/tipc/index.ts index f6180a1120..5918f3139a 100644 --- a/src/main/tipc/index.ts +++ b/src/main/tipc/index.ts @@ -2,6 +2,7 @@ import { appRoute } from "./app" import { debugRoute } from "./debug" import { dockRoute } from "./dock" import { menuRoute } from "./menu" +import { readerRoute } from "./reader" import { settingRoute } from "./setting" import { trackerRoute } from "./tracker" @@ -12,6 +13,7 @@ export const router = { ...appRoute, ...trackerRoute, ...dockRoute, + ...readerRoute, } export type Router = typeof router diff --git a/src/main/tipc/reader.ts b/src/main/tipc/reader.ts new file mode 100644 index 0000000000..f894d2ac75 --- /dev/null +++ b/src/main/tipc/reader.ts @@ -0,0 +1,17 @@ +import { readability } from "../lib/readability" +import { t } from "./_instance" + +export const readerRoute = { + readability: t.procedure + .input<{ url: string }>() + .action(async ({ input }) => { + const { url } = input + + if (!url) { + return null + } + const result = await readability(url) + + return result + }), +} diff --git a/src/renderer/src/atoms/readability.ts b/src/renderer/src/atoms/readability.ts new file mode 100644 index 0000000000..ff67f68f09 --- /dev/null +++ b/src/renderer/src/atoms/readability.ts @@ -0,0 +1,53 @@ +import { createAtomHooks } from "@renderer/lib/jotai" +import { atom } from "jotai" + +type Readability = { + title: string + content: string + textContent: string + length: number + excerpt: string + byline: string + dir: string + siteName: string + lang: string + publishedTime: string +} + +const mergeObjectSetter = + (setter: (prev: T) => void, getter: () => T) => + (value: Partial) => + setter({ ...getter(), ...value }) + +export const [ + , + , + useReadabilityContent, + , + getReadabilityContent, + __setReadabilityContent, + useReadabilityContentSelector, +] = createAtomHooks(atom>({})) +export const setReadabilityContent = mergeObjectSetter( + __setReadabilityContent, + getReadabilityContent, +) + +export const [ + , + , + useReadabilityStatus, + , + getReadabilityStatus, + __setReadabilityStatus, + useReadabilityStatusSelector, +] = createAtomHooks(atom>({})) +export const setReadabilityStatus = mergeObjectSetter( + __setReadabilityStatus, + getReadabilityStatus, +) + +export const useEntryIsInReadability = (entryId?: string) => + useReadabilityStatusSelector((map) => (entryId ? map[entryId] : false)) +export const useEntryReadabilityContent = (entryId: string) => + useReadabilityContentSelector((map) => map[entryId]) diff --git a/src/renderer/src/hooks/biz/useEntryActions.tsx b/src/renderer/src/hooks/biz/useEntryActions.tsx index 33b89e827d..08b18806c7 100644 --- a/src/renderer/src/hooks/biz/useEntryActions.tsx +++ b/src/renderer/src/hooks/biz/useEntryActions.tsx @@ -1,5 +1,11 @@ +import { + getReadabilityStatus, + setReadabilityContent, + setReadabilityStatus, + useEntryIsInReadability, +} from "@renderer/atoms/readability" import { SimpleIconsEagle } from "@renderer/components/ui/platform-icon/icons" -import { COPY_MAP } from "@renderer/constants" +import { COPY_MAP, views } from "@renderer/constants" import { shortcuts } from "@renderer/constants/shortcuts" import { tipcClient } from "@renderer/lib/client" import { nextFrame } from "@renderer/lib/dom" @@ -72,6 +78,7 @@ export const useEntryActions = ({ refetchOnMount: false, refetchOnWindowFocus: false, }) + const entryIsInReadability = useEntryIsInReadability(entry?.entries.id) const feed = useFeedById(entry?.feedId) @@ -102,7 +109,8 @@ export const useEntryActions = ({ shortcut?: string name: string icon?: ReactNode - disabled?: boolean + hide?: boolean + active?: boolean onClick: () => void }[] = [ { @@ -119,7 +127,7 @@ export const useEntryActions = ({ shortcut: shortcuts.entry.toggleStarred.key, name: `Star`, className: "i-mgc-star-cute-re", - disabled: !!populatedEntry.collections, + hide: !!populatedEntry.collections, onClick: () => { collect.mutate() }, @@ -129,7 +137,7 @@ export const useEntryActions = ({ name: `Unstar`, shortcut: shortcuts.entry.toggleStarred.key, className: "i-mgc-star-cute-fi text-orange-500", - disabled: !populatedEntry.collections, + hide: !populatedEntry.collections, onClick: () => { uncollect.mutate() }, @@ -138,7 +146,7 @@ export const useEntryActions = ({ key: "copyLink", name: "Copy Link", className: "i-mgc-link-cute-re", - disabled: !populatedEntry.entries.url, + hide: !populatedEntry.entries.url, shortcut: shortcuts.entry.copyLink.key, onClick: () => { if (!populatedEntry.entries.url) return @@ -153,17 +161,45 @@ export const useEntryActions = ({ name: COPY_MAP.OpenInBrowser(), shortcut: shortcuts.entry.openInBrowser.key, className: "i-mgc-world-2-cute-re", - disabled: !populatedEntry.entries.url, + hide: !populatedEntry.entries.url, onClick: () => { if (!populatedEntry.entries.url) return window.open(populatedEntry.entries.url, "_blank") }, }, + { + name: "Readability", + className: entryIsInReadability ? `i-mgc-sparkles-2-filled` : `i-mgc-sparkles-2-cute-re`, + key: "readability", + hide: views[view].wideMode || !populatedEntry.entries.url || !window.electron, + active: entryIsInReadability, + async onClick() { + const isTurnOn = getReadabilityStatus()[populatedEntry.entries.id] + if (!isTurnOn && populatedEntry.entries.url) { + const result = await tipcClient?.readability({ + url: populatedEntry.entries.url, + }) + + if (result) { + setReadabilityStatus({ + [populatedEntry.entries.id]: true, + }) + setReadabilityContent({ + [populatedEntry.entries.id]: result, + }) + } + } else { + setReadabilityStatus({ + [populatedEntry.entries.id]: false, + }) + } + }, + }, { name: "Save Media to Eagle", icon: , key: "saveToEagle", - disabled: + hide: (checkEagle.isLoading ? true : !checkEagle.data) || !populatedEntry.entries.media?.length, onClick: async () => { @@ -196,7 +232,7 @@ export const useEntryActions = ({ `i-mgc-share-3-cute-re` : "i-mgc-share-forward-cute-re", shortcut: shortcuts.entry.share.key, - disabled: !window.electron && !navigator.share, + hide: !window.electron && !navigator.share, onClick: () => { if (!populatedEntry.entries.url) return @@ -216,7 +252,7 @@ export const useEntryActions = ({ name: `Mark as Read`, shortcut: shortcuts.entry.toggleRead.key, className: "i-mgc-round-cute-fi", - disabled: !!(!!populatedEntry.read || populatedEntry.collections), + hide: !!(!!populatedEntry.read || populatedEntry.collections), onClick: () => { read.mutate(populatedEntry) }, @@ -226,7 +262,7 @@ export const useEntryActions = ({ name: `Mark as Unread`, shortcut: shortcuts.entry.toggleRead.key, className: "i-mgc-round-cute-re", - disabled: !!(!populatedEntry.read || populatedEntry.collections), + hide: !!(!populatedEntry.read || populatedEntry.collections), onClick: () => { unread.mutate(populatedEntry) }, @@ -244,6 +280,7 @@ export const useEntryActions = ({ uncollect, unread, view, + entryIsInReadability, ]) return { diff --git a/src/renderer/src/modules/entry-column/item.tsx b/src/renderer/src/modules/entry-column/item.tsx index 1749cda372..15ec516cab 100644 --- a/src/renderer/src/modules/entry-column/item.tsx +++ b/src/renderer/src/modules/entry-column/item.tsx @@ -146,7 +146,7 @@ function EntryItemImpl({ showNativeMenu( [ ...items - .filter((item) => !item.disabled) + .filter((item) => !item.hide) .map((item) => ({ type: "text" as const, label: item.name, diff --git a/src/renderer/src/modules/entry-column/social-media-item.tsx b/src/renderer/src/modules/entry-column/social-media-item.tsx index 0fbbb714b4..e2c3cf79ef 100644 --- a/src/renderer/src/modules/entry-column/social-media-item.tsx +++ b/src/renderer/src/modules/entry-column/social-media-item.tsx @@ -119,7 +119,7 @@ const ActionBar = ({ entryId }: { entryId: string }) => { return (
{items - .filter((item) => !item.disabled && (item.key !== "read" && item.key !== "unread")) + .filter((item) => !item.hide && (item.key !== "read" && item.key !== "unread")) .map((item) => ( {items - .filter((item) => !item.disabled) + .filter((item) => !item.hide) .map((item) => ( ) } + active={item.active} shortcut={item.shortcut} onClick={item.onClick} tooltip={item.name} diff --git a/src/renderer/src/modules/entry-content/index.tsx b/src/renderer/src/modules/entry-content/index.tsx index 5164b34169..a7e17e0c0a 100644 --- a/src/renderer/src/modules/entry-content/index.tsx +++ b/src/renderer/src/modules/entry-content/index.tsx @@ -1,12 +1,14 @@ +import { + useEntryIsInReadability, + useEntryReadabilityContent, +} from "@renderer/atoms/readability" import { useUISettingKey } from "@renderer/atoms/settings/ui" import { useMe } from "@renderer/atoms/user" import { m } from "@renderer/components/common/Motion" import { Logo } from "@renderer/components/icons/logo" import { AutoResizeHeight } from "@renderer/components/ui/auto-resize-height" import { ScrollArea } from "@renderer/components/ui/scroll-area" -import { - useRouteParamsSelector, -} from "@renderer/hooks/biz/useRouteParams" +import { useRouteParamsSelector } from "@renderer/hooks/biz/useRouteParams" import { useAuthQuery, useTitle } from "@renderer/hooks/common" import { stopPropagation } from "@renderer/lib/dom" import { parseHtml } from "@renderer/lib/parse-html" @@ -18,6 +20,7 @@ import { import { Queries } from "@renderer/queries" import { useEntry, useEntryReadHistory } from "@renderer/store/entry" import { useFeedById, useFeedHeaderTitle } from "@renderer/store/feed" +import type { ReactNode } from "react" import { useEffect, useLayoutEffect, useState } from "react" import { LoadingCircle } from "../../components/ui/loading" @@ -120,6 +123,7 @@ function EntryContentRender({ entryId }: { entryId: string }) { const readerFontFamily = useUISettingKey("readerFontFamily") const view = useRouteParamsSelector((route) => route.view) + const isInReadabilityMode = useEntryIsInReadability(entryId) if (!entry) return null return ( @@ -209,7 +213,11 @@ function EntryContentRender({ entryId }: { entryId: string }) {
)} - {content} + {!isInReadabilityMode ? ( + content + ) : ( + + )} {!content && ( @@ -270,3 +278,25 @@ const TitleMetaHandler: Component<{ }, [entryId, entryTitle, feedTitle]) return null } + +const ReadabilityContent = ({ entryId }: { entryId: string }) => { + const result = useEntryReadabilityContent(entryId) + + const [renderer, setRenderer] = useState(null) + useEffect(() => { + if (!result) return + const { content: processContent } = result + + if (processContent) { + parseHtml(processContent, { + renderInlineStyle: true, + }).then((parsed) => { + setRenderer(parsed.content) + }) + } else { + setRenderer(null) + } + }, [result, parseHtml]) + + return renderer +} From d131682f051e8168db1abe0861bccf5d70bbed51 Mon Sep 17 00:00:00 2001 From: Innei Date: Thu, 1 Aug 2024 21:23:53 +0800 Subject: [PATCH 2/2] feat: done Signed-off-by: Innei --- package.json | 2 +- pnpm-lock.yaml | 62 ++++++++---- src/main/lib/readability.ts | 23 ++++- src/renderer/src/atoms/readability.ts | 28 +++++- .../src/hooks/biz/useEntryActions.tsx | 96 +++++++++++-------- .../src/hooks/biz/useNavigateEntry.ts | 6 +- .../src/modules/entry-content/hooks.tsx | 7 +- .../src/modules/entry-content/index.tsx | 59 +++++++++++- .../src/modules/entry-content/provider.tsx | 10 +- 9 files changed, 219 insertions(+), 74 deletions(-) diff --git a/package.json b/package.json index ed5f56671c..d198934f43 100644 --- a/package.json +++ b/package.json @@ -90,7 +90,7 @@ "idb-keyval": "6.2.1", "immer": "10.1.1", "jotai": "2.9.1", - "jsdom": "^24.1.1", + "lethargy": "1.0.9", "linkedom": "^0.18.4", "lodash-es": "4.17.21", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7482291b43..45846f357b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -199,9 +199,6 @@ importers: jotai: specifier: 2.9.1 version: 2.9.1(@types/react@18.3.3)(react@18.3.1) - jsdom: - specifier: ^24.1.1 - version: 24.1.1 lethargy: specifier: 1.0.9 version: 1.0.9 @@ -9696,6 +9693,7 @@ snapshots: debug: 4.3.5 transitivePeerDependencies: - supports-color + optional: true agentkeepalive@4.5.0: dependencies: @@ -10314,6 +10312,7 @@ snapshots: cssstyle@4.0.1: dependencies: rrweb-cssom: 0.6.0 + optional: true csstype@3.1.3: {} @@ -10321,6 +10320,7 @@ snapshots: dependencies: whatwg-mimetype: 4.0.0 whatwg-url: 14.0.0 + optional: true dayjs@1.11.12: {} @@ -10336,7 +10336,8 @@ snapshots: dependencies: ms: 2.1.2 - decimal.js@10.4.3: {} + decimal.js@10.4.3: + optional: true decode-named-character-reference@1.0.2: dependencies: @@ -11659,6 +11660,7 @@ snapshots: html-encoding-sniffer@4.0.0: dependencies: whatwg-encoding: 3.1.1 + optional: true html-escaper@3.0.3: {} @@ -11694,6 +11696,7 @@ snapshots: debug: 4.3.5 transitivePeerDependencies: - supports-color + optional: true http2-wrapper@1.0.3: dependencies: @@ -11713,6 +11716,7 @@ snapshots: debug: 4.3.5 transitivePeerDependencies: - supports-color + optional: true human-signals@2.1.0: {} @@ -11879,7 +11883,8 @@ snapshots: is-plain-object@5.0.0: {} - is-potential-custom-element-name@1.0.1: {} + is-potential-custom-element-name@1.0.1: + optional: true is-property@1.0.2: optional: true @@ -11971,6 +11976,7 @@ snapshots: - bufferutil - supports-color - utf-8-validate + optional: true jsesc@0.5.0: {} @@ -12707,7 +12713,8 @@ snapshots: dependencies: boolbase: 1.0.0 - nwsapi@2.2.12: {} + nwsapi@2.2.12: + optional: true oauth4webapi@2.11.1: {} @@ -13060,7 +13067,8 @@ snapshots: proxy-from-env@1.1.0: {} - psl@1.9.0: {} + psl@1.9.0: + optional: true pump@3.0.0: dependencies: @@ -13071,7 +13079,8 @@ snapshots: punycode@2.3.1: {} - querystringify@2.2.0: {} + querystringify@2.2.0: + optional: true queue-microtask@1.2.3: {} @@ -13333,7 +13342,8 @@ snapshots: transitivePeerDependencies: - supports-color - requires-port@1.0.0: {} + requires-port@1.0.0: + optional: true resedit@2.0.2: dependencies: @@ -13435,9 +13445,11 @@ snapshots: '@rollup/rollup-win32-x64-msvc': 4.18.0 fsevents: 2.3.3 - rrweb-cssom@0.6.0: {} + rrweb-cssom@0.6.0: + optional: true - rrweb-cssom@0.7.1: {} + rrweb-cssom@0.7.1: + optional: true run-parallel@1.2.0: dependencies: @@ -13458,6 +13470,7 @@ snapshots: saxes@6.0.0: dependencies: xmlchars: 2.2.0 + optional: true scheduler@0.23.2: dependencies: @@ -13715,7 +13728,8 @@ snapshots: swiper@11.1.8: {} - symbol-tree@3.2.4: {} + symbol-tree@3.2.4: + optional: true synckit@0.6.2: dependencies: @@ -13834,12 +13848,14 @@ snapshots: punycode: 2.3.1 universalify: 0.2.0 url-parse: 1.5.10 + optional: true tr46@0.0.3: {} tr46@5.0.0: dependencies: punycode: 2.3.1 + optional: true trigram-utils@2.0.1: dependencies: @@ -13970,7 +13986,8 @@ snapshots: universalify@0.1.2: {} - universalify@0.2.0: {} + universalify@0.2.0: + optional: true universalify@2.0.1: {} @@ -14004,6 +14021,7 @@ snapshots: dependencies: querystringify: 2.2.0 requires-port: 1.0.0 + optional: true use-callback-ref@1.3.2(@types/react@18.3.3)(react@18.3.1): dependencies: @@ -14147,6 +14165,7 @@ snapshots: w3c-xmlserializer@5.0.0: dependencies: xml-name-validator: 5.0.0 + optional: true wcwidth@1.0.1: dependencies: @@ -14158,7 +14177,8 @@ snapshots: webidl-conversions@3.0.1: {} - webidl-conversions@7.0.0: {} + webidl-conversions@7.0.0: + optional: true webpack-sources@3.2.3: {} @@ -14167,13 +14187,16 @@ snapshots: whatwg-encoding@3.1.1: dependencies: iconv-lite: 0.6.3 + optional: true - whatwg-mimetype@4.0.0: {} + whatwg-mimetype@4.0.0: + optional: true whatwg-url@14.0.0: dependencies: tr46: 5.0.0 webidl-conversions: 7.0.0 + optional: true whatwg-url@5.0.0: dependencies: @@ -14221,13 +14244,16 @@ snapshots: wrappy@1.0.2: {} - ws@8.18.0: {} + ws@8.18.0: + optional: true - xml-name-validator@5.0.0: {} + xml-name-validator@5.0.0: + optional: true xmlbuilder@15.1.1: {} - xmlchars@2.2.0: {} + xmlchars@2.2.0: + optional: true xtend@4.0.2: {} diff --git a/src/main/lib/readability.ts b/src/main/lib/readability.ts index f7a13f3239..9235aff146 100644 --- a/src/main/lib/readability.ts +++ b/src/main/lib/readability.ts @@ -17,8 +17,29 @@ export async function readability(url: string) { // FIXME: linkedom does not handle relative addresses in strings. Refer to // @see https://github.com/WebReflection/linkedom/issues/153 // JSDOM handles it correctly, but JSDOM introduces canvas binding. - const reader = new Readability(parseHTML(documentString).document, { + + const { document } = parseHTML(documentString) + const baseUrl = new URL(url).origin + + document.querySelectorAll("a").forEach((a) => { + a.href = replaceRelativeAddress(baseUrl, a.href) + }); + + (["img", "audio", "video"] as const).forEach((tag) => { + document.querySelectorAll(tag).forEach((img) => { + img.src = img.src && replaceRelativeAddress(baseUrl, img.src) + }) + }) + + const reader = new Readability(document, { debug: isDev, }) return reader.parse() } + +const replaceRelativeAddress = (baseUrl: string, url: string) => { + if (url.startsWith("http")) { + return url + } + return new URL(url, baseUrl).href +} diff --git a/src/renderer/src/atoms/readability.ts b/src/renderer/src/atoms/readability.ts index ff67f68f09..50a7bea805 100644 --- a/src/renderer/src/atoms/readability.ts +++ b/src/renderer/src/atoms/readability.ts @@ -33,6 +33,12 @@ export const setReadabilityContent = mergeObjectSetter( getReadabilityContent, ) +export enum ReadabilityStatus { + INITIAL = 1, + WAITING = 2, + SUCCESS = 3, + FAILURE = 4, +} export const [ , , @@ -41,13 +47,29 @@ export const [ getReadabilityStatus, __setReadabilityStatus, useReadabilityStatusSelector, -] = createAtomHooks(atom>({})) +] = createAtomHooks(atom>({})) export const setReadabilityStatus = mergeObjectSetter( __setReadabilityStatus, getReadabilityStatus, ) export const useEntryIsInReadability = (entryId?: string) => - useReadabilityStatusSelector((map) => (entryId ? map[entryId] : false)) + useReadabilityStatusSelector( + (map) => + entryId ? (map[entryId] ? isInReadability(map[entryId]) : false) : false, + [entryId], + ) + +export const useEntryInReadabilityStatus = (entryId?: string) => + useReadabilityStatusSelector( + (map) => + entryId ? + map[entryId] || ReadabilityStatus.INITIAL : + ReadabilityStatus.INITIAL, + [entryId], + ) + +export const isInReadability = (status: ReadabilityStatus) => + status !== ReadabilityStatus.INITIAL && !!status export const useEntryReadabilityContent = (entryId: string) => - useReadabilityContentSelector((map) => map[entryId]) + useReadabilityContentSelector((map) => map[entryId], [entryId]) diff --git a/src/renderer/src/hooks/biz/useEntryActions.tsx b/src/renderer/src/hooks/biz/useEntryActions.tsx index 08b18806c7..b732b6c305 100644 --- a/src/renderer/src/hooks/biz/useEntryActions.tsx +++ b/src/renderer/src/hooks/biz/useEntryActions.tsx @@ -1,15 +1,17 @@ import { getReadabilityStatus, + isInReadability, + ReadabilityStatus, setReadabilityContent, setReadabilityStatus, - useEntryIsInReadability, + useEntryInReadabilityStatus, } from "@renderer/atoms/readability" import { SimpleIconsEagle } from "@renderer/components/ui/platform-icon/icons" import { COPY_MAP, views } from "@renderer/constants" import { shortcuts } from "@renderer/constants/shortcuts" import { tipcClient } from "@renderer/lib/client" import { nextFrame } from "@renderer/lib/dom" -import { getOS } from "@renderer/lib/utils" +import { cn, getOS } from "@renderer/lib/utils" import type { CombinedEntryModel } from "@renderer/models" import { useTipModal } from "@renderer/modules/wallet/hooks" import type { FlatEntryModel } from "@renderer/store/entry" @@ -19,9 +21,42 @@ import { useMutation, useQuery } from "@tanstack/react-query" import type { FetchError } from "ofetch" import { ofetch } from "ofetch" import type { ReactNode } from "react" -import { useMemo } from "react" +import { useCallback, useMemo } from "react" import { toast } from "sonner" +export const useEntryReadabilityToggle = ({ + id, + url, +}: { + id: string + url: string +}) => + useCallback(async () => { + const status = getReadabilityStatus()[id] + const isTurnOn = status !== ReadabilityStatus.INITIAL && !!status + + if (!isTurnOn && url) { + setReadabilityStatus({ + [id]: ReadabilityStatus.WAITING, + }) + const result = await tipcClient?.readability({ + url, + }) + + if (result) { + setReadabilityStatus({ + [id]: ReadabilityStatus.SUCCESS, + }) + setReadabilityContent({ + [id]: result, + }) + } + } else { + setReadabilityStatus({ + [id]: ReadabilityStatus.INITIAL, + }) + } + }, [id, url]) export const useCollect = (entry: Nullable) => useMutation({ mutationFn: async () => @@ -78,7 +113,7 @@ export const useEntryActions = ({ refetchOnMount: false, refetchOnWindowFocus: false, }) - const entryIsInReadability = useEntryIsInReadability(entry?.entries.id) + const entryReadabilityStatus = useEntryInReadabilityStatus(entry?.entries.id) const feed = useFeedById(entry?.feedId) @@ -101,6 +136,10 @@ export const useEntryActions = ({ const read = useRead() const unread = useUnread() + const readabilityToggle = useEntryReadabilityToggle({ + id: populatedEntry?.entries.id ?? "", + url: populatedEntry?.entries.url ?? "", + }) const items = useMemo(() => { if (!populatedEntry || view === undefined) return [] const items: { @@ -169,30 +208,22 @@ export const useEntryActions = ({ }, { name: "Readability", - className: entryIsInReadability ? `i-mgc-sparkles-2-filled` : `i-mgc-sparkles-2-cute-re`, + className: cn( + isInReadability(entryReadabilityStatus) ? + `i-mgc-sparkles-2-filled` : + `i-mgc-sparkles-2-cute-re`, + entryReadabilityStatus === ReadabilityStatus.WAITING ? + `animate-pulse` : + "", + ), key: "readability", - hide: views[view].wideMode || !populatedEntry.entries.url || !window.electron, - active: entryIsInReadability, + hide: + views[view].wideMode || + !populatedEntry.entries.url || + !window.electron, + active: isInReadability(entryReadabilityStatus), async onClick() { - const isTurnOn = getReadabilityStatus()[populatedEntry.entries.id] - if (!isTurnOn && populatedEntry.entries.url) { - const result = await tipcClient?.readability({ - url: populatedEntry.entries.url, - }) - - if (result) { - setReadabilityStatus({ - [populatedEntry.entries.id]: true, - }) - setReadabilityContent({ - [populatedEntry.entries.id]: result, - }) - } - } else { - setReadabilityStatus({ - [populatedEntry.entries.id]: false, - }) - } + return readabilityToggle() }, }, { @@ -270,18 +301,7 @@ export const useEntryActions = ({ ] return items - }, [ - checkEagle.data, - checkEagle.isLoading, - collect, - populatedEntry, - read, - openTipModal, - uncollect, - unread, - view, - entryIsInReadability, - ]) + }, [populatedEntry, view, checkEagle.isLoading, checkEagle.data, openTipModal, collect, uncollect, readabilityToggle, read, unread, entryReadabilityStatus]) return { items, diff --git a/src/renderer/src/hooks/biz/useNavigateEntry.ts b/src/renderer/src/hooks/biz/useNavigateEntry.ts index 91b6687a26..6a8cdf747c 100644 --- a/src/renderer/src/hooks/biz/useNavigateEntry.ts +++ b/src/renderer/src/hooks/biz/useNavigateEntry.ts @@ -4,7 +4,7 @@ import { } from "@renderer/atoms/route" import { setSidebarActiveView } from "@renderer/atoms/sidebar" import { ROUTE_ENTRY_PENDING, ROUTE_FEED_PENDING } from "@renderer/constants" -import type { FeedViewType } from "@renderer/lib/enum" +import { FeedViewType } from "@renderer/lib/enum" import { isUndefined } from "lodash-es" type NavigateEntryOptions = Partial<{ @@ -35,11 +35,13 @@ export const navigateEntry = (options: NavigateEntryOptions) => { setSidebarActiveView(view) } + const finalView = nextSearchParams.get("view") + if (window.posthog) { window.posthog.capture("Navigate Entry", { feedId: finalFeedId, entryId, - view, + view: finalView ? Number.parseInt(finalView, 10) : FeedViewType.Articles, }) } diff --git a/src/renderer/src/modules/entry-content/hooks.tsx b/src/renderer/src/modules/entry-content/hooks.tsx index abd671f269..4cfcb8bb4a 100644 --- a/src/renderer/src/modules/entry-content/hooks.tsx +++ b/src/renderer/src/modules/entry-content/hooks.tsx @@ -1,13 +1,12 @@ +import { isDev } from "@renderer/constants" import { useContext } from "react" import { EntryContentContext } from "./provider" export const useEntryContentContext = () => { const ctx = useContext(EntryContentContext) - if (!ctx) { - throw new Error( - "useEntryContentContext must be used within EntryContentProvider", - ) + if (!ctx && isDev) { + console.error("Notice: EntryContentContext is not provided.", new Error("x").stack) } return ctx } diff --git a/src/renderer/src/modules/entry-content/index.tsx b/src/renderer/src/modules/entry-content/index.tsx index a7e17e0c0a..790d7b3906 100644 --- a/src/renderer/src/modules/entry-content/index.tsx +++ b/src/renderer/src/modules/entry-content/index.tsx @@ -1,4 +1,6 @@ import { + ReadabilityStatus, + useEntryInReadabilityStatus, useEntryIsInReadability, useEntryReadabilityContent, } from "@renderer/atoms/readability" @@ -7,7 +9,9 @@ import { useMe } from "@renderer/atoms/user" import { m } from "@renderer/components/common/Motion" import { Logo } from "@renderer/components/icons/logo" import { AutoResizeHeight } from "@renderer/components/ui/auto-resize-height" +import { StyledButton } from "@renderer/components/ui/button" import { ScrollArea } from "@renderer/components/ui/scroll-area" +import { useEntryReadabilityToggle } from "@renderer/hooks/biz/useEntryActions" import { useRouteParamsSelector } from "@renderer/hooks/biz/useRouteParams" import { useAuthQuery, useTitle } from "@renderer/hooks/common" import { stopPropagation } from "@renderer/lib/dom" @@ -20,7 +24,7 @@ import { import { Queries } from "@renderer/queries" import { useEntry, useEntryReadHistory } from "@renderer/store/entry" import { useFeedById, useFeedHeaderTitle } from "@renderer/store/feed" -import type { ReactNode } from "react" +import type { FC, ReactNode } from "react" import { useEffect, useLayoutEffect, useState } from "react" import { LoadingCircle } from "../../components/ui/loading" @@ -194,6 +198,7 @@ function EntryContentRender({ entryId }: { entryId: string }) { +
@@ -232,9 +237,10 @@ function EntryContentRender({ entryId }: { entryId: string }) {
) : ( -
- No content -
+ )} )} @@ -298,5 +304,48 @@ const ReadabilityContent = ({ entryId }: { entryId: string }) => { } }, [result, parseHtml]) - return renderer + return ( +
+

+ + This content is provided by Readability. If you find typographical + anomalies, please go to the source site to view the original content. +

+ {renderer} +
+ ) +} + +const NoContent: FC<{ + id: string + url: string +}> = ({ id, url }) => { + const toggle = useEntryReadabilityToggle({ + id, + url, + }) + + const status = useEntryInReadabilityStatus(id) + if (status === ReadabilityStatus.SUCCESS) { + return null + } + return ( +
+
+ No content + {url && window.electron && ( +
+ But you can try to get the source site's content and parse and + render it by using the button below. + + Readability + +
+ )} +
+
+ ) } diff --git a/src/renderer/src/modules/entry-content/provider.tsx b/src/renderer/src/modules/entry-content/provider.tsx index 520ae69e94..b13a111c3f 100644 --- a/src/renderer/src/modules/entry-content/provider.tsx +++ b/src/renderer/src/modules/entry-content/provider.tsx @@ -1,4 +1,4 @@ -import type { FeedViewType } from "@renderer/lib/enum" +import { FeedViewType } from "@renderer/lib/enum" import { createContext } from "react" export interface EntryContentContext { @@ -9,7 +9,13 @@ export interface EntryContentContext { view: FeedViewType } -export const EntryContentContext = createContext(null!) +const defaultContextValue: EntryContentContext = { + entryId: "", + feedId: "", + view: FeedViewType.Articles, +} +export const EntryContentContext = + createContext(defaultContextValue) export const EntryContentProvider: Component = ({ children,