From 3ca03b5567fe907339192f681e43bdb5029cc97a Mon Sep 17 00:00:00 2001 From: Jovyn Tan <61113575+jovyntls@users.noreply.github.com> Date: Fri, 17 Mar 2023 13:05:33 +0800 Subject: [PATCH] Create unified types for nodes (#2201) Create unified types for nodes There is no standardised type for node variables, resulting in several undesirable issues: * `as unknown as` typecasts throughout the codebase * unnecessary checks on node attributes to satisfy the linter * incompatible types, treated as interchangeable in conversions Let's add common MbNode, TextElement and NodeOrText types that enforce attributes for nodes throughout the codebase. Doing so avoids the abovementioned issues, and formalises our view of the relationship between cheerio.Element, DomElement, and all the other attributes expected from node variables. --- packages/core/src/html/MdAttributeRenderer.ts | 3 +- packages/core/src/html/NodeProcessor.ts | 58 ++++++++-------- packages/core/src/html/elements.ts | 10 +-- .../core/src/html/includePanelProcessor.ts | 69 +++++++++---------- packages/core/src/utils/node.ts | 22 ++++++ 5 files changed, 90 insertions(+), 72 deletions(-) create mode 100644 packages/core/src/utils/node.ts diff --git a/packages/core/src/html/MdAttributeRenderer.ts b/packages/core/src/html/MdAttributeRenderer.ts index d04cddcfcb..0a0da8ef1e 100644 --- a/packages/core/src/html/MdAttributeRenderer.ts +++ b/packages/core/src/html/MdAttributeRenderer.ts @@ -5,6 +5,7 @@ import { getVslotShorthandName } from './vueSlotSyntaxProcessor'; import type { MarkdownProcessor } from './MarkdownProcessor'; import * as logger from '../utils/logger'; import { createSlotTemplateNode } from './elements'; +import { NodeOrText } from '../utils/node'; const _ = { has, @@ -44,7 +45,7 @@ export class MdAttributeRenderer { rendered = this.markdownProcessor.renderMd(node.attribs[attribute]); } - const attributeSlotElement = createSlotTemplateNode(slotName, rendered); + const attributeSlotElement: NodeOrText[] = createSlotTemplateNode(slotName, rendered); node.children = node.children ? attributeSlotElement.concat(node.children) : attributeSlotElement; diff --git a/packages/core/src/html/NodeProcessor.ts b/packages/core/src/html/NodeProcessor.ts index 2b6d4d891a..885294a731 100644 --- a/packages/core/src/html/NodeProcessor.ts +++ b/packages/core/src/html/NodeProcessor.ts @@ -26,6 +26,7 @@ import { PageNavProcessor, renderSiteNav, addSitePageNavPortal } from './siteAnd import { highlightCodeBlock, setCodeLineNumbers } from './codeblockProcessor'; import { setHeadingId, assignPanelId } from './headerProcessor'; import { FootnoteProcessor } from './FootnoteProcessor'; +import { MbNode, NodeOrText, TextElement } from '../utils/node'; const fm = require('fastmatter'); @@ -87,7 +88,9 @@ export class NodeProcessor { * Private utility functions */ - static _trimNodes(node: DomElement) { + static _trimNodes(nodeOrText: NodeOrText) { + if (NodeProcessor._isText(nodeOrText)) return; + const node = nodeOrText as MbNode; if (node.name === 'pre' || node.name === 'code') { return; } @@ -105,14 +108,14 @@ export class NodeProcessor { } } - static _isText(node: DomElement) { + static _isText(node: NodeOrText) { return node.type === 'text' || node.type === 'comment'; } /* * Frontmatter collection */ - _processFrontmatter(node: DomElement, context: Context) { + _processFrontmatter(node: MbNode, context: Context) { let currentFrontmatter = {}; const frontmatter = cheerio(node); if (!context.processingOptions.omitFrontmatter && frontmatter.text().trim()) { @@ -121,8 +124,8 @@ export class NodeProcessor { // The latter case will result in the data being wrapped in a div const frontmatterIncludeDiv = frontmatter.find('div'); const frontmatterData = frontmatterIncludeDiv.length - ? ((frontmatterIncludeDiv[0] as DomElement).children as DomElement[])[0].data - : ((frontmatter[0] as DomElement).children as DomElement[])[0].data; + ? ((frontmatterIncludeDiv[0] as MbNode).children as MbNode[])[0].data + : ((frontmatter[0] as MbNode).children as MbNode[])[0].data; const frontmatterWrapped = `${FRONTMATTER_FENCE}\n${frontmatterData}\n${FRONTMATTER_FENCE}`; currentFrontmatter = fm(frontmatterWrapped).attributes; @@ -139,7 +142,7 @@ export class NodeProcessor { * Layout element collection */ - private static collectLayoutEl(node: DomElement): string | null { + private static collectLayoutEl(node: MbNode): string | null { const $ = cheerio(node); const html = $.html(); $.remove(); @@ -149,7 +152,7 @@ export class NodeProcessor { /** * Removes the node if modal id already exists, processes node otherwise */ - private processModal(node: DomElement) { + private processModal(node: MbNode) { if (node.attribs) { if (this.processedModals[node.attribs.id]) { cheerio(node).remove(); @@ -168,11 +171,10 @@ export class NodeProcessor { /* * API */ - processNode(node: DomElement, context: Context): Context { + processNode(nodeOrText: NodeOrText, context: Context): Context { try { - if (!node.name || !node.attribs) { - return context; - } + if (NodeProcessor._isText(nodeOrText)) return context; + const node = nodeOrText as MbNode; transformOldSlotSyntax(node); shiftSlotNodeDeeper(node); @@ -274,7 +276,10 @@ export class NodeProcessor { return context; } - postProcessNode(node: DomElement) { + postProcessNode(nodeOrText: NodeOrText) { + if (NodeProcessor._isText(nodeOrText)) return; + const node = nodeOrText as MbNode; + try { switch (node.name) { case 'pre': @@ -316,13 +321,12 @@ export class NodeProcessor { } } - private traverse(node: DomElement, context: Context): DomElement { - if (NodeProcessor._isText(node)) { - return node; - } - if (node.name) { - node.name = node.name.toLowerCase(); + private traverse(dom: DomElement, context: Context): NodeOrText { + if (NodeProcessor._isText(dom)) { + return dom as TextElement; } + const node = dom as MbNode; + node.name = node.name.toLowerCase(); if (linkProcessor.hasTagLink(node)) { linkProcessor.convertRelativeLinks(node, context.cwf, this.config.rootPath, this.config.baseUrl); linkProcessor.convertMdExtToHtmlExt(node); @@ -363,16 +367,14 @@ export class NodeProcessor { addSitePageNavPortal(node); - if (node.name) { - const isHeadingTag = (/^h[1-6]$/).test(node.name); - if (isHeadingTag && !node.attribs?.id) { - setHeadingId(node, this.config); - } + const isHeadingTag = (/^h[1-6]$/).test(node.name); + if (isHeadingTag && !node.attribs.id) { + setHeadingId(node, this.config); + } - // Generate dummy spans as anchor points for header[sticky] - if (isHeadingTag && node.attribs?.id) { - cheerio(node).prepend(``); - } + // Generate dummy spans as anchor points for header[sticky] + if (isHeadingTag && node.attribs.id) { + cheerio(node).prepend(``); } this.pluginManager.postProcessNode(node); @@ -407,7 +409,7 @@ export class NodeProcessor { }); mainHtmlNodes.forEach(d => NodeProcessor._trimNodes(d)); - const footnotesHtml = this.footnoteProcessor.combineFootnotes((node: DomElement) => this.processNode( + const footnotesHtml = this.footnoteProcessor.combineFootnotes(node => this.processNode( node, new Context(cwf, [], extraVariables, {}), )); const mainHtml = cheerio(mainHtmlNodes).html(); diff --git a/packages/core/src/html/elements.ts b/packages/core/src/html/elements.ts index 9c53a4b98d..bd8e4fc962 100644 --- a/packages/core/src/html/elements.ts +++ b/packages/core/src/html/elements.ts @@ -1,22 +1,22 @@ import cheerio from 'cheerio'; -import { DomElement } from 'htmlparser2'; import pick from 'lodash/pick'; +import { MbNode, NodeOrText } from '../utils/node'; const _ = { pick }; -export function createErrorNode(element: DomElement, error: any) { +export function createErrorNode(element: NodeOrText, error: any) { const errorElement = cheerio.parseHTML( `
${error.message}
`, undefined, true, )[0]; - return Object.assign(element, _.pick(errorElement, ['name', 'attribs', 'children'])); + return Object.assign(element, _.pick(errorElement, ['name', 'attribs', 'children'])) as MbNode; } export function createEmptyNode() { return cheerio.parseHTML('
', undefined, true)[0]; } -export function createSlotTemplateNode(slotName: string, content: string): DomElement[] { +export function createSlotTemplateNode(slotName: string, content: string): MbNode[] { return cheerio.parseHTML( ``, undefined, true, - ) as unknown as DomElement[]; + ) as unknown as MbNode[]; } diff --git a/packages/core/src/html/includePanelProcessor.ts b/packages/core/src/html/includePanelProcessor.ts index ffc6165cc1..4505130dd2 100644 --- a/packages/core/src/html/includePanelProcessor.ts +++ b/packages/core/src/html/includePanelProcessor.ts @@ -4,7 +4,6 @@ import parse from 'url-parse'; import has from 'lodash/has'; import isEmpty from 'lodash/isEmpty'; -import { DomElement } from 'htmlparser2'; import { createErrorNode, createSlotTemplateNode } from './elements'; import CyclicReferenceError from '../errors/CyclicReferenceError'; @@ -14,6 +13,7 @@ import * as urlUtil from '../utils/urlUtil'; import type { Context } from './Context'; import type { PageSources } from '../Page/PageSources'; import type VariableProcessor from '../variables/VariableProcessor'; +import { MbNode, NodeOrText } from '../utils/node'; require('../patches/htmlparser2'); @@ -26,7 +26,7 @@ const _ = { has, isEmpty }; /** * Returns a boolean representing whether the file specified exists. */ -function _checkAndWarnFileExists(element: DomElement, context: Context, actualFilePath: string, +function _checkAndWarnFileExists(element: MbNode, context: Context, actualFilePath: string, pageSources: PageSources, isOptional = false) { if (!fsUtil.fileExists(actualFilePath)) { if (isOptional) { @@ -48,11 +48,7 @@ function _checkAndWarnFileExists(element: DomElement, context: Context, actualFi return true; } -function _getBoilerplateFilePath(node: DomElement, filePath: string, config: Record) { - const element = node; - - if (!element.attribs) return undefined; - +function _getBoilerplateFilePath(element: MbNode, filePath: string, config: Record) { const isBoilerplate = _.has(element.attribs, 'boilerplate'); if (isBoilerplate) { element.attribs.boilerplate = element.attribs.boilerplate || path.basename(filePath); @@ -66,8 +62,7 @@ function _getBoilerplateFilePath(node: DomElement, filePath: string, config: Rec /** * Retrieves several flags and file paths from the src attribute specified in the element. */ -function _getSrcFlagsAndFilePaths(element: DomElement & { attribs: { src: string } }, - config: Record) { +function _getSrcFlagsAndFilePaths(element: MbNode, config: Record) { const isUrl = urlUtil.isUrl(element.attribs.src); // We do this even if the src is not a url to get the hash, if any @@ -109,10 +104,10 @@ function _getSrcFlagsAndFilePaths(element: DomElement & { attribs: { src: string * Otherwise, sets the fragment attribute of the panel as parsed from the src, * and adds the appropriate include. */ -export function processPanelSrc(node: DomElement, context: Context, pageSources: PageSources, +export function processPanelSrc(node: MbNode, context: Context, pageSources: PageSources, config: Record): Context { const hasSrc = _.has(node.attribs, 'src'); - if (!hasSrc || !node.attribs) { + if (!hasSrc) { return context; } @@ -122,7 +117,7 @@ export function processPanelSrc(node: DomElement, context: Context, pageSources: filePath, actualFilePath, // We can typecast here as we have checked for src above. - } = _getSrcFlagsAndFilePaths(node as DomElement & { attribs: { src: string } }, config); + } = _getSrcFlagsAndFilePaths(node, config); const fileExists = _checkAndWarnFileExists(node, context, actualFilePath, pageSources); if (!fileExists) { @@ -153,23 +148,21 @@ export function processPanelSrc(node: DomElement, context: Context, pageSources: * Includes */ -function _deleteIncludeAttributes(node: DomElement) { - const nodeAttribs = node.attribs; - if (!nodeAttribs) return; +function _deleteIncludeAttributes(node: MbNode) { // Delete variable attributes in include tags as they are no longer needed // e.g. '' - Object.keys(nodeAttribs).forEach((attribute) => { + Object.keys(node.attribs).forEach((attribute) => { if (attribute.startsWith('var-')) { - delete nodeAttribs[attribute]; + delete node.attribs[attribute]; } }); - delete nodeAttribs.boilerplate; - delete nodeAttribs.src; - delete nodeAttribs.inline; - delete nodeAttribs.trim; - delete nodeAttribs.optional; - delete nodeAttribs.omitFrontmatter; + delete node.attribs.boilerplate; + delete node.attribs.src; + delete node.attribs.inline; + delete node.attribs.trim; + delete node.attribs.optional; + delete node.attribs.omitFrontmatter; } /** @@ -177,14 +170,14 @@ function _deleteIncludeAttributes(node: DomElement) { * Replaces it with an error node if the specified src is invalid, * or an empty node if the src is invalid but optional. */ -export function processInclude(node: DomElement, context: Context, pageSources: PageSources, +export function processInclude(node: MbNode, context: Context, pageSources: PageSources, variableProcessor: VariableProcessor, renderMd: (text: string) => string, renderMdInline: (text: string) => string, config: Record): Context { - if (_.isEmpty(node.attribs?.src)) { + if (_.isEmpty(node.attribs.src)) { const error = new Error(`Empty src attribute in include in: ${context.cwf}`); logger.error(error); - cheerio(node).replaceWith(createErrorNode(node, error) as cheerio.Element); + cheerio(node).replaceWith(createErrorNode(node, error)); return context; } @@ -194,7 +187,7 @@ export function processInclude(node: DomElement, context: Context, pageSources: filePath, actualFilePath, // We can typecast here as we have checked for src above. - } = _getSrcFlagsAndFilePaths(node as DomElement & { attribs: { src: string } }, config); + } = _getSrcFlagsAndFilePaths(node, config); // No need to process url contents if (isUrl) { @@ -243,7 +236,7 @@ export function processInclude(node: DomElement, context: Context, pageSources: + `Missing reference in ${context.cwf}`); logger.error(error); - actualContent = cheerio.html(createErrorNode(node, error) as cheerio.Element); + actualContent = cheerio.html(createErrorNode(node, error)); } } @@ -262,7 +255,7 @@ export function processInclude(node: DomElement, context: Context, pageSources: if (childContext.hasExceededMaxCallstackSize()) { const error = new CyclicReferenceError(childContext.callStack); logger.error(error); - cheerio(node).replaceWith(createErrorNode(node, error) as cheerio.Element); + cheerio(node).replaceWith(createErrorNode(node, error)); return context; } } @@ -277,17 +270,17 @@ export function processInclude(node: DomElement, context: Context, pageSources: * Replaces it with an error node if the specified src is invalid. * Else, appends the content to the node. */ -export function processPopoverSrc(node: DomElement, context: Context, pageSources: PageSources, +export function processPopoverSrc(node: MbNode, context: Context, pageSources: PageSources, variableProcessor: VariableProcessor, renderMd: (text: string) => string, config: Record): Context { - if (!node.attribs || !_.has(node.attribs, 'src')) { + if (!_.has(node.attribs, 'src')) { return context; } if (_.isEmpty(node.attribs.src)) { const error = new Error(`Empty src attribute in include in: ${context.cwf}`); logger.error(error); - cheerio(node).replaceWith(createErrorNode(node, error) as cheerio.Element); + cheerio(node).replaceWith(createErrorNode(node, error)); return context; } @@ -297,13 +290,13 @@ export function processPopoverSrc(node: DomElement, context: Context, pageSource filePath, actualFilePath, // We can typecast here as we have checked for src above. - } = _getSrcFlagsAndFilePaths(node as DomElement & { attribs: { src: string } }, config); + } = _getSrcFlagsAndFilePaths(node, config); // No need to process url contents if (isUrl) { const error = new Error('URLs are not allowed in the \'src\' attribute'); logger.error(error); - cheerio(node).replaceWith(createErrorNode(node, error) as cheerio.Element); + cheerio(node).replaceWith(createErrorNode(node, error)); return context; } @@ -338,7 +331,7 @@ export function processPopoverSrc(node: DomElement, context: Context, pageSource + `Missing reference in ${context.cwf}`); logger.error(error); - cheerio(node).replaceWith(createErrorNode(node, error) as cheerio.Element); + cheerio(node).replaceWith(createErrorNode(node, error)); return context; } @@ -346,18 +339,18 @@ export function processPopoverSrc(node: DomElement, context: Context, pageSource actualContent = actualContent.trim(); - if (node.children && node.children.length > 0) { + if (node.children.length > 0) { childContext.addCwfToCallstack(context.cwf); if (childContext.hasExceededMaxCallstackSize()) { const error = new CyclicReferenceError(childContext.callStack); logger.error(error); - cheerio(node).replaceWith(createErrorNode(node, error) as cheerio.Element); + cheerio(node).replaceWith(createErrorNode(node, error)); return context; } } - const attributeSlotElement = createSlotTemplateNode('content', actualContent); + const attributeSlotElement: NodeOrText[] = createSlotTemplateNode('content', actualContent); node.children = node.children ? attributeSlotElement.concat(node.children) : attributeSlotElement; delete node.attribs.src; diff --git a/packages/core/src/utils/node.ts b/packages/core/src/utils/node.ts new file mode 100644 index 0000000000..b0eb872476 --- /dev/null +++ b/packages/core/src/utils/node.ts @@ -0,0 +1,22 @@ +import { DomElement } from 'htmlparser2'; + +/* + * A TextElement is a simple node that does not need complex processing. + */ +export type TextElement = DomElement; + +/* + * MbNode (MarkBindNode) is an element that can be operated on by cheerio and our own node processing + * methods. It must have a name (used to identify what kind of node it is), attributes (possibly empty), + * and children nodes (possibly empty). This type allows us to assert that these attributes exist. + */ +export type MbNode = DomElement & cheerio.Element & { + name: string, + attribs: { [key: string]: any }, + children: NodeOrText[], +}; + +/* + * NodeOrText is used before a node can be casted to either TextElement or MbNode. + */ +export type NodeOrText = TextElement | MbNode;