From 038f086dcffba32d1833078a58351749ec4155a4 Mon Sep 17 00:00:00 2001 From: iseulde Date: Wed, 4 Apr 2018 16:54:04 +0200 Subject: [PATCH 1/2] Add paste schema Comment, test, clean up Do not allow figures without embedded content Adjust getContentSchema signature Destructure where possible Fix typo Add test for 315819db56d711b509e63cc82bc9ecdb7168d74a Move schemas to blocks Simplify Restore iframe filter Add Markdown integration test Address feedback Separate Markdown converter Remove unneeded nodeType checks --- blocks/api/index.js | 2 +- .../api/raw-handling/blockquote-normaliser.js | 9 - blocks/api/raw-handling/comment-remover.js | 12 - blocks/api/raw-handling/create-unwrapper.js | 34 -- .../raw-handling/embedded-content-reducer.js | 47 --- .../raw-handling/figure-content-reducer.js | 88 +++++ .../raw-handling/formatting-transformer.js | 39 --- blocks/api/raw-handling/iframe-remover.js | 17 + blocks/api/raw-handling/image-corrector.js | 5 - blocks/api/raw-handling/index.js | 201 ++++++----- .../raw-handling/inline-content-converter.js | 27 -- blocks/api/raw-handling/is-inline-content.js | 57 +++- blocks/api/raw-handling/list-reducer.js | 29 +- blocks/api/raw-handling/markdown-converter.js | 42 +++ blocks/api/raw-handling/ms-list-converter.js | 13 +- blocks/api/raw-handling/normalise-blocks.js | 10 +- .../raw-handling/phrasing-content-reducer.js | 37 ++ .../slack-markdown-variant-corrector.js | 16 - .../raw-handling/special-comment-converter.js | 17 +- blocks/api/raw-handling/strip-attributes.js | 46 --- blocks/api/raw-handling/table-normaliser.js | 18 - .../api/raw-handling/test/comment-remover.js | 20 -- .../api/raw-handling/test/create-unwrapper.js | 42 --- .../test/embedded-content-reducer.js | 22 -- .../test/figure-content-reducer.js | 41 +++ blocks/api/raw-handling/test/index.js | 100 +----- .../test/inline-content-converter.js | 19 -- .../test/integration/apple-out.html | 9 +- .../test/integration/google-docs-out.html | 9 +- .../test/integration/iframe-embed-out.html | 2 +- .../raw-handling/test/integration/index.js | 15 +- .../test/integration/markdown-in.txt | 38 +++ .../test/integration/markdown-out.html | 83 +++++ .../test/integration/wordpress-in.html | 4 + .../test/integration/wordpress-out.html | 15 + .../raw-handling/test/is-inline-content.js | 2 +- blocks/api/raw-handling/test/list-reducer.js | 16 +- .../raw-handling/test/markdown-converter.js | 35 ++ ...sformer.js => phrasing-content-reducer.js} | 14 +- .../test/slack-markdown-variant-corrector.js | 29 -- .../api/raw-handling/test/strip-attributes.js | 36 -- .../api/raw-handling/test/table-normaliser.js | 19 -- blocks/api/raw-handling/test/utils.js | 134 ++++++-- blocks/api/raw-handling/utils.js | 322 ++++++++++-------- core-blocks/code/index.js | 15 +- core-blocks/heading/index.js | 11 +- core-blocks/html/index.js | 17 +- core-blocks/image/index.js | 34 +- core-blocks/list/index.js | 24 +- core-blocks/more/index.js | 3 + core-blocks/nextpage/index.js | 3 + core-blocks/paragraph/index.js | 13 +- core-blocks/preformatted/index.js | 10 +- core-blocks/quote/index.js | 12 +- core-blocks/separator/index.js | 5 +- core-blocks/table/index.js | 33 +- utils/dom.js | 38 +++ 57 files changed, 1122 insertions(+), 888 deletions(-) delete mode 100644 blocks/api/raw-handling/comment-remover.js delete mode 100644 blocks/api/raw-handling/create-unwrapper.js delete mode 100644 blocks/api/raw-handling/embedded-content-reducer.js create mode 100644 blocks/api/raw-handling/figure-content-reducer.js delete mode 100644 blocks/api/raw-handling/formatting-transformer.js create mode 100644 blocks/api/raw-handling/iframe-remover.js delete mode 100644 blocks/api/raw-handling/inline-content-converter.js create mode 100644 blocks/api/raw-handling/markdown-converter.js create mode 100644 blocks/api/raw-handling/phrasing-content-reducer.js delete mode 100644 blocks/api/raw-handling/slack-markdown-variant-corrector.js delete mode 100644 blocks/api/raw-handling/strip-attributes.js delete mode 100644 blocks/api/raw-handling/table-normaliser.js delete mode 100644 blocks/api/raw-handling/test/comment-remover.js delete mode 100644 blocks/api/raw-handling/test/create-unwrapper.js delete mode 100644 blocks/api/raw-handling/test/embedded-content-reducer.js create mode 100644 blocks/api/raw-handling/test/figure-content-reducer.js delete mode 100644 blocks/api/raw-handling/test/inline-content-converter.js create mode 100644 blocks/api/raw-handling/test/integration/markdown-in.txt create mode 100644 blocks/api/raw-handling/test/integration/markdown-out.html create mode 100644 blocks/api/raw-handling/test/integration/wordpress-in.html create mode 100644 blocks/api/raw-handling/test/integration/wordpress-out.html create mode 100644 blocks/api/raw-handling/test/markdown-converter.js rename blocks/api/raw-handling/test/{formatting-transformer.js => phrasing-content-reducer.js} (50%) delete mode 100644 blocks/api/raw-handling/test/slack-markdown-variant-corrector.js delete mode 100644 blocks/api/raw-handling/test/strip-attributes.js delete mode 100644 blocks/api/raw-handling/test/table-normaliser.js diff --git a/blocks/api/index.js b/blocks/api/index.js index 8770b2609a85f9..897aba32f31761 100644 --- a/blocks/api/index.js +++ b/blocks/api/index.js @@ -11,7 +11,7 @@ export { getBlockAttributes, parseWithAttributeSchema, } from './parser'; -export { default as rawHandler } from './raw-handling'; +export { default as rawHandler, getPhrasingContentSchema } from './raw-handling'; export { default as serialize, getBlockContent, diff --git a/blocks/api/raw-handling/blockquote-normaliser.js b/blocks/api/raw-handling/blockquote-normaliser.js index 58633866f5a847..f08e89791280b6 100644 --- a/blocks/api/raw-handling/blockquote-normaliser.js +++ b/blocks/api/raw-handling/blockquote-normaliser.js @@ -3,16 +3,7 @@ */ import normaliseBlocks from './normalise-blocks'; -/** - * Browser dependencies - */ -const { ELEMENT_NODE } = window.Node; - export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - if ( node.nodeName !== 'BLOCKQUOTE' ) { return; } diff --git a/blocks/api/raw-handling/comment-remover.js b/blocks/api/raw-handling/comment-remover.js deleted file mode 100644 index 93b580b5f44eae..00000000000000 --- a/blocks/api/raw-handling/comment-remover.js +++ /dev/null @@ -1,12 +0,0 @@ -/** - * Browser dependencies - */ -const { COMMENT_NODE } = window.Node; - -export default function( node ) { - if ( node.nodeType !== COMMENT_NODE ) { - return; - } - - node.parentNode.removeChild( node ); -} diff --git a/blocks/api/raw-handling/create-unwrapper.js b/blocks/api/raw-handling/create-unwrapper.js deleted file mode 100644 index 3266cbe62e3d5f..00000000000000 --- a/blocks/api/raw-handling/create-unwrapper.js +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Browser dependencies - */ -const { ELEMENT_NODE } = window.Node; - -function unwrap( node ) { - const parent = node.parentNode; - - while ( node.firstChild ) { - parent.insertBefore( node.firstChild, node ); - } - - parent.removeChild( node ); -} - -export default function( predicate, after ) { - return ( node ) => { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - - if ( ! predicate( node ) ) { - return; - } - - const afterNode = after && after( node ); - - if ( afterNode ) { - node.appendChild( afterNode ); - } - - unwrap( node ); - }; -} diff --git a/blocks/api/raw-handling/embedded-content-reducer.js b/blocks/api/raw-handling/embedded-content-reducer.js deleted file mode 100644 index b2740e8b201fbe..00000000000000 --- a/blocks/api/raw-handling/embedded-content-reducer.js +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Internal dependencies - */ -import { isEmbedded } from './utils'; - -/** - * Browser dependencies - */ -const { ELEMENT_NODE } = window.Node; - -/** - * This filter takes embedded content out of paragraphs. - * - * @param {Node} node The node to filter. - * - * @return {void} - */ -export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - - if ( ! isEmbedded( node ) ) { - return; - } - - let nodeToInsert = node; - // if the embedded is an image and its parent is an anchor with just the image - // take the anchor out instead of just the image - if ( - 'IMG' === node.nodeName && - 1 === node.parentNode.childNodes.length && - 'A' === node.parentNode.nodeName - ) { - nodeToInsert = node.parentNode; - } - - let wrapper = nodeToInsert; - - while ( wrapper && wrapper.nodeName !== 'P' ) { - wrapper = wrapper.parentElement; - } - - if ( wrapper ) { - wrapper.parentNode.insertBefore( nodeToInsert, wrapper ); - } -} diff --git a/blocks/api/raw-handling/figure-content-reducer.js b/blocks/api/raw-handling/figure-content-reducer.js new file mode 100644 index 00000000000000..1fa63b080d1c44 --- /dev/null +++ b/blocks/api/raw-handling/figure-content-reducer.js @@ -0,0 +1,88 @@ +/** + * External dependencies + */ +import { has } from 'lodash'; + +/** + * Internal dependencies + */ +import { isPhrasingContent } from './utils'; + +/** + * Whether or not the given node is figure content. + * + * @param {Node} node The node to check. + * @param {Object} schema The schema to use. + * + * @return {boolean} True if figure content, false if not. + */ +function isFigureContent( node, schema ) { + const tag = node.nodeName.toLowerCase(); + + // We are looking for tags that can be a child of the figure tag, excluding + // `figcaption` and any phrasing content. + if ( tag === 'figcaption' || isPhrasingContent( node ) ) { + return false; + } + + return has( schema, [ 'figure', 'children', tag ] ); +} + +/** + * Whether or not the given node can have an anchor. + * + * @param {Node} node The node to check. + * @param {Object} schema The schema to use. + * + * @return {boolean} True if it can, false if not. + */ +function canHaveAnchor( node, schema ) { + const tag = node.nodeName.toLowerCase(); + + return has( schema, [ 'figure', 'children', 'a', 'children', tag ] ); +} + +/** + * This filter takes figure content out of paragraphs, wraps it in a figure + * element, and moves any anchors with it if needed. + * + * @param {Node} node The node to filter. + * @param {Document} doc The document of the node. + * @param {Object} schema The schema to use. + * + * @return {void} + */ +export default function( node, doc, schema ) { + if ( ! isFigureContent( node, schema ) ) { + return; + } + + let nodeToInsert = node; + const parentNode = node.parentNode; + + // If the figure content can have an anchor and its parent is an anchor with + // only the figure content, take the anchor out instead of just the content. + if ( + canHaveAnchor( node, schema ) && + parentNode.nodeName === 'A' && + parentNode.childNodes.length === 1 + ) { + nodeToInsert = node.parentNode; + } + + let wrapper = nodeToInsert; + + while ( wrapper && wrapper.nodeName !== 'P' ) { + wrapper = wrapper.parentElement; + } + + const figure = doc.createElement( 'figure' ); + + if ( wrapper ) { + wrapper.parentNode.insertBefore( figure, wrapper ); + } else { + nodeToInsert.parentNode.insertBefore( figure, nodeToInsert ); + } + + figure.appendChild( nodeToInsert ); +} diff --git a/blocks/api/raw-handling/formatting-transformer.js b/blocks/api/raw-handling/formatting-transformer.js deleted file mode 100644 index 1b12810d9d9136..00000000000000 --- a/blocks/api/raw-handling/formatting-transformer.js +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Browser dependencies - */ -const { ELEMENT_NODE } = window.Node; - -function replace( node, tagName ) { - const newNode = document.createElement( tagName ); - - while ( node.firstChild ) { - newNode.appendChild( node.firstChild ); - } - - node.parentNode.replaceChild( newNode, node ); -} - -export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - - if ( node.nodeName === 'SPAN' ) { - const fontWeight = node.style.fontWeight; - const fontStyle = node.style.fontStyle; - - if ( fontWeight === 'bold' || fontWeight === '700' ) { - replace( node, 'strong' ); - } else if ( fontStyle === 'italic' ) { - replace( node, 'em' ); - } - } - - if ( node.nodeName === 'B' ) { - replace( node, 'strong' ); - } - - if ( node.nodeName === 'I' ) { - replace( node, 'em' ); - } -} diff --git a/blocks/api/raw-handling/iframe-remover.js b/blocks/api/raw-handling/iframe-remover.js new file mode 100644 index 00000000000000..31733fa0428531 --- /dev/null +++ b/blocks/api/raw-handling/iframe-remover.js @@ -0,0 +1,17 @@ +/** + * WordPress dependencies + */ +import { remove } from '@wordpress/utils'; + +/** + * Removes iframes. + * + * @param {Node} node The node to check. + * + * @return {void} + */ +export default function( node ) { + if ( node.nodeName === 'IFRAME' ) { + remove( node ); + } +} diff --git a/blocks/api/raw-handling/image-corrector.js b/blocks/api/raw-handling/image-corrector.js index 142cfdcdd89947..c6266c4d99db8b 100644 --- a/blocks/api/raw-handling/image-corrector.js +++ b/blocks/api/raw-handling/image-corrector.js @@ -7,13 +7,8 @@ import { createBlobURL } from '@wordpress/utils'; * Browser dependencies */ const { atob, Blob } = window; -const { ELEMENT_NODE } = window.Node; export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - if ( node.nodeName !== 'IMG' ) { return; } diff --git a/blocks/api/raw-handling/index.js b/blocks/api/raw-handling/index.js index ecf0c5786d63ec..8cf4f2bde25f51 100644 --- a/blocks/api/raw-handling/index.js +++ b/blocks/api/raw-handling/index.js @@ -1,32 +1,67 @@ /** * External dependencies */ -import { compact } from 'lodash'; -import showdown from 'showdown'; +import { find, flatMap, filter, compact } from 'lodash'; /** * Internal dependencies */ -import { createBlock, getBlockTransforms, findTransform } from '../factory'; -import { getBlockType, getUnknownTypeHandlerName } from '../registration'; +import { createBlock, getBlockTransforms } from '../factory'; +import { getBlockType } from '../registration'; import { getBlockAttributes, parseWithGrammar } from '../parser'; import normaliseBlocks from './normalise-blocks'; -import stripAttributes from './strip-attributes'; import specialCommentConverter from './special-comment-converter'; -import commentRemover from './comment-remover'; -import createUnwrapper from './create-unwrapper'; import isInlineContent from './is-inline-content'; -import formattingTransformer from './formatting-transformer'; +import phrasingContentReducer from './phrasing-content-reducer'; import msListConverter from './ms-list-converter'; import listReducer from './list-reducer'; import imageCorrector from './image-corrector'; import blockquoteNormaliser from './blockquote-normaliser'; -import tableNormaliser from './table-normaliser'; -import inlineContentConverter from './inline-content-converter'; -import embeddedContentReducer from './embedded-content-reducer'; -import { deepFilterHTML, isInvalidInline, isNotWhitelisted, isPlain, isInline } from './utils'; +import figureContentReducer from './figure-content-reducer'; import shortcodeConverter from './shortcode-converter'; -import slackMarkdownVariantCorrector from './slack-markdown-variant-corrector'; +import markdownConverter from './markdown-converter'; +import iframeRemover from './iframe-remover'; +import { + deepFilterHTML, + isPlain, + removeInvalidHTML, + getPhrasingContentSchema, + getBlockContentSchema, +} from './utils'; + +/** + * Browser dependencies + */ +const { log, warn } = window.console; + +export { getPhrasingContentSchema }; + +/** + * Filters HTML to only contain phrasing content. + * + * @param {string} HTML The HTML to filter. + * + * @return {string} HTML only containing phrasing content. + */ +function filterInlineHTML( HTML ) { + HTML = deepFilterHTML( HTML, [ phrasingContentReducer ] ); + HTML = removeInvalidHTML( HTML, getPhrasingContentSchema(), { inline: true } ); + + // Allows us to ask for this information when we get a report. + log( 'Processed inline HTML:\n\n', HTML ); + + return HTML; +} + +function getRawTransformations() { + return filter( getBlockTransforms( 'from' ), { type: 'raw' } ) + .map( ( transform ) => { + return transform.isMatch ? transform : { + ...transform, + isMatch: ( node ) => transform.selector && node.matches( transform.selector ), + }; + } ); +} /** * Converts an HTML string to known blocks. Strips everything else. @@ -38,7 +73,7 @@ import slackMarkdownVariantCorrector from './slack-markdown-variant-corrector'; * * 'INLINE': Always handle as inline content, and return string. * * 'BLOCKS': Always handle as blocks, and return array of blocks. * @param {Array} [options.tagName] The tag into which content will be inserted. - * @param {boolean} [options.canUserUseUnfilteredHTML] Whether or not to user can use unfiltered HTML. + * @param {boolean} [options.canUserUseUnfilteredHTML] Whether or not the user can use unfiltered HTML. * * @return {Array|string} A list of blocks or a string, depending on `handlerMode`. */ @@ -55,17 +90,7 @@ export default function rawHandler( { HTML = '', plainText = '', mode = 'AUTO', // * There is a plain text version. // * There is no HTML version, or it has no formatting. if ( plainText && ( ! HTML || isPlain( HTML ) ) ) { - const converter = new showdown.Converter(); - - converter.setOption( 'noHeaderId', true ); - converter.setOption( 'tables', true ); - converter.setOption( 'literalMidWordUnderscores', true ); - converter.setOption( 'omitExtraWLInCodeBlocks', true ); - converter.setOption( 'simpleLineBreaks', true ); - - plainText = slackMarkdownVariantCorrector( plainText ); - - HTML = converter.makeHtml( plainText ); + HTML = markdownConverter( plainText ); // Switch to inline mode if: // * The current mode is AUTO. @@ -82,101 +107,91 @@ export default function rawHandler( { HTML = '', plainText = '', mode = 'AUTO', } } - // An array of HTML strings and block objects. The blocks replace matched shortcodes. + if ( mode === 'INLINE' ) { + return filterInlineHTML( HTML ); + } + + // An array of HTML strings and block objects. The blocks replace matched + // shortcodes. const pieces = shortcodeConverter( HTML ); - // The call to shortcodeConverter will always return more than one element if shortcodes are matched. - // The reason is when shortcodes are matched empty HTML strings are included. + // The call to shortcodeConverter will always return more than one element + // if shortcodes are matched. The reason is when shortcodes are matched + // empty HTML strings are included. const hasShortcodes = pieces.length > 1; - // True if mode is auto, no shortcode is included and HTML verifies the isInlineContent condition - const isAutoModeInline = mode === 'AUTO' && isInlineContent( HTML, tagName ) && ! hasShortcodes; - - // Return filtered HTML if condition is true - if ( mode === 'INLINE' || isAutoModeInline ) { - HTML = deepFilterHTML( HTML, [ - // Add semantic formatting before attributes are stripped. - formattingTransformer, - stripAttributes, - specialCommentConverter, - commentRemover, - createUnwrapper( ( node ) => ! isInline( node, tagName ) ), - ] ); - - // Allows us to ask for this information when we get a report. - window.console.log( 'Processed inline HTML:\n\n', HTML ); - - return HTML; + if ( mode === 'AUTO' && ! hasShortcodes && isInlineContent( HTML, tagName ) ) { + return filterInlineHTML( HTML ); } - // Before we parse any HTML, extract shorcodes so they don't get messed up. - return pieces.reduce( ( accu, piece ) => { + const rawTransformations = getRawTransformations(); + const phrasingContentSchema = getPhrasingContentSchema(); + const blockContentSchema = getBlockContentSchema( rawTransformations ); + + return compact( flatMap( pieces, ( piece ) => { // Already a block from shortcode. if ( typeof piece !== 'string' ) { - return [ ...accu, piece ]; + return piece; } - // Context dependent filters. Needs to run before we remove nodes. - piece = deepFilterHTML( piece, [ + const filters = [ msListConverter, - ] ); - - piece = deepFilterHTML( piece, compact( [ listReducer, imageCorrector, - // Add semantic formatting before attributes are stripped. - formattingTransformer, - stripAttributes, + phrasingContentReducer, specialCommentConverter, - commentRemover, - ! canUserUseUnfilteredHTML && createUnwrapper( ( element ) => element.nodeName === 'IFRAME' ), - embeddedContentReducer, - createUnwrapper( isNotWhitelisted ), + figureContentReducer, blockquoteNormaliser, - tableNormaliser, - inlineContentConverter, - ] ) ); + ]; - piece = deepFilterHTML( piece, [ - createUnwrapper( isInvalidInline ), - ] ); + if ( ! canUserUseUnfilteredHTML ) { + // Should run before `figureContentReducer`. + filters.unshift( iframeRemover ); + } + + const schema = { + ...blockContentSchema, + // Keep top-level phrasing content, normalised by `normaliseBlocks`. + ...phrasingContentSchema, + }; + piece = deepFilterHTML( piece, filters, blockContentSchema ); + piece = removeInvalidHTML( piece, schema ); piece = normaliseBlocks( piece ); // Allows us to ask for this information when we get a report. - window.console.log( 'Processed HTML piece:\n\n', piece ); + log( 'Processed HTML piece:\n\n', piece ); const doc = document.implementation.createHTMLDocument( '' ); doc.body.innerHTML = piece; - const transformsFrom = getBlockTransforms( 'from' ); - - const blocks = Array.from( doc.body.children ).map( ( node ) => { - const transformation = findTransform( transformsFrom, ( transform ) => ( - transform.type === 'raw' && - transform.isMatch( node ) - ) ); - - if ( transformation ) { - if ( transformation.transform ) { - return transformation.transform( node ); - } - - return createBlock( - transformation.blockName, - getBlockAttributes( - getBlockType( transformation.blockName ), - node.outerHTML - ) + return Array.from( doc.body.children ).map( ( node ) => { + const rawTransformation = find( rawTransformations, ( { isMatch } ) => isMatch( node ) ); + + if ( ! rawTransformation ) { + warn( + 'A block registered a raw transformation schema for `' + node.nodeName + '` but did not match it. ' + + 'Make sure there is a `selector` or `isMatch` property that can match the schema.\n' + + 'Sanitized HTML: `' + node.outerHTML + '`' ); + + return; } - return createBlock( getUnknownTypeHandlerName(), { - content: node.outerHTML, - } ); - } ); + const { transform, blockName } = rawTransformation; + + if ( transform ) { + return transform( node ); + } - return [ ...accu, ...blocks ]; - }, [] ); + return createBlock( + blockName, + getBlockAttributes( + getBlockType( blockName ), + node.outerHTML + ) + ); + } ); + } ) ); } diff --git a/blocks/api/raw-handling/inline-content-converter.js b/blocks/api/raw-handling/inline-content-converter.js deleted file mode 100644 index 1d36ebc8fb4650..00000000000000 --- a/blocks/api/raw-handling/inline-content-converter.js +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Browser dependencies - */ -const { ELEMENT_NODE } = window.Node; - -/** - * Internal dependencies - */ -import { isInlineWrapper, isInline, isAllowedBlock, deepFilterNodeList } from './utils'; -import createUnwrapper from './create-unwrapper'; - -export default function( node, doc ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - - if ( ! isInlineWrapper( node ) ) { - return; - } - - deepFilterNodeList( node.childNodes, [ - createUnwrapper( - ( childNode ) => ! isInline( childNode ) && ! isAllowedBlock( node, childNode ), - ( childNode ) => childNode.nextElementSibling && doc.createElement( 'BR' ) - ), - ], doc ); -} diff --git a/blocks/api/raw-handling/is-inline-content.js b/blocks/api/raw-handling/is-inline-content.js index e53ab5a1424173..28d9587f0ab861 100644 --- a/blocks/api/raw-handling/is-inline-content.js +++ b/blocks/api/raw-handling/is-inline-content.js @@ -1,21 +1,58 @@ +/** + * External dependencies + */ +import { difference } from 'lodash'; + /** * Internal dependencies */ -import { isInline, isDoubleBR } from './utils'; +import { isPhrasingContent } from './utils'; + +/** + * Checks if the given node should be considered inline content, optionally + * depending on a context tag. + * + * @param {Node} node Node name. + * @param {string} contextTag Tag name. + * + * @return {boolean} True if the node is inline content, false if nohe. + */ +function isInline( node, contextTag ) { + if ( isPhrasingContent( node ) ) { + return true; + } + + if ( ! contextTag ) { + return false; + } + + const tag = node.nodeName.toLowerCase(); + const inlineWhitelistTagGroups = [ + [ 'ul', 'li', 'ol' ], + [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ], + ]; -export default function( HTML, tagName ) { + return inlineWhitelistTagGroups.some( ( tagGroup ) => + difference( [ tag, contextTag ], tagGroup ).length === 0 + ); +} + +function deepCheck( nodes, contextTag ) { + return nodes.every( ( node ) => + isInline( node, contextTag ) && deepCheck( Array.from( node.children ), contextTag ) + ); +} + +function isDoubleBR( node ) { + return node.nodeName === 'BR' && node.previousSibling && node.previousSibling.nodeName === 'BR'; +} + +export default function( HTML, contextTag ) { const doc = document.implementation.createHTMLDocument( '' ); doc.body.innerHTML = HTML; const nodes = Array.from( doc.body.children ); - return ! nodes.some( isDoubleBR ) && deepCheck( nodes, tagName ); -} - -function deepCheck( nodes, tagName ) { - return nodes.every( ( node ) => { - return ( 'SPAN' === node.nodeName || isInline( node, tagName ) ) && - deepCheck( Array.from( node.children ), tagName ); - } ); + return ! nodes.some( isDoubleBR ) && deepCheck( nodes, contextTag ); } diff --git a/blocks/api/raw-handling/list-reducer.js b/blocks/api/raw-handling/list-reducer.js index 25c33792de606c..3182c703eb2ab1 100644 --- a/blocks/api/raw-handling/list-reducer.js +++ b/blocks/api/raw-handling/list-reducer.js @@ -1,7 +1,11 @@ /** - * Browser dependencies + * WordPress dependencies */ -const { ELEMENT_NODE } = window.Node; +import { unwrap } from '@wordpress/utils'; + +function isList( node ) { + return node.nodeName === 'OL' || node.nodeName === 'UL'; +} function shallowTextContent( element ) { return [ ...element.childNodes ] @@ -10,13 +14,7 @@ function shallowTextContent( element ) { } export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - - const type = node.nodeName; - - if ( type !== 'OL' && type !== 'UL' ) { + if ( ! isList( node ) ) { return; } @@ -28,7 +26,7 @@ export default function( node ) { // * There is only one list item. if ( prevElement && - prevElement.nodeName === type && + prevElement.nodeName === node.nodeName && list.children.length === 1 ) { prevElement.appendChild( list.firstChild ); @@ -56,4 +54,15 @@ export default function( node ) { parentList.parentNode.removeChild( parentList ); } } + + // Invalid: OL/UL > OL/UL. + if ( parentElement && isList( parentElement ) ) { + const prevListItem = node.previousElementSibling; + + if ( prevListItem ) { + prevListItem.appendChild( node ); + } else { + unwrap( node ); + } + } } diff --git a/blocks/api/raw-handling/markdown-converter.js b/blocks/api/raw-handling/markdown-converter.js new file mode 100644 index 00000000000000..cd0ee5ea4ebfd4 --- /dev/null +++ b/blocks/api/raw-handling/markdown-converter.js @@ -0,0 +1,42 @@ +/** + * External dependencies + */ +import showdown from 'showdown'; + +// Reuse the same showdown converter. +const converter = new showdown.Converter( { + noHeaderId: true, + tables: true, + literalMidWordUnderscores: true, + omitExtraWLInCodeBlocks: true, + simpleLineBreaks: true, +} ); + +/** + * Corrects the Slack Markdown variant of the code block. + * If uncorrected, it will be converted to inline code. + * + * @see https://get.slack.help/hc/en-us/articles/202288908-how-can-i-add-formatting-to-my-messages-#code-blocks + * + * @param {string} text The potential Markdown text to correct. + * + * @return {string} The corrected Markdown. + */ +function slackMarkdownVariantCorrector( text ) { + return text.replace( + /((?:^|\n)```)([^\n`]+)(```(?:$|\n))/, + ( match, p1, p2, p3 ) => `${ p1 }\n${ p2 }\n${ p3 }` + ); +} + +/** + * Converts a piece of text into HTML based on any Markdown present. + * Also decodes any encoded HTML. + * + * @param {string} text The plain text to convert. + * + * @return {string} HTML. + */ +export default function( text ) { + return converter.makeHtml( slackMarkdownVariantCorrector( text ) ); +} diff --git a/blocks/api/raw-handling/ms-list-converter.js b/blocks/api/raw-handling/ms-list-converter.js index b5408e4ad6e87e..0181517493f797 100644 --- a/blocks/api/raw-handling/ms-list-converter.js +++ b/blocks/api/raw-handling/ms-list-converter.js @@ -2,17 +2,12 @@ * Browser dependencies */ const { parseInt } = window; -const { ELEMENT_NODE } = window.Node; function isList( node ) { return node.nodeName === 'OL' || node.nodeName === 'UL'; } -export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - +export default function( node, doc ) { if ( node.nodeName !== 'P' ) { return; } @@ -43,7 +38,7 @@ export default function( node ) { // See https://html.spec.whatwg.org/multipage/grouping-content.html#attr-ol-type. const type = node.textContent.trim().slice( 0, 1 ); const isNumeric = /[1iIaA]/.test( type ); - const newListNode = document.createElement( isNumeric ? 'ol' : 'ul' ); + const newListNode = doc.createElement( isNumeric ? 'ol' : 'ul' ); if ( isNumeric ) { newListNode.setAttribute( 'type', type ); @@ -54,7 +49,7 @@ export default function( node ) { const listNode = node.previousElementSibling; const listType = listNode.nodeName; - const listItem = document.createElement( 'li' ); + const listItem = doc.createElement( 'li' ); let receivingNode = listNode; @@ -78,7 +73,7 @@ export default function( node ) { // Make sure we append to a list. if ( ! isList( receivingNode ) ) { - receivingNode = receivingNode.appendChild( document.createElement( listType ) ); + receivingNode = receivingNode.appendChild( doc.createElement( listType ) ); } // Append the list item to the list. diff --git a/blocks/api/raw-handling/normalise-blocks.js b/blocks/api/raw-handling/normalise-blocks.js index 9d7ccac4af03b0..7bb3b44f2d5711 100644 --- a/blocks/api/raw-handling/normalise-blocks.js +++ b/blocks/api/raw-handling/normalise-blocks.js @@ -1,7 +1,7 @@ /** * Internal dependencies */ -import { isInline, isEmpty } from './utils'; +import { isPhrasingContent, isEmpty } from './utils'; /** * Browser dependencies @@ -26,7 +26,7 @@ export default function( HTML ) { decu.removeChild( node ); } else { if ( ! accu.lastChild || accu.lastChild.nodeName !== 'P' ) { - accu.appendChild( document.createElement( 'P' ) ); + accu.appendChild( accuDoc.createElement( 'P' ) ); } accu.lastChild.appendChild( node ); @@ -36,7 +36,7 @@ export default function( HTML ) { // BR nodes: create a new paragraph on double, or append to previous. if ( node.nodeName === 'BR' ) { if ( node.nextSibling && node.nextSibling.nodeName === 'BR' ) { - accu.appendChild( document.createElement( 'P' ) ); + accu.appendChild( accuDoc.createElement( 'P' ) ); decu.removeChild( node.nextSibling ); } @@ -57,9 +57,9 @@ export default function( HTML ) { } else { accu.appendChild( node ); } - } else if ( isInline( node ) ) { + } else if ( isPhrasingContent( node ) ) { if ( ! accu.lastChild || accu.lastChild.nodeName !== 'P' ) { - accu.appendChild( document.createElement( 'P' ) ); + accu.appendChild( accuDoc.createElement( 'P' ) ); } accu.lastChild.appendChild( node ); } else { diff --git a/blocks/api/raw-handling/phrasing-content-reducer.js b/blocks/api/raw-handling/phrasing-content-reducer.js new file mode 100644 index 00000000000000..1f60c8d1ee8232 --- /dev/null +++ b/blocks/api/raw-handling/phrasing-content-reducer.js @@ -0,0 +1,37 @@ +/** + * WordPress dependencies + */ +import { unwrap, replaceTag } from '@wordpress/utils'; + +/** + * Internal dependencies + */ +import { isPhrasingContent } from './utils'; + +function isBlockContent( node, schema = {} ) { + return schema.hasOwnProperty( node.nodeName.toLowerCase() ); +} + +export default function( node, doc, schema ) { + if ( node.nodeName === 'SPAN' ) { + const { fontWeight, fontStyle } = node.style; + + if ( fontWeight === 'bold' || fontWeight === '700' ) { + node = replaceTag( node, 'strong', doc ); + } else if ( fontStyle === 'italic' ) { + node = replaceTag( node, 'em', doc ); + } + } else if ( node.nodeName === 'B' ) { + node = replaceTag( node, 'strong', doc ); + } else if ( node.nodeName === 'I' ) { + node = replaceTag( node, 'em', doc ); + } + + if ( + isPhrasingContent( node ) && + node.hasChildNodes() && + Array.from( node.childNodes ).some( ( child ) => isBlockContent( child, schema ) ) + ) { + unwrap( node ); + } +} diff --git a/blocks/api/raw-handling/slack-markdown-variant-corrector.js b/blocks/api/raw-handling/slack-markdown-variant-corrector.js deleted file mode 100644 index d3ae3bea7c1d46..00000000000000 --- a/blocks/api/raw-handling/slack-markdown-variant-corrector.js +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Corrects the Slack Markdown variant of the code block. - * If uncorrected, it will be converted to inline code. - * - * @see https://get.slack.help/hc/en-us/articles/202288908-how-can-i-add-formatting-to-my-messages-#code-blocks - * - * @param {string} text The potential Markdown text to correct. - * - * @return {string} The corrected Markdown. - */ -export default function( text ) { - return text.replace( - /((?:^|\n)```)([^\n`]+)(```(?:$|\n))/, - ( match, p1, p2, p3 ) => `${ p1 }\n${ p2 }\n${ p3 }` - ); -} diff --git a/blocks/api/raw-handling/special-comment-converter.js b/blocks/api/raw-handling/special-comment-converter.js index 8146d671ce6395..cbf4dd320821eb 100644 --- a/blocks/api/raw-handling/special-comment-converter.js +++ b/blocks/api/raw-handling/special-comment-converter.js @@ -20,16 +20,17 @@ const { COMMENT_NODE } = window.Node; * The custom element is then expected to be recognized by any registered * block's `raw` transform. * - * @param {Node} node The node to be processed. + * @param {Node} node The node to be processed. + * @param {Document} doc The document of the node. * @return {void} */ -export default function( node ) { +export default function( node, doc ) { if ( node.nodeType !== COMMENT_NODE ) { return; } if ( node.nodeValue === 'nextpage' ) { - replace( node, createNextpage() ); + replace( node, createNextpage( doc ) ); return; } @@ -55,12 +56,12 @@ export default function( node ) { } } - replace( node, createMore( customText, noTeaser ) ); + replace( node, createMore( customText, noTeaser, doc ) ); } } -function createMore( customText, noTeaser ) { - const node = document.createElement( 'wp-block' ); +function createMore( customText, noTeaser, doc ) { + const node = doc.createElement( 'wp-block' ); node.dataset.block = 'core/more'; if ( customText ) { node.dataset.customText = customText; @@ -72,8 +73,8 @@ function createMore( customText, noTeaser ) { return node; } -function createNextpage() { - const node = document.createElement( 'wp-block' ); +function createNextpage( doc ) { + const node = doc.createElement( 'wp-block' ); node.dataset.block = 'core/nextpage'; return node; diff --git a/blocks/api/raw-handling/strip-attributes.js b/blocks/api/raw-handling/strip-attributes.js deleted file mode 100644 index 832f71c9e36d5f..00000000000000 --- a/blocks/api/raw-handling/strip-attributes.js +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Browser dependencies - */ -const { ELEMENT_NODE } = window.Node; - -/** - * Internal dependencies - */ -import { isAttributeWhitelisted, isClassWhitelisted } from './utils'; - -export default function( node ) { - if ( node.nodeType !== ELEMENT_NODE ) { - return; - } - - if ( ! node.hasAttributes() ) { - return; - } - - const tag = node.nodeName.toLowerCase(); - - Array.from( node.attributes ).forEach( ( { name } ) => { - if ( name === 'class' || isAttributeWhitelisted( tag, name ) ) { - return; - } - - node.removeAttribute( name ); - } ); - - const oldClasses = node.getAttribute( 'class' ); - - if ( ! oldClasses ) { - return; - } - - const newClasses = oldClasses - .split( ' ' ) - .filter( ( name ) => name && isClassWhitelisted( tag, name ) ) - .join( ' ' ); - - if ( newClasses.length ) { - node.setAttribute( 'class', newClasses ); - } else { - node.removeAttribute( 'class' ); - } -} diff --git a/blocks/api/raw-handling/table-normaliser.js b/blocks/api/raw-handling/table-normaliser.js deleted file mode 100644 index 34ee6ed666faf2..00000000000000 --- a/blocks/api/raw-handling/table-normaliser.js +++ /dev/null @@ -1,18 +0,0 @@ -/** - * Browser dependencies - */ -const { TEXT_NODE } = window.Node; - -export default function( node ) { - if ( node.nodeType !== TEXT_NODE ) { - return; - } - - const parentNode = node.parentNode; - - if ( [ 'TR', 'TBODY', 'THEAD', 'TFOOT', 'TABLE' ].indexOf( parentNode.nodeName ) === -1 ) { - return; - } - - parentNode.removeChild( node ); -} diff --git a/blocks/api/raw-handling/test/comment-remover.js b/blocks/api/raw-handling/test/comment-remover.js deleted file mode 100644 index 91222acfdd8a79..00000000000000 --- a/blocks/api/raw-handling/test/comment-remover.js +++ /dev/null @@ -1,20 +0,0 @@ -/** - * External dependencies - */ -import { equal } from 'assert'; - -/** - * Internal dependencies - */ -import commentRemover from '../comment-remover'; -import { deepFilterHTML } from '../utils'; - -describe( 'commentRemover', () => { - it( 'should remove comments', () => { - equal( deepFilterHTML( '', [ commentRemover ] ), '' ); - } ); - - it( 'should deep remove comments', () => { - equal( deepFilterHTML( '

test

', [ commentRemover ] ), '

test

' ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/create-unwrapper.js b/blocks/api/raw-handling/test/create-unwrapper.js deleted file mode 100644 index 772ccff0d2c9e1..00000000000000 --- a/blocks/api/raw-handling/test/create-unwrapper.js +++ /dev/null @@ -1,42 +0,0 @@ -/** - * External dependencies - */ -import { equal } from 'assert'; - -/** - * Internal dependencies - */ -import createUnwrapper from '../create-unwrapper'; -import { deepFilterHTML } from '../utils'; - -const unwrapper = createUnwrapper( ( node ) => node.nodeName === 'SPAN' ); -const unwrapperWithAfter = createUnwrapper( - ( node ) => node.nodeName === 'P', - () => document.createElement( 'BR' ) -); - -describe( 'createUnwrapper', () => { - it( 'should remove spans', () => { - equal( deepFilterHTML( 'test', [ unwrapper ] ), 'test' ); - } ); - - it( 'should remove wrapped spans', () => { - equal( deepFilterHTML( '

test

', [ unwrapper ] ), '

test

' ); - } ); - - it( 'should remove spans with attributes', () => { - equal( deepFilterHTML( '

test

', [ unwrapper ] ), '

test

' ); - } ); - - it( 'should remove nested spans', () => { - equal( deepFilterHTML( '

test

', [ unwrapper ] ), '

test

' ); - } ); - - it( 'should remove spans, but preserve nested structure', () => { - equal( deepFilterHTML( '

test test

', [ unwrapper ] ), '

test test

' ); - } ); - - it( 'should remove paragraphs and insert line break', () => { - equal( deepFilterHTML( '

test

', [ unwrapperWithAfter ] ), 'test
' ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/embedded-content-reducer.js b/blocks/api/raw-handling/test/embedded-content-reducer.js deleted file mode 100644 index a05e2f5eb8ab99..00000000000000 --- a/blocks/api/raw-handling/test/embedded-content-reducer.js +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Internal dependencies - */ -import embeddedContentReducer from '../embedded-content-reducer'; -import { deepFilterHTML } from '../utils'; - -describe( 'embeddedContentReducer', () => { - it( 'should move embedded content from paragraph', () => { - expect( deepFilterHTML( '

test

', [ embeddedContentReducer ] ) ) - .toEqual( '

test

' ); - } ); - - it( 'should move an anchor with just an image from paragraph', () => { - expect( deepFilterHTML( '

test

', [ embeddedContentReducer ] ) ) - .toEqual( '

test

' ); - } ); - - it( 'should move multiple images', () => { - expect( deepFilterHTML( '

test

', [ embeddedContentReducer ] ) ) - .toEqual( '

test

' ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/figure-content-reducer.js b/blocks/api/raw-handling/test/figure-content-reducer.js new file mode 100644 index 00000000000000..d8346b81fb9a26 --- /dev/null +++ b/blocks/api/raw-handling/test/figure-content-reducer.js @@ -0,0 +1,41 @@ +/** + * Internal dependencies + */ +import figureContentReducer from '../figure-content-reducer'; +import { deepFilterHTML } from '../utils'; + +describe( 'figureContentReducer', () => { + const schema = { + figure: { + children: { + img: {}, + a: { + children: { + img: {}, + }, + }, + }, + }, + }; + + it( 'should move embedded content from paragraph', () => { + const input = '

test

'; + const output = '

test

'; + + expect( deepFilterHTML( input, [ figureContentReducer ], schema ) ).toEqual( output ); + } ); + + it( 'should move an anchor with just an image from paragraph', () => { + const input = '

test

'; + const output = '

test

'; + + expect( deepFilterHTML( input, [ figureContentReducer ], schema ) ).toEqual( output ); + } ); + + it( 'should move multiple images', () => { + const input = '

test

'; + const output = '

test

'; + + expect( deepFilterHTML( input, [ figureContentReducer ], schema ) ).toEqual( output ); + } ); +} ); diff --git a/blocks/api/raw-handling/test/index.js b/blocks/api/raw-handling/test/index.js index 8e41a4e8381705..5c68c2f31493c7 100644 --- a/blocks/api/raw-handling/test/index.js +++ b/blocks/api/raw-handling/test/index.js @@ -1,99 +1,20 @@ /** * External dependencies */ -import { equal, deepEqual } from 'assert'; +import { equal } from 'assert'; /** * Internal dependencies */ import rawHandler from '../index'; -import { registerBlockType, unregisterBlockType, setUnknownTypeHandlerName } from '../../registration'; -import { createBlock } from '../../factory'; import { getBlockContent } from '../../serializer'; +import { registerCoreBlocks } from '../../../../core-blocks'; describe( 'rawHandler', () => { - it( 'should convert recognised raw content', () => { - registerBlockType( 'test/figure', { - category: 'common', - title: 'test figure', - attributes: { - content: { - type: 'array', - source: 'children', - selector: 'figure', - }, - }, - transforms: { - from: [ - { - type: 'raw', - isMatch: ( node ) => node.nodeName === 'FIGURE', - }, - ], - }, - save: () => {}, - } ); - - const block = rawHandler( { HTML: '
test
' } )[ 0 ]; - const { name, attributes } = createBlock( 'test/figure', { content: [ 'test' ] } ); - - equal( block.name, name ); - deepEqual( block.attributes, attributes ); - - unregisterBlockType( 'test/figure' ); - } ); - - it( 'should handle unknown raw content', () => { - registerBlockType( 'test/unknown', { - category: 'common', - title: 'test unknown', - attributes: { - content: { - type: 'string', - source: 'property', - property: 'innerHTML', - }, - }, - save: () => {}, - } ); - setUnknownTypeHandlerName( 'test/unknown' ); - - const block = rawHandler( { HTML: '
test
' } )[ 0 ]; - - equal( block.name, 'test/unknown' ); - equal( block.attributes.content, '
test
' ); - - unregisterBlockType( 'test/unknown' ); - setUnknownTypeHandlerName( undefined ); - } ); - - it( 'should handle raw content with transform', () => { - registerBlockType( 'test/transform', { - category: 'common', - title: 'test figure', - attributes: { - content: { - type: 'array', - }, - }, - transforms: { - from: [ - { - type: 'raw', - isMatch: ( node ) => node.nodeName === 'FIGURE', - transform: ( node ) => createBlock( 'test/transform', { content: node.nodeName } ), - }, - ], - }, - save: () => {}, - } ); - - const block = rawHandler( { HTML: '
test
' } )[ 0 ]; - - equal( block.name, 'test/transform' ); - equal( block.attributes.content, 'FIGURE' ); - - unregisterBlockType( 'test/transform' ); + beforeAll( () => { + // Load all hooks that modify blocks + require( 'blocks/hooks' ); + registerCoreBlocks(); } ); it( 'should filter inline content', () => { @@ -144,6 +65,15 @@ describe( 'rawHandler', () => { equal( filtered, '

Some heading

' ); } ); + + it( 'should break up forced inline content', () => { + const filtered = rawHandler( { + HTML: '

test

test

', + mode: 'INLINE', + } ); + + equal( filtered, 'test
test' ); + } ); } ); import './integration'; diff --git a/blocks/api/raw-handling/test/inline-content-converter.js b/blocks/api/raw-handling/test/inline-content-converter.js deleted file mode 100644 index ee5decf487aad9..00000000000000 --- a/blocks/api/raw-handling/test/inline-content-converter.js +++ /dev/null @@ -1,19 +0,0 @@ -/** - * External dependencies - */ -import { equal } from 'assert'; - -/** - * Internal dependencies - */ -import inlineContentConverter from '../inline-content-converter'; -import { deepFilterHTML } from '../utils'; - -describe( 'inlineContentConverter', () => { - it( 'should remove non-inline content from inline wrapper', () => { - equal( - deepFilterHTML( '

test

test

', [ inlineContentConverter ] ), - '
test
test
' - ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/integration/apple-out.html b/blocks/api/raw-handling/test/integration/apple-out.html index ece5eb1f6190a2..84bdfad50c7e1e 100644 --- a/blocks/api/raw-handling/test/integration/apple-out.html +++ b/blocks/api/raw-handling/test/integration/apple-out.html @@ -13,10 +13,11 @@ diff --git a/blocks/api/raw-handling/test/integration/google-docs-out.html b/blocks/api/raw-handling/test/integration/google-docs-out.html index 84e1fdf8f5300d..1c54f0c4b23ecb 100644 --- a/blocks/api/raw-handling/test/integration/google-docs-out.html +++ b/blocks/api/raw-handling/test/integration/google-docs-out.html @@ -13,10 +13,11 @@

This is a heading

diff --git a/blocks/api/raw-handling/test/integration/iframe-embed-out.html b/blocks/api/raw-handling/test/integration/iframe-embed-out.html index 08cf7f9625a46b..6eeb487da82b54 100644 --- a/blocks/api/raw-handling/test/integration/iframe-embed-out.html +++ b/blocks/api/raw-handling/test/integration/iframe-embed-out.html @@ -1,3 +1,3 @@ - +
diff --git a/blocks/api/raw-handling/test/integration/index.js b/blocks/api/raw-handling/test/integration/index.js index 9fc1cc57266f06..2dc567ad108951 100644 --- a/blocks/api/raw-handling/test/integration/index.js +++ b/blocks/api/raw-handling/test/integration/index.js @@ -23,8 +23,14 @@ const types = [ 'iframe-embed', 'one-image', 'two-images', + 'markdown', + 'wordpress', ]; +function readFile( filePath ) { + return fs.existsSync( filePath ) ? fs.readFileSync( filePath, 'utf8' ).trim() : ''; +} + describe( 'raw handling: integration', () => { beforeAll( () => { // Load all hooks that modify blocks @@ -34,12 +40,13 @@ describe( 'raw handling: integration', () => { types.forEach( ( type ) => { it( type, () => { - const input = fs.readFileSync( path.join( __dirname, `${ type }-in.html` ), 'utf8' ).trim(); - const output = fs.readFileSync( path.join( __dirname, `${ type }-out.html` ), 'utf8' ).trim(); - const converted = rawHandler( { HTML: input, canUserUseUnfilteredHTML: true } ); + const HTML = readFile( path.join( __dirname, `${ type }-in.html` ) ); + const plainText = readFile( path.join( __dirname, `${ type }-in.txt` ) ); + const output = readFile( path.join( __dirname, `${ type }-out.html` ) ); + const converted = rawHandler( { HTML, plainText, canUserUseUnfilteredHTML: true } ); const serialized = typeof converted === 'string' ? converted : serialize( converted ); - equal( output, serialized ); + equal( serialized, output ); } ); } ); } ); diff --git a/blocks/api/raw-handling/test/integration/markdown-in.txt b/blocks/api/raw-handling/test/integration/markdown-in.txt new file mode 100644 index 00000000000000..ef2b6187d1e275 --- /dev/null +++ b/blocks/api/raw-handling/test/integration/markdown-in.txt @@ -0,0 +1,38 @@ +# This is a heading with *italic* + +This is a paragraph with a [link](https://w.org/) and **bold**. + +Preserve +line breaks please. + +## Lists + +* A +* Bulleted + * Indented +* List + +1. One +2. Two +3. Three + +## Table + +First Header | Second Header +------------ | ------------- +Content from cell 1 | Content from cell 2 +Content in the first column | Content in the second column + +## Quote + +> First +> +> Second + +## Code + +Inline `code` tags should work. + +```html +This is a code block. +``` diff --git a/blocks/api/raw-handling/test/integration/markdown-out.html b/blocks/api/raw-handling/test/integration/markdown-out.html new file mode 100644 index 00000000000000..867fa64ebea503 --- /dev/null +++ b/blocks/api/raw-handling/test/integration/markdown-out.html @@ -0,0 +1,83 @@ + +

This is a heading with italic

+ + + +

This is a paragraph with a link and bold.

+ + + +

Preserve
line breaks please.

+ + + +

Lists

+ + + + + + + +
    +
  1. One
  2. +
  3. Two
  4. +
  5. Three
  6. +
+ + + +

Table

+ + + + + + + + + + + + + + + + + + + + +
First HeaderSecond Header
Content from cell 1Content from cell 2
Content in the first columnContent in the second column
+ + + +

Quote

+ + + +
+

First

+

Second

+
+ + + +

Code

+ + + +

Inline code tags should work.

+ + + +
This is a code block.
+ diff --git a/blocks/api/raw-handling/test/integration/wordpress-in.html b/blocks/api/raw-handling/test/integration/wordpress-in.html new file mode 100644 index 00000000000000..3336bcf1940421 --- /dev/null +++ b/blocks/api/raw-handling/test/integration/wordpress-in.html @@ -0,0 +1,4 @@ +

Howdy

+

This is a paragraph.

+

More tag

+

diff --git a/blocks/api/raw-handling/test/integration/wordpress-out.html b/blocks/api/raw-handling/test/integration/wordpress-out.html new file mode 100644 index 00000000000000..5bc8b8b8d4d7be --- /dev/null +++ b/blocks/api/raw-handling/test/integration/wordpress-out.html @@ -0,0 +1,15 @@ + +

Howdy

+ + + +

This is a paragraph.

+ + + +

More tag

+ + + + + diff --git a/blocks/api/raw-handling/test/is-inline-content.js b/blocks/api/raw-handling/test/is-inline-content.js index 56aa123eb72fcc..3b9816fa42e6a5 100644 --- a/blocks/api/raw-handling/test/is-inline-content.js +++ b/blocks/api/raw-handling/test/is-inline-content.js @@ -8,7 +8,7 @@ import { equal } from 'assert'; */ import isInlineContent from '../is-inline-content'; -describe( 'stripWrappers', () => { +describe( 'isInlineContent', () => { it( 'should be inline content', () => { equal( isInlineContent( 'test' ), true ); equal( isInlineContent( 'test' ), true ); diff --git a/blocks/api/raw-handling/test/list-reducer.js b/blocks/api/raw-handling/test/list-reducer.js index 77d8f9012b3e84..7947aca487530d 100644 --- a/blocks/api/raw-handling/test/list-reducer.js +++ b/blocks/api/raw-handling/test/list-reducer.js @@ -37,14 +37,26 @@ describe( 'listReducer', () => { equal( deepFilterHTML( input, [ listReducer ] ), output ); } ); - it( 'Should remove empty list wrappers', () => { + it( 'should remove empty list wrappers', () => { const input = ''; const output = ''; equal( deepFilterHTML( input, [ listReducer ] ), output ); } ); - it( 'Should not remove filled list wrappers', () => { + it( 'should not remove filled list wrappers', () => { const input = ''; equal( deepFilterHTML( input, [ listReducer ] ), input ); } ); + + it( 'should adjust wrong indentation (1)', () => { + const input = ''; + const output = ''; + equal( deepFilterHTML( input, [ listReducer ] ), output ); + } ); + + it( 'should adjust wrong indentation (2)', () => { + const input = ''; + const output = ''; + equal( deepFilterHTML( input, [ listReducer ] ), output ); + } ); } ); diff --git a/blocks/api/raw-handling/test/markdown-converter.js b/blocks/api/raw-handling/test/markdown-converter.js new file mode 100644 index 00000000000000..58eb0c90a4ddee --- /dev/null +++ b/blocks/api/raw-handling/test/markdown-converter.js @@ -0,0 +1,35 @@ +/** + * External dependencies + */ +import { equal } from 'assert'; + +/** + * Internal dependencies + */ +import markdownConverter from '../markdown-converter'; + +describe( 'markdownConverter', () => { + it( 'should correct Slack variant', () => { + const input = '```test```'; + const output = '
test
'; + equal( markdownConverter( input ), output ); + } ); + + it( 'should correct Slack variant on own line', () => { + const input = 'test\n```test```\ntest'; + const output = '

test

\n
test
\n

test

'; + equal( markdownConverter( input ), output ); + } ); + + it( 'should not correct inline code', () => { + const input = 'test ```test``` test'; + const output = '

test test test

'; + equal( markdownConverter( input ), output ); + } ); + + it( 'should not correct code with line breaks', () => { + const input = '```js\ntest\n```'; + const output = '
test
'; + equal( markdownConverter( input ), output ); + } ); +} ); diff --git a/blocks/api/raw-handling/test/formatting-transformer.js b/blocks/api/raw-handling/test/phrasing-content-reducer.js similarity index 50% rename from blocks/api/raw-handling/test/formatting-transformer.js rename to blocks/api/raw-handling/test/phrasing-content-reducer.js index 4389a3ba26176a..c9ea0c8c90280c 100644 --- a/blocks/api/raw-handling/test/formatting-transformer.js +++ b/blocks/api/raw-handling/test/phrasing-content-reducer.js @@ -6,19 +6,23 @@ import { equal } from 'assert'; /** * Internal dependencies */ -import formattingTransformer from '../formatting-transformer'; +import phrasingContentReducer from '../phrasing-content-reducer'; import { deepFilterHTML } from '../utils'; -describe( 'formattingTransformer', () => { +describe( 'phrasingContentReducer', () => { it( 'should transform font weight', () => { - equal( deepFilterHTML( 'test', [ formattingTransformer ] ), 'test' ); + equal( deepFilterHTML( 'test', [ phrasingContentReducer ], {} ), 'test' ); } ); it( 'should transform numeric font weight', () => { - equal( deepFilterHTML( 'test', [ formattingTransformer ] ), 'test' ); + equal( deepFilterHTML( 'test', [ phrasingContentReducer ], {} ), 'test' ); } ); it( 'should transform font style', () => { - equal( deepFilterHTML( 'test', [ formattingTransformer ] ), 'test' ); + equal( deepFilterHTML( 'test', [ phrasingContentReducer ], {} ), 'test' ); + } ); + + it( 'should remove invalid phrasing content', () => { + equal( deepFilterHTML( '

test

', [ phrasingContentReducer ], { p: {} } ), '

test

' ); } ); } ); diff --git a/blocks/api/raw-handling/test/slack-markdown-variant-corrector.js b/blocks/api/raw-handling/test/slack-markdown-variant-corrector.js deleted file mode 100644 index 610595bb634dfb..00000000000000 --- a/blocks/api/raw-handling/test/slack-markdown-variant-corrector.js +++ /dev/null @@ -1,29 +0,0 @@ -/** - * External dependencies - */ -import { equal } from 'assert'; - -/** - * Internal dependencies - */ -import slackMarkdownVariantCorrector from '../slack-markdown-variant-corrector'; - -describe( 'slackMarkdownVariantCorrector', () => { - it( 'should correct Slack variant', () => { - equal( slackMarkdownVariantCorrector( '```test```' ), '```\ntest\n```' ); - } ); - - it( 'should correct Slack variant on own line', () => { - equal( slackMarkdownVariantCorrector( 'test\n```test```\ntest' ), 'test\n```\ntest\n```\ntest' ); - } ); - - it( 'should not correct inline code', () => { - const text = 'test ```test``` test'; - equal( slackMarkdownVariantCorrector( text ), text ); - } ); - - it( 'should not correct code with line breaks', () => { - const text = '```js\ntest\n```'; - equal( slackMarkdownVariantCorrector( text ), text ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/strip-attributes.js b/blocks/api/raw-handling/test/strip-attributes.js deleted file mode 100644 index a68469cb202f4f..00000000000000 --- a/blocks/api/raw-handling/test/strip-attributes.js +++ /dev/null @@ -1,36 +0,0 @@ -/** - * External dependencies - */ -import { equal } from 'assert'; - -/** - * Internal dependencies - */ -import stripAttributes from '../strip-attributes'; -import { deepFilterHTML } from '../utils'; - -describe( 'stripAttributes', () => { - it( 'should remove attributes', () => { - equal( deepFilterHTML( '

test

', [ stripAttributes ] ), '

test

' ); - } ); - - it( 'should remove multiple attributes', () => { - equal( deepFilterHTML( '

test

', [ stripAttributes ] ), '

test

' ); - } ); - - it( 'should deep remove attributes', () => { - equal( deepFilterHTML( '

test test

', [ stripAttributes ] ), '

test test

' ); - } ); - - it( 'should remove data-* attributes', () => { - equal( deepFilterHTML( '

test

', [ stripAttributes ] ), '

test

' ); - } ); - - it( 'should keep some attributes', () => { - equal( deepFilterHTML( 'test', [ stripAttributes ] ), 'test' ); - } ); - - it( 'should keep some classes', () => { - equal( deepFilterHTML( '', [ stripAttributes ] ), '' ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/table-normaliser.js b/blocks/api/raw-handling/test/table-normaliser.js deleted file mode 100644 index 27295ba22bcc40..00000000000000 --- a/blocks/api/raw-handling/test/table-normaliser.js +++ /dev/null @@ -1,19 +0,0 @@ -/** - * External dependencies - */ -import { equal } from 'assert'; - -/** - * Internal dependencies - */ -import tableNormaliser from '../table-normaliser'; -import { deepFilterHTML } from '../utils'; - -describe( 'tableNormaliser', () => { - it( 'should remove invalid text nodes in table', () => { - equal( - deepFilterHTML( '\n\n\n\n\n\n\n
\n
\n', [ tableNormaliser ] ), - '\n
\n
\n' - ); - } ); -} ); diff --git a/blocks/api/raw-handling/test/utils.js b/blocks/api/raw-handling/test/utils.js index 411ff1a44b8a56..c5a2bd68ff2192 100644 --- a/blocks/api/raw-handling/test/utils.js +++ b/blocks/api/raw-handling/test/utils.js @@ -6,18 +6,7 @@ import { equal } from 'assert'; /** * Internal dependencies */ -import createUnwrapper from '../create-unwrapper'; -import { deepFilterHTML, isEmpty, isInvalidInline, isPlain } from '../utils'; - -const spanUnwrapper = createUnwrapper( ( node ) => node.nodeName === 'SPAN' ); -const inlineUnwrapper = createUnwrapper( ( node ) => node.nodeName === 'EM' ); - -describe( 'deepFilterHTML', () => { - it( 'should not error', () => { - equal( deepFilterHTML( 'test', [ spanUnwrapper, inlineUnwrapper ] ), 'test' ); - equal( deepFilterHTML( 'test', [ spanUnwrapper, inlineUnwrapper ] ), 'test' ); - } ); -} ); +import { isEmpty, isPlain, removeInvalidHTML, getPhrasingContentSchema } from '../utils'; describe( 'isEmpty', () => { function isEmptyHTML( HTML ) { @@ -57,38 +46,119 @@ describe( 'isEmpty', () => { } ); } ); -describe( 'isInvalidInline', () => { - function isInvalidInlineHTML( HTML ) { - const doc = document.implementation.createHTMLDocument( '' ); +describe( 'isPlain', () => { + it( 'should return true for plain text', () => { + equal( isPlain( 'test' ), true ); + } ); - doc.body.innerHTML = HTML; + it( 'should return true for only line breaks', () => { + equal( isPlain( 'test
test' ), true ); + equal( isPlain( 'test
test' ), true ); + equal( isPlain( 'test
test' ), true ); + equal( isPlain( 'test
test' ), true ); + } ); - return isInvalidInline( doc.body.firstChild ); - } + it( 'should return false for formatted text', () => { + equal( isPlain( 'test' ), false ); + equal( isPlain( 'test
' ), false ); + equal( isPlain( 'testtest' ), false ); + } ); +} ); - it( 'should return true for div element', () => { - equal( isInvalidInlineHTML( '
test
' ), true ); +describe( 'removeInvalidHTML', () => { + const phrasingContentSchema = getPhrasingContentSchema(); + const schema = { + p: { + children: phrasingContentSchema, + }, + figure: { + require: [ 'img' ], + children: { + img: { + attributes: [ 'src', 'alt' ], + classes: [ 'alignleft' ], + }, + figcaption: { + children: phrasingContentSchema, + }, + }, + }, + ...phrasingContentSchema, + }; + + it( 'should leave plain text alone', () => { + const input = 'test'; + equal( removeInvalidHTML( input, schema ), input ); } ); - it( 'should return true for deep div element', () => { - equal( isInvalidInlineHTML( '
test
' ), true ); + it( 'should leave valid phrasing content alone', () => { + const input = 'test'; + equal( removeInvalidHTML( input, schema ), input ); } ); - it( 'should return false for valid structure', () => { - equal( isInvalidInlineHTML( 'test' ), false ); + it( 'should remove unrecognised tags from phrasing content', () => { + const input = '
test
'; + const output = 'test'; + equal( removeInvalidHTML( input, schema ), output ); } ); -} ); -describe( 'isPlain', () => { - it( 'should return true for plain text', () => { - equal( isPlain( 'test' ), true ); + it( 'should remove unwanted whitespace outside phrasing content', () => { + const input = '
'; + const output = '
'; + equal( removeInvalidHTML( input, schema ), output ); } ); - it( 'should return true for only line breaks', () => { - equal( isPlain( 'test
test' ), true ); + it( 'should remove attributes', () => { + const input = '

test

'; + const output = '

test

'; + equal( removeInvalidHTML( input, schema ), output ); } ); - it( 'should return false for formatted text', () => { - equal( isPlain( 'test' ), false ); + it( 'should remove multiple attributes', () => { + const input = '

test

'; + const output = '

test

'; + equal( removeInvalidHTML( input, schema ), output ); + } ); + + it( 'should deep remove attributes', () => { + const input = '

test test

'; + const output = '

test test

'; + equal( removeInvalidHTML( input, schema ), output ); + } ); + + it( 'should remove data-* attributes', () => { + const input = '

test

'; + const output = '

test

'; + equal( removeInvalidHTML( input, schema ), output ); + } ); + + it( 'should keep some attributes', () => { + const input = 'test'; + const output = 'test'; + equal( removeInvalidHTML( input, schema ), output ); + } ); + + it( 'should keep some classes', () => { + const input = '
'; + const output = '
'; + equal( removeInvalidHTML( input, schema ), output ); + } ); + + it( 'should remove empty nodes that should have children', () => { + const input = '
'; + const output = ''; + equal( removeInvalidHTML( input, schema ), output ); + } ); + + it( 'should break up block content with phrasing schema', () => { + const input = '

test

test

'; + const output = 'test
test'; + equal( removeInvalidHTML( input, phrasingContentSchema, true ), output ); + } ); + + it( 'should unwrap node that does not satisfy require', () => { + const input = '

test

test
'; + const output = '

test

test'; + equal( removeInvalidHTML( input, schema ), output ); } ); } ); diff --git a/blocks/api/raw-handling/utils.js b/blocks/api/raw-handling/utils.js index 3d9961c2b6dd0b..046b77ddd4268a 100644 --- a/blocks/api/raw-handling/utils.js +++ b/blocks/api/raw-handling/utils.js @@ -1,24 +1,19 @@ /** * External dependencies */ -import { includes } from 'lodash'; +import { omit, mergeWith, includes } from 'lodash'; /** - * Browser dependencies + * WordPress dependencies */ -const { ELEMENT_NODE, TEXT_NODE } = window.Node; +import { unwrap, insertAfter, remove } from '@wordpress/utils'; /** - * An array of tag groups used by isInlineForTag function. - * If tagName and nodeName are present in the same group, the node should be treated as inline. - * @type {Array} + * Browser dependencies */ -const inlineWhitelistTagGroups = [ - [ 'ul', 'li', 'ol' ], - [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ], -]; +const { ELEMENT_NODE, TEXT_NODE } = window.Node; -const inlineWhitelist = { +const phrasingContentSchema = { strong: {}, em: {}, del: {}, @@ -29,146 +24,72 @@ const inlineWhitelist = { sub: {}, sup: {}, br: {}, + '#text': {}, }; -const embeddedWhiteList = { - img: { attributes: [ 'src', 'alt' ], classes: [ 'alignleft', 'aligncenter', 'alignright', 'alignnone' ] }, - iframe: { attributes: [ 'src', 'allowfullscreen', 'height', 'width' ] }, -}; - -const inlineWrapperWhiteList = { - figcaption: {}, - h1: {}, - h2: {}, - h3: {}, - h4: {}, - h5: {}, - h6: {}, - p: {}, - li: { children: [ 'ul', 'ol', 'li' ] }, - pre: {}, - td: {}, - th: {}, -}; - -const whitelist = { - ...inlineWhitelist, - ...inlineWrapperWhiteList, - ...embeddedWhiteList, - figure: {}, - blockquote: {}, - hr: {}, - ul: {}, - ol: { attributes: [ 'type' ] }, - table: {}, - thead: {}, - tfoot: {}, - tbody: {}, - tr: {}, -}; - -export function isWhitelisted( element ) { - return whitelist.hasOwnProperty( element.nodeName.toLowerCase() ); -} - -export function isNotWhitelisted( element ) { - return ! isWhitelisted( element ); -} - -export function isAttributeWhitelisted( tag, attribute ) { - return ( - whitelist[ tag ] && - whitelist[ tag ].attributes && - whitelist[ tag ].attributes.indexOf( attribute ) !== -1 - ); -} +// Recursion is needed. +// Possible: strong > em > strong. +// Impossible: strong > strong. +[ 'strong', 'em', 'del', 'ins', 'a', 'code', 'abbr', 'sub', 'sup' ].forEach( ( tag ) => { + phrasingContentSchema[ tag ].children = omit( phrasingContentSchema, tag ); +} ); /** - * Checks if nodeName should be treated as inline when being added to tagName. - * This happens if nodeName and tagName are in the same group defined in inlineWhitelistTagGroups. + * Get schema of possible paths for phrasing content. * - * @param {string} nodeName Node name. - * @param {string} tagName Tag name. + * @see https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content * - * @return {boolean} True if nodeName is inline in the context of tagName and - * false otherwise. + * @return {Object} Schema. */ -function isInlineForTag( nodeName, tagName ) { - if ( ! tagName || ! nodeName ) { - return false; - } - return inlineWhitelistTagGroups.some( ( tagGroup ) => - includes( tagGroup, nodeName ) && includes( tagGroup, tagName ) - ); -} - -export function isInline( node, tagName ) { - const nodeName = node.nodeName.toLowerCase(); - return inlineWhitelist.hasOwnProperty( nodeName ) || isInlineForTag( nodeName, tagName ); -} - -export function isClassWhitelisted( tag, name ) { - return ( - whitelist[ tag ] && - whitelist[ tag ].classes && - whitelist[ tag ].classes.indexOf( name ) !== -1 - ); +export function getPhrasingContentSchema() { + return phrasingContentSchema; } /** - * Whether or not the given node is embedded content. + * Find out whether or not the given node is phrasing content. * - * @see https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Embedded_content + * @see https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content * - * @param {Node} node The node to check. + * @param {Element} node The node to test. * - * @return {boolean} True if embedded content, false if not. + * @return {boolean} True if phrasing content, false if not. */ -export function isEmbedded( node ) { - return embeddedWhiteList.hasOwnProperty( node.nodeName.toLowerCase() ); +export function isPhrasingContent( node ) { + const tag = node.nodeName.toLowerCase(); + return getPhrasingContentSchema().hasOwnProperty( tag ) || tag === 'span'; } -export function isInlineWrapper( node ) { - return inlineWrapperWhiteList.hasOwnProperty( node.nodeName.toLowerCase() ); -} - -export function isAllowedBlock( parentNode, node ) { - const parentNodeTag = parentNode.nodeName.toLowerCase(); - const nodeTag = node.nodeName.toLowerCase(); - - return ( - whitelist[ parentNodeTag ] && - whitelist[ parentNodeTag ].children && - whitelist[ parentNodeTag ].children.indexOf( nodeTag ) !== -1 - ); -} - -export function isInvalidInline( element ) { - if ( ! isInline( element ) ) { - return false; - } - - if ( ! element.hasChildNodes() ) { - return false; - } +/** + * Given raw transforms from blocks, merges all schemas into one. + * + * @param {Array} transforms Block transforms, of the `raw` type. + * + * @return {Object} A complete block content schema. + */ +export function getBlockContentSchema( transforms ) { + const schemas = transforms.map( ( { schema } ) => schema ); - return Array.from( element.childNodes ).some( ( node ) => { - if ( node.nodeType === ELEMENT_NODE ) { - if ( ! isInline( node ) ) { - return true; + return mergeWith( {}, ...schemas, ( objValue, srcValue, key ) => { + if ( key === 'children' ) { + if ( objValue === '*' || srcValue === '*' ) { + return '*'; } - return isInvalidInline( node ); + return { ...objValue, ...srcValue }; + } else if ( key === 'attributes' || key === 'require' ) { + return [ ...( objValue || [] ), ...( srcValue || [] ) ]; } - - return false; } ); } -export function isDoubleBR( node ) { - return node.nodeName === 'BR' && node.previousSibling && node.previousSibling.nodeName === 'BR'; -} - +/** + * Recursively checks if an element is empty. An element is not empty if it + * contains text or contains elements with attributes such as images. + * + * @param {Element} element The element to check. + * + * @return {boolean} Wether or not the element is empty. + */ export function isEmpty( element ) { if ( ! element.hasChildNodes() ) { return true; @@ -193,23 +114,16 @@ export function isEmpty( element ) { } ); } +/** + * Checks wether HTML can be considered plain text. That is, it does not contain + * any elements that are not line breaks. + * + * @param {string} HTML The HTML to check. + * + * @return {boolean} Wether the HTML can be considered plain text. + */ export function isPlain( HTML ) { - const doc = document.implementation.createHTMLDocument( '' ); - - doc.body.innerHTML = HTML; - - const brs = doc.querySelectorAll( 'br' ); - - // Remove all BR nodes. - Array.from( brs ).forEach( ( node ) => { - node.parentNode.replaceChild( document.createTextNode( '\n' ), node ); - } ); - - // Merge all text nodes. - doc.body.normalize(); - - // If it's plain text, there should only be one node left. - return doc.body.childNodes.length === 1 && doc.body.firstChild.nodeType === TEXT_NODE; + return ! /<(?!br[ />])/i.test( HTML ); } /** @@ -218,36 +132,144 @@ export function isPlain( HTML ) { * @param {NodeList} nodeList The nodeList to filter. * @param {Array} filters An array of functions that can mutate with the provided node. * @param {Document} doc The document of the nodeList. + * @param {Object} schema The schema to use. */ -export function deepFilterNodeList( nodeList, filters, doc ) { +export function deepFilterNodeList( nodeList, filters, doc, schema ) { Array.from( nodeList ).forEach( ( node ) => { - deepFilterNodeList( node.childNodes, filters, doc ); + deepFilterNodeList( node.childNodes, filters, doc, schema ); - filters.forEach( ( filter ) => { + filters.forEach( ( item ) => { // Make sure the node is still attached to the document. if ( ! doc.contains( node ) ) { return; } - filter( node, doc ); + item( node, doc, schema ); } ); } ); } /** * Given node filters, deeply filters HTML tags. + * Filters from the deepest nodes to the top. * * @param {string} HTML The HTML to filter. * @param {Array} filters An array of functions that can mutate with the provided node. + * @param {Object} schema The schema to use. * * @return {string} The filtered HTML. */ -export function deepFilterHTML( HTML, filters = [] ) { +export function deepFilterHTML( HTML, filters = [], schema ) { + const doc = document.implementation.createHTMLDocument( '' ); + + doc.body.innerHTML = HTML; + + deepFilterNodeList( doc.body.childNodes, filters, doc, schema ); + + return doc.body.innerHTML; +} + +/** + * Given a schema, unwraps or removes nodes, attributes and classes on a node + * list. + * + * @param {NodeList} nodeList The nodeList to filter. + * @param {Document} doc The document of the nodeList. + * @param {Object} schema An array of functions that can mutate with the provided node. + * @param {Object} inline Whether to clean for inline mode. + */ +function cleanNodeList( nodeList, doc, schema, inline ) { + Array.from( nodeList ).forEach( ( node ) => { + const tag = node.nodeName.toLowerCase(); + + // It's a valid child. + if ( schema.hasOwnProperty( tag ) ) { + if ( node.nodeType === ELEMENT_NODE ) { + const { attributes = [], classes = [], children, require = [] } = schema[ tag ]; + + // If the node is empty and it's supposed to have children, + // remove the node. + if ( isEmpty( node ) && children ) { + remove( node ); + return; + } + + if ( node.hasAttributes() ) { + // Strip invalid attributes. + Array.from( node.attributes ).forEach( ( { name } ) => { + if ( name !== 'class' && ! includes( attributes, name ) ) { + node.removeAttribute( name ); + } + } ); + + // Strip invalid classes. + if ( node.classList.length ) { + const newClasses = classes.filter( ( name ) => + node.classList.contains( name ) + ); + + if ( newClasses.length ) { + node.setAttribute( 'class', newClasses.join( ' ' ) ); + } else { + node.removeAttribute( 'class' ); + } + } + } + + if ( node.hasChildNodes() ) { + // Do not filter any content. + if ( children === '*' ) { + return; + } + + // Continue if the node is supposed to have children. + if ( children ) { + // If a parent requires certain children, but it does + // not have them, drop the parent and continue. + if ( require.length && ! node.querySelector( require.join( ',' ) ) ) { + cleanNodeList( node.childNodes, doc, schema, inline ); + unwrap( node ); + } + + cleanNodeList( node.childNodes, doc, children, inline ); + // Remove children if the node is not supposed to have any. + } else { + while ( node.firstChild ) { + remove( node.firstChild ); + } + } + } + } + // Invalid child. Continue with schema at the same place and unwrap. + } else { + cleanNodeList( node.childNodes, doc, schema, inline ); + + // For inline mode, insert a line break when unwrapping nodes that + // are not phrasing content. + if ( inline && ! isPhrasingContent( node ) && node.nextElementSibling ) { + insertAfter( doc.createElement( 'br' ), node ); + } + + unwrap( node ); + } + } ); +} + +/** + * Given a schema, unwraps or removes nodes, attributes and classes on HTML. + * + * @param {string} HTML The HTML to clean up. + * @param {Object} schema Schema for the HTML. + * @param {Object} inline Whether to clean for inline mode. + * + * @return {string} The cleaned up HTML. + */ +export function removeInvalidHTML( HTML, schema, inline ) { const doc = document.implementation.createHTMLDocument( '' ); doc.body.innerHTML = HTML; - deepFilterNodeList( doc.body.childNodes, filters, doc ); + cleanNodeList( doc.body.childNodes, doc, schema, inline ); return doc.body.innerHTML; } diff --git a/core-blocks/code/index.js b/core-blocks/code/index.js index 7587d697e3fb67..22946375e0207c 100644 --- a/core-blocks/code/index.js +++ b/core-blocks/code/index.js @@ -47,10 +47,21 @@ export const settings = { { type: 'raw', isMatch: ( node ) => ( - node.nodeName === 'PRE' && + node.matches( 'pre' ) && node.children.length === 1 && - node.firstChild.nodeName === 'CODE' + node.firstChild.matches( 'code' ) ), + schema: { + pre: { + children: { + code: { + children: { + '#text': {}, + }, + }, + }, + }, + }, }, ], }, diff --git a/core-blocks/heading/index.js b/core-blocks/heading/index.js index a0ff87c0d194db..6684ef62071a3b 100644 --- a/core-blocks/heading/index.js +++ b/core-blocks/heading/index.js @@ -10,6 +10,7 @@ import { BlockControls, InspectorControls, AlignmentToolbar, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -69,7 +70,15 @@ export const settings = { }, { type: 'raw', - isMatch: ( node ) => /H\d/.test( node.nodeName ), + selector: 'h1,h2,h3,h4,h5,h6', + schema: { + h1: { children: getPhrasingContentSchema() }, + h2: { children: getPhrasingContentSchema() }, + h3: { children: getPhrasingContentSchema() }, + h4: { children: getPhrasingContentSchema() }, + h5: { children: getPhrasingContentSchema() }, + h6: { children: getPhrasingContentSchema() }, + }, }, { type: 'pattern', diff --git a/core-blocks/html/index.js b/core-blocks/html/index.js index 3764afd736b9b7..93df4c86747fc2 100644 --- a/core-blocks/html/index.js +++ b/core-blocks/html/index.js @@ -4,7 +4,7 @@ import { RawHTML } from '@wordpress/element'; import { __ } from '@wordpress/i18n'; import { withState, SandBox, CodeEditor } from '@wordpress/components'; -import { BlockControls } from '@wordpress/blocks'; +import { BlockControls, getPhrasingContentSchema } from '@wordpress/blocks'; /** * Internal dependencies @@ -41,7 +41,20 @@ export const settings = { from: [ { type: 'raw', - isMatch: ( node ) => node.nodeName === 'IFRAME', + isMatch: ( node ) => node.matches( 'figure' ) && !! node.querySelector( 'iframe' ), + schema: { + figure: { + require: [ 'iframe' ], + children: { + iframe: { + attributes: [ 'src', 'allowfullscreen', 'height', 'width' ], + }, + figcaption: { + children: getPhrasingContentSchema(), + }, + }, + }, + }, }, ], }, diff --git a/core-blocks/image/index.js b/core-blocks/image/index.js index e689b31d1139a5..980bafcc01e8e5 100644 --- a/core-blocks/image/index.js +++ b/core-blocks/image/index.js @@ -7,6 +7,7 @@ import { getBlockAttributes, getBlockType, RichText, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -57,6 +58,29 @@ const blockAttributes = { }, }; +const imageSchema = { + img: { + attributes: [ 'src', 'alt' ], + classes: [ 'alignleft', 'aligncenter', 'alignright', 'alignnone' ], + }, +}; + +const schema = { + figure: { + require: [ 'img' ], + children: { + ...imageSchema, + a: { + attributes: [ 'href' ], + children: imageSchema, + }, + figcaption: { + children: getPhrasingContentSchema(), + }, + }, + }, +}; + export const settings = { title: __( 'Image' ), @@ -74,13 +98,9 @@ export const settings = { from: [ { type: 'raw', - isMatch( node ) { - const tag = node.nodeName.toLowerCase(); - const hasImage = node.querySelector( 'img' ); - - return tag === 'img' || ( hasImage && tag === 'figure' ); - }, - transform( node ) { + isMatch: ( node ) => node.matches( 'figure' ) && !! node.querySelector( 'img' ), + schema, + transform: ( node ) => { const matches = /align(left|center|right)/.exec( node.className ); const align = matches ? matches[ 1 ] : undefined; const blockType = getBlockType( 'core/image' ); diff --git a/core-blocks/list/index.js b/core-blocks/list/index.js index 21a5e9a17c7926..5adaee2e7cc782 100644 --- a/core-blocks/list/index.js +++ b/core-blocks/list/index.js @@ -12,6 +12,7 @@ import { createBlock, BlockControls, RichText, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -19,6 +20,23 @@ import { */ import './editor.scss'; +const listContentSchema = { + ...getPhrasingContentSchema(), + ul: {}, + ol: { attributes: [ 'type' ] }, +}; + +// Recursion is needed. +// Possible: ul > li > ul. +// Impossible: ul > ul. +[ 'ul', 'ol' ].forEach( ( tag ) => { + listContentSchema[ tag ].children = { + li: { + children: listContentSchema, + }, + }; +} ); + export const name = 'core/list'; export const settings = { @@ -80,7 +98,11 @@ export const settings = { }, { type: 'raw', - isMatch: ( node ) => node.nodeName === 'OL' || node.nodeName === 'UL', + selector: 'ol,ul', + schema: { + ol: listContentSchema.ol, + ul: listContentSchema.ul, + }, }, { type: 'pattern', diff --git a/core-blocks/more/index.js b/core-blocks/more/index.js index 20a8ee5175cd43..9b31308e2eac32 100644 --- a/core-blocks/more/index.js +++ b/core-blocks/more/index.js @@ -52,6 +52,9 @@ export const settings = { from: [ { type: 'raw', + schema: { + 'wp-block': { attributes: [ 'data-block' ] }, + }, isMatch: ( node ) => node.dataset && node.dataset.block === 'core/more', transform( node ) { const { customText, noTeaser } = node.dataset; diff --git a/core-blocks/nextpage/index.js b/core-blocks/nextpage/index.js index 86420233d3014d..e3668e5fb7b04b 100644 --- a/core-blocks/nextpage/index.js +++ b/core-blocks/nextpage/index.js @@ -35,6 +35,9 @@ export const settings = { from: [ { type: 'raw', + schema: { + 'wp-block': { attributes: [ 'data-block' ] }, + }, isMatch: ( node ) => node.dataset && node.dataset.block === 'core/nextpage', transform() { return createBlock( 'core/nextpage', {} ); diff --git a/core-blocks/paragraph/index.js b/core-blocks/paragraph/index.js index c1160a11bd6952..8c9267b73eb97b 100644 --- a/core-blocks/paragraph/index.js +++ b/core-blocks/paragraph/index.js @@ -34,6 +34,7 @@ import { InspectorControls, PanelColor, RichText, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -343,12 +344,12 @@ export const settings = { from: [ { type: 'raw', - priority: 20, - isMatch: ( node ) => ( - node.nodeName === 'P' && - // Do not allow embedded content. - ! node.querySelector( 'audio, canvas, embed, iframe, img, math, object, svg, video' ) - ), + selector: 'p', + schema: { + p: { + children: getPhrasingContentSchema(), + }, + }, }, ], }, diff --git a/core-blocks/preformatted/index.js b/core-blocks/preformatted/index.js index 82a594da4ab5a8..751a9d5dc62447 100644 --- a/core-blocks/preformatted/index.js +++ b/core-blocks/preformatted/index.js @@ -5,6 +5,7 @@ import { __ } from '@wordpress/i18n'; import { createBlock, RichText, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -42,12 +43,17 @@ export const settings = { { type: 'raw', isMatch: ( node ) => ( - node.nodeName === 'PRE' && + node.matches( 'pre' ) && ! ( node.children.length === 1 && - node.firstChild.nodeName === 'CODE' + node.firstChild.matches( 'code' ) ) ), + schema: { + pre: { + children: getPhrasingContentSchema(), + }, + }, }, ], to: [ diff --git a/core-blocks/quote/index.js b/core-blocks/quote/index.js index bd3a0ab2211e85..2733fcc5bd406c 100644 --- a/core-blocks/quote/index.js +++ b/core-blocks/quote/index.js @@ -15,6 +15,7 @@ import { BlockControls, AlignmentToolbar, RichText, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -101,7 +102,16 @@ export const settings = { }, { type: 'raw', - isMatch: ( node ) => node.nodeName === 'BLOCKQUOTE', + selector: 'blockquote', + schema: { + blockquote: { + children: { + p: { + children: getPhrasingContentSchema(), + }, + }, + }, + }, }, ], to: [ diff --git a/core-blocks/separator/index.js b/core-blocks/separator/index.js index c0d4607d7f3859..1ed566613958c9 100644 --- a/core-blocks/separator/index.js +++ b/core-blocks/separator/index.js @@ -32,7 +32,10 @@ export const settings = { }, { type: 'raw', - isMatch: ( node ) => node.nodeName === 'HR', + selector: 'hr', + schema: { + hr: {}, + }, }, ], }, diff --git a/core-blocks/table/index.js b/core-blocks/table/index.js index 42c24adc941c98..b8aa6429824f3d 100644 --- a/core-blocks/table/index.js +++ b/core-blocks/table/index.js @@ -11,6 +11,7 @@ import { BlockControls, BlockAlignmentToolbar, RichText, + getPhrasingContentSchema, } from '@wordpress/blocks'; /** @@ -20,6 +21,35 @@ import './editor.scss'; import './style.scss'; import TableBlock from './table-block'; +const tableContentSchema = { + tr: { + children: { + th: { + children: getPhrasingContentSchema(), + }, + td: { + children: getPhrasingContentSchema(), + }, + }, + }, +}; + +const tableSchema = { + table: { + children: { + thead: { + children: tableContentSchema, + }, + tfoot: { + children: tableContentSchema, + }, + tbody: { + children: tableContentSchema, + }, + }, + }, +}; + export const name = 'core/table'; export const settings = { @@ -49,7 +79,8 @@ export const settings = { from: [ { type: 'raw', - isMatch: ( node ) => node.nodeName === 'TABLE', + selector: 'table', + schema: tableSchema, }, ], }, diff --git a/utils/dom.js b/utils/dom.js index 36b407bfe4106f..bedf98ba6c5feb 100644 --- a/utils/dom.js +++ b/utils/dom.js @@ -446,3 +446,41 @@ export function remove( node ) { export function insertAfter( newNode, referenceNode ) { referenceNode.parentNode.insertBefore( newNode, referenceNode.nextSibling ); } + +/** + * Unwrap the given node. This means any child nodes are moved to the parent. + * + * @param {Node} node The node to unwrap. + * + * @return {void} + */ +export function unwrap( node ) { + const parent = node.parentNode; + + while ( node.firstChild ) { + parent.insertBefore( node.firstChild, node ); + } + + parent.removeChild( node ); +} + +/** + * Replaces the given node with a new node with the given tag name. + * + * @param {Element} node The node to replace + * @param {string} tagName The new tag name. + * @param {Document} doc The document of the node. + * + * @return {Element} The new node. + */ +export function replaceTag( node, tagName, doc ) { + const newNode = doc.createElement( tagName ); + + while ( node.firstChild ) { + newNode.appendChild( node.firstChild ); + } + + node.parentNode.replaceChild( newNode, node ); + + return newNode; +} From 5c1c5dd62a0266b754f9b98f73f6795e87418054 Mon Sep 17 00:00:00 2001 From: iseulde Date: Wed, 2 May 2018 23:38:25 +0200 Subject: [PATCH 2/2] Polyfill Element#matches --- blocks/api/raw-handling/index.js | 2 ++ core-blocks/code/index.js | 4 ++-- core-blocks/html/index.js | 2 +- core-blocks/image/index.js | 2 +- core-blocks/preformatted/index.js | 4 ++-- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/blocks/api/raw-handling/index.js b/blocks/api/raw-handling/index.js index 8cf4f2bde25f51..aaaccb0338c56d 100644 --- a/blocks/api/raw-handling/index.js +++ b/blocks/api/raw-handling/index.js @@ -2,6 +2,8 @@ * External dependencies */ import { find, flatMap, filter, compact } from 'lodash'; +// Also polyfills Element#matches. +import 'element-closest'; /** * Internal dependencies diff --git a/core-blocks/code/index.js b/core-blocks/code/index.js index 22946375e0207c..4fcf09bd332481 100644 --- a/core-blocks/code/index.js +++ b/core-blocks/code/index.js @@ -47,9 +47,9 @@ export const settings = { { type: 'raw', isMatch: ( node ) => ( - node.matches( 'pre' ) && + node.nodeName === 'PRE' && node.children.length === 1 && - node.firstChild.matches( 'code' ) + node.firstChild.nodeName === 'CODE' ), schema: { pre: { diff --git a/core-blocks/html/index.js b/core-blocks/html/index.js index 93df4c86747fc2..7a7eb822fe7154 100644 --- a/core-blocks/html/index.js +++ b/core-blocks/html/index.js @@ -41,7 +41,7 @@ export const settings = { from: [ { type: 'raw', - isMatch: ( node ) => node.matches( 'figure' ) && !! node.querySelector( 'iframe' ), + isMatch: ( node ) => node.nodeName === 'FIGURE' && !! node.querySelector( 'iframe' ), schema: { figure: { require: [ 'iframe' ], diff --git a/core-blocks/image/index.js b/core-blocks/image/index.js index 980bafcc01e8e5..bad314b5cb4eb2 100644 --- a/core-blocks/image/index.js +++ b/core-blocks/image/index.js @@ -98,7 +98,7 @@ export const settings = { from: [ { type: 'raw', - isMatch: ( node ) => node.matches( 'figure' ) && !! node.querySelector( 'img' ), + isMatch: ( node ) => node.nodeName === 'FIGURE' && !! node.querySelector( 'img' ), schema, transform: ( node ) => { const matches = /align(left|center|right)/.exec( node.className ); diff --git a/core-blocks/preformatted/index.js b/core-blocks/preformatted/index.js index 751a9d5dc62447..6e094ced7b8a07 100644 --- a/core-blocks/preformatted/index.js +++ b/core-blocks/preformatted/index.js @@ -43,10 +43,10 @@ export const settings = { { type: 'raw', isMatch: ( node ) => ( - node.matches( 'pre' ) && + node.nodeName === 'PRE' && ! ( node.children.length === 1 && - node.firstChild.matches( 'code' ) + node.firstChild.nodeName === 'CODE' ) ), schema: {