diff --git a/packages/blocks/src/api/raw-handling/html-formatting-remover.js b/packages/blocks/src/api/raw-handling/html-formatting-remover.js new file mode 100644 index 0000000000000..615feb80508a9 --- /dev/null +++ b/packages/blocks/src/api/raw-handling/html-formatting-remover.js @@ -0,0 +1,92 @@ +/** + * Internal dependencies + */ +import { isPhrasingContent } from './phrasing-content'; + +function getSibling( node, which ) { + const sibling = node[ `${ which }Sibling` ]; + + if ( sibling && isPhrasingContent( sibling ) ) { + return sibling; + } + + const { parentNode } = node; + + if ( ! parentNode || ! isPhrasingContent( parentNode ) ) { + return; + } + + return getSibling( parentNode, which ); +} + +function isFormattingSpace( character ) { + return ( + character === ' ' || + character === '\r' || + character === '\n' || + character === '\t' + ); +} + +/** + * Removes spacing that formats HTML. + * + * @see https://www.w3.org/TR/css-text-3/#white-space-processing + * + * @param {Node} node The node to be processed. + * @return {void} + */ +export default function( node ) { + if ( node.nodeType !== node.TEXT_NODE ) { + return; + } + + // Ignore pre content. + if ( node.parentElement.closest( 'pre' ) ) { + return; + } + + // First, replace any sequence of HTML formatting space with a single space. + let newData = node.data.replace( /[ \r\n\t]+/g, ' ' ); + + // Remove the leading space if the text element is at the start of a block, + // is preceded by a line break element, or has a space in the previous + // node. + if ( newData[ 0 ] === ' ' ) { + const previousSibling = getSibling( node, 'previous' ); + + if ( + ! previousSibling || + previousSibling.nodeName === 'BR' || + previousSibling.textContent.slice( -1 ) === ' ' + ) { + newData = newData.slice( 1 ); + } + } + + // Remove the trailing space if the text element is at the end of a block, + // is succeded by a line break element, or has a space in the next text + // node. + if ( newData[ newData.length - 1 ] === ' ' ) { + const nextSibling = getSibling( node, 'next' ); + + if ( + ! nextSibling || + nextSibling.nodeName === 'BR' || + ( + nextSibling.nodeType === nextSibling.TEXT_NODE && + isFormattingSpace( nextSibling.textContent[ 0 ] ) + ) + ) { + newData = newData.slice( 0, -1 ); + } + } + + // If there's no data left, remove the node, so `previousSibling` stays + // accurate. Otherwise, update the node data. + if ( ! newData ) { + node.parentNode.removeChild( node ); + } else { + node.data = newData; + } +} diff --git a/packages/blocks/src/api/raw-handling/paste-handler.js b/packages/blocks/src/api/raw-handling/paste-handler.js index fdea17bd0eba2..4468eb87c51db 100644 --- a/packages/blocks/src/api/raw-handling/paste-handler.js +++ b/packages/blocks/src/api/raw-handling/paste-handler.js @@ -24,6 +24,7 @@ import shortcodeConverter from './shortcode-converter'; import markdownConverter from './markdown-converter'; import iframeRemover from './iframe-remover'; import googleDocsUIDRemover from './google-docs-uid-remover'; +import htmlFormattingRemover from './html-formatting-remover'; import { getPhrasingContentSchema } from './phrasing-content'; import { deepFilterHTML, @@ -224,6 +225,7 @@ export function pasteHandler( { HTML = '', plainText = '', mode = 'AUTO', tagNam piece = deepFilterHTML( piece, filters, blockContentSchema ); piece = removeInvalidHTML( piece, schema ); + piece = deepFilterHTML( piece, [ htmlFormattingRemover ], blockContentSchema ); piece = normaliseBlocks( piece ); // Allows us to ask for this information when we get a report. diff --git a/packages/blocks/src/api/raw-handling/test/html-formatting-remover.js b/packages/blocks/src/api/raw-handling/test/html-formatting-remover.js new file mode 100644 index 0000000000000..d5efd50f4db85 --- /dev/null +++ b/packages/blocks/src/api/raw-handling/test/html-formatting-remover.js @@ -0,0 +1,110 @@ +/** + * Internal dependencies + */ +import filter from '../html-formatting-remover'; +import { deepFilterHTML } from '../utils'; + +describe( 'HTMLFormattingRemover', () => { + it( 'should trim text node without parent', () => { + const input = 'a'; + expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input ); + } ); + + it( 'should remove formatting space', () => { + const input = ` +
a\n b\n`; + expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input ); + } ); + + it( 'should not remove white space if next elemnt has none', () => { + const input = `
-One - | -Two - | -Three - |
-1 - | -2 - | -3 - |
-I - | -II - | -III - |
One | Two | Three |
1 | 2 | 3 |
I | II | III |
An image:
+An image:
diff --git a/test/integration/fixtures/classic-out.html b/test/integration/fixtures/classic-out.html index dbc94528db7b2..1298053376dfa 100644 --- a/test/integration/fixtures/classic-out.html +++ b/test/integration/fixtures/classic-out.html @@ -15,7 +15,7 @@ -Fourth paragraph
+Fourth paragraph
diff --git a/test/integration/fixtures/evernote-out.html b/test/integration/fixtures/evernote-out.html index c984df312f0be..32d2deb764897 100644 --- a/test/integration/fixtures/evernote-out.html +++ b/test/integration/fixtures/evernote-out.html @@ -1,7 +1,5 @@ -This is a paragraph.
-
This is a link.
-
This is a paragraph.
This is a link.
One - | Two - | Three - |
Four - | Five - | Six - |
One | Two | Three |
Four | Five | Six |
Preserve
-line breaks please.
Preserve
line breaks please.
This is a -title
+This is a title
-This is a -subtitle
+This is a subtitle
@@ -29,25 +27,7 @@- One - | - Two - | - Three - |
- 1 - | - 2 - | - 3 - |
- I - | - II - | - III - |
One | Two | Three |
1 | 2 | 3 |
I | II | III |
-Lorem -ipsum dolor sit amet, consectetur adipiscing elit -
+Lorem ipsum dolor sit amet, consectetur adipiscing elit
--Lorem -ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque -aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, -purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam -congue laoreet massa, quis varius est tincidunt ut.
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam congue laoreet massa, quis varius est tincidunt ut.