Skip to content

Commit

Permalink
Paste: Remove HTML Formatting Space (#17470)
Browse files Browse the repository at this point in the history
* Paste: Remove HTML Formatting Space

* Simplify

* Add unit tests

* Ignore pre

* Address feedback

* Add extra test

* Add extra plain text test
  • Loading branch information
ellatrix authored Nov 25, 2019
1 parent ada00b6 commit 0413162
Show file tree
Hide file tree
Showing 9 changed files with 215 additions and 66 deletions.
92 changes: 92 additions & 0 deletions packages/blocks/src/api/raw-handling/html-formatting-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* Internal dependencies
*/
import { isPhrasingContent } from './phrasing-content';

function getSibling( node, which ) {
const sibling = node[ `${ which }Sibling` ];

if ( sibling && isPhrasingContent( sibling ) ) {
return sibling;
}

const { parentNode } = node;

if ( ! parentNode || ! isPhrasingContent( parentNode ) ) {
return;
}

return getSibling( parentNode, which );
}

function isFormattingSpace( character ) {
return (
character === ' ' ||
character === '\r' ||
character === '\n' ||
character === '\t'
);
}

/**
* Removes spacing that formats HTML.
*
* @see https://www.w3.org/TR/css-text-3/#white-space-processing
*
* @param {Node} node The node to be processed.
* @return {void}
*/
export default function( node ) {
if ( node.nodeType !== node.TEXT_NODE ) {
return;
}

// Ignore pre content.
if ( node.parentElement.closest( 'pre' ) ) {
return;
}

// First, replace any sequence of HTML formatting space with a single space.
let newData = node.data.replace( /[ \r\n\t]+/g, ' ' );

// Remove the leading space if the text element is at the start of a block,
// is preceded by a line break element, or has a space in the previous
// node.
if ( newData[ 0 ] === ' ' ) {
const previousSibling = getSibling( node, 'previous' );

if (
! previousSibling ||
previousSibling.nodeName === 'BR' ||
previousSibling.textContent.slice( -1 ) === ' '
) {
newData = newData.slice( 1 );
}
}

// Remove the trailing space if the text element is at the end of a block,
// is succeded by a line break element, or has a space in the next text
// node.
if ( newData[ newData.length - 1 ] === ' ' ) {
const nextSibling = getSibling( node, 'next' );

if (
! nextSibling ||
nextSibling.nodeName === 'BR' ||
(
nextSibling.nodeType === nextSibling.TEXT_NODE &&
isFormattingSpace( nextSibling.textContent[ 0 ] )
)
) {
newData = newData.slice( 0, -1 );
}
}

// If there's no data left, remove the node, so `previousSibling` stays
// accurate. Otherwise, update the node data.
if ( ! newData ) {
node.parentNode.removeChild( node );
} else {
node.data = newData;
}
}
2 changes: 2 additions & 0 deletions packages/blocks/src/api/raw-handling/paste-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import shortcodeConverter from './shortcode-converter';
import markdownConverter from './markdown-converter';
import iframeRemover from './iframe-remover';
import googleDocsUIDRemover from './google-docs-uid-remover';
import htmlFormattingRemover from './html-formatting-remover';
import { getPhrasingContentSchema } from './phrasing-content';
import {
deepFilterHTML,
Expand Down Expand Up @@ -224,6 +225,7 @@ export function pasteHandler( { HTML = '', plainText = '', mode = 'AUTO', tagNam

piece = deepFilterHTML( piece, filters, blockContentSchema );
piece = removeInvalidHTML( piece, schema );
piece = deepFilterHTML( piece, [ htmlFormattingRemover ], blockContentSchema );
piece = normaliseBlocks( piece );

// Allows us to ask for this information when we get a report.
Expand Down
110 changes: 110 additions & 0 deletions packages/blocks/src/api/raw-handling/test/html-formatting-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/**
* Internal dependencies
*/
import filter from '../html-formatting-remover';
import { deepFilterHTML } from '../utils';

describe( 'HTMLFormattingRemover', () => {
it( 'should trim text node without parent', () => {
const input = 'a';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input );
} );

it( 'should remove formatting space', () => {
const input = `
<div>
a
b
</div>
`;
const output = '<div>a b</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove nested formatting space', () => {
const input = `
<div>
<strong>
a
b
</strong>
</div>
`;
const output = '<div><strong>a b</strong></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should not remove leading or trailing space if previous or next element has no space', () => {
const input = `
<div>
a
<strong>b</strong>
c
</div>
`;
const output = '<div>a <strong>b</strong> c</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove formatting space (empty)', () => {
const input = `
<div>
</div>
`;
const output = '<div></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove block level formatting space', () => {
const input = `
<div>
<div>
a
</div>
<div>
b
</div>
</div>
`;
const output = '<div><div>a</div><div>b</div></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove formatting space around br', () => {
const input = `
<div>
a
<br>
b
</div>
`;
const output = '<div>a<br>b</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove formatting space around phasing content elements', () => {
const input = `
<div>
<strong>
a
</strong>
<strong>
b
</strong>
</div>
`;
const output = '<div><strong>a</strong> <strong>b</strong></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should ignore pre', () => {
const input = `<pre> a\n b\n</pre>`;
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input );
} );

it( 'should not remove white space if next elemnt has none', () => {
const input = `<div><strong>a </strong>b</div>`;
const output = '<div><strong>a </strong>b</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );
} );
22 changes: 2 additions & 20 deletions test/integration/fixtures/apple-out.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,9 @@
<!-- /wp:list -->

<!-- wp:table -->
<figure class="wp-block-table"><table class=""><tbody><tr><td>
One
</td><td>
Two
</td><td>
Three
</td></tr><tr><td>
1
</td><td>
2
</td><td>
3
</td></tr><tr><td>
I
</td><td>
II
</td><td>
III
</td></tr></tbody></table></figure>
<figure class="wp-block-table"><table class=""><tbody><tr><td>One</td><td>Two</td><td>Three</td></tr><tr><td>1</td><td>2</td><td>3</td></tr><tr><td>I</td><td>II</td><td>III</td></tr></tbody></table></figure>
<!-- /wp:table -->

<!-- wp:paragraph -->
<p>An image: </p>
<p>An image:</p>
<!-- /wp:paragraph -->
2 changes: 1 addition & 1 deletion test/integration/fixtures/classic-out.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>Fourth paragraph</p>
<p>Fourth paragraph</p>
<!-- /wp:paragraph -->

<!-- wp:more -->
Expand Down
12 changes: 2 additions & 10 deletions test/integration/fixtures/evernote-out.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<!-- wp:paragraph -->
<p>This is a <em>paragraph</em>.
<br>This is a <a href="https://w.org">link</a>.
<br></p>
<p>This is a <em>paragraph</em>.<br>This is a <a href="https://w.org">link</a>.<br></p>
<!-- /wp:paragraph -->

<!-- wp:list -->
Expand All @@ -17,13 +15,7 @@
<!-- /wp:separator -->

<!-- wp:table -->
<figure class="wp-block-table"><table class=""><tbody><tr><td>One
</td><td>Two
</td><td>Three
</td></tr><tr><td>Four
</td><td>Five
</td><td>Six
</td></tr></tbody></table></figure>
<figure class="wp-block-table"><table class=""><tbody><tr><td>One</td><td>Two</td><td>Three</td></tr><tr><td>Four</td><td>Five</td><td>Six</td></tr></tbody></table></figure>
<!-- /wp:table -->

<!-- wp:image -->
Expand Down
3 changes: 1 addition & 2 deletions test/integration/fixtures/markdown-out.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ <h1>This is a heading with <em>italic</em></h1>
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>Preserve<br>
line breaks please.</p>
<p>Preserve<br>line breaks please.</p>
<!-- /wp:paragraph -->

<!-- wp:heading -->
Expand Down
26 changes: 3 additions & 23 deletions test/integration/fixtures/ms-word-out.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
<!-- wp:paragraph -->
<p>This is a
title</p>
<p>This is a title</p>
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>This is a
subtitle</p>
<p>This is a subtitle</p>
<!-- /wp:paragraph -->

<!-- wp:heading {"level":1} -->
Expand All @@ -29,25 +27,7 @@ <h2>This is a heading level 2</h2>
<!-- /wp:list -->

<!-- wp:table -->
<figure class="wp-block-table"><table class=""><tbody><tr><td>
One
</td><td>
Two
</td><td>
Three
</td></tr><tr><td>
1
</td><td>
2
</td><td>
3
</td></tr><tr><td>
I
</td><td>
II
</td><td>
III
</td></tr></tbody></table></figure>
<figure class="wp-block-table"><table class=""><tbody><tr><td>One</td><td>Two</td><td>Three</td></tr><tr><td>1</td><td>2</td><td>3</td></tr><tr><td>I</td><td>II</td><td>III</td></tr></tbody></table></figure>
<!-- /wp:table -->

<!-- wp:paragraph -->
Expand Down
12 changes: 2 additions & 10 deletions test/integration/fixtures/ms-word-styled-out.html
Original file line number Diff line number Diff line change
@@ -1,15 +1,7 @@
<!-- wp:paragraph -->
<p>
<strong>Lorem
ipsum dolor sit amet, consectetur adipiscing elit&nbsp; </strong>
</p>
<p><strong>Lorem ipsum dolor sit amet, consectetur adipiscing elit&nbsp;</strong></p>
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>
Lorem
ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque
aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt,
purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam
congue laoreet massa, quis varius est tincidunt ut.</p>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam congue laoreet massa, quis varius est tincidunt ut.</p>
<!-- /wp:paragraph -->

0 comments on commit 0413162

Please sign in to comment.