Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Paste: Remove HTML Formatting Space #17470

Merged
merged 7 commits into from
Nov 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions packages/blocks/src/api/raw-handling/html-formatting-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* Internal dependencies
*/
import { isPhrasingContent } from './phrasing-content';

function getSibling( node, which ) {
const sibling = node[ `${ which }Sibling` ];

if ( sibling && isPhrasingContent( sibling ) ) {
return sibling;
}

const { parentNode } = node;

if ( ! parentNode || ! isPhrasingContent( parentNode ) ) {
return;
}

return getSibling( parentNode, which );
}

function isFormattingSpace( character ) {
return (
character === ' ' ||
character === '\r' ||
character === '\n' ||
character === '\t'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

\f (form feed) is also allowed here, though rare in practice.

);
}

/**
* Removes spacing that formats HTML.
*
* @see https://www.w3.org/TR/css-text-3/#white-space-processing
*
* @param {Node} node The node to be processed.
* @return {void}
*/
export default function( node ) {
if ( node.nodeType !== node.TEXT_NODE ) {
return;
}

// Ignore pre content.
if ( node.parentElement.closest( 'pre' ) ) {
ellatrix marked this conversation as resolved.
Show resolved Hide resolved
return;
}

// First, replace any sequence of HTML formatting space with a single space.
let newData = node.data.replace( /[ \r\n\t]+/g, ' ' );

// Remove the leading space if the text element is at the start of a block,
ellatrix marked this conversation as resolved.
Show resolved Hide resolved
// is preceded by a line break element, or has a space in the previous
// node.
if ( newData[ 0 ] === ' ' ) {
const previousSibling = getSibling( node, 'previous' );

if (
! previousSibling ||
previousSibling.nodeName === 'BR' ||
previousSibling.textContent.slice( -1 ) === ' '
ellatrix marked this conversation as resolved.
Show resolved Hide resolved
) {
newData = newData.slice( 1 );
}
}

// Remove the trailing space if the text element is at the end of a block,
// is succeded by a line break element, or has a space in the next text
// node.
if ( newData[ newData.length - 1 ] === ' ' ) {
const nextSibling = getSibling( node, 'next' );

if (
! nextSibling ||
nextSibling.nodeName === 'BR' ||
(
nextSibling.nodeType === nextSibling.TEXT_NODE &&
isFormattingSpace( nextSibling.textContent[ 0 ] )
)
) {
newData = newData.slice( 0, -1 );
}
}

// If there's no data left, remove the node, so `previousSibling` stays
// accurate. Otherwise, update the node data.
if ( ! newData ) {
node.parentNode.removeChild( node );
} else {
node.data = newData;
}
}
2 changes: 2 additions & 0 deletions packages/blocks/src/api/raw-handling/paste-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import shortcodeConverter from './shortcode-converter';
import markdownConverter from './markdown-converter';
import iframeRemover from './iframe-remover';
import googleDocsUIDRemover from './google-docs-uid-remover';
import htmlFormattingRemover from './html-formatting-remover';
import { getPhrasingContentSchema } from './phrasing-content';
import {
deepFilterHTML,
Expand Down Expand Up @@ -224,6 +225,7 @@ export function pasteHandler( { HTML = '', plainText = '', mode = 'AUTO', tagNam

piece = deepFilterHTML( piece, filters, blockContentSchema );
piece = removeInvalidHTML( piece, schema );
piece = deepFilterHTML( piece, [ htmlFormattingRemover ], blockContentSchema );
piece = normaliseBlocks( piece );

// Allows us to ask for this information when we get a report.
Expand Down
110 changes: 110 additions & 0 deletions packages/blocks/src/api/raw-handling/test/html-formatting-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/**
* Internal dependencies
*/
import filter from '../html-formatting-remover';
import { deepFilterHTML } from '../utils';

describe( 'HTMLFormattingRemover', () => {
it( 'should trim text node without parent', () => {
const input = 'a';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input );
} );

it( 'should remove formatting space', () => {
const input = `
<div>
a
b
</div>
`;
const output = '<div>a b</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove nested formatting space', () => {
const input = `
<div>
<strong>
a
b
</strong>
</div>
`;
const output = '<div><strong>a b</strong></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should not remove leading or trailing space if previous or next element has no space', () => {
const input = `
<div>
a
<strong>b</strong>
c
</div>
`;
const output = '<div>a <strong>b</strong> c</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove formatting space (empty)', () => {
const input = `
<div>
</div>
`;
const output = '<div></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove block level formatting space', () => {
const input = `
<div>
<div>
a
</div>
<div>
b
</div>
</div>
`;
const output = '<div><div>a</div><div>b</div></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove formatting space around br', () => {
const input = `
<div>
a
<br>
b
</div>
`;
const output = '<div>a<br>b</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should remove formatting space around phasing content elements', () => {
const input = `
<div>
<strong>
a
</strong>
<strong>
b
</strong>
</div>
`;
const output = '<div><strong>a</strong> <strong>b</strong></div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );

it( 'should ignore pre', () => {
const input = `<pre> a\n b\n</pre>`;
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input );
} );

it( 'should not remove white space if next elemnt has none', () => {
const input = `<div><strong>a </strong>b</div>`;
const output = '<div><strong>a </strong>b</div>';
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output );
} );
} );
22 changes: 2 additions & 20 deletions test/integration/fixtures/apple-out.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,9 @@
<!-- /wp:list -->

<!-- wp:table -->
<figure class="wp-block-table"><table class=""><tbody><tr><td>
One
</td><td>
Two
</td><td>
Three
</td></tr><tr><td>
1
</td><td>
2
</td><td>
3
</td></tr><tr><td>
I
</td><td>
II
</td><td>
III
</td></tr></tbody></table></figure>
<figure class="wp-block-table"><table class=""><tbody><tr><td>One</td><td>Two</td><td>Three</td></tr><tr><td>1</td><td>2</td><td>3</td></tr><tr><td>I</td><td>II</td><td>III</td></tr></tbody></table></figure>
<!-- /wp:table -->

<!-- wp:paragraph -->
<p>An image: </p>
<p>An image:</p>
<!-- /wp:paragraph -->
2 changes: 1 addition & 1 deletion test/integration/fixtures/classic-out.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>Fourth paragraph</p>
<p>Fourth paragraph</p>
<!-- /wp:paragraph -->

<!-- wp:more -->
Expand Down
12 changes: 2 additions & 10 deletions test/integration/fixtures/evernote-out.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<!-- wp:paragraph -->
<p>This is a <em>paragraph</em>.
<br>This is a <a href="https://w.org">link</a>.
<br></p>
<p>This is a <em>paragraph</em>.<br>This is a <a href="https://w.org">link</a>.<br></p>
<!-- /wp:paragraph -->

<!-- wp:list -->
Expand All @@ -17,13 +15,7 @@
<!-- /wp:separator -->

<!-- wp:table -->
<figure class="wp-block-table"><table class=""><tbody><tr><td>One
</td><td>Two
</td><td>Three
</td></tr><tr><td>Four
</td><td>Five
</td><td>Six
</td></tr></tbody></table></figure>
<figure class="wp-block-table"><table class=""><tbody><tr><td>One</td><td>Two</td><td>Three</td></tr><tr><td>Four</td><td>Five</td><td>Six</td></tr></tbody></table></figure>
<!-- /wp:table -->

<!-- wp:image -->
Expand Down
3 changes: 1 addition & 2 deletions test/integration/fixtures/markdown-out.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ <h1>This is a heading with <em>italic</em></h1>
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>Preserve<br>
line breaks please.</p>
<p>Preserve<br>line breaks please.</p>
ellatrix marked this conversation as resolved.
Show resolved Hide resolved
<!-- /wp:paragraph -->

<!-- wp:heading -->
Expand Down
26 changes: 3 additions & 23 deletions test/integration/fixtures/ms-word-out.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
<!-- wp:paragraph -->
<p>This is a
title</p>
<p>This is a title</p>
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>This is a
subtitle</p>
<p>This is a subtitle</p>
<!-- /wp:paragraph -->

<!-- wp:heading {"level":1} -->
Expand All @@ -29,25 +27,7 @@ <h2>This is a heading level 2</h2>
<!-- /wp:list -->

<!-- wp:table -->
<figure class="wp-block-table"><table class=""><tbody><tr><td>
One
</td><td>
Two
</td><td>
Three
</td></tr><tr><td>
1
</td><td>
2
</td><td>
3
</td></tr><tr><td>
I
</td><td>
II
</td><td>
III
</td></tr></tbody></table></figure>
<figure class="wp-block-table"><table class=""><tbody><tr><td>One</td><td>Two</td><td>Three</td></tr><tr><td>1</td><td>2</td><td>3</td></tr><tr><td>I</td><td>II</td><td>III</td></tr></tbody></table></figure>
<!-- /wp:table -->

<!-- wp:paragraph -->
Expand Down
12 changes: 2 additions & 10 deletions test/integration/fixtures/ms-word-styled-out.html
Original file line number Diff line number Diff line change
@@ -1,15 +1,7 @@
<!-- wp:paragraph -->
<p>
<strong>Lorem
ipsum dolor sit amet, consectetur adipiscing elit&nbsp; </strong>
</p>
<p><strong>Lorem ipsum dolor sit amet, consectetur adipiscing elit&nbsp;</strong></p>
<!-- /wp:paragraph -->

<!-- wp:paragraph -->
<p>
Lorem
ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque
aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt,
purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam
congue laoreet massa, quis varius est tincidunt ut.</p>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam congue laoreet massa, quis varius est tincidunt ut.</p>
<!-- /wp:paragraph -->