-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Paste: Remove HTML Formatting Space (#17470)
* Paste: Remove HTML Formatting Space * Simplify * Add unit tests * Ignore pre * Address feedback * Add extra test * Add extra plain text test
- Loading branch information
Showing
9 changed files
with
215 additions
and
66 deletions.
There are no files selected for viewing
92 changes: 92 additions & 0 deletions
92
packages/blocks/src/api/raw-handling/html-formatting-remover.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/** | ||
* Internal dependencies | ||
*/ | ||
import { isPhrasingContent } from './phrasing-content'; | ||
|
||
function getSibling( node, which ) { | ||
const sibling = node[ `${ which }Sibling` ]; | ||
|
||
if ( sibling && isPhrasingContent( sibling ) ) { | ||
return sibling; | ||
} | ||
|
||
const { parentNode } = node; | ||
|
||
if ( ! parentNode || ! isPhrasingContent( parentNode ) ) { | ||
return; | ||
} | ||
|
||
return getSibling( parentNode, which ); | ||
} | ||
|
||
function isFormattingSpace( character ) { | ||
return ( | ||
character === ' ' || | ||
character === '\r' || | ||
character === '\n' || | ||
character === '\t' | ||
); | ||
} | ||
|
||
/** | ||
* Removes spacing that formats HTML. | ||
* | ||
* @see https://www.w3.org/TR/css-text-3/#white-space-processing | ||
* | ||
* @param {Node} node The node to be processed. | ||
* @return {void} | ||
*/ | ||
export default function( node ) { | ||
if ( node.nodeType !== node.TEXT_NODE ) { | ||
return; | ||
} | ||
|
||
// Ignore pre content. | ||
if ( node.parentElement.closest( 'pre' ) ) { | ||
return; | ||
} | ||
|
||
// First, replace any sequence of HTML formatting space with a single space. | ||
let newData = node.data.replace( /[ \r\n\t]+/g, ' ' ); | ||
|
||
// Remove the leading space if the text element is at the start of a block, | ||
// is preceded by a line break element, or has a space in the previous | ||
// node. | ||
if ( newData[ 0 ] === ' ' ) { | ||
const previousSibling = getSibling( node, 'previous' ); | ||
|
||
if ( | ||
! previousSibling || | ||
previousSibling.nodeName === 'BR' || | ||
previousSibling.textContent.slice( -1 ) === ' ' | ||
) { | ||
newData = newData.slice( 1 ); | ||
} | ||
} | ||
|
||
// Remove the trailing space if the text element is at the end of a block, | ||
// is succeded by a line break element, or has a space in the next text | ||
// node. | ||
if ( newData[ newData.length - 1 ] === ' ' ) { | ||
const nextSibling = getSibling( node, 'next' ); | ||
|
||
if ( | ||
! nextSibling || | ||
nextSibling.nodeName === 'BR' || | ||
( | ||
nextSibling.nodeType === nextSibling.TEXT_NODE && | ||
isFormattingSpace( nextSibling.textContent[ 0 ] ) | ||
) | ||
) { | ||
newData = newData.slice( 0, -1 ); | ||
} | ||
} | ||
|
||
// If there's no data left, remove the node, so `previousSibling` stays | ||
// accurate. Otherwise, update the node data. | ||
if ( ! newData ) { | ||
node.parentNode.removeChild( node ); | ||
} else { | ||
node.data = newData; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 110 additions & 0 deletions
110
packages/blocks/src/api/raw-handling/test/html-formatting-remover.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/** | ||
* Internal dependencies | ||
*/ | ||
import filter from '../html-formatting-remover'; | ||
import { deepFilterHTML } from '../utils'; | ||
|
||
describe( 'HTMLFormattingRemover', () => { | ||
it( 'should trim text node without parent', () => { | ||
const input = 'a'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input ); | ||
} ); | ||
|
||
it( 'should remove formatting space', () => { | ||
const input = ` | ||
<div> | ||
a | ||
b | ||
</div> | ||
`; | ||
const output = '<div>a b</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove nested formatting space', () => { | ||
const input = ` | ||
<div> | ||
<strong> | ||
a | ||
b | ||
</strong> | ||
</div> | ||
`; | ||
const output = '<div><strong>a b</strong></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should not remove leading or trailing space if previous or next element has no space', () => { | ||
const input = ` | ||
<div> | ||
a | ||
<strong>b</strong> | ||
c | ||
</div> | ||
`; | ||
const output = '<div>a <strong>b</strong> c</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove formatting space (empty)', () => { | ||
const input = ` | ||
<div> | ||
</div> | ||
`; | ||
const output = '<div></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove block level formatting space', () => { | ||
const input = ` | ||
<div> | ||
<div> | ||
a | ||
</div> | ||
<div> | ||
b | ||
</div> | ||
</div> | ||
`; | ||
const output = '<div><div>a</div><div>b</div></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove formatting space around br', () => { | ||
const input = ` | ||
<div> | ||
a | ||
<br> | ||
b | ||
</div> | ||
`; | ||
const output = '<div>a<br>b</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove formatting space around phasing content elements', () => { | ||
const input = ` | ||
<div> | ||
<strong> | ||
a | ||
</strong> | ||
<strong> | ||
b | ||
</strong> | ||
</div> | ||
`; | ||
const output = '<div><strong>a</strong> <strong>b</strong></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should ignore pre', () => { | ||
const input = `<pre> a\n b\n</pre>`; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input ); | ||
} ); | ||
|
||
it( 'should not remove white space if next elemnt has none', () => { | ||
const input = `<div><strong>a </strong>b</div>`; | ||
const output = '<div><strong>a </strong>b</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
} ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,7 @@ | ||
<!-- wp:paragraph --> | ||
<p> | ||
<strong>Lorem | ||
ipsum dolor sit amet, consectetur adipiscing elit </strong> | ||
</p> | ||
<p><strong>Lorem ipsum dolor sit amet, consectetur adipiscing elit </strong></p> | ||
<!-- /wp:paragraph --> | ||
|
||
<!-- wp:paragraph --> | ||
<p> | ||
Lorem | ||
ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque | ||
aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, | ||
purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam | ||
congue laoreet massa, quis varius est tincidunt ut.</p> | ||
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam congue laoreet massa, quis varius est tincidunt ut.</p> | ||
<!-- /wp:paragraph --> |