-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Paste: Remove HTML Formatting Space #17470
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
c823d18
Paste: Remove HTML Formatting Space
ellatrix 893937f
Simplify
ellatrix 18559d6
Add unit tests
ellatrix d9c1382
Ignore pre
ellatrix 29d2f30
Address feedback
ellatrix 35795d2
Add extra test
ellatrix 760a492
Add extra plain text test
ellatrix File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
92 changes: 92 additions & 0 deletions
92
packages/blocks/src/api/raw-handling/html-formatting-remover.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/** | ||
* Internal dependencies | ||
*/ | ||
import { isPhrasingContent } from './phrasing-content'; | ||
|
||
function getSibling( node, which ) { | ||
const sibling = node[ `${ which }Sibling` ]; | ||
|
||
if ( sibling && isPhrasingContent( sibling ) ) { | ||
return sibling; | ||
} | ||
|
||
const { parentNode } = node; | ||
|
||
if ( ! parentNode || ! isPhrasingContent( parentNode ) ) { | ||
return; | ||
} | ||
|
||
return getSibling( parentNode, which ); | ||
} | ||
|
||
function isFormattingSpace( character ) { | ||
return ( | ||
character === ' ' || | ||
character === '\r' || | ||
character === '\n' || | ||
character === '\t' | ||
); | ||
} | ||
|
||
/** | ||
* Removes spacing that formats HTML. | ||
* | ||
* @see https://www.w3.org/TR/css-text-3/#white-space-processing | ||
* | ||
* @param {Node} node The node to be processed. | ||
* @return {void} | ||
*/ | ||
export default function( node ) { | ||
if ( node.nodeType !== node.TEXT_NODE ) { | ||
return; | ||
} | ||
|
||
// Ignore pre content. | ||
if ( node.parentElement.closest( 'pre' ) ) { | ||
ellatrix marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return; | ||
} | ||
|
||
// First, replace any sequence of HTML formatting space with a single space. | ||
let newData = node.data.replace( /[ \r\n\t]+/g, ' ' ); | ||
|
||
// Remove the leading space if the text element is at the start of a block, | ||
ellatrix marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// is preceded by a line break element, or has a space in the previous | ||
// node. | ||
if ( newData[ 0 ] === ' ' ) { | ||
const previousSibling = getSibling( node, 'previous' ); | ||
|
||
if ( | ||
! previousSibling || | ||
previousSibling.nodeName === 'BR' || | ||
previousSibling.textContent.slice( -1 ) === ' ' | ||
ellatrix marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) { | ||
newData = newData.slice( 1 ); | ||
} | ||
} | ||
|
||
// Remove the trailing space if the text element is at the end of a block, | ||
// is succeded by a line break element, or has a space in the next text | ||
// node. | ||
if ( newData[ newData.length - 1 ] === ' ' ) { | ||
const nextSibling = getSibling( node, 'next' ); | ||
|
||
if ( | ||
! nextSibling || | ||
nextSibling.nodeName === 'BR' || | ||
( | ||
nextSibling.nodeType === nextSibling.TEXT_NODE && | ||
isFormattingSpace( nextSibling.textContent[ 0 ] ) | ||
) | ||
) { | ||
newData = newData.slice( 0, -1 ); | ||
} | ||
} | ||
|
||
// If there's no data left, remove the node, so `previousSibling` stays | ||
// accurate. Otherwise, update the node data. | ||
if ( ! newData ) { | ||
node.parentNode.removeChild( node ); | ||
} else { | ||
node.data = newData; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 110 additions & 0 deletions
110
packages/blocks/src/api/raw-handling/test/html-formatting-remover.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/** | ||
* Internal dependencies | ||
*/ | ||
import filter from '../html-formatting-remover'; | ||
import { deepFilterHTML } from '../utils'; | ||
|
||
describe( 'HTMLFormattingRemover', () => { | ||
it( 'should trim text node without parent', () => { | ||
const input = 'a'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input ); | ||
} ); | ||
|
||
it( 'should remove formatting space', () => { | ||
const input = ` | ||
<div> | ||
a | ||
b | ||
</div> | ||
`; | ||
const output = '<div>a b</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove nested formatting space', () => { | ||
const input = ` | ||
<div> | ||
<strong> | ||
a | ||
b | ||
</strong> | ||
</div> | ||
`; | ||
const output = '<div><strong>a b</strong></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should not remove leading or trailing space if previous or next element has no space', () => { | ||
const input = ` | ||
<div> | ||
a | ||
<strong>b</strong> | ||
c | ||
</div> | ||
`; | ||
const output = '<div>a <strong>b</strong> c</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove formatting space (empty)', () => { | ||
const input = ` | ||
<div> | ||
</div> | ||
`; | ||
const output = '<div></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove block level formatting space', () => { | ||
const input = ` | ||
<div> | ||
<div> | ||
a | ||
</div> | ||
<div> | ||
b | ||
</div> | ||
</div> | ||
`; | ||
const output = '<div><div>a</div><div>b</div></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove formatting space around br', () => { | ||
const input = ` | ||
<div> | ||
a | ||
<br> | ||
b | ||
</div> | ||
`; | ||
const output = '<div>a<br>b</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should remove formatting space around phasing content elements', () => { | ||
const input = ` | ||
<div> | ||
<strong> | ||
a | ||
</strong> | ||
<strong> | ||
b | ||
</strong> | ||
</div> | ||
`; | ||
const output = '<div><strong>a</strong> <strong>b</strong></div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
|
||
it( 'should ignore pre', () => { | ||
const input = `<pre> a\n b\n</pre>`; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( input ); | ||
} ); | ||
|
||
it( 'should not remove white space if next elemnt has none', () => { | ||
const input = `<div><strong>a </strong>b</div>`; | ||
const output = '<div><strong>a </strong>b</div>'; | ||
expect( deepFilterHTML( input, [ filter ] ) ).toEqual( output ); | ||
} ); | ||
} ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,7 @@ | ||
<!-- wp:paragraph --> | ||
<p> | ||
<strong>Lorem | ||
ipsum dolor sit amet, consectetur adipiscing elit </strong> | ||
</p> | ||
<p><strong>Lorem ipsum dolor sit amet, consectetur adipiscing elit </strong></p> | ||
<!-- /wp:paragraph --> | ||
|
||
<!-- wp:paragraph --> | ||
<p> | ||
Lorem | ||
ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque | ||
aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, | ||
purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam | ||
congue laoreet massa, quis varius est tincidunt ut.</p> | ||
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque aliquet hendrerit auctor. Nam lobortis, est vel lacinia tincidunt, purus tellus vehicula ex, nec pharetra justo dui sed lorem. Nam congue laoreet massa, quis varius est tincidunt ut.</p> | ||
<!-- /wp:paragraph --> |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
\f
(form feed) is also allowed here, though rare in practice.