Skip to content

Commit 1237243

Browse files
aduthyouknowriad
authored andcommitted
Block API: Consider encoding-normalized text as equivalent (#11771)
1 parent 87d940f commit 1237243

File tree

7 files changed

+100
-21
lines changed

7 files changed

+100
-21
lines changed

lib/client-assets.php

+1
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ function gutenberg_register_scripts_and_styles() {
470470
'wp-dom',
471471
'wp-element',
472472
'wp-hooks',
473+
'wp-html-entities',
473474
'wp-i18n',
474475
'wp-is-shallow-equal',
475476
'wp-polyfill',

package-lock.json

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/blocks/CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 5.3.2 (Unreleased)
2+
3+
### Bug Fix
4+
5+
- The block validator is more lenient toward equivalent encoding forms.
6+
17
## 5.3.1 (2018-11-12)
28

39
## 5.3.0 (2018-11-09)

packages/blocks/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"@wordpress/dom": "file:../dom",
3030
"@wordpress/element": "file:../element",
3131
"@wordpress/hooks": "file:../hooks",
32+
"@wordpress/html-entities": "file:../html-entities",
3233
"@wordpress/i18n": "file:../i18n",
3334
"@wordpress/is-shallow-equal": "file:../is-shallow-equal",
3435
"@wordpress/shortcode": "file:../shortcode",

packages/blocks/src/api/test/validation.js

+17-6
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
* Internal dependencies
33
*/
44
import {
5+
IdentityEntityParser,
56
getTextPiecesSplitOnWhitespace,
67
getTextWithCollapsedWhitespace,
78
getMeaningfulAttributePairs,
8-
isEqualTextTokensWithCollapsedWhitespace,
9+
isEquivalentTextTokens,
910
getNormalizedStyleValue,
1011
getStyleProperties,
1112
isEqualAttributesOfName,
@@ -40,6 +41,16 @@ describe( 'validation', () => {
4041
} );
4142
} );
4243

44+
describe( 'IdentityEntityParser', () => {
45+
it( 'can be constructed', () => {
46+
expect( new IdentityEntityParser() instanceof IdentityEntityParser ).toBe( true );
47+
} );
48+
49+
it( 'returns parse as decoded value', () => {
50+
expect( new IdentityEntityParser().parse( 'quot' ) ).toBe( '"' );
51+
} );
52+
} );
53+
4354
describe( 'getTextPiecesSplitOnWhitespace()', () => {
4455
it( 'returns text pieces spilt on whitespace', () => {
4556
const pieces = getTextPiecesSplitOnWhitespace( ' a \t b \n c' );
@@ -98,9 +109,9 @@ describe( 'validation', () => {
98109
} );
99110
} );
100111

101-
describe( 'isEqualTextTokensWithCollapsedWhitespace()', () => {
112+
describe( 'isEquivalentTextTokens()', () => {
102113
it( 'should return false if not equal with collapsed whitespace', () => {
103-
const isEqual = isEqualTextTokensWithCollapsedWhitespace(
114+
const isEqual = isEquivalentTextTokens(
104115
{ chars: ' a \t b \n c' },
105116
{ chars: 'a \n c \t b ' },
106117
);
@@ -110,7 +121,7 @@ describe( 'validation', () => {
110121
} );
111122

112123
it( 'should return true if equal with collapsed whitespace', () => {
113-
const isEqual = isEqualTextTokensWithCollapsedWhitespace(
124+
const isEqual = isEquivalentTextTokens(
114125
{ chars: ' a \t b \n c' },
115126
{ chars: 'a \n b \t c ' },
116127
);
@@ -379,8 +390,8 @@ describe( 'validation', () => {
379390

380391
it( 'should return true for effectively equivalent html', () => {
381392
const isEquivalent = isEquivalentHTML(
382-
'<div>&quot; Hello<span class="b a" id="foo"> World!</ span> "</div>',
383-
'<div >" Hello\n<span id="foo" class="a b">World!</span>"</div>'
393+
'<div>&quot; Hello<span class="b a" id="foo" data-foo="here &mdash; there"> World! &#128517;</ span> "</div>',
394+
'<div >" Hello\n<span id="foo" class="a b" data-foo="here — there">World! 😅</span>"</div>'
384395
);
385396

386397
expect( isEquivalent ).toBe( true );

packages/blocks/src/api/validation.js

+71-15
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
11
/**
22
* External dependencies
33
*/
4-
import { tokenize } from 'simple-html-tokenizer';
5-
import { xor, fromPairs, isEqual, includes, stubTrue } from 'lodash';
4+
import Tokenizer from 'simple-html-tokenizer/dist/es6/tokenizer';
5+
import {
6+
identity,
7+
xor,
8+
fromPairs,
9+
isEqual,
10+
includes,
11+
stubTrue,
12+
} from 'lodash';
613

714
/**
815
* WordPress dependencies
916
*/
1017
import deprecated from '@wordpress/deprecated';
18+
import { decodeEntities } from '@wordpress/html-entities';
1119

1220
/**
1321
* Internal dependencies
@@ -134,6 +142,40 @@ const MEANINGFUL_ATTRIBUTES = [
134142
...ENUMERATED_ATTRIBUTES,
135143
];
136144

145+
/**
146+
* Array of functions which receive a text string on which to apply normalizing
147+
* behavior for consideration in text token equivalence, carefully ordered from
148+
* least-to-most expensive operations.
149+
*
150+
* @type {Array}
151+
*/
152+
const TEXT_NORMALIZATIONS = [
153+
identity,
154+
getTextWithCollapsedWhitespace,
155+
];
156+
157+
/**
158+
* Subsitute EntityParser class for `simple-html-tokenizer` which bypasses
159+
* entity substitution in favor of validator's internal normalization.
160+
*
161+
* @see https://github.com/tildeio/simple-html-tokenizer/tree/master/src/entity-parser.ts
162+
*/
163+
export class IdentityEntityParser {
164+
/**
165+
* Returns a substitute string for an entity string sequence between `&`
166+
* and `;`, or undefined if no substitution should occur.
167+
*
168+
* In this implementation, undefined is always returned.
169+
*
170+
* @param {string} entity Entity fragment discovered in HTML.
171+
*
172+
* @return {?string} Entity substitute value.
173+
*/
174+
parse( entity ) {
175+
return decodeEntities( '&' + entity + ';' );
176+
}
177+
}
178+
137179
/**
138180
* Object of logger functions.
139181
*/
@@ -186,6 +228,10 @@ export function getTextPiecesSplitOnWhitespace( text ) {
186228
* @return {string} Trimmed text with consecutive whitespace collapsed.
187229
*/
188230
export function getTextWithCollapsedWhitespace( text ) {
231+
// This is an overly simplified whitespace comparison. The specification is
232+
// more prescriptive of whitespace behavior in inline and block contexts.
233+
//
234+
// See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
189235
return getTextPiecesSplitOnWhitespace( text ).join( ' ' );
190236
}
191237

@@ -220,18 +266,28 @@ export function getMeaningfulAttributePairs( token ) {
220266
*
221267
* @return {boolean} Whether two text tokens are equivalent.
222268
*/
223-
export function isEqualTextTokensWithCollapsedWhitespace( actual, expected ) {
224-
// This is an overly simplified whitespace comparison. The specification is
225-
// more prescriptive of whitespace behavior in inline and block contexts.
226-
//
227-
// See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
228-
const isEquivalentText = isEqual( ...[ actual.chars, expected.chars ].map( getTextWithCollapsedWhitespace ) );
229-
230-
if ( ! isEquivalentText ) {
231-
log.warning( 'Expected text `%s`, saw `%s`.', expected.chars, actual.chars );
269+
export function isEquivalentTextTokens( actual, expected ) {
270+
// This function is intentionally written as syntactically "ugly" as a hot
271+
// path optimization. Text is progressively normalized in order from least-
272+
// to-most operationally expensive, until the earliest point at which text
273+
// can be confidently inferred as being equal.
274+
let actualChars = actual.chars;
275+
let expectedChars = expected.chars;
276+
277+
for ( let i = 0; i < TEXT_NORMALIZATIONS.length; i++ ) {
278+
const normalize = TEXT_NORMALIZATIONS[ i ];
279+
280+
actualChars = normalize( actualChars );
281+
expectedChars = normalize( expectedChars );
282+
283+
if ( actualChars === expectedChars ) {
284+
return true;
285+
}
232286
}
233287

234-
return isEquivalentText;
288+
log.warning( 'Expected text `%s`, saw `%s`.', expected.chars, actual.chars );
289+
290+
return false;
235291
}
236292

237293
/**
@@ -359,8 +415,8 @@ export const isEqualTokensOfType = {
359415
...[ actual, expected ].map( getMeaningfulAttributePairs )
360416
);
361417
},
362-
Chars: isEqualTextTokensWithCollapsedWhitespace,
363-
Comment: isEqualTextTokensWithCollapsedWhitespace,
418+
Chars: isEquivalentTextTokens,
419+
Comment: isEquivalentTextTokens,
364420
};
365421

366422
/**
@@ -396,7 +452,7 @@ export function getNextNonWhitespaceToken( tokens ) {
396452
*/
397453
function getHTMLTokens( html ) {
398454
try {
399-
return tokenize( html );
455+
return new Tokenizer( new IdentityEntityParser() ).tokenize( html );
400456
} catch ( e ) {
401457
log.warning( 'Malformed HTML detected: %s', html );
402458
}

test/unit/jest.config.json

+3
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,8 @@
1414
"/test/e2e",
1515
"<rootDir>/.*/build/",
1616
"<rootDir>/.*/build-module/"
17+
],
18+
"transformIgnorePatterns": [
19+
"node_modules/(?!(simple-html-tokenizer)/)"
1720
]
1821
}

0 commit comments

Comments
 (0)