Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Block API: Consider encoding-normalized text as equivalent #11771

Merged
merged 2 commits into from
Nov 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/client-assets.php
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ function gutenberg_register_scripts_and_styles() {
'wp-dom',
'wp-element',
'wp-hooks',
'wp-html-entities',
'wp-i18n',
'wp-is-shallow-equal',
'wp-polyfill',
Expand Down
1 change: 1 addition & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions packages/blocks/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 5.3.2 (Unreleased)

### Bug Fix

- The block validator is more lenient toward equivalent encoding forms.

## 5.3.1 (2018-11-12)

## 5.3.0 (2018-11-09)
Expand Down
1 change: 1 addition & 0 deletions packages/blocks/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"@wordpress/dom": "file:../dom",
"@wordpress/element": "file:../element",
"@wordpress/hooks": "file:../hooks",
"@wordpress/html-entities": "file:../html-entities",
"@wordpress/i18n": "file:../i18n",
"@wordpress/is-shallow-equal": "file:../is-shallow-equal",
"@wordpress/shortcode": "file:../shortcode",
Expand Down
23 changes: 17 additions & 6 deletions packages/blocks/src/api/test/validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
* Internal dependencies
*/
import {
IdentityEntityParser,
getTextPiecesSplitOnWhitespace,
getTextWithCollapsedWhitespace,
getMeaningfulAttributePairs,
isEqualTextTokensWithCollapsedWhitespace,
isEquivalentTextTokens,
getNormalizedStyleValue,
getStyleProperties,
isEqualAttributesOfName,
Expand Down Expand Up @@ -40,6 +41,16 @@ describe( 'validation', () => {
} );
} );

describe( 'IdentityEntityParser', () => {
it( 'can be constructed', () => {
expect( new IdentityEntityParser() instanceof IdentityEntityParser ).toBe( true );
} );

it( 'returns parse as decoded value', () => {
expect( new IdentityEntityParser().parse( 'quot' ) ).toBe( '"' );
} );
} );

describe( 'getTextPiecesSplitOnWhitespace()', () => {
it( 'returns text pieces spilt on whitespace', () => {
const pieces = getTextPiecesSplitOnWhitespace( ' a \t b \n c' );
Expand Down Expand Up @@ -98,9 +109,9 @@ describe( 'validation', () => {
} );
} );

describe( 'isEqualTextTokensWithCollapsedWhitespace()', () => {
describe( 'isEquivalentTextTokens()', () => {
it( 'should return false if not equal with collapsed whitespace', () => {
const isEqual = isEqualTextTokensWithCollapsedWhitespace(
const isEqual = isEquivalentTextTokens(
{ chars: ' a \t b \n c' },
{ chars: 'a \n c \t b ' },
);
Expand All @@ -110,7 +121,7 @@ describe( 'validation', () => {
} );

it( 'should return true if equal with collapsed whitespace', () => {
const isEqual = isEqualTextTokensWithCollapsedWhitespace(
const isEqual = isEquivalentTextTokens(
{ chars: ' a \t b \n c' },
{ chars: 'a \n b \t c ' },
);
Expand Down Expand Up @@ -379,8 +390,8 @@ describe( 'validation', () => {

it( 'should return true for effectively equivalent html', () => {
const isEquivalent = isEquivalentHTML(
'<div>&quot; Hello<span class="b a" id="foo"> World!</ span> "</div>',
'<div >" Hello\n<span id="foo" class="a b">World!</span>"</div>'
'<div>&quot; Hello<span class="b a" id="foo" data-foo="here &mdash; there"> World! &#128517;</ span> "</div>',
'<div >" Hello\n<span id="foo" class="a b" data-foo="here — there">World! 😅</span>"</div>'
);

expect( isEquivalent ).toBe( true );
Expand Down
86 changes: 71 additions & 15 deletions packages/blocks/src/api/validation.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
/**
* External dependencies
*/
import { tokenize } from 'simple-html-tokenizer';
import { xor, fromPairs, isEqual, includes, stubTrue } from 'lodash';
import Tokenizer from 'simple-html-tokenizer/dist/es6/tokenizer';
import {
identity,
xor,
fromPairs,
isEqual,
includes,
stubTrue,
} from 'lodash';

/**
* WordPress dependencies
*/
import deprecated from '@wordpress/deprecated';
import { decodeEntities } from '@wordpress/html-entities';

/**
* Internal dependencies
Expand Down Expand Up @@ -134,6 +142,40 @@ const MEANINGFUL_ATTRIBUTES = [
...ENUMERATED_ATTRIBUTES,
];

/**
* Array of functions which receive a text string on which to apply normalizing
* behavior for consideration in text token equivalence, carefully ordered from
* least-to-most expensive operations.
*
* @type {Array}
*/
const TEXT_NORMALIZATIONS = [
identity,
getTextWithCollapsedWhitespace,
];

/**
* Subsitute EntityParser class for `simple-html-tokenizer` which bypasses
* entity substitution in favor of validator's internal normalization.
*
* @see https://github.com/tildeio/simple-html-tokenizer/tree/master/src/entity-parser.ts
*/
export class IdentityEntityParser {
/**
* Returns a substitute string for an entity string sequence between `&`
* and `;`, or undefined if no substitution should occur.
*
* In this implementation, undefined is always returned.
*
* @param {string} entity Entity fragment discovered in HTML.
*
* @return {?string} Entity substitute value.
*/
parse( entity ) {
return decodeEntities( '&' + entity + ';' );
}
}

/**
* Object of logger functions.
*/
Expand Down Expand Up @@ -186,6 +228,10 @@ export function getTextPiecesSplitOnWhitespace( text ) {
* @return {string} Trimmed text with consecutive whitespace collapsed.
*/
export function getTextWithCollapsedWhitespace( text ) {
// This is an overly simplified whitespace comparison. The specification is
// more prescriptive of whitespace behavior in inline and block contexts.
//
// See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
return getTextPiecesSplitOnWhitespace( text ).join( ' ' );
}

Expand Down Expand Up @@ -220,18 +266,28 @@ export function getMeaningfulAttributePairs( token ) {
*
* @return {boolean} Whether two text tokens are equivalent.
*/
export function isEqualTextTokensWithCollapsedWhitespace( actual, expected ) {
// This is an overly simplified whitespace comparison. The specification is
// more prescriptive of whitespace behavior in inline and block contexts.
//
// See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33
const isEquivalentText = isEqual( ...[ actual.chars, expected.chars ].map( getTextWithCollapsedWhitespace ) );

if ( ! isEquivalentText ) {
log.warning( 'Expected text `%s`, saw `%s`.', expected.chars, actual.chars );
export function isEquivalentTextTokens( actual, expected ) {
// This function is intentionally written as syntactically "ugly" as a hot
// path optimization. Text is progressively normalized in order from least-
// to-most operationally expensive, until the earliest point at which text
// can be confidently inferred as being equal.
let actualChars = actual.chars;
let expectedChars = expected.chars;

for ( let i = 0; i < TEXT_NORMALIZATIONS.length; i++ ) {
const normalize = TEXT_NORMALIZATIONS[ i ];

actualChars = normalize( actualChars );
expectedChars = normalize( expectedChars );

if ( actualChars === expectedChars ) {
return true;
}
}

return isEquivalentText;
log.warning( 'Expected text `%s`, saw `%s`.', expected.chars, actual.chars );

return false;
}

/**
Expand Down Expand Up @@ -359,8 +415,8 @@ export const isEqualTokensOfType = {
...[ actual, expected ].map( getMeaningfulAttributePairs )
);
},
Chars: isEqualTextTokensWithCollapsedWhitespace,
Comment: isEqualTextTokensWithCollapsedWhitespace,
Chars: isEquivalentTextTokens,
Comment: isEquivalentTextTokens,
};

/**
Expand Down Expand Up @@ -396,7 +452,7 @@ export function getNextNonWhitespaceToken( tokens ) {
*/
function getHTMLTokens( html ) {
try {
return tokenize( html );
return new Tokenizer( new IdentityEntityParser() ).tokenize( html );
} catch ( e ) {
log.warning( 'Malformed HTML detected: %s', html );
}
Expand Down
3 changes: 3 additions & 0 deletions test/unit/jest.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@
"/test/e2e",
"<rootDir>/.*/build/",
"<rootDir>/.*/build-module/"
],
"transformIgnorePatterns": [
"node_modules/(?!(simple-html-tokenizer)/)"
]
}