From 53ea90bf9bc2f55948c0211ea7c8b2bea05c5c84 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 11 Aug 2021 23:12:56 -0700 Subject: [PATCH] Update amphtml spec to dc6cd22a52 (#6530) Co-authored-by: Pierre Gordon --- bin/amphtml-update.py | 147 +++++++++++++++--- .../class-amp-allowed-tags-generated.php | 141 +++++++++++++++-- .../class-amp-tag-and-attribute-sanitizer.php | 12 ++ .../php/test-tag-and-attribute-sanitizer.php | 52 +++++-- 4 files changed, 309 insertions(+), 43 deletions(-) diff --git a/bin/amphtml-update.py b/bin/amphtml-update.py index 6473c3ed37e..98d6483cbde 100755 --- a/bin/amphtml-update.py +++ b/bin/amphtml-update.py @@ -387,40 +387,99 @@ def ParseRules(repo_directory, out_dir): descendant_lists[_list.name].append( val.lower() ) - # Separate extension scripts from non-extension scripts + # Separate extension scripts from non-extension scripts and gather the versions extension_scripts = defaultdict(list) + extension_specs_by_satisfies = dict() script_tags = [] for script_tag in allowed_tags['script']: if 'extension_spec' in script_tag['tag_spec']: - extension_scripts[script_tag['tag_spec']['extension_spec']['name']].append(script_tag) + extension = script_tag['tag_spec']['extension_spec']['name'] + extension_scripts[extension].append(script_tag) + if 'satisfies' in script_tag['tag_spec']: + satisfies = script_tag['tag_spec']['satisfies'] + else: + satisfies = extension + if satisfies in extension_specs_by_satisfies: + raise Exception( 'Duplicate extension script that satisfies %s.' % satisfies ) + + extension_specs_by_satisfies[satisfies] = script_tag['tag_spec']['extension_spec'] + + # These component lack an explicit requirement on a specific extension script: + # - amp-selector + # - amp-accordion + # - amp-soundcloud + # - amp-brightcove + # - amp-video + # - amp-video-iframe + # - amp-vimeo + # - amp-twitter + # - amp-instagram + # - amp-lightbox + # - amp-facebook + # - amp-youtube + # - amp-social-share + # - amp-fit-text + # So use the one with the latest version as a fallback. + if 'latest' in script_tag['tag_spec']['extension_spec']['version']: + extension_specs_by_satisfies[extension] = script_tag['tag_spec']['extension_spec'] else: script_tags.append(script_tag) + # Amend the allowed_tags to supply the required versions for each component. + for tag_name, tags in allowed_tags.items(): + for tag in tags: + tag['tag_spec'].pop('satisfies', None) # We don't need it anymore. + requires = tag['tag_spec'].pop('requires', []) + + if 'requires_extension' not in tag['tag_spec']: + continue + + requires_extension_versions = {} + for required_extension in tag['tag_spec']['requires_extension']: + required_versions = [] + for require in requires: + if require in extension_specs_by_satisfies: + if required_extension != extension_specs_by_satisfies[require]['name']: + raise Exception('Expected satisfied to be for the %s extension' % required_extension) + required_versions = extension_specs_by_satisfies[require]['version'] + break + if len( required_versions ) == 0: + if required_extension in extension_specs_by_satisfies: + if required_extension != extension_specs_by_satisfies[required_extension]['name']: + raise Exception('Expected satisfied to be for the %s extension' % required_extension) + required_versions = extension_specs_by_satisfies[required_extension]['version'] + + if len( required_versions ) == 0: + raise Exception('Unable to obtain any versions for %s' % required_extension) + + requires_extension_versions[required_extension] = filter( lambda ver: ver != 'latest', required_versions ) + tag['tag_spec']['requires_extension'] = requires_extension_versions + extensions = json.load( open( os.path.join( repo_directory, 'build-system/compile/bundles.config.extensions.json' ) ) ) - extension_versions = dict() + extensions_versions = dict() for extension in extensions: - if extension['name'] not in extension_versions: - extension_versions[ extension['name'] ] = { + if extension['name'] not in extensions_versions: + extensions_versions[ extension['name'] ] = { 'versions': [], 'latest': None, } if type(extension['version']) == list: - extension_versions[ extension['name'] ]['versions'].extend( extension['version'] ) + extensions_versions[ extension['name'] ]['versions'].extend( extension['version'] ) else: - extension_versions[ extension['name'] ]['versions'].append( extension['version'] ) - if extension_versions[ extension['name'] ]['latest'] is not None and extension_versions[ extension['name'] ]['latest'] != extension['latestVersion']: + extensions_versions[ extension['name'] ]['versions'].append( extension['version'] ) + if extensions_versions[ extension['name'] ]['latest'] is not None and extensions_versions[ extension['name'] ]['latest'] != extension['latestVersion']: logging.info('Warning: latestVersion mismatch for ' + extension['name']) - extension_versions[ extension['name'] ]['latest'] = extension['latestVersion'] + extensions_versions[ extension['name'] ]['latest'] = extension['latestVersion'] if 'options' in extension and 'wrapper' in extension['options'] and extension['options']['wrapper'] == 'bento': - extension_versions[ extension['name'] ]['bento'] = { + extensions_versions[ extension['name'] ]['bento'] = { 'version': extension['version'], 'has_css': extension['options'].get( 'hasCss', False ), } # Merge extension scripts (e.g. Bento and non-Bento) into one script per extension. for extension_name in sorted(extension_scripts): - if extension_name not in extension_versions: + if extension_name not in extensions_versions: raise Exception( 'There is a script for an unknown extension: ' + extension_name ); extension_script_list = extension_scripts[extension_name] @@ -430,28 +489,68 @@ def ParseRules(repo_directory, out_dir): if 'latest' in validator_versions: validator_versions.remove('latest') - bundle_versions = set( extension_versions[extension_name]['versions'] ) + bundle_versions = set( extensions_versions[extension_name]['versions'] ) if not validator_versions.issubset( bundle_versions ): logging.info( 'Validator versions are not a subset of bundle versions: ' + extension_name ) - if 'bento' in extension_versions[extension_name] and extension_versions[extension_name]['bento']['version'] not in validator_versions: - logging.info( 'Skipping bento for ' + extension_name + ' since version ' + extension_versions[extension_name]['bento']['version'] + ' is not yet valid' ) - del extension_versions[extension_name]['bento'] + if 'bento' in extensions_versions[extension_name] and extensions_versions[extension_name]['bento']['version'] not in validator_versions: + logging.info( 'Skipping bento for ' + extension_name + ' since version ' + extensions_versions[extension_name]['bento']['version'] + ' is not yet valid' ) + del extensions_versions[extension_name]['bento'] validator_versions = sorted( validator_versions, key=lambda version: map(int, version.split('.') ) ) extension_script_list[0]['tag_spec']['extension_spec']['version'] = validator_versions - if 'bento' in extension_versions[extension_name] and extension_versions[extension_name]['bento']['version'] in validator_versions: - extension_script_list[0]['tag_spec']['extension_spec']['bento'] = extension_versions[extension_name]['bento'] + if 'bento' in extensions_versions[extension_name] and extensions_versions[extension_name]['bento']['version'] in validator_versions: + extension_script_list[0]['tag_spec']['extension_spec']['bento'] = extensions_versions[extension_name]['bento'] - extension_script_list[0]['tag_spec']['extension_spec']['latest'] = extension_versions[extension_name]['latest'] + extension_script_list[0]['tag_spec']['extension_spec']['latest'] = extensions_versions[extension_name]['latest'] + + extension_script_list[0]['tag_spec']['extension_spec'].pop('version_name', None) + + # Remove the spec name since we've potentially merged multiple scripts, thus it does not refer to one. + extension_script_list[0]['tag_spec'].pop('spec_name', None) - if 'version_name' in extension_script_list[0]['tag_spec']['extension_spec']: - del extension_script_list[0]['tag_spec']['extension_spec']['version_name'] script_tags.append(extension_script_list[0]) allowed_tags['script'] = script_tags + # Now that Bento information is in hand, re-decorate specs with require_extension to indicate which + for tag_name, tags in allowed_tags.items(): + tags_bento_status = [] + for tag in tags: + if 'requires_extension' not in tag['tag_spec']: + continue + + # Determine the Bento availability of all the required extensions. + tag_extensions_with_bento = {} + for extension, extension_versions in tag['tag_spec']['requires_extension'].items(): + if extension in extensions_versions and 'bento' in extensions_versions[extension] and extensions_versions[extension]['bento']['version'] in extension_versions: + tag_extensions_with_bento[ extension ] = True + else: + tag_extensions_with_bento[ extension ] = False + + # Mark that this tag is for Bento since all its required extensions have Bento available. + if len( tag_extensions_with_bento ) > 0 and False not in tag_extensions_with_bento.values(): + tags_bento_status.append( True ) + tag['tag_spec']['bento'] = True + else: + tags_bento_status.append( False ) + + # Now that the ones with Bento have been identified, add flags to tag specs when there are different versions specifically for Bento: + for tag in tags: + if 'requires_extension' not in tag['tag_spec']: + continue + + if False not in tags_bento_status: + # Clear the Bento flag if _all_ of the components are for Bento. + tag['tag_spec'].pop( 'bento', None ) + elif True in tags_bento_status and 'bento' not in tag['tag_spec']: + # Otherwise, if _some_ of the components were exclusively for Bento, flag the others as being _not_ for Bento specifically. + tag['tag_spec']['bento'] = False + + # Now convert requires_versions back into a list of extensions rather than an extension/versions mapping. + tag['tag_spec']['requires_extension'] = sorted( tag['tag_spec']['requires_extension'].keys() ) + return allowed_tags, attr_lists, descendant_lists, reference_points, versions @@ -553,6 +652,9 @@ def GetTagRules(tag_spec): for requires_extension in tag_spec.requires_extension: requires_extension_list.add(requires_extension) + if hasattr(tag_spec, 'requires') and len( tag_spec.requires ) != 0: + tag_rules['requires'] = [ requires for requires in tag_spec.requires ] + if hasattr(tag_spec, 'also_requires_tag_warning') and len( tag_spec.also_requires_tag_warning ) != 0: for also_requires_tag_warning in tag_spec.also_requires_tag_warning: matches = re.search( r'(amp-\S+) extension( \.js)? script', also_requires_tag_warning ) @@ -650,6 +752,11 @@ def GetTagRules(tag_spec): if tag_spec.HasField('spec_name'): tag_rules['spec_name'] = UnicodeEscape(tag_spec.spec_name) + if hasattr(tag_spec, 'satisfies') and len( tag_spec.satisfies ) > 0: + if len( tag_spec.satisfies ) > 1: + raise Exception('More than expected was satisfied') + tag_rules['satisfies'] = tag_spec.satisfies[0] + if tag_spec.HasField('spec_url'): tag_rules['spec_url'] = UnicodeEscape(tag_spec.spec_url) diff --git a/includes/sanitizers/class-amp-allowed-tags-generated.php b/includes/sanitizers/class-amp-allowed-tags-generated.php index a81459c69eb..831a1043987 100644 --- a/includes/sanitizers/class-amp-allowed-tags-generated.php +++ b/includes/sanitizers/class-amp-allowed-tags-generated.php @@ -260,6 +260,7 @@ class AMP_Allowed_Tags_Generated { 'amp-list', 'amp-live-list', 'amp-pixel', + 'amp-render', 'amp-state', 'amp-story-360', 'amp-story-auto-analytics', @@ -441,6 +442,7 @@ class AMP_Allowed_Tags_Generated { 'amp-powr-player', 'amp-reach-player', 'amp-reddit', + 'amp-render', 'amp-riddle-quiz', 'amp-soundcloud', 'amp-springboard-player', @@ -550,6 +552,7 @@ class AMP_Allowed_Tags_Generated { 'table', 'tbody', 'td', + 'template', 'text', 'textpath', 'tfoot', @@ -1584,7 +1587,7 @@ class AMP_Allowed_Tags_Generated { 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false)', ), 'loop' => array( - 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false)', + 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false|^$)', ), 'media' => array(), 'mixed-length' => array( @@ -1673,7 +1676,7 @@ class AMP_Allowed_Tags_Generated { 'mandatory' => true, ), 'loop' => array( - 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false)', + 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false|^$)', ), 'media' => array(), 'mixed-length' => array( @@ -1726,8 +1729,8 @@ class AMP_Allowed_Tags_Generated { ), ), 'requires_extension' => array( - 'amp-lightbox-gallery', 'amp-base-carousel', + 'amp-lightbox-gallery', ), 'spec_name' => 'AMP-BASE-CAROUSEL [lightbox]', 'spec_url' => 'https://amp.dev/documentation/components/amp-base-carousel/', @@ -3131,11 +3134,42 @@ class AMP_Allowed_Tags_Generated { 4, ), ), + 'bento' => false, 'requires_extension' => array( 'amp-facebook-comments', ), ), ), + array( + 'attr_spec_list' => array( + 'data-href' => array( + 'mandatory' => true, + ), + 'media' => array(), + 'noloading' => array( + 'value' => array( + '', + ), + ), + ), + 'tag_spec' => array( + 'amp_layout' => array( + 'supported_layouts' => array( + 6, + 2, + 3, + 7, + 1, + 4, + ), + ), + 'bento' => true, + 'requires_extension' => array( + 'amp-facebook', + ), + 'spec_name' => 'amp-facebook-comments 1.0', + ), + ), ), 'amp-facebook-like' => array( array( @@ -3168,11 +3202,49 @@ class AMP_Allowed_Tags_Generated { 4, ), ), + 'bento' => false, 'requires_extension' => array( 'amp-facebook-like', ), ), ), + array( + 'attr_spec_list' => array( + 'data-href' => array( + 'mandatory' => true, + 'value_url' => array( + 'allow_relative' => false, + 'protocol' => array( + 'http', + 'https', + ), + ), + ), + 'media' => array(), + 'noloading' => array( + 'value' => array( + '', + ), + ), + ), + 'tag_spec' => array( + 'amp_layout' => array( + 'supported_layouts' => array( + 6, + 2, + 3, + 7, + 1, + 4, + ), + ), + 'bento' => true, + 'requires_extension' => array( + 'amp-facebook', + ), + 'spec_name' => 'amp-facebook-like 1.0', + ), + ), ), 'amp-facebook-page' => array( array( @@ -3205,11 +3277,49 @@ class AMP_Allowed_Tags_Generated { 4, ), ), + 'bento' => false, 'requires_extension' => array( 'amp-facebook-page', ), ), ), + array( + 'attr_spec_list' => array( + 'data-href' => array( + 'mandatory' => true, + 'value_url' => array( + 'allow_relative' => false, + 'protocol' => array( + 'http', + 'https', + ), + ), + ), + 'media' => array(), + 'noloading' => array( + 'value' => array( + '', + ), + ), + ), + 'tag_spec' => array( + 'amp_layout' => array( + 'supported_layouts' => array( + 6, + 2, + 3, + 7, + 1, + 4, + ), + ), + 'bento' => true, + 'requires_extension' => array( + 'amp-facebook', + ), + 'spec_name' => 'amp-facebook-page 1.0', + ), + ), ), 'amp-fit-text' => array( array( @@ -3897,6 +4007,7 @@ class AMP_Allowed_Tags_Generated { 'value' => array( 'true', 'false', + '', ), ), 'media' => array(), @@ -6911,7 +7022,7 @@ class AMP_Allowed_Tags_Generated { ), ), 'loop' => array( - 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false)', + 'value_regex' => '([^,]+\\s+(true|false),\\s*)*(true|false|^$)', ), 'max-item-width' => array( 'value_regex' => '([^,]+\\s+(\\d+),\\s*)*(\\d+)', @@ -11082,14 +11193,7 @@ class AMP_Allowed_Tags_Generated { ), 'html' => array( array( - 'attr_spec_list' => array( - 'data-amp-autocomplete-opt-in' => array( - 'disallowed_value_regex' => 'false', - 'value' => array( - 'false', - ), - ), - ), + 'attr_spec_list' => array(), 'tag_spec' => array( 'mandatory' => true, 'mandatory_parent' => '!doctype', @@ -11591,8 +11695,8 @@ class AMP_Allowed_Tags_Generated { 'tag_spec' => array( 'mandatory_parent' => 'amp-autocomplete', 'requires_extension' => array( - 'amp-form', 'amp-autocomplete', + 'amp-form', ), 'spec_name' => 'amp-autocomplete > input', ), @@ -15215,7 +15319,6 @@ class AMP_Allowed_Tags_Generated { '0.1', ), ), - 'spec_name' => 'amp-ad extension script', ), ), array( @@ -15710,11 +15813,16 @@ class AMP_Allowed_Tags_Generated { ), 'tag_spec' => array( 'extension_spec' => array( + 'bento' => array( + 'has_css' => true, + 'version' => '1.0', + ), 'latest' => '0.1', 'name' => 'amp-brightcove', 'requires_usage' => true, 'version' => array( '0.1', + '1.0', ), ), ), @@ -16188,11 +16296,16 @@ class AMP_Allowed_Tags_Generated { ), 'tag_spec' => array( 'extension_spec' => array( + 'bento' => array( + 'has_css' => true, + 'version' => '1.0', + ), 'latest' => '0.1', 'name' => 'amp-facebook', 'requires_usage' => true, 'version' => array( '0.1', + '1.0', ), ), ), diff --git a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php index 3105ce9ee76..6f000cb29ec 100644 --- a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php +++ b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php @@ -179,6 +179,7 @@ public function __construct( $dom, $args = [] ) { 'amp_allowed_tags' => AMP_Allowed_Tags_Generated::get_allowed_tags(), 'amp_globally_allowed_attributes' => AMP_Allowed_Tags_Generated::get_allowed_attributes(), 'amp_layout_allowed_attributes' => AMP_Allowed_Tags_Generated::get_layout_attributes(), + 'prefer_bento' => false, ]; parent::__construct( $dom, $args ); @@ -430,6 +431,17 @@ private function process_node( DOMElement $node ) { $validation_errors = []; $rule_spec_list = $this->allowed_tags[ $node->nodeName ]; foreach ( $rule_spec_list as $id => $rule_spec ) { + // When there are multiple versions of a rule spec, with one specifically for Bento and another for + // non-Bento make sure that only the preferred version is considered. Otherwise, the wrong requires_extension + // constraint may be applied. + if ( + isset( $rule_spec['tag_spec']['bento'] ) + && + $this->args['prefer_bento'] !== $rule_spec['tag_spec']['bento'] + ) { + continue; + } + $validity = $this->validate_tag_spec_for_node( $node, $rule_spec[ AMP_Rule_Spec::TAG_SPEC ] ); if ( true === $validity ) { $rule_spec_list_to_validate[ $id ] = $this->get_rule_spec_list_to_validate( $node, $rule_spec ); diff --git a/tests/php/test-tag-and-attribute-sanitizer.php b/tests/php/test-tag-and-attribute-sanitizer.php index 316c0adc3ed..9cc943357b9 100644 --- a/tests/php/test-tag-and-attribute-sanitizer.php +++ b/tests/php/test-tag-and-attribute-sanitizer.php @@ -191,6 +191,14 @@ public function get_body_data() { [ 'amp-facebook-comments' ], ], + 'amp-facebook-comments_bento' => [ + '', + null, // No change. + [ 'amp-facebook' ], + [], + [ 'prefer_bento' => true ], + ], + 'amp-facebook-comments_missing_required_attribute' => [ '', '', @@ -209,6 +217,28 @@ public function get_body_data() { [ 'amp-facebook-like' ], ], + 'amp-facebook-like_bento' => [ + '', + null, // No change. + [ 'amp-facebook' ], + [], + [ 'prefer_bento' => true ], + ], + + 'amp-facebook-page' => [ + '', + null, // No change. + [ 'amp-facebook-page' ], + ], + + 'amp-facebook-page_bento' => [ + '', + null, // No change. + [ 'amp-facebook' ], + [], + [ 'prefer_bento' => true ], + ], + 'amp-facebook-like_missing_required_attribute' => [ '', '', @@ -772,7 +802,7 @@ static function () { 'base_carousel' => [ ' - +
first slide
second slide
@@ -3695,20 +3725,24 @@ public function get_html_data() { * @param string $expected The markup to expect. * @param array $expected_scripts The AMP component script names that are obtained through sanitization. * @param array|null $expected_errors Expected validation errors, either codes or validation error subsets. + * @param array $sanitizer_args Sanitizer args. */ - public function test_sanitize( $source, $expected = null, $expected_scripts = [], $expected_errors = [] ) { + public function test_sanitize( $source, $expected = null, $expected_scripts = [], $expected_errors = [], $sanitizer_args = [] ) { $expected = isset( $expected ) ? $expected : $source; $dom = Document::fromHtml( $source, Options::DEFAULTS ); $actual_errors = []; $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom, - [ - 'use_document_element' => true, - 'validation_error_callback' => static function( $error ) use ( &$actual_errors ) { - $actual_errors[] = $error; - return true; - }, - ] + array_merge( + [ + 'use_document_element' => true, + 'validation_error_callback' => static function( $error ) use ( &$actual_errors ) { + $actual_errors[] = $error; + return true; + }, + ], + $sanitizer_args + ) ); $sanitizer->sanitize(); $content = $dom->saveHTML( $dom->documentElement );