From 666ff4079b742a63de1ff17697d9ad32ffcc6b64 Mon Sep 17 00:00:00 2001 From: Pierre Gordon Date: Wed, 1 Apr 2020 19:42:49 -0400 Subject: [PATCH 1/7] Do not process generic meta tags --- .../sanitizers/class-amp-meta-sanitizer.php | 29 ++++++++++++------- tests/php/test-class-amp-meta-sanitizer.php | 5 ++++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/includes/sanitizers/class-amp-meta-sanitizer.php b/includes/sanitizers/class-amp-meta-sanitizer.php index 57868edeec6..b193cfb94b5 100644 --- a/includes/sanitizers/class-amp-meta-sanitizer.php +++ b/includes/sanitizers/class-amp-meta-sanitizer.php @@ -70,15 +70,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer { * Sanitize. */ public function sanitize() { - $meta_elements = $this->dom->getElementsByTagName( static::$tag ); - - // Remove all nodes for easy reordering later on. - $meta_elements = array_map( - static function ( $element ) { - return $element->parentNode->removeChild( $element ); - }, - iterator_to_array( $meta_elements, false ) - ); + $meta_elements = iterator_to_array( $this->dom->getElementsByTagName( static::$tag ), false ); foreach ( $meta_elements as $meta_element ) { @@ -97,12 +89,29 @@ static function ( $element ) { */ if ( $meta_element->hasAttribute( Attribute::CHARSET ) ) { $this->meta_tags[ self::TAG_CHARSET ][] = $meta_element; + $meta_element->parentNode->removeChild( $meta_element ); } elseif ( Attribute::VIEWPORT === $meta_element->getAttribute( Attribute::NAME ) ) { $this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element; + $meta_element->parentNode->removeChild( $meta_element ); } elseif ( Attribute::AMP_SCRIPT_SRC === $meta_element->getAttribute( Attribute::NAME ) ) { $this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element; + $meta_element->parentNode->removeChild( $meta_element ); } else { - $this->meta_tags[ self::TAG_OTHER ][] = $meta_element; + /** + * Prevent processing nodes that should not be reordered. Conditions below are adapted from the generic + * meta tag spec. + * + * @see https://github.com/ampproject/amphtml/blob/286a6302fcd007eab303b105c161e76d9e880322/validator/validator-main.protoascii#L701-L727 + */ + if ( + $meta_element->hasAttribute( 'name' ) && + preg_match( '/(^|\\s)(amp-.*|amp4ads-.*|apple-itunes-app|content-disposition|revisit-after|viewport)(\\s|$)/', $meta_element->getAttribute( 'name' ) ) + ) { + $this->meta_tags[ self::TAG_OTHER ][] = $meta_element; + $meta_element->parentNode->removeChild( $meta_element ); + } + + continue; } } diff --git a/tests/php/test-class-amp-meta-sanitizer.php b/tests/php/test-class-amp-meta-sanitizer.php index 8cba767ce2c..2eeb7de6f30 100644 --- a/tests/php/test-class-amp-meta-sanitizer.php +++ b/tests/php/test-class-amp-meta-sanitizer.php @@ -65,6 +65,11 @@ public function get_data_for_sanitize() { '' . $amp_boilerplate . '', '' . $amp_boilerplate . '', ], + + 'Ignore generic meta tags' => [ + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', + ], ]; } From 2d24aa7dbd20fe1f8292acc483e7c7c2ba883667 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 1 Apr 2020 17:57:16 -0700 Subject: [PATCH 2/7] Obtain non-body meta name attribute pattern from spec --- .../sanitizers/class-amp-meta-sanitizer.php | 61 ++++++++++------- tests/php/test-class-amp-meta-sanitizer.php | 65 ++++++++++++++++++- 2 files changed, 102 insertions(+), 24 deletions(-) diff --git a/includes/sanitizers/class-amp-meta-sanitizer.php b/includes/sanitizers/class-amp-meta-sanitizer.php index b193cfb94b5..cb09468260b 100644 --- a/includes/sanitizers/class-amp-meta-sanitizer.php +++ b/includes/sanitizers/class-amp-meta-sanitizer.php @@ -66,6 +66,36 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer { */ const AMP_VIEWPORT = 'width=device-width'; + /** + * Spec name for the tag spec for meta elements that are allowed in the body. + * + * @since 1.5.2 + * @var string + */ + const BODY_ANCESTOR_META_TAG_SPEC_NAME = 'meta name= and content='; + + /** + * Get tag spec for meta tags which are allowed in the body. + * + * @since 1.5.2 + * @return string Deny pattern. + */ + private function get_body_meta_tag_name_attribute_deny_pattern() { + static $pattern = null; + if ( null === $pattern ) { + $tag_spec = current( + array_filter( + AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), + static function ( $spec ) { + return isset( $spec['tag_spec']['spec_name'] ) && 'meta name= and content=' === $spec['tag_spec']['spec_name']; + } + ) + ); + $pattern = '/' . $tag_spec['attr_spec_list']['name']['blacklisted_value_regex'] . '/'; + } + return $pattern; + } + /** * Sanitize. */ @@ -88,30 +118,17 @@ public function sanitize() { * @var DOMElement $meta_element */ if ( $meta_element->hasAttribute( Attribute::CHARSET ) ) { - $this->meta_tags[ self::TAG_CHARSET ][] = $meta_element; - $meta_element->parentNode->removeChild( $meta_element ); + $this->meta_tags[ self::TAG_CHARSET ][] = $meta_element->parentNode->removeChild( $meta_element ); } elseif ( Attribute::VIEWPORT === $meta_element->getAttribute( Attribute::NAME ) ) { - $this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element; - $meta_element->parentNode->removeChild( $meta_element ); + $this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element->parentNode->removeChild( $meta_element ); } elseif ( Attribute::AMP_SCRIPT_SRC === $meta_element->getAttribute( Attribute::NAME ) ) { - $this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element; - $meta_element->parentNode->removeChild( $meta_element ); - } else { - /** - * Prevent processing nodes that should not be reordered. Conditions below are adapted from the generic - * meta tag spec. - * - * @see https://github.com/ampproject/amphtml/blob/286a6302fcd007eab303b105c161e76d9e880322/validator/validator-main.protoascii#L701-L727 - */ - if ( - $meta_element->hasAttribute( 'name' ) && - preg_match( '/(^|\\s)(amp-.*|amp4ads-.*|apple-itunes-app|content-disposition|revisit-after|viewport)(\\s|$)/', $meta_element->getAttribute( 'name' ) ) - ) { - $this->meta_tags[ self::TAG_OTHER ][] = $meta_element; - $meta_element->parentNode->removeChild( $meta_element ); - } - - continue; + $this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element->parentNode->removeChild( $meta_element ); + } elseif ( + $meta_element->hasAttribute( 'name' ) + && + preg_match( $this->get_body_meta_tag_name_attribute_deny_pattern(), $meta_element->getAttribute( 'name' ) ) + ) { + $this->meta_tags[ self::TAG_OTHER ][] = $meta_element->parentNode->removeChild( $meta_element ); } } diff --git a/tests/php/test-class-amp-meta-sanitizer.php b/tests/php/test-class-amp-meta-sanitizer.php index 2eeb7de6f30..32f560695b0 100644 --- a/tests/php/test-class-amp-meta-sanitizer.php +++ b/tests/php/test-class-amp-meta-sanitizer.php @@ -30,6 +30,36 @@ public function get_data_for_sanitize() { $amp_boilerplate = amp_get_boilerplate_code(); + $html5_microdata = ' + + + + + + + + + + + + + + + + +
+

Name: Amanda

+
+
+

Band: Jazz Band

+

Size: 12 players

+
+ + + '; + return [ 'Do not break the correct charset tag' => [ '' . $amp_boilerplate . '', @@ -67,12 +97,43 @@ public function get_data_for_sanitize() { ], 'Ignore generic meta tags' => [ - '' . $amp_boilerplate . '', - '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '' . $html5_microdata . '', + '' . $amp_boilerplate . '' . $html5_microdata . '', ], ]; } + /** + * Test that the expected tag specs exist for the body. + */ + public function test_expected_meta_tags() { + $named_specs = array_filter( + AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), + static function ( $spec ) { + return isset( $spec['tag_spec']['spec_name'] ) && AMP_Meta_Sanitizer::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name']; + } + ); + $this->assertCount( 1, $named_specs ); + + $body_ok_specs = array_filter( + AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), + static function ( $spec ) { + $head_required = ( + ( isset( $spec['tag_spec']['mandatory_parent'] ) && 'head' === $spec['tag_spec']['mandatory_parent'] ) + || + ( isset( $spec['tag_spec']['mandatory_ancestor'] ) && 'head' === $spec['tag_spec']['mandatory_ancestor'] ) + ); + return ! $head_required; + } + ); + + $this->assertEquals( $named_specs, $body_ok_specs ); + + $spec = current( $named_specs ); + $this->assertArrayHasKey( 'name', $spec['attr_spec_list'] ); + $this->assertEquals( [ 'blacklisted_value_regex' ], array_keys( $spec['attr_spec_list']['name'] ) ); + } + /** * Tests the sanitize method. * From 9a3bf2bc6b5f04f263323eaeaddd8c25af703a5c Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 1 Apr 2020 18:53:35 -0700 Subject: [PATCH 3/7] Make use of BODY_ANCESTOR_META_TAG_SPEC_NAME constant --- includes/sanitizers/class-amp-meta-sanitizer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/sanitizers/class-amp-meta-sanitizer.php b/includes/sanitizers/class-amp-meta-sanitizer.php index cb09468260b..7cb7e11b462 100644 --- a/includes/sanitizers/class-amp-meta-sanitizer.php +++ b/includes/sanitizers/class-amp-meta-sanitizer.php @@ -87,7 +87,7 @@ private function get_body_meta_tag_name_attribute_deny_pattern() { array_filter( AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), static function ( $spec ) { - return isset( $spec['tag_spec']['spec_name'] ) && 'meta name= and content=' === $spec['tag_spec']['spec_name']; + return isset( $spec['tag_spec']['spec_name'] ) && self::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name']; } ) ); From 583c018e443d00e52bf8dad552fb5feb298516ac Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 1 Apr 2020 19:17:11 -0700 Subject: [PATCH 4/7] Test meta[schema] and meta[property] --- tests/php/test-class-amp-meta-sanitizer.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/php/test-class-amp-meta-sanitizer.php b/tests/php/test-class-amp-meta-sanitizer.php index 32f560695b0..c5dc7a12bce 100644 --- a/tests/php/test-class-amp-meta-sanitizer.php +++ b/tests/php/test-class-amp-meta-sanitizer.php @@ -58,6 +58,8 @@ public function get_data_for_sanitize() { + + '; return [ From 1fabd53e9bca26b7abf61c27e1e96fc94a784b45 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 1 Apr 2020 19:17:50 -0700 Subject: [PATCH 5/7] Add recognition and repositioning of meta[http-equiv] elements --- includes/sanitizers/class-amp-meta-sanitizer.php | 4 ++++ tests/php/test-class-amp-meta-sanitizer.php | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/includes/sanitizers/class-amp-meta-sanitizer.php b/includes/sanitizers/class-amp-meta-sanitizer.php index 7cb7e11b462..55207f0a7e9 100644 --- a/includes/sanitizers/class-amp-meta-sanitizer.php +++ b/includes/sanitizers/class-amp-meta-sanitizer.php @@ -34,6 +34,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer { * Tags array keys. */ const TAG_CHARSET = 'charset'; + const TAG_HTTP_EQUIV = 'http-equiv'; const TAG_VIEWPORT = 'viewport'; const TAG_AMP_SCRIPT_SRC = 'amp_script_src'; const TAG_OTHER = 'other'; @@ -54,6 +55,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer { */ protected $meta_tags = [ self::TAG_CHARSET => [], + self::TAG_HTTP_EQUIV => [], self::TAG_VIEWPORT => [], self::TAG_AMP_SCRIPT_SRC => [], self::TAG_OTHER => [], @@ -119,6 +121,8 @@ public function sanitize() { */ if ( $meta_element->hasAttribute( Attribute::CHARSET ) ) { $this->meta_tags[ self::TAG_CHARSET ][] = $meta_element->parentNode->removeChild( $meta_element ); + } elseif ( $meta_element->hasAttribute( Attribute::HTTP_EQUIV ) ) { + $this->meta_tags[ self::TAG_HTTP_EQUIV ][] = $meta_element->parentNode->removeChild( $meta_element ); } elseif ( Attribute::VIEWPORT === $meta_element->getAttribute( Attribute::NAME ) ) { $this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element->parentNode->removeChild( $meta_element ); } elseif ( Attribute::AMP_SCRIPT_SRC === $meta_element->getAttribute( Attribute::NAME ) ) { diff --git a/tests/php/test-class-amp-meta-sanitizer.php b/tests/php/test-class-amp-meta-sanitizer.php index c5dc7a12bce..fee7206fd17 100644 --- a/tests/php/test-class-amp-meta-sanitizer.php +++ b/tests/php/test-class-amp-meta-sanitizer.php @@ -98,6 +98,11 @@ public function get_data_for_sanitize() { '' . $amp_boilerplate . '', ], + 'Make sure http-equiv meta tags are moved' => [ + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', + ], + 'Ignore generic meta tags' => [ '' . $amp_boilerplate . '' . $html5_microdata . '', '' . $amp_boilerplate . '' . $html5_microdata . '', From 7fa67897b464242bceae9df1933d13ea2ef33a5e Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 1 Apr 2020 20:26:37 -0700 Subject: [PATCH 6/7] Add critical use_document_element=true arg for AMP_Tag_And_Attribute_Sanitizer --- tests/php/test-class-amp-meta-sanitizer.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/php/test-class-amp-meta-sanitizer.php b/tests/php/test-class-amp-meta-sanitizer.php index fee7206fd17..52e91b8d752 100644 --- a/tests/php/test-class-amp-meta-sanitizer.php +++ b/tests/php/test-class-amp-meta-sanitizer.php @@ -155,7 +155,10 @@ public function test_sanitize( $source_content, $expected_content ) { $sanitizer = new AMP_Meta_Sanitizer( $dom ); $sanitizer->sanitize(); - $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom ); + $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( + $dom, + [ 'use_document_element' => true ] + ); $sanitizer->sanitize(); $this->assertEqualMarkup( $expected_content, $dom->saveHTML() ); From 442465e3b5d8aaa07443aaf5072d2bf5dd5364a1 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 1 Apr 2020 20:43:52 -0700 Subject: [PATCH 7/7] Add tests for the discrete spec'ed meta tags --- tests/php/test-class-amp-meta-sanitizer.php | 137 +++++++++++++----- .../php/test-tag-and-attribute-sanitizer.php | 6 + 2 files changed, 107 insertions(+), 36 deletions(-) diff --git a/tests/php/test-class-amp-meta-sanitizer.php b/tests/php/test-class-amp-meta-sanitizer.php index 52e91b8d752..fbbc0f86e78 100644 --- a/tests/php/test-class-amp-meta-sanitizer.php +++ b/tests/php/test-class-amp-meta-sanitizer.php @@ -12,6 +12,37 @@ */ class Test_AMP_Meta_Sanitizer extends WP_UnitTestCase { + /** + * Test that the expected tag specs exist for the body. + */ + public function test_expected_meta_tags() { + $named_specs = array_filter( + AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), + static function ( $spec ) { + return isset( $spec['tag_spec']['spec_name'] ) && AMP_Meta_Sanitizer::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name']; + } + ); + $this->assertCount( 1, $named_specs ); + + $body_ok_specs = array_filter( + AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), + static function ( $spec ) { + $head_required = ( + ( isset( $spec['tag_spec']['mandatory_parent'] ) && 'head' === $spec['tag_spec']['mandatory_parent'] ) + || + ( isset( $spec['tag_spec']['mandatory_ancestor'] ) && 'head' === $spec['tag_spec']['mandatory_ancestor'] ) + ); + return ! $head_required; + } + ); + + $this->assertEquals( $named_specs, $body_ok_specs ); + + $spec = current( $named_specs ); + $this->assertArrayHasKey( 'name', $spec['attr_spec_list'] ); + $this->assertEquals( [ 'blacklisted_value_regex' ], array_keys( $spec['attr_spec_list']['name'] ) ); + } + /** * Provide data to the test_sanitize method. * @@ -30,7 +61,10 @@ public function get_data_for_sanitize() { $amp_boilerplate = amp_get_boilerplate_code(); - $html5_microdata = ' + $meta_charset = ''; + $meta_viewport = ''; + + $meta_tags_allowed_in_body = ' @@ -62,7 +96,7 @@ public function get_data_for_sanitize() { '; - return [ + $data = [ 'Do not break the correct charset tag' => [ '' . $amp_boilerplate . '', '' . $amp_boilerplate . '', @@ -98,47 +132,78 @@ public function get_data_for_sanitize() { '' . $amp_boilerplate . '', ], - 'Make sure http-equiv meta tags are moved' => [ - '' . $amp_boilerplate . '', - '' . $amp_boilerplate . '', + 'Remove legacy meta http-equiv=Content-Type' => [ + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', ], - 'Ignore generic meta tags' => [ - '' . $amp_boilerplate . '' . $html5_microdata . '', - '' . $amp_boilerplate . '' . $html5_microdata . '', + 'Process invalid meta http-equiv value' => [ + // Note the AMP_Tag_And_Attribute_Sanitizer removes the http-equiv attribute because the content is invalid. + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', ], - ]; - } - /** - * Test that the expected tag specs exist for the body. - */ - public function test_expected_meta_tags() { - $named_specs = array_filter( - AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), - static function ( $spec ) { - return isset( $spec['tag_spec']['spec_name'] ) && AMP_Meta_Sanitizer::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name']; - } - ); - $this->assertCount( 1, $named_specs ); + 'Disallowed meta=content-deposition' => [ + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', + ], - $body_ok_specs = array_filter( - AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ), - static function ( $spec ) { - $head_required = ( - ( isset( $spec['tag_spec']['mandatory_parent'] ) && 'head' === $spec['tag_spec']['mandatory_parent'] ) - || - ( isset( $spec['tag_spec']['mandatory_ancestor'] ) && 'head' === $spec['tag_spec']['mandatory_ancestor'] ) - ); - return ! $head_required; - } - ); + 'Disallowed meta=revisit-after' => [ + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', + ], - $this->assertEquals( $named_specs, $body_ok_specs ); + 'Disallowed meta=amp-bogus' => [ + '' . $amp_boilerplate . '', + '' . $amp_boilerplate . '', + ], - $spec = current( $named_specs ); - $this->assertArrayHasKey( 'name', $spec['attr_spec_list'] ); - $this->assertEquals( [ 'blacklisted_value_regex' ], array_keys( $spec['attr_spec_list']['name'] ) ); + 'Ignore generic meta tags' => [ + '' . $amp_boilerplate . '' . $meta_tags_allowed_in_body . '', + '' . $amp_boilerplate . '' . $meta_tags_allowed_in_body . '', + ], + ]; + + $http_equiv_specs = [ + 'meta http-equiv=X-UA-Compatible' => '', + 'meta http-equiv=content-language' => '', + 'meta http-equiv=pics-label' => '', + 'meta http-equiv=imagetoolbar' => '', + 'meta http-equiv=Content-Style-Type' => '', + 'meta http-equiv=Content-Script-Type' => '', + 'meta http-equiv=origin-trial' => '', + 'meta http-equiv=resource-type' => '', + 'meta http-equiv=x-dns-prefetch-control' => '', + ]; + foreach ( $http_equiv_specs as $equiv_spec => $tag ) { + $data[ "Verify http-equiv moved: $equiv_spec" ] = [ + "{$meta_charset}{$meta_viewport}{$amp_boilerplate}{$tag}", + "{$meta_charset}{$tag}{$meta_viewport}{$amp_boilerplate}", + ]; + } + + $named_specs = [ + 'meta name=apple-itunes-app' => '', + 'meta name=amp-experiments-opt-in' => '', + 'meta name=amp-3p-iframe-src' => '', + 'meta name=amp-consent-blocking' => '', + 'meta name=amp-experiment-token' => '', + 'meta name=amp-link-variable-allowed-origin' => '', + 'meta name=amp-google-clientid-id-api' => '', + 'meta name=amp-ad-doubleclick-sra' => '', + 'meta name=amp-list-load-more' => '', + 'meta name=amp-recaptcha-input' => '', + 'meta name=amp-ad-enable-refresh' => '', + 'meta name=amp-to-amp-navigation' => '', + ]; + foreach ( $named_specs as $named_spec => $tag ) { + $data[ "Verify meta[name] moved: $named_spec" ] = [ + "{$meta_charset}{$meta_viewport}{$amp_boilerplate}{$tag}", + "{$meta_charset}{$meta_viewport}{$tag}{$amp_boilerplate}", + ]; + } + + return $data; } /** diff --git a/tests/php/test-tag-and-attribute-sanitizer.php b/tests/php/test-tag-and-attribute-sanitizer.php index 9acc2491ead..48dd2c2d1bc 100644 --- a/tests/php/test-tag-and-attribute-sanitizer.php +++ b/tests/php/test-tag-and-attribute-sanitizer.php @@ -2975,6 +2975,12 @@ public function get_html_data() { null, [ 'amp-subscriptions' ], ], + 'bad http-equiv meta tag' => [ + '', + '', + [], + [ AMP_Tag_And_Attribute_Sanitizer::DISALLOWED_ATTR ], + ], ]; $bad_dev_mode_document = sprintf(