diff --git a/includes/amp-helper-functions.php b/includes/amp-helper-functions.php index 67cc7878c09..e75beaca5b9 100644 --- a/includes/amp-helper-functions.php +++ b/includes/amp-helper-functions.php @@ -915,6 +915,7 @@ function amp_get_content_sanitizers( $post = null ) { 'AMP_Style_Sanitizer' => [ 'include_manifest_comment' => ( defined( 'WP_DEBUG' ) && WP_DEBUG ) ? 'always' : 'when_excessive', ], + 'AMP_Meta_Sanitizer' => [], 'AMP_Tag_And_Attribute_Sanitizer' => [], // Note: This whitelist sanitizer must come at the end to clean up any remaining issues the other sanitizers didn't catch. ]; @@ -971,7 +972,7 @@ function amp_get_content_sanitizers( $post = null ) { } // Force style sanitizer and whitelist sanitizer to be at end. - foreach ( [ 'AMP_Style_Sanitizer', 'AMP_Tag_And_Attribute_Sanitizer' ] as $class_name ) { + foreach ( [ 'AMP_Style_Sanitizer', 'AMP_Meta_Sanitizer', 'AMP_Tag_And_Attribute_Sanitizer' ] as $class_name ) { if ( isset( $sanitizers[ $class_name ] ) ) { $sanitizer = $sanitizers[ $class_name ]; unset( $sanitizers[ $class_name ] ); diff --git a/includes/class-amp-autoloader.php b/includes/class-amp-autoloader.php index 80b0fe4e765..c6241e87671 100644 --- a/includes/class-amp-autoloader.php +++ b/includes/class-amp-autoloader.php @@ -78,6 +78,7 @@ class AMP_Autoloader { 'AMP_Iframe_Sanitizer' => 'includes/sanitizers/class-amp-iframe-sanitizer', 'AMP_Img_Sanitizer' => 'includes/sanitizers/class-amp-img-sanitizer', 'AMP_Link_Sanitizer' => 'includes/sanitizers/class-amp-link-sanitizer', + 'AMP_Meta_Sanitizer' => 'includes/sanitizers/class-amp-meta-sanitizer', 'AMP_Nav_Menu_Toggle_Sanitizer' => 'includes/sanitizers/class-amp-nav-menu-toggle-sanitizer', 'AMP_Nav_Menu_Dropdown_Sanitizer' => 'includes/sanitizers/class-amp-nav-menu-dropdown-sanitizer', 'AMP_Comments_Sanitizer' => 'includes/sanitizers/class-amp-comments-sanitizer', diff --git a/includes/class-amp-theme-support.php b/includes/class-amp-theme-support.php index 0df3b892bd4..b9fe763e303 100644 --- a/includes/class-amp-theme-support.php +++ b/includes/class-amp-theme-support.php @@ -5,6 +5,8 @@ * @package AMP */ +use Amp\AmpWP\Dom\Document; + /** * Class AMP_Theme_Support * @@ -1600,10 +1602,10 @@ public static function filter_admin_bar_script_loader_tag( $tag, $handle ) { * @link https://amp.dev/documentation/guides-and-tutorials/optimize-and-measure/optimize_amp/ * @todo All of this might be better placed inside of a sanitizer. * - * @param DOMDocument $dom Document. - * @param string[] $script_handles AMP script handles for components identified during output buffering. + * @param Document $dom Document. + * @param string[] $script_handles AMP script handles for components identified during output buffering. */ - public static function ensure_required_markup( DOMDocument $dom, $script_handles = [] ) { + public static function ensure_required_markup( Document $dom, $script_handles = [] ) { /** * Elements. * @@ -1614,23 +1616,14 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles * @var DOMElement $noscript */ - $xpath = new DOMXPath( $dom ); - - // Make sure the HEAD element is in the doc. - $head = $dom->getElementsByTagName( 'head' )->item( 0 ); - if ( ! $head ) { - $head = $dom->createElement( 'head' ); - $dom->documentElement->insertBefore( $head, $dom->documentElement->firstChild ); - } - // Ensure there is a schema.org script in the document. // @todo Consider applying the amp_schemaorg_metadata filter on the contents when a script is already present. - $schema_org_meta_script = $xpath->query( '//script[ @type = "application/ld+json" ][ contains( ./text(), "schema.org" ) ]' )->item( 0 ); + $schema_org_meta_script = $dom->xpath->query( '//script[ @type = "application/ld+json" ][ contains( ./text(), "schema.org" ) ]' )->item( 0 ); if ( ! $schema_org_meta_script ) { $script = $dom->createElement( 'script' ); $script->setAttribute( 'type', 'application/ld+json' ); $script->appendChild( $dom->createTextNode( wp_json_encode( amp_get_schemaorg_metadata(), JSON_UNESCAPED_UNICODE ) ) ); - $head->appendChild( $script ); + $dom->head->appendChild( $script ); } // Gather all links. @@ -1647,7 +1640,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles ), ], ]; - $link_elements = $head->getElementsByTagName( 'link' ); + $link_elements = $dom->head->getElementsByTagName( 'link' ); foreach ( $link_elements as $link ) { if ( $link->hasAttribute( 'rel' ) ) { $links[ $link->getAttribute( 'rel' ) ][] = $link; @@ -1665,97 +1658,18 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles 'href' => self::get_current_canonical_url(), ] ); - $head->appendChild( $rel_canonical ); - } - - /* - * Ensure meta charset and meta viewport are present. - * - * "AMP is already quite restrictive about which markup is allowed in the
section. However, - * there are a few basic optimizations that you can apply. The key is to structure the section - * in a way so that all render-blocking scripts and custom fonts load as fast as possible." - * - * "1. The first tag should be the meta charset tag, followed by any remaining meta tags." - * - * {@link https://amp.dev/documentation/guides-and-tutorials/optimize-and-measure/optimize_amp/ Optimize the AMP Runtime loading} - */ - $meta_charset = null; - $meta_viewport = null; - $meta_amp_script_srcs = []; - $meta_elements = []; - foreach ( $head->getElementsByTagName( 'meta' ) as $meta ) { - if ( $meta->hasAttribute( 'charset' ) ) { // There will not be a meta[http-equiv] because the sanitizer removed it. - $meta_charset = $meta; - } elseif ( 'viewport' === $meta->getAttribute( 'name' ) ) { - $meta_viewport = $meta; - } elseif ( 'amp-script-src' === $meta->getAttribute( 'name' ) ) { - $meta_amp_script_srcs[] = $meta; - } else { - $meta_elements[] = $meta; - } - } - - // Handle meta charset. - if ( ! $meta_charset ) { - // Warning: This probably means the character encoding needs to be converted. - $meta_charset = AMP_DOM_Utils::create_node( - $dom, - 'meta', - [ - 'charset' => 'utf-8', - ] - ); - } else { - $head->removeChild( $meta_charset ); // So we can move it. - } - $head->insertBefore( $meta_charset, $head->firstChild ); - - // Handle meta viewport. - if ( ! $meta_viewport ) { - $meta_viewport = AMP_DOM_Utils::create_node( - $dom, - 'meta', - [ - 'name' => 'viewport', - 'content' => 'width=device-width', - ] - ); - } else { - $head->removeChild( $meta_viewport ); // So we can move it. + $dom->head->appendChild( $rel_canonical ); } - $head->insertBefore( $meta_viewport, $meta_charset->nextSibling ); - // Handle meta amp-script-src elements. - $first_meta_amp_script_src = array_shift( $meta_amp_script_srcs ); - if ( $first_meta_amp_script_src ) { - $meta_elements[] = $first_meta_amp_script_src; - - // Merge (and remove) any subsequent meta amp-script-src elements. - if ( ! empty( $meta_amp_script_srcs ) ) { - $content_values = [ $first_meta_amp_script_src->getAttribute( 'content' ) ]; - foreach ( $meta_amp_script_srcs as $meta_amp_script_src ) { - $meta_amp_script_src->parentNode->removeChild( $meta_amp_script_src ); - $content_values[] = $meta_amp_script_src->getAttribute( 'content' ); - } - $first_meta_amp_script_src->setAttribute( 'content', implode( ' ', $content_values ) ); - unset( $meta_amp_script_src, $content_values ); - } - } - unset( $meta_amp_script_srcs, $first_meta_amp_script_src ); - - // Insert all the the meta elements next in the head. - $previous_node = $meta_viewport; - foreach ( $meta_elements as $meta_element ) { - $meta_element->parentNode->removeChild( $meta_element ); - $head->insertBefore( $meta_element, $previous_node->nextSibling ); - $previous_node = $meta_element; - } + // Store the last meta tag as the previous node to append to. + $meta_tags = $dom->head->getElementsByTagName( 'meta' ); + $previous_node = $meta_tags->length > 0 ? $meta_tags->item( $meta_tags->length - 1 ) : $dom->head->firstChild; // Handle the title. - $title = $head->getElementsByTagName( 'title' )->item( 0 ); + $title = $dom->head->getElementsByTagName( 'title' )->item( 0 ); if ( $title ) { $title->parentNode->removeChild( $title ); // So we can move it. - $head->insertBefore( $title, $previous_node->nextSibling ); + $dom->head->insertBefore( $title, $previous_node->nextSibling ); $previous_node = $title; } @@ -1771,7 +1685,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles $ordered_scripts = []; $head_scripts = []; $runtime_src = wp_scripts()->registered['amp-runtime']->src; - foreach ( $head->getElementsByTagName( 'script' ) as $script ) { // Note that prepare_response() already moved body scripts to head. + foreach ( $dom->head->getElementsByTagName( 'script' ) as $script ) { // Note that prepare_response() already moved body scripts to head. $head_scripts[] = $script; } foreach ( $head_scripts as $script ) { @@ -1879,7 +1793,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles if ( $link->parentNode ) { $link->parentNode->removeChild( $link ); // So we can move it. } - $head->insertBefore( $link, $previous_node->nextSibling ); + $dom->head->insertBefore( $link, $previous_node->nextSibling ); $previous_node = $link; } } @@ -1913,25 +1827,25 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles */ $ordered_scripts = array_merge( $ordered_scripts, $amp_scripts ); foreach ( $ordered_scripts as $ordered_script ) { - $head->insertBefore( $ordered_script, $previous_node->nextSibling ); + $dom->head->insertBefore( $ordered_script, $previous_node->nextSibling ); $previous_node = $ordered_script; } /* * "8. Specify any custom styles by using the ', - '', + '', ], 'head_noscript_span' => [ '', - 'No script', + 'No script', ], 'test_with_dev_mode' => [ - '', + '', null, ], ]; @@ -54,13 +56,13 @@ public function test_noscript_promotion( $source, $expected = null ) { if ( null === $expected ) { $expected = $source; } - $dom = AMP_DOM_Utils::get_dom( $source ); + $dom = Document::from_html( $source ); $this->assertSame( 1, $dom->getElementsByTagName( 'noscript' )->length ); $sanitizer = new AMP_Script_Sanitizer( $dom ); $sanitizer->sanitize(); $whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom ); $whitelist_sanitizer->sanitize(); - $content = AMP_DOM_Utils::get_content_from_dom_node( $dom, $dom->documentElement ); + $content = $dom->saveHTML( $dom->documentElement ); $this->assertEquals( $expected, $content ); } @@ -103,10 +105,10 @@ public function test_boilerplate_preservation() { 'use_document_element' => true, ]; - $dom = AMP_DOM_Utils::get_dom( $html ); + $dom = Document::from_html( $html ); AMP_Content_Sanitizer::sanitize_document( $dom, amp_get_content_sanitizers(), $args ); - $content = AMP_DOM_Utils::get_content_from_dom_node( $dom, $dom->documentElement ); + $content = $dom->saveHTML( $dom->documentElement ); $this->assertRegExp( '/\s*/', $content ); $this->assertContains( '', $content ); diff --git a/tests/php/test-amp-style-sanitizer.php b/tests/php/test-amp-style-sanitizer.php index b8441e8ee62..b22274e443d 100644 --- a/tests/php/test-amp-style-sanitizer.php +++ b/tests/php/test-amp-style-sanitizer.php @@ -7,6 +7,7 @@ // phpcs:disable WordPress.Arrays.MultipleStatementAlignment.DoubleArrowNotAligned +use Amp\AmpWP\Dom\Document; use Amp\AmpWP\Tests\PrivateAccess; /** @@ -573,7 +574,7 @@ static function( $preempt, $request, $url ) { 10, 3 ); - $dom = AMP_DOM_Utils::get_dom( $source ); + $dom = Document::from_html( $source ); $error_codes = []; $args = [ @@ -589,7 +590,7 @@ static function( $preempt, $request, $url ) { $whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom, $args ); $whitelist_sanitizer->sanitize(); - $sanitized_html = AMP_DOM_Utils::get_content_from_dom_node( $dom, $dom->documentElement ); + $sanitized_html = $dom->saveHTML( $dom->documentElement ); $actual_stylesheets = array_values( $sanitizer->get_stylesheets() ); $this->assertEquals( $expected_errors, $error_codes ); $this->assertCount( count( $expected_stylesheets ), $actual_stylesheets ); @@ -724,7 +725,7 @@ public function get_amp_selector_data() { */ public function test_amp_selector_conversion( $markup, $input, $output ) { $html = "$markup"; - $dom = AMP_DOM_Utils::get_dom( $html ); + $dom = Document::from_html( $html ); $sanitizer_classes = amp_get_content_sanitizers(); $sanitized = AMP_Content_Sanitizer::sanitize_document( @@ -870,7 +871,7 @@ static function ( $selector ) { ); $html = "$markup"; - $dom = AMP_DOM_Utils::get_dom( $html ); + $dom = Document::from_html( $html ); $sanitizer_classes = amp_get_content_sanitizers(); @@ -1063,7 +1064,7 @@ public function get_amp_css_hacks_data() { */ public function test_browser_css_hacks( $input ) { $html = ""; - $dom = AMP_DOM_Utils::get_dom( $html ); + $dom = Document::from_html( $html ); $error_codes = []; $sanitizer = new AMP_Style_Sanitizer( @@ -1093,7 +1094,7 @@ public function test_font_data_url_handling() { $html .= '