Skip to content

Commit

Permalink
Only move meta tags to the head when required and add processing for …
Browse files Browse the repository at this point in the history
…meta[http-equiv] (#4505)

* Do not process generic meta tags

* Obtain non-body meta name attribute pattern from spec

* Make use of BODY_ANCESTOR_META_TAG_SPEC_NAME constant

* Test meta[schema] and meta[property]

* Add recognition and repositioning of meta[http-equiv] elements

* Add critical use_document_element=true arg for AMP_Tag_And_Attribute_Sanitizer

* Add tests for the discrete spec'ed meta tags

Co-authored-by: Weston Ruter <westonruter@google.com>
  • Loading branch information
pierlon and westonruter committed Apr 2, 2020
1 parent e382be8 commit 03af649
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 16 deletions.
58 changes: 44 additions & 14 deletions includes/sanitizers/class-amp-meta-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer {
* Tags array keys.
*/
const TAG_CHARSET = 'charset';
const TAG_HTTP_EQUIV = 'http-equiv';
const TAG_VIEWPORT = 'viewport';
const TAG_AMP_SCRIPT_SRC = 'amp_script_src';
const TAG_OTHER = 'other';
Expand All @@ -54,6 +55,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer {
*/
protected $meta_tags = [
self::TAG_CHARSET => [],
self::TAG_HTTP_EQUIV => [],
self::TAG_VIEWPORT => [],
self::TAG_AMP_SCRIPT_SRC => [],
self::TAG_OTHER => [],
Expand All @@ -66,19 +68,41 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer {
*/
const AMP_VIEWPORT = 'width=device-width';

/**
* Spec name for the tag spec for meta elements that are allowed in the body.
*
* @since 1.5.2
* @var string
*/
const BODY_ANCESTOR_META_TAG_SPEC_NAME = 'meta name= and content=';

/**
* Get tag spec for meta tags which are allowed in the body.
*
* @since 1.5.2
* @return string Deny pattern.
*/
private function get_body_meta_tag_name_attribute_deny_pattern() {
static $pattern = null;
if ( null === $pattern ) {
$tag_spec = current(
array_filter(
AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ),
static function ( $spec ) {
return isset( $spec['tag_spec']['spec_name'] ) && self::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name'];
}
)
);
$pattern = '/' . $tag_spec['attr_spec_list']['name']['blacklisted_value_regex'] . '/';
}
return $pattern;
}

/**
* Sanitize.
*/
public function sanitize() {
$meta_elements = $this->dom->getElementsByTagName( static::$tag );

// Remove all nodes for easy reordering later on.
$meta_elements = array_map(
static function ( $element ) {
return $element->parentNode->removeChild( $element );
},
iterator_to_array( $meta_elements, false )
);
$meta_elements = iterator_to_array( $this->dom->getElementsByTagName( static::$tag ), false );

foreach ( $meta_elements as $meta_element ) {

Expand All @@ -96,13 +120,19 @@ static function ( $element ) {
* @var DOMElement $meta_element
*/
if ( $meta_element->hasAttribute( Attribute::CHARSET ) ) {
$this->meta_tags[ self::TAG_CHARSET ][] = $meta_element;
$this->meta_tags[ self::TAG_CHARSET ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif ( $meta_element->hasAttribute( Attribute::HTTP_EQUIV ) ) {
$this->meta_tags[ self::TAG_HTTP_EQUIV ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif ( Attribute::VIEWPORT === $meta_element->getAttribute( Attribute::NAME ) ) {
$this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element;
$this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif ( Attribute::AMP_SCRIPT_SRC === $meta_element->getAttribute( Attribute::NAME ) ) {
$this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element;
} else {
$this->meta_tags[ self::TAG_OTHER ][] = $meta_element;
$this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif (
$meta_element->hasAttribute( 'name' )
&&
preg_match( $this->get_body_meta_tag_name_attribute_deny_pattern(), $meta_element->getAttribute( 'name' ) )
) {
$this->meta_tags[ self::TAG_OTHER ][] = $meta_element->parentNode->removeChild( $meta_element );
}
}

Expand Down
145 changes: 143 additions & 2 deletions tests/php/test-class-amp-meta-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,37 @@
*/
class Test_AMP_Meta_Sanitizer extends WP_UnitTestCase {

/**
* Test that the expected tag specs exist for the body.
*/
public function test_expected_meta_tags() {
$named_specs = array_filter(
AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ),
static function ( $spec ) {
return isset( $spec['tag_spec']['spec_name'] ) && AMP_Meta_Sanitizer::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name'];
}
);
$this->assertCount( 1, $named_specs );

$body_ok_specs = array_filter(
AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ),
static function ( $spec ) {
$head_required = (
( isset( $spec['tag_spec']['mandatory_parent'] ) && 'head' === $spec['tag_spec']['mandatory_parent'] )
||
( isset( $spec['tag_spec']['mandatory_ancestor'] ) && 'head' === $spec['tag_spec']['mandatory_ancestor'] )
);
return ! $head_required;
}
);

$this->assertEquals( $named_specs, $body_ok_specs );

$spec = current( $named_specs );
$this->assertArrayHasKey( 'name', $spec['attr_spec_list'] );
$this->assertEquals( [ 'blacklisted_value_regex' ], array_keys( $spec['attr_spec_list']['name'] ) );
}

/**
* Provide data to the test_sanitize method.
*
Expand All @@ -30,7 +61,42 @@ public function get_data_for_sanitize() {

$amp_boilerplate = amp_get_boilerplate_code();

return [
$meta_charset = '<meta charset="utf-8">';
$meta_viewport = '<meta name="viewport" content="width=device-width">';

$meta_tags_allowed_in_body = '
<span itemprop="author" itemscope itemtype="https://schema.org/Person">
<meta itemprop="name" content="Siva">
</span>
<meta itemprop="datePublished" content="2020-03-24T18:05:15+05:30">
<meta itemprop="dateModified" content="2020-03-24T18:05:15+05:30">
<meta itemscope itemprop="mainEntityOfPage" itemtype="https://schema.org/WebPage" itemid="https://example.com/">
<span itemprop="publisher" itemscope itemtype="https://schema.org/Organization">
<span itemprop="logo" itemscope itemtype="https://schema.org/ImageObject">
<meta itemprop="url" content="https://example/logo.png">
</span>
<meta itemprop="name" content="Example">
<meta itemprop="url" content="https://example.com">
</span>
<meta itemprop="headline " content="This is a test">
<span itemprop="image" itemscope itemtype="https://schema.org/ImageObject">
<meta itemprop="url" content="https://example.com/foo.jpg">
<meta itemprop="width" content="1280"><meta itemprop="height" content="720">
</span>
<div itemscope id="amanda" itemref="a b"></div>
<p id="a">Name: <span itemprop="name">Amanda</span> </p>
<div id="b" itemprop="band" itemscope itemref="c"></div>
<div id="c">
<p>Band: <span itemprop="name">Jazz Band</span> </p>
<p>Size: <span itemprop="size">12</span> players</p>
</div>
<meta id="foo">
<meta name="greeting" content="Hello!">
<meta name="keywords" content="Meta Tags, Metadata" scheme="ISBN">
<meta content="This is a basic text" property="og:title">
';

$data = [
'Do not break the correct charset tag' => [
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
Expand Down Expand Up @@ -65,7 +131,79 @@ public function get_data_for_sanitize() {
'<!DOCTYPE html><html><head><meta name="amp-script-src" content="' . esc_attr( $script1_hash ) . '"><meta charset="utf-8"><meta name="amp-script-src" content="' . esc_attr( $script2_hash ) . '"><meta name="viewport" content="width=device-width"><meta name="amp-script-src" content="' . esc_attr( $script3_hash ) . '">' . $amp_boilerplate . '</head><body><meta name="amp-script-src" content="' . esc_attr( $script4_hash ) . '"></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta name="amp-script-src" content="' . esc_attr( $script1_hash ) . ' ' . esc_attr( $script2_hash ) . ' ' . esc_attr( $script3_hash ) . ' ' . esc_attr( $script4_hash ) . '">' . $amp_boilerplate . '</head><body></body></html>',
],

'Remove legacy meta http-equiv=Content-Type' => [
'<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">' . $amp_boilerplate . '</head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
],

'Process invalid meta http-equiv value' => [
// Note the AMP_Tag_And_Attribute_Sanitizer removes the http-equiv attribute because the content is invalid.
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '</head><body><meta http-equiv="Content-Type" content="text/vbscript"></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta content="text/vbscript"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
],

'Disallowed meta=content-deposition' => [
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '<meta name="content-disposition" content="inline; filename=data.csv"></head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta content="inline; filename=data.csv">' . $amp_boilerplate . '</head><body></body></html>',
],

'Disallowed meta=revisit-after' => [
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '<meta name="revisit-after" content="7 days"></head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta content="7 days">' . $amp_boilerplate . '</head><body></body></html>',
],

'Disallowed meta=amp-bogus' => [
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '<meta name="amp-bogus" content="bad"></head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta content="bad">' . $amp_boilerplate . '</head><body></body></html>',
],

'Ignore generic meta tags' => [
'<!DOCTYPE html><html><head><meta charset="utf-8">' . $amp_boilerplate . '</head><body>' . $meta_tags_allowed_in_body . '</body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body>' . $meta_tags_allowed_in_body . '</body></html>',
],
];

$http_equiv_specs = [
'meta http-equiv=X-UA-Compatible' => '<meta http-equiv="X-UA-Compatible" content="IE=edge">',
'meta http-equiv=content-language' => '<meta http-equiv="content-language" content="labellist">',
'meta http-equiv=pics-label' => '<meta http-equiv="PICS-Label" content="en-US">',
'meta http-equiv=imagetoolbar' => '<meta http-equiv="imagetoolbar" content="false">',
'meta http-equiv=Content-Style-Type' => '<meta http-equiv="Content-Style-Type" content="text/css">',
'meta http-equiv=Content-Script-Type' => '<meta http-equiv="Content-Script-Type" content="text/javascript">',
'meta http-equiv=origin-trial' => '<meta http-equiv="origin-trial" content="...">',
'meta http-equiv=resource-type' => '<meta http-equiv="resource-type" content="document">',
'meta http-equiv=x-dns-prefetch-control' => '<meta http-equiv="x-dns-prefetch-control" content="on">',
];
foreach ( $http_equiv_specs as $equiv_spec => $tag ) {
$data[ "Verify http-equiv moved: $equiv_spec" ] = [
"<!DOCTYPE html><html><head>{$meta_charset}{$meta_viewport}{$amp_boilerplate}</head><body>{$tag}</body></html>",
"<!DOCTYPE html><html><head>{$meta_charset}{$tag}{$meta_viewport}{$amp_boilerplate}</head><body></body></html>",
];
}

$named_specs = [
'meta name=apple-itunes-app' => '<meta name="apple-itunes-app" content="app-id=myAppStoreID, affiliate-data=myAffiliateData, app-argument=myURL">',
'meta name=amp-experiments-opt-in' => '<meta name="amp-experiments-opt-in" content="experiment-a,experiment-b">',
'meta name=amp-3p-iframe-src' => '<meta name="amp-3p-iframe-src" content="https://storage.googleapis.com/amp-testing.appspot.com/public/remote.html">',
'meta name=amp-consent-blocking' => '<meta name="amp-consent-blocking" content="">',
'meta name=amp-experiment-token' => '<meta name="amp-experiment-token" content="{copy your token here}">',
'meta name=amp-link-variable-allowed-origin' => '<meta name="amp-link-variable-allowed-origin" content="https://example.com https://example.org">',
'meta name=amp-google-clientid-id-api' => '<meta name="amp-google-client-id-api" content="googleanalytics">',
'meta name=amp-ad-doubleclick-sra' => '<meta name="amp-ad-doubleclick-sra">',
'meta name=amp-list-load-more' => '<meta name="amp-list-load-more" content="">',
'meta name=amp-recaptcha-input' => '<meta name="amp-recaptcha-input" content="">',
'meta name=amp-ad-enable-refresh' => '<meta name="amp-ad-enable-refresh" content="network1=refresh_interval1,network2=refresh_interval2,...">',
'meta name=amp-to-amp-navigation' => '<meta name="amp-to-amp-navigation" content="AMP-Redirect-To; AMP.navigateTo">',
];
foreach ( $named_specs as $named_spec => $tag ) {
$data[ "Verify meta[name] moved: $named_spec" ] = [
"<!DOCTYPE html><html><head>{$meta_charset}{$meta_viewport}{$amp_boilerplate}</head><body>{$tag}</body></html>",
"<!DOCTYPE html><html><head>{$meta_charset}{$meta_viewport}{$tag}{$amp_boilerplate}</head><body></body></html>",
];
}

return $data;
}

/**
Expand All @@ -82,7 +220,10 @@ public function test_sanitize( $source_content, $expected_content ) {
$sanitizer = new AMP_Meta_Sanitizer( $dom );
$sanitizer->sanitize();

$sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );
$sanitizer = new AMP_Tag_And_Attribute_Sanitizer(
$dom,
[ 'use_document_element' => true ]
);
$sanitizer->sanitize();

$this->assertEqualMarkup( $expected_content, $dom->saveHTML() );
Expand Down
6 changes: 6 additions & 0 deletions tests/php/test-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -2975,6 +2975,12 @@ public function get_html_data() {
null,
[ 'amp-subscriptions' ],
],
'bad http-equiv meta tag' => [
'<html><head><meta charset="utf-8"><meta http-equiv="Content-Script-Type" content="text/vbscript"></head><body></body></html>',
'<html><head><meta charset="utf-8"><meta content="text/vbscript"></head><body></body></html>',
[],
[ AMP_Tag_And_Attribute_Sanitizer::DISALLOWED_ATTR ],
],
];

$bad_dev_mode_document = sprintf(
Expand Down

0 comments on commit 03af649

Please sign in to comment.