Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only move meta tags to the head when required and add processing for meta[http-equiv] #4505

Merged
merged 8 commits into from
Apr 2, 2020
58 changes: 44 additions & 14 deletions includes/sanitizers/class-amp-meta-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer {
* Tags array keys.
*/
const TAG_CHARSET = 'charset';
const TAG_HTTP_EQUIV = 'http-equiv';
const TAG_VIEWPORT = 'viewport';
const TAG_AMP_SCRIPT_SRC = 'amp_script_src';
const TAG_OTHER = 'other';
Expand All @@ -54,6 +55,7 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer {
*/
protected $meta_tags = [
self::TAG_CHARSET => [],
self::TAG_HTTP_EQUIV => [],
self::TAG_VIEWPORT => [],
self::TAG_AMP_SCRIPT_SRC => [],
self::TAG_OTHER => [],
Expand All @@ -66,19 +68,41 @@ class AMP_Meta_Sanitizer extends AMP_Base_Sanitizer {
*/
const AMP_VIEWPORT = 'width=device-width';

/**
* Spec name for the tag spec for meta elements that are allowed in the body.
*
* @since 1.5.2
* @var string
*/
const BODY_ANCESTOR_META_TAG_SPEC_NAME = 'meta name= and content=';

/**
* Get tag spec for meta tags which are allowed in the body.
*
* @since 1.5.2
* @return string Deny pattern.
*/
private function get_body_meta_tag_name_attribute_deny_pattern() {
static $pattern = null;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial method of directly supplying the pattern was indeed brittle in retrospect. Getting the pattern from the spec and making it static is much more robust.

if ( null === $pattern ) {
$tag_spec = current(
array_filter(
AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ),
static function ( $spec ) {
return isset( $spec['tag_spec']['spec_name'] ) && self::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name'];
}
)
);
$pattern = '/' . $tag_spec['attr_spec_list']['name']['blacklisted_value_regex'] . '/';
}
return $pattern;
}

/**
* Sanitize.
*/
public function sanitize() {
$meta_elements = $this->dom->getElementsByTagName( static::$tag );

// Remove all nodes for easy reordering later on.
$meta_elements = array_map(
static function ( $element ) {
return $element->parentNode->removeChild( $element );
},
iterator_to_array( $meta_elements, false )
);
$meta_elements = iterator_to_array( $this->dom->getElementsByTagName( static::$tag ), false );

foreach ( $meta_elements as $meta_element ) {

Expand All @@ -96,13 +120,19 @@ static function ( $element ) {
* @var DOMElement $meta_element
*/
if ( $meta_element->hasAttribute( Attribute::CHARSET ) ) {
$this->meta_tags[ self::TAG_CHARSET ][] = $meta_element;
$this->meta_tags[ self::TAG_CHARSET ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif ( $meta_element->hasAttribute( Attribute::HTTP_EQUIV ) ) {
$this->meta_tags[ self::TAG_HTTP_EQUIV ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif ( Attribute::VIEWPORT === $meta_element->getAttribute( Attribute::NAME ) ) {
$this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element;
$this->meta_tags[ self::TAG_VIEWPORT ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif ( Attribute::AMP_SCRIPT_SRC === $meta_element->getAttribute( Attribute::NAME ) ) {
$this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element;
} else {
$this->meta_tags[ self::TAG_OTHER ][] = $meta_element;
$this->meta_tags[ self::TAG_AMP_SCRIPT_SRC ][] = $meta_element->parentNode->removeChild( $meta_element );
} elseif (
$meta_element->hasAttribute( 'name' )
&&
preg_match( $this->get_body_meta_tag_name_attribute_deny_pattern(), $meta_element->getAttribute( 'name' ) )
) {
$this->meta_tags[ self::TAG_OTHER ][] = $meta_element->parentNode->removeChild( $meta_element );
}
}

Expand Down
145 changes: 143 additions & 2 deletions tests/php/test-class-amp-meta-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,37 @@
*/
class Test_AMP_Meta_Sanitizer extends WP_UnitTestCase {

/**
* Test that the expected tag specs exist for the body.
*/
public function test_expected_meta_tags() {
$named_specs = array_filter(
AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ),
static function ( $spec ) {
return isset( $spec['tag_spec']['spec_name'] ) && AMP_Meta_Sanitizer::BODY_ANCESTOR_META_TAG_SPEC_NAME === $spec['tag_spec']['spec_name'];
}
);
$this->assertCount( 1, $named_specs );

$body_ok_specs = array_filter(
AMP_Allowed_Tags_Generated::get_allowed_tag( 'meta' ),
static function ( $spec ) {
$head_required = (
( isset( $spec['tag_spec']['mandatory_parent'] ) && 'head' === $spec['tag_spec']['mandatory_parent'] )
||
( isset( $spec['tag_spec']['mandatory_ancestor'] ) && 'head' === $spec['tag_spec']['mandatory_ancestor'] )
);
return ! $head_required;
}
);

$this->assertEquals( $named_specs, $body_ok_specs );

$spec = current( $named_specs );
$this->assertArrayHasKey( 'name', $spec['attr_spec_list'] );
$this->assertEquals( [ 'blacklisted_value_regex' ], array_keys( $spec['attr_spec_list']['name'] ) );
}

/**
* Provide data to the test_sanitize method.
*
Expand All @@ -30,7 +61,42 @@ public function get_data_for_sanitize() {

$amp_boilerplate = amp_get_boilerplate_code();

return [
$meta_charset = '<meta charset="utf-8">';
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests here are much more extensive than what I had before and covers the happy path of execution, along with potential mishaps that may occur 👍.

$meta_viewport = '<meta name="viewport" content="width=device-width">';

$meta_tags_allowed_in_body = '
<span itemprop="author" itemscope itemtype="https://schema.org/Person">
<meta itemprop="name" content="Siva">
</span>
<meta itemprop="datePublished" content="2020-03-24T18:05:15+05:30">
<meta itemprop="dateModified" content="2020-03-24T18:05:15+05:30">
<meta itemscope itemprop="mainEntityOfPage" itemtype="https://schema.org/WebPage" itemid="https://example.com/">
<span itemprop="publisher" itemscope itemtype="https://schema.org/Organization">
<span itemprop="logo" itemscope itemtype="https://schema.org/ImageObject">
<meta itemprop="url" content="https://example/logo.png">
</span>
<meta itemprop="name" content="Example">
<meta itemprop="url" content="https://example.com">
</span>
<meta itemprop="headline " content="This is a test">
<span itemprop="image" itemscope itemtype="https://schema.org/ImageObject">
<meta itemprop="url" content="https://example.com/foo.jpg">
<meta itemprop="width" content="1280"><meta itemprop="height" content="720">
</span>
<div itemscope id="amanda" itemref="a b"></div>
<p id="a">Name: <span itemprop="name">Amanda</span> </p>
<div id="b" itemprop="band" itemscope itemref="c"></div>
<div id="c">
<p>Band: <span itemprop="name">Jazz Band</span> </p>
<p>Size: <span itemprop="size">12</span> players</p>
</div>
<meta id="foo">
<meta name="greeting" content="Hello!">
<meta name="keywords" content="Meta Tags, Metadata" scheme="ISBN">
<meta content="This is a basic text" property="og:title">
';

$data = [
'Do not break the correct charset tag' => [
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
Expand Down Expand Up @@ -65,7 +131,79 @@ public function get_data_for_sanitize() {
'<!DOCTYPE html><html><head><meta name="amp-script-src" content="' . esc_attr( $script1_hash ) . '"><meta charset="utf-8"><meta name="amp-script-src" content="' . esc_attr( $script2_hash ) . '"><meta name="viewport" content="width=device-width"><meta name="amp-script-src" content="' . esc_attr( $script3_hash ) . '">' . $amp_boilerplate . '</head><body><meta name="amp-script-src" content="' . esc_attr( $script4_hash ) . '"></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta name="amp-script-src" content="' . esc_attr( $script1_hash ) . ' ' . esc_attr( $script2_hash ) . ' ' . esc_attr( $script3_hash ) . ' ' . esc_attr( $script4_hash ) . '">' . $amp_boilerplate . '</head><body></body></html>',
],

'Remove legacy meta http-equiv=Content-Type' => [
'<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">' . $amp_boilerplate . '</head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
],

'Process invalid meta http-equiv value' => [
// Note the AMP_Tag_And_Attribute_Sanitizer removes the http-equiv attribute because the content is invalid.
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '</head><body><meta http-equiv="Content-Type" content="text/vbscript"></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta content="text/vbscript"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body></body></html>',
],

'Disallowed meta=content-deposition' => [
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '<meta name="content-disposition" content="inline; filename=data.csv"></head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta content="inline; filename=data.csv">' . $amp_boilerplate . '</head><body></body></html>',
],

'Disallowed meta=revisit-after' => [
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '<meta name="revisit-after" content="7 days"></head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta content="7 days">' . $amp_boilerplate . '</head><body></body></html>',
],

'Disallowed meta=amp-bogus' => [
'<!DOCTYPE html><html><head>' . $amp_boilerplate . '<meta name="amp-bogus" content="bad"></head><body></body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width"><meta content="bad">' . $amp_boilerplate . '</head><body></body></html>',
],

'Ignore generic meta tags' => [
'<!DOCTYPE html><html><head><meta charset="utf-8">' . $amp_boilerplate . '</head><body>' . $meta_tags_allowed_in_body . '</body></html>',
'<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width">' . $amp_boilerplate . '</head><body>' . $meta_tags_allowed_in_body . '</body></html>',
],
];

$http_equiv_specs = [
'meta http-equiv=X-UA-Compatible' => '<meta http-equiv="X-UA-Compatible" content="IE=edge">',
'meta http-equiv=content-language' => '<meta http-equiv="content-language" content="labellist">',
'meta http-equiv=pics-label' => '<meta http-equiv="PICS-Label" content="en-US">',
'meta http-equiv=imagetoolbar' => '<meta http-equiv="imagetoolbar" content="false">',
'meta http-equiv=Content-Style-Type' => '<meta http-equiv="Content-Style-Type" content="text/css">',
'meta http-equiv=Content-Script-Type' => '<meta http-equiv="Content-Script-Type" content="text/javascript">',
'meta http-equiv=origin-trial' => '<meta http-equiv="origin-trial" content="...">',
'meta http-equiv=resource-type' => '<meta http-equiv="resource-type" content="document">',
'meta http-equiv=x-dns-prefetch-control' => '<meta http-equiv="x-dns-prefetch-control" content="on">',
];
foreach ( $http_equiv_specs as $equiv_spec => $tag ) {
$data[ "Verify http-equiv moved: $equiv_spec" ] = [
"<!DOCTYPE html><html><head>{$meta_charset}{$meta_viewport}{$amp_boilerplate}</head><body>{$tag}</body></html>",
"<!DOCTYPE html><html><head>{$meta_charset}{$tag}{$meta_viewport}{$amp_boilerplate}</head><body></body></html>",
];
}

$named_specs = [
'meta name=apple-itunes-app' => '<meta name="apple-itunes-app" content="app-id=myAppStoreID, affiliate-data=myAffiliateData, app-argument=myURL">',
'meta name=amp-experiments-opt-in' => '<meta name="amp-experiments-opt-in" content="experiment-a,experiment-b">',
'meta name=amp-3p-iframe-src' => '<meta name="amp-3p-iframe-src" content="https://storage.googleapis.com/amp-testing.appspot.com/public/remote.html">',
'meta name=amp-consent-blocking' => '<meta name="amp-consent-blocking" content="">',
'meta name=amp-experiment-token' => '<meta name="amp-experiment-token" content="{copy your token here}">',
'meta name=amp-link-variable-allowed-origin' => '<meta name="amp-link-variable-allowed-origin" content="https://example.com https://example.org">',
'meta name=amp-google-clientid-id-api' => '<meta name="amp-google-client-id-api" content="googleanalytics">',
'meta name=amp-ad-doubleclick-sra' => '<meta name="amp-ad-doubleclick-sra">',
'meta name=amp-list-load-more' => '<meta name="amp-list-load-more" content="">',
'meta name=amp-recaptcha-input' => '<meta name="amp-recaptcha-input" content="">',
'meta name=amp-ad-enable-refresh' => '<meta name="amp-ad-enable-refresh" content="network1=refresh_interval1,network2=refresh_interval2,...">',
'meta name=amp-to-amp-navigation' => '<meta name="amp-to-amp-navigation" content="AMP-Redirect-To; AMP.navigateTo">',
];
foreach ( $named_specs as $named_spec => $tag ) {
$data[ "Verify meta[name] moved: $named_spec" ] = [
"<!DOCTYPE html><html><head>{$meta_charset}{$meta_viewport}{$amp_boilerplate}</head><body>{$tag}</body></html>",
"<!DOCTYPE html><html><head>{$meta_charset}{$meta_viewport}{$tag}{$amp_boilerplate}</head><body></body></html>",
];
}

return $data;
}

/**
Expand All @@ -82,7 +220,10 @@ public function test_sanitize( $source_content, $expected_content ) {
$sanitizer = new AMP_Meta_Sanitizer( $dom );
$sanitizer->sanitize();

$sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );
$sanitizer = new AMP_Tag_And_Attribute_Sanitizer(
$dom,
[ 'use_document_element' => true ]
);
$sanitizer->sanitize();

$this->assertEqualMarkup( $expected_content, $dom->saveHTML() );
Expand Down
6 changes: 6 additions & 0 deletions tests/php/test-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -2975,6 +2975,12 @@ public function get_html_data() {
null,
[ 'amp-subscriptions' ],
],
'bad http-equiv meta tag' => [
'<html><head><meta charset="utf-8"><meta http-equiv="Content-Script-Type" content="text/vbscript"></head><body></body></html>',
'<html><head><meta charset="utf-8"><meta content="text/vbscript"></head><body></body></html>',
[],
[ AMP_Tag_And_Attribute_Sanitizer::DISALLOWED_ATTR ],
],
];

$bad_dev_mode_document = sprintf(
Expand Down