Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1103/add Amp_Embed_Sanitizer #1128

Merged
merged 12 commits into from Jun 20, 2018
Merged
1 change: 1 addition & 0 deletions includes/amp-helper-functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ function amp_get_content_sanitizers( $post = null ) {
'AMP_Video_Sanitizer' => array(),
'AMP_Audio_Sanitizer' => array(),
'AMP_Playbuzz_Sanitizer' => array(),
'AMP_Embed_Sanitizer' => array(),
'AMP_Iframe_Sanitizer' => array(
'add_placeholder' => true,
),
Expand Down
1 change: 1 addition & 0 deletions includes/class-amp-autoloader.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class AMP_Autoloader {
'AMP_Form_Sanitizer' => 'includes/sanitizers/class-amp-form-sanitizer',
'AMP_Playbuzz_Sanitizer' => 'includes/sanitizers/class-amp-playbuzz-sanitizer',
'AMP_Style_Sanitizer' => 'includes/sanitizers/class-amp-style-sanitizer',
'AMP_Embed_Sanitizer' => 'includes/sanitizers/class-amp-embed-sanitizer',
'AMP_Tag_And_Attribute_Sanitizer' => 'includes/sanitizers/class-amp-tag-and-attribute-sanitizer',
'AMP_Video_Sanitizer' => 'includes/sanitizers/class-amp-video-sanitizer',
'AMP_Core_Theme_Sanitizer' => 'includes/sanitizers/class-amp-core-theme-sanitizer',
Expand Down
1 change: 1 addition & 0 deletions includes/class-amp-theme-support.php
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ public static function finish_init() {
self::$sanitizer_classes = amp_get_content_sanitizers();
self::$sanitizer_classes = AMP_Validation_Manager::filter_sanitizer_args( self::$sanitizer_classes );
self::$embed_handlers = self::register_content_embed_handlers();
self::$sanitizer_classes['AMP_Embed_Sanitizer']['embed_handlers'] = self::$embed_handlers;

foreach ( self::$sanitizer_classes as $sanitizer_class => $args ) {
if ( method_exists( $sanitizer_class, 'add_buffering_hooks' ) ) {
Expand Down
99 changes: 93 additions & 6 deletions includes/embeds/class-amp-facebook-embed.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@ class AMP_Facebook_Embed_Handler extends AMP_Base_Embed_Handler {
protected $DEFAULT_WIDTH = 600;
protected $DEFAULT_HEIGHT = 400;

/**
* Tag.
*
* @var string embed HTML blockquote tag to identify and replace with AMP version.
*/
protected $sanitize_tag = 'div';
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious to find out what the performance impacts are between $this->dom->getElementsByTagName( 'div' ) and then filtering out the ones that have the required attributes (is_facebook_raw_embed) .vs using XPath to obtain just the exact matching set up front, for example via:

//div[ @data-href and ( contains( @class, 'fb-post' ) or contains( @class, 'fb-video' ) ) ]

Given that div is a very generic element, I worry about the performance impacts of locating elements in large documents. But I'm not sure if XPath would be faster.

Copy link
Author

@ghost ghost Jun 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created a quick benchmark checking both implementations:
seems like xpath isn't faster.


/**
* Tag.
*
* @var string AMP amp-facebook tag
*/
private $amp_tag = 'amp-facebook';

public function register_embed() {
wp_embed_register_handler( 'amp-facebook', self::URL_PATTERN, array( $this, 'oembed' ), -1 );
wp_embed_register_handler( $this->amp_tag, self::URL_PATTERN, array( $this, 'oembed' ), -1 );
}

public function unregister_embed() {
wp_embed_unregister_handler( 'amp-facebook', -1 );
wp_embed_unregister_handler( $this->amp_tag, -1 );
}

public function oembed( $matches, $attr, $url, $rawattr ) {
Expand All @@ -38,13 +52,86 @@ public function render( $args ) {
$this->did_convert_elements = true;

return AMP_HTML_Utils::build_tag(
'amp-facebook',
$this->amp_tag,
array(
'data-href' => $args['url'],
'layout' => 'responsive',
'width' => $this->args['width'],
'height' => $this->args['height'],
'layout' => 'responsive',
'width' => $this->args['width'],
'height' => $this->args['height'],
)
);
}

/**
* Sanitized <div class="fb-video" data-href=> tags to <amp-facebook>.
*
* @param DOMDocument $dom DOM.
*/
public function sanitize_raw_embeds( $dom ) {
/**
* Node list.
*
* @var DOMNodeList $node
*/
$nodes = $dom->getElementsByTagName( $this->sanitize_tag );
$num_nodes = $nodes->length;

if ( 0 === $num_nodes ) {
return;
}

for ( $i = $num_nodes - 1; $i >= 0; $i-- ) {
$node = $nodes->item( $i );
if ( ! $node instanceof DOMElement ) {
continue;
}

$embed_type = $this->get_embed_type( $node );

if ( null !== $embed_type ) {
$this->create_amp_facebook_and_replace_node( $dom, $node, $embed_type );
}
}
}

/**
* Get embed type.
*
* @param DOMElement $node The DOMNode to adjust and replace.
* @return string|null Embed type or null if not detected.
*/
private function get_embed_type( $node ) {
$class_attr = $node->getAttribute( 'class' );
if ( null !== $class_attr && $node->hasAttribute( 'data-href' ) ) {
if ( false !== strpos( $class_attr, 'fb-post' ) ) {
return 'post';
} elseif ( false !== strpos( $class_attr, 'fb-video' ) ) {
return 'video';
}
return false !== strpos( $class_attr, 'fb-video' ) ? 'video' : 'post';
}

return null;
}

/**
* Create amp-facebook and replace node.
*
* @param DOMDocument $dom The HTML Document.
* @param DOMElement $node The DOMNode to adjust and replace.
* @param string $embed_type Embed type.
*/
private function create_amp_facebook_and_replace_node( $dom, $node, $embed_type ) {
$amp_facebook_node = AMP_DOM_Utils::create_node( $dom, $this->amp_tag, array(
'data-href' => $node->getAttribute( 'data-href' ),
'data-embed-as' => $embed_type,
'layout' => 'responsive',
'width' => $this->DEFAULT_WIDTH,
'height' => $this->DEFAULT_HEIGHT,
) );

$node->parentNode->replaceChild( $amp_facebook_node, $node );

$this->did_convert_elements = true;
}
}
122 changes: 115 additions & 7 deletions includes/embeds/class-amp-instagram-embed.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,27 @@ class AMP_Instagram_Embed_Handler extends AMP_Base_Embed_Handler {
protected $DEFAULT_WIDTH = 600;
protected $DEFAULT_HEIGHT = 600;

/**
* Tag.
*
* @var string embed HTML blockquote tag to identify and replace with AMP version.
*/
protected $sanitize_tag = 'blockquote';

/**
* Tag.
*
* @var string AMP amp-facebook tag
*/
private $amp_tag = 'amp-instagram';

public function register_embed() {
wp_embed_register_handler( 'amp-instagram', self::URL_PATTERN, array( $this, 'oembed' ), -1 );
wp_embed_register_handler( $this->amp_tag, self::URL_PATTERN, array( $this, 'oembed' ), -1 );
add_shortcode( 'instagram', array( $this, 'shortcode' ) );
}

public function unregister_embed() {
wp_embed_unregister_handler( 'amp-instagram', -1 );
wp_embed_unregister_handler( $this->amp_tag, -1 );
remove_shortcode( 'instagram' );
}

Expand Down Expand Up @@ -53,7 +67,7 @@ public function oembed( $matches, $attr, $url, $rawattr ) {

public function render( $args ) {
$args = wp_parse_args( $args, array(
'url' => false,
'url' => false,
'instagram_id' => false,
) );

Expand All @@ -64,16 +78,22 @@ public function render( $args ) {
$this->did_convert_elements = true;

return AMP_HTML_Utils::build_tag(
'amp-instagram',
$this->amp_tag,
array(
'data-shortcode' => $args['instagram_id'],
'layout' => 'responsive',
'width' => $this->args['width'],
'height' => $this->args['height'],
'layout' => 'responsive',
'width' => $this->args['width'],
'height' => $this->args['height'],
)
);
}

/**
* Get Instagram ID from URL.
*
* @param string $url URL.
* @return string|false The ID parsed from the URL or false if not found.
*/
private function get_instagram_id_from_url( $url ) {
$found = preg_match( self::URL_PATTERN, $url, $matches );

Expand All @@ -83,4 +103,92 @@ private function get_instagram_id_from_url( $url ) {

return end( $matches );
}

/**
* Sanitized <blockquote class="instagram-media"> tags to <amp-instagram>
*
* @param DOMDocument $dom DOM.
*/
public function sanitize_raw_embeds( $dom ) {
/**
* Node list.
*
* @var DOMNodeList $node
*/
$nodes = $dom->getElementsByTagName( $this->sanitize_tag );
$num_nodes = $nodes->length;

if ( 0 === $num_nodes ) {
return;
}

for ( $i = $num_nodes - 1; $i >= 0; $i-- ) {
$node = $nodes->item( $i );
if ( ! $node instanceof DOMElement ) {
continue;
}

if ( $node->hasAttribute( 'data-instgrm-permalink' ) ) {
$this->create_amp_instagram_and_replace_node( $dom, $node );
}
}
}

/**
* Make final modifications to DOMNode
*
* @param DOMDocument $dom The HTML Document.
* @param DOMElement $node The DOMNode to adjust and replace.
*/
private function create_amp_instagram_and_replace_node( $dom, $node ) {
$instagram_id = $this->get_instagram_id_from_url( $node->getAttribute( 'data-instgrm-permalink' ) );

$new_node = AMP_DOM_Utils::create_node( $dom, $this->amp_tag, array(
'data-shortcode' => $instagram_id,
'layout' => 'responsive',
'width' => $this->DEFAULT_WIDTH,
'height' => $this->DEFAULT_HEIGHT,
) );

$this->sanitize_embed_script( $node );

$node->parentNode->replaceChild( $new_node, $node );

$this->did_convert_elements = true;
}

/**
* Removes Instagram's embed <script> tag.
*
* @param DOMElement $node The DOMNode to whose sibling is the instagram script.
*/
private function sanitize_embed_script( $node ) {
$next_element_sibling = $node->nextSibling;
while ( $next_element_sibling && ! ( $next_element_sibling instanceof DOMElement ) ) {
$next_element_sibling = $next_element_sibling->nextSibling;
}

$script_src = 'instagram.com/embed.js';

// Handle case where script is wrapped in paragraph by wpautop.
if ( $next_element_sibling instanceof DOMElement && 'p' === $next_element_sibling->nodeName ) {
$children = $next_element_sibling->getElementsByTagName( '*' );
if ( 1 === $children->length && 'script' === $children->item( 0 )->nodeName && false !== strpos( $children->item( 0 )->getAttribute( 'src' ), $script_src ) ) {
$next_element_sibling->parentNode->removeChild( $next_element_sibling );
return;
}
}

// Handle case where script is immediately following.
$is_embed_script = (
$next_element_sibling
&&
'script' === strtolower( $next_element_sibling->nodeName )
&&
false !== strpos( $next_element_sibling->getAttribute( 'src' ), $script_src )
);
if ( $is_embed_script ) {
$next_element_sibling->parentNode->removeChild( $next_element_sibling );
}
}
}
Loading