Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of Tumblr embeds #5926

Merged
merged 28 commits into from
Mar 4, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9de2cfc
Sanitize Tumblr embeds
pierlon Feb 25, 2021
a414ddf
Fix lint errors
pierlon Feb 25, 2021
ebd31df
Add test for `https://teded.tumblr.com/post/184736320764/how-do-vacci…
pierlon Feb 25, 2021
9125cd6
Use `tagName` instead of `nodeName`
pierlon Feb 25, 2021
ca74ffa
Remove escaping of text
pierlon Feb 25, 2021
da37307
Make use of tagName, use constants where possible, add phpdoc
westonruter Feb 25, 2021
1475347
Check if `tumblr-post` is apart of the list of classes
pierlon Feb 25, 2021
c330597
Use callback to determine if script should be removed
pierlon Feb 26, 2021
7df8bbe
Use button as overflow element
pierlon Feb 26, 2021
3323625
Add bottom:0 to Tumblr overflow button
westonruter Feb 26, 2021
8ae2574
Make xpath predicate more specific and improve condition formatting
westonruter Mar 3, 2021
005ea45
Remove extraneous parentheses
westonruter Mar 3, 2021
5c4b667
Apply style rules for AMP iframe resize button via core sanitizer
pierlon Mar 3, 2021
6d4bef6
Fix indentation
pierlon Mar 3, 2021
f56f10c
Set button type
pierlon Mar 3, 2021
2740d7e
Add style rule to place overflow button in bottom left corner of AMP …
pierlon Mar 4, 2021
7da8a35
Make sure TT1 stylesheet is enqueued before adding inline style
westonruter Mar 4, 2021
f642be8
Opt to combine add_twentytwentyone_overflow_button_fix into amend_twe…
westonruter Mar 4, 2021
0c6666e
Make sure that Tumblr embed has a link to use as a placeholder
westonruter Mar 4, 2021
685bdc9
Make use of MarkupComparison
westonruter Mar 4, 2021
435b6ad
Test removal of Tumblr script when wpautop was not involved
westonruter Mar 4, 2021
bcbdf00
Add test for tumblr in embed block
westonruter Mar 4, 2021
8fa44dd
Remove extraneous sprintf()
westonruter Mar 4, 2021
c482fcb
Combine conditions
westonruter Mar 4, 2021
ce2b8b4
Use xpath query to obtain the placeholder
westonruter Mar 4, 2021
77c7367
Set placeholder attribute when we are sure we will match
westonruter Mar 4, 2021
8839cc1
Remove extraneous sprintf()
westonruter Mar 4, 2021
ad2e9be
Fix tests in WP 4.9
westonruter Mar 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions includes/embeds/class-amp-base-embed-handler.php
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,58 @@ protected function unwrap_p_element( DOMElement $node ) {
$parent_node->parentNode->replaceChild( $node, $parent_node );
}
}

/**
* Removes the node's nearest `<script>` sibling with a `src` attribute containing the base `src` URL provided.
*
* @since 2.1
*
* @param DOMElement $node The DOMNode to whose sibling is the script to be removed.
* @param string $base_src_url Script URL to match against.
* @param string $content Text content of node to match against.
*/
protected function maybe_remove_script_sibling( DOMElement $node, $base_src_url, $content = '' ) {
pierlon marked this conversation as resolved.
Show resolved Hide resolved
$next_element_sibling = $node->nextSibling;

while ( $next_element_sibling && ! ( $next_element_sibling instanceof DOMElement ) ) {
$next_element_sibling = $next_element_sibling->nextSibling;
}

// Handle case where script is wrapped in paragraph by wpautop.
if ( $next_element_sibling instanceof DOMElement && 'p' === $next_element_sibling->nodeName ) {
pierlon marked this conversation as resolved.
Show resolved Hide resolved
$children_elements = array_values(
array_filter(
iterator_to_array( $next_element_sibling->childNodes ),
static function ( DOMNode $child ) {
return $child instanceof DOMElement;
}
)
);

if (
1 === count( $children_elements ) &&
'script' === $children_elements[0]->nodeName &&
(
( $base_src_url && false !== strpos( $children_elements[0]->getAttribute( 'src' ), $base_src_url ) ) ||
( $content && false !== strpos( $children_elements[0]->textContent, $content ) )
)
) {
$next_element_sibling->parentNode->removeChild( $next_element_sibling );
return;
}
}

// Handle case where script is immediately following.
pierlon marked this conversation as resolved.
Show resolved Hide resolved
$is_embed_script = (
$next_element_sibling instanceof DOMElement &&
'script' === strtolower( $next_element_sibling->nodeName ) &&
(
( $base_src_url && false !== strpos( $next_element_sibling->getAttribute( 'src' ), $base_src_url ) ) ||
( $content && false !== strpos( $next_element_sibling->textContent, $content ) )
)
);
if ( $is_embed_script ) {
$next_element_sibling->parentNode->removeChild( $next_element_sibling );
}
}
}
89 changes: 67 additions & 22 deletions includes/embeds/class-amp-tumblr-embed-handler.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,56 +6,101 @@
* @since 0.7
*/

use AmpProject\Dom\Document;

/**
* Class AMP_Tumblr_Embed_Handler
*
* @internal
*/
class AMP_Tumblr_Embed_Handler extends AMP_Base_Embed_Handler {

/**
* Default width.
*
* Tumblr embeds for web have a fixed width of 540px.
* See <https://tumblr.zendesk.com/hc/en-us/articles/226261028-Embed-pro-tips>.
*
* @var int
*/
protected $DEFAULT_WIDTH = 540;

/**
* Base URL used for identifying embeds.
*
* @var string
*/
protected $base_embed_url = 'https://embed.tumblr.com/embed/post/';

/**
* Register embed.
*/
public function register_embed() {
add_filter( 'embed_oembed_html', [ $this, 'filter_embed_oembed_html' ], 10, 2 );
// Not implemented.
}

/**
* Unregister embed.
*/
public function unregister_embed() {
remove_filter( 'embed_oembed_html', [ $this, 'filter_embed_oembed_html' ], 10 );
// Not implemented.
}

/**
* Filter oEmbed HTML for Tumblr to prepare it for AMP.
* Sanitizes Tumblr raw embeds to make them AMP compatible.
*
* @param string $cache Cache for oEmbed.
* @param string $url Embed URL.
* @return string Embed.
* @param Document $dom DOM.
*/
public function filter_embed_oembed_html( $cache, $url ) {
$parsed_url = wp_parse_url( $url );
if ( false === strpos( $parsed_url['host'], 'tumblr.com' ) ) {
return $cache;
public function sanitize_raw_embeds( Document $dom ) {
$nodes = $dom->xpath->query( sprintf( '//div[ @class = "tumblr-post" and starts-with( @data-href, "%s" ) ]', $this->base_embed_url ) );
pierlon marked this conversation as resolved.
Show resolved Hide resolved

if ( 0 === $nodes->length ) {
return;
}

// @todo The iframe will not get sized properly.
if ( preg_match( '#data-href="(?P<href>https://embed.tumblr.com/embed/post/\w+/\w+)"#', $cache, $matches ) ) {
$cache = AMP_HTML_Utils::build_tag(
foreach ( $nodes as $node ) {
$iframe_src = $node->getAttribute( 'data-href' );

$attributes = [
'src' => $iframe_src,
'layout' => 'responsive',
'width' => $this->args['width'],
'height' => $this->args['height'],
'resizable' => '',
'sandbox' => 'allow-scripts allow-popups allow-same-origin',
];

$amp_node = AMP_DOM_Utils::create_node(
$dom,
'amp-iframe',
$attributes
);

// Add an overflow node to allow the amp-iframe to resize.
$overflow_node = AMP_DOM_Utils::create_node(
$dom,
'div',
[
'width' => $this->args['width'],
'height' => $this->args['height'],
'layout' => 'responsive',
'sandbox' => 'allow-scripts allow-popups', // The allow-scripts is needed to allow the iframe to render; allow-popups needed to allow clicking.
'src' => $matches['href'],
],
sprintf( '<a placeholder href="%s">Tumblr</a>', $url )
'overflow' => '',
'tabindex' => 0,
'role' => 'button',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can rather use a button here. Then we won't need the role nor tabindex attributes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 7df8bbe. Here's how it looks in Twenty Twenty with the button:

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very good.

'aria-label' => esc_attr__( 'See more', 'amp' ),
pierlon marked this conversation as resolved.
Show resolved Hide resolved
]
);
}
$overflow_node->textContent = esc_html__( 'See more', 'amp' );
pierlon marked this conversation as resolved.
Show resolved Hide resolved
$amp_node->appendChild( $overflow_node );

// Append the original link as a placeholder node.
if ( $node->firstChild instanceof DOMElement && 'a' === $node->firstChild->nodeName ) {
$placeholder_node = $node->firstChild;
$placeholder_node->setAttribute( 'placeholder', '' );
$amp_node->appendChild( $placeholder_node );
}

return $cache;
$this->maybe_remove_script_sibling( $node, 'assets.tumblr.com/post.js' );
pierlon marked this conversation as resolved.
Show resolved Hide resolved

$node->parentNode->replaceChild( $amp_node, $node );
}
}
}

155 changes: 155 additions & 0 deletions tests/php/test-amp-tumblr-embed-handler.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
<?php
/**
* Class AMP_Tumblr_Embed_Handler_Test
*
* @package AMP
*/

/**
* Tests for Tumblr embeds.
*
* @coversDefaultClass \AMP_Tumblr_Embed_Handler
*/
class AMP_Tumblr_Embed_Handler_Test extends WP_UnitTestCase {

/**
* Set up.
*/
public function setUp() {
parent::setUp();

// Mock the HTTP request.
add_filter( 'pre_http_request', [ $this, 'mock_http_request' ], 10, 3 );
}

/**
* Tear down.
*/
public function tearDown() {
remove_filter( 'pre_http_request', [ $this, 'mock_http_request' ] );
parent::tearDown();
}

/**
* Mock HTTP request.
*
* @param mixed $pre Whether to preempt an HTTP request's return value. Default false.
* @param mixed $r HTTP request arguments.
* @param string $url The request URL.
* @return array Response data.
*/
public function mock_http_request( $pre, $r, $url ) {
if ( in_array( 'external-http', $_SERVER['argv'], true ) ) {
return $pre;
}

if ( false === strpos( $url, 'tumblr.com' ) ) {
return $pre;
}

if ( false !== strpos( $url, 'grant-wood-american-gothic' ) ) {
$body = '{"cache_age":3600,"url":"https:\/\/ifpaintingscouldtext.tumblr.com\/post\/92003045635\/grant-wood-american-gothic-1930","provider_url":"https:\/\/www.tumblr.com","provider_name":"Tumblr","author_name":"If Paintings Could Text","version":"1.0","author_url":"https:\/\/ifpaintingscouldtext.tumblr.com\/","type":"rich","html":"\u003Cdiv class=\u0022tumblr-post\u0022 data-href=\u0022https:\/\/embed.tumblr.com\/embed\/post\/2JT2XTaiTxO08wh21dqQrw\/92003045635\u0022 data-did=\u00227ce4825965cbd8bfd208f6aae43de7a528859aee\u0022 \u003E\u003Ca href=\u0022https:\/\/ifpaintingscouldtext.tumblr.com\/post\/92003045635\/grant-wood-american-gothic-1930\u0022\u003Ehttps:\/\/ifpaintingscouldtext.tumblr.com\/post\/92003045635\/grant-wood-american-gothic-1930\u003C\/a\u003E\u003C\/div\u003E\u003Cscript async src=\u0022https:\/\/assets.tumblr.com\/post.js\u0022\u003E\u003C\/script\u003E","height":null,"width":540}';
} elseif ( false !== strpos( $url, 'how-do-vaccines-work' ) ) {
$body = '{"cache_age":3600,"url":"https:\/\/teded.tumblr.com\/post\/184736320764\/how-do-vaccines-work","provider_url":"https:\/\/www.tumblr.com","provider_name":"Tumblr","author_name":"TED-Ed - Gifs worth sharing","version":"1.0","author_url":"https:\/\/teded.tumblr.com\/","type":"rich","html":"\u003Cdiv class=\u0022tumblr-post\u0022 data-href=\u0022https:\/\/embed.tumblr.com\/embed\/post\/O6_eRR6K-z9QGTzdU5HrhQ\/184736320764\u0022 data-did=\u0022523d09cda8bc0da2f871ffea606ff71c80405725\u0022 \u003E\u003Ca href=\u0022https:\/\/teded.tumblr.com\/post\/184736320764\/how-do-vaccines-work\u0022\u003Ehttps:\/\/teded.tumblr.com\/post\/184736320764\/how-do-vaccines-work\u003C\/a\u003E\u003C\/div\u003E\u003Cscript async src=\u0022https:\/\/assets.tumblr.com\/post.js\u0022\u003E\u003C\/script\u003E","height":null,"width":540}';
} else {
$body = '';
}

return [
'body' => $body,
'response' => [
'code' => 200,
'message' => 'OK',
],
];
}

/**
* Get conversion data.
*
* @return array
*/
public function get_conversion_data() {
return [
'no_embed' => [
'<p>Hello world.</p>',
'<p>Hello world.</p>' . PHP_EOL,
],

'url_simple' => [
'https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930' . PHP_EOL,
westonruter marked this conversation as resolved.
Show resolved Hide resolved
'<amp-iframe src="https://embed.tumblr.com/embed/post/2JT2XTaiTxO08wh21dqQrw/92003045635" layout="responsive" width="540" height="480" resizable="" sandbox="allow-scripts allow-popups allow-same-origin"><div overflow="" tabindex="0" role="button" aria-label="See more">See more</div><a href="https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930" placeholder="">https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930</a></amp-iframe>' . PHP_EOL . PHP_EOL,
],

'url_simple_2' => [
'https://teded.tumblr.com/post/184736320764/how-do-vaccines-work' . PHP_EOL,
'<amp-iframe src="https://embed.tumblr.com/embed/post/O6_eRR6K-z9QGTzdU5HrhQ/184736320764" layout="responsive" width="540" height="480" resizable="" sandbox="allow-scripts allow-popups allow-same-origin"><div overflow="" tabindex="0" role="button" aria-label="See more">See more</div><a href="https://teded.tumblr.com/post/184736320764/how-do-vaccines-work" placeholder="">https://teded.tumblr.com/post/184736320764/how-do-vaccines-work</a></amp-iframe>' . PHP_EOL . PHP_EOL,
],
];
}

/**
* Test conversion.
*
* @covers ::sanitize_raw_embeds()
* @dataProvider get_conversion_data
*
* @param string $source Source.
* @param string $expected Expected content.
*/
public function test__conversion( $source, $expected ) {
$embed = new AMP_Tumblr_Embed_Handler();

$filtered_content = apply_filters( 'the_content', $source );
$dom = AMP_DOM_Utils::get_dom_from_content( $filtered_content );
$embed->sanitize_raw_embeds( $dom );

$content = AMP_DOM_Utils::get_content_from_dom( $dom );

$this->assertEquals( $expected, $content );
}

/**
* Data for test_get_scripts().
*
* @return array Data.
*/
public function get_scripts_data() {
return [
'not_converted' => [
'<p>Hello World.</p>',
[],
],
'converted' => [
'https://ifpaintingscouldtext.tumblr.com/post/92003045635/grant-wood-american-gothic-1930' . PHP_EOL,
[ 'amp-iframe' => true ],
],
];
}

/**
* Test AMP_Tag_And_Attribute_Sanitizer::get_scripts().
*
* @dataProvider get_scripts_data
*
* @param string $source Source content.
* @param array $expected Expected scripts.
*/
public function test_get_scripts( $source, $expected ) {
$embed = new AMP_Tumblr_Embed_Handler();

$filtered_content = apply_filters( 'the_content', $source );
$dom = AMP_DOM_Utils::get_dom_from_content( $filtered_content );
$embed->sanitize_raw_embeds( $dom );

$whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );
$whitelist_sanitizer->sanitize();

$scripts = array_merge(
$embed->get_scripts(),
$whitelist_sanitizer->get_scripts()
);

$this->assertEquals( $expected, $scripts );
}
}