-
Notifications
You must be signed in to change notification settings - Fork 384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for comments #871
Changes from 3 commits
7ec60e4
eed214f
4067b59
a8d2ad3
98874ba
4f6dcd6
317f90f
451db91
0541686
2c8575e
ac0cc53
2741fdf
ebffaae
9790be3
b425b19
b7c5963
94aee02
d0fc6c5
6b660b3
69d1342
4200d9e
366bef3
7b63836
8b289c1
15d327d
561189b
a1bf8d3
b40e932
dc1f9c3
187e136
82e8021
d4fd8bc
bbabbd7
0fd1ab5
9f99abe
9216e4e
fafa679
9536794
74ae3b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
<?php | ||
/** | ||
* Class AMP_Form_Sanitizer. | ||
* | ||
* @package AMP | ||
*/ | ||
|
||
/** | ||
* Class AMP_Form_Sanitizer | ||
* | ||
* Strips and corrects attributes in forms. | ||
*/ | ||
class AMP_Form_Sanitizer extends AMP_Base_Sanitizer { | ||
|
||
/** | ||
* Tag. | ||
* | ||
* @var string HTML <form> tag to identify and process. | ||
* | ||
* @since 0.2 | ||
*/ | ||
public static $tag = 'form'; | ||
|
||
/** | ||
* Sanitize the <img> elements from the HTML contained in this instance's DOMDocument. | ||
* | ||
* @since 0.2 | ||
*/ | ||
public function sanitize() { | ||
|
||
/** | ||
* Node list. | ||
* | ||
* @var DOMNodeList $node | ||
*/ | ||
$nodes = $this->dom->getElementsByTagName( self::$tag ); | ||
$num_nodes = $nodes->length; | ||
|
||
if ( 0 === $num_nodes ) { | ||
return; | ||
} | ||
|
||
for ( $i = $num_nodes - 1; $i >= 0; $i-- ) { | ||
$node = $nodes->item( $i ); | ||
if ( ! $node instanceof DOMElement ) { | ||
continue; | ||
} | ||
|
||
if ( ! $node->hasAttribute( 'action' ) || '' === $node->getAttribute( 'action' ) ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per the spec: https://www.ampproject.org/docs/reference/components/amp-form#target
Is the second part of this conditional intended to check if $method = 'get';
if ( $node->hasAttribute( 'method' ) ) {
$method = strtolower( $node->getAttribute( 'method' ) );
} And then use the |
||
$node->parentNode->removeChild( $node ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is right. If |
||
continue; | ||
} | ||
|
||
// Correct action. | ||
if ( $node->hasAttribute( 'action' ) && ! $node->hasAttribute( 'action-xhr' ) ) { | ||
$action_url = $node->getAttribute( 'action' ); | ||
$action_url = str_replace( 'http:', '', $action_url ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's possible that |
||
$node->setAttribute( 'action', $action_url ); | ||
|
||
if ( 'post' === $node->getAttribute( 'method' ) ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per above, this could compare with |
||
$node->setAttribute( 'action-xhr', $action_url ); | ||
$node->removeAttribute( 'action' ); | ||
} | ||
} | ||
|
||
// Set a target if needed. | ||
if ( ! $node->hasAttribute( 'target' ) ) { | ||
$node->setAttribute( 'target', '_blank' ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per the spec: https://www.ampproject.org/docs/reference/components/amp-form#target
The default I think should be if ( ! in_array( $node->hasAttribute( 'target' ), array( '_top', '_blank' ) ) ) {
$node->setAttribute( 'target', '_top' ); |
||
} | ||
} | ||
|
||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,17 +13,15 @@ | |
class AMP_DOM_Utils { | ||
|
||
/** | ||
* Return a valid DOMDocument representing arbitrary HTML content passed as a parameter. | ||
* | ||
* @see Reciprocal function get_content_from_dom() | ||
* Return a valid DOMDocument representing HTML document passed as a parameter. | ||
* | ||
* @since 0.2 | ||
* @since 0.7 | ||
* | ||
* @param string $content Valid HTML content to be represented by a DOMDocument. | ||
* @param string $document Valid HTML document to be represented by a DOMDocument. | ||
* | ||
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed. | ||
*/ | ||
public static function get_dom_from_content( $content ) { | ||
public static function get_dom( $document ) { | ||
$libxml_previous_state = libxml_use_internal_errors( true ); | ||
|
||
$dom = new DOMDocument(); | ||
|
@@ -35,7 +33,7 @@ public static function get_dom_from_content( $content ) { | |
* Add utf-8 charset so loadHTML does not have problems parsing it. | ||
* See: http://php.net/manual/en/domdocument.loadhtml.php#78243 | ||
*/ | ||
$result = $dom->loadHTML( '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>' . $content . '</body></html>' ); | ||
$result = $dom->loadHTML( $document ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changes here are conflicting in |
||
|
||
libxml_clear_errors(); | ||
libxml_use_internal_errors( $libxml_previous_state ); | ||
|
@@ -47,6 +45,31 @@ public static function get_dom_from_content( $content ) { | |
return $dom; | ||
} | ||
|
||
/** | ||
* Return a valid DOMDocument representing arbitrary HTML content passed as a parameter. | ||
* | ||
* @see Reciprocal function get_content_from_dom() | ||
* | ||
* @since 0.2 | ||
* | ||
* @param string $content Valid HTML content to be represented by a DOMDocument. | ||
* | ||
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed. | ||
*/ | ||
public static function get_dom_from_content( $content ) { | ||
/* | ||
* Wrap in dummy tags, since XML needs one parent node. | ||
* It also makes it easier to loop through nodes. | ||
* We can later use this to extract our nodes. | ||
* Add utf-8 charset so loadHTML does not have problems parsing it. | ||
* See: http://php.net/manual/en/domdocument.loadhtml.php#78243 | ||
*/ | ||
$document = '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>' . $content . '</body></html>'; | ||
|
||
return self::get_dom( $document ); | ||
|
||
} | ||
|
||
/** | ||
* Return valid HTML content extracted from the DOMDocument passed as a parameter. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@DavidCramer I chatted about this with @ThierryA, but I think we should avoid having to load the entire response into the DOM for sanitization. I think that we should limit sanitization to just the post content, 3rd-party widgets, and other “leaf nodes” which we know are liable to be invalid AMP.
Is this right? Or am I off track? My worry is that loading the HTML into a DOMDocument for every request will be too heavy. But at the same time, if we're already spinning up a DOMDocument for every instance of
the_content()
maybe it is actually better to just do one for the entire HTML. This would certainly simplify somethings for sanitizing widgets (cc @kienstra). It could also mean that we wouldn't have to have this ad hoc logic to search for elements requiring components: https://github.com/Automattic/amp-wp/blob/e60cb152a50e2ffbf5504d41aee859ad1d0e0baa/includes/class-amp-theme-support.php#L448-L455Maybe we should decide to not prematurely optimize and instead keep things simple at first and just go ahead and sanitize the entire response. Thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See findings at #875 (comment)
It seems we should definitely not sanitize the entire output buffer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@westonruter whats the consensus now on this? It looks like your findings indicated it may be a good idea to sanitize the whole buffer output. I personally think this is the most efficient. I spoke with @ThierryA about this yesterday and he was going to do some tests, like you did.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm leaning toward sanitizing the entire output buffer now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@DavidCramer do you want to build off of #875 (comment) in a new PR to serve as a basis for what you're introducing here with form sanitization?