diff --git a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php index 460c15f695d..7d8df775321 100644 --- a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php +++ b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php @@ -67,15 +67,6 @@ class AMP_Tag_And_Attribute_Sanitizer extends AMP_Base_Sanitizer { */ protected $visited_unique_tag_specs = []; - /** - * Stack. - * - * @since 0.5 - * - * @var DOMElement[] - */ - private $stack = []; - /** * Default args. * @@ -99,6 +90,18 @@ class AMP_Tag_And_Attribute_Sanitizer extends AMP_Base_Sanitizer { */ protected $should_not_replace_nodes = []; + /** + * Keep track of the elements that are currently open. + * + * This is used to determine whether a node exists inside of a given element tree, speeding up has_ancestor checks + * as well as disabling attribute validation inside of templates. + * + * @see \AMP_Tag_And_Attribute_Sanitizer::has_ancestor() + * @since 1.3 + * @var array + */ + protected $open_elements = []; + /** * AMP_Tag_And_Attribute_Sanitizer constructor. * @@ -252,35 +255,65 @@ private function process_alternate_names( $attr_spec_list ) { * @since 0.5 */ public function sanitize() { + $result = $this->sanitize_element( $this->root_element ); + if ( is_array( $result ) ) { + $this->script_components = $result; + } + } - // Add root of content to the stack. - $this->stack[] = $this->root_element; - - /** - * This loop traverses through the DOM tree iteratively. - */ - while ( ! empty( $this->stack ) ) { + /** + * Sanitize element. + * + * Walk the DOM tree with depth first search (DFS) with post order traversal (LRN). + * + * @param DOMElement $element Element. + * @return string[]|null Required component scripts from sanitizing an element tree, or null if the element was removed. + */ + private function sanitize_element( DOMElement $element ) { + if ( ! isset( $this->open_elements[ $element->nodeName ] ) ) { + $this->open_elements[ $element->nodeName ] = 0; + } + $this->open_elements[ $element->nodeName ]++; + + $script_components = []; + + // First recurse into children to sanitize descendants. + // The check for $element->parentNode at each iteration is to make sure an invalid child didn't bubble up removed + // ancestor nodes in AMP_Tag_And_Attribute_Sanitizer::remove_node(). + $this_child = $element->firstChild; + while ( $this_child && $element->parentNode ) { + $next_child = $this_child->nextSibling; + if ( $this_child instanceof DOMElement ) { + $result = $this->sanitize_element( $this_child ); + if ( is_array( $result ) ) { + $script_components = array_merge( + $script_components, + $result + ); + } + } + $this_child = $next_child; + } - // Get the next node to process. - $node = array_shift( $this->stack ); + // If the element is still in the tree, process it. + // The element can currently be removed from the tree when processing children via AMP_Tag_And_Attribute_Sanitizer::remove_node(). + $was_removed = false; + if ( $element->parentNode ) { + $result = $this->process_node( $element ); + if ( is_array( $result ) ) { + $script_components = array_merge( $script_components, $result ); + } else { + $was_removed = true; + } + } - /** - * Process this node. - */ - $this->process_node( $node ); + $this->open_elements[ $element->nodeName ]--; - /* - * Push child nodes onto the stack, if any exist. - * if node was removed, then it's parentNode value is null. - */ - if ( $node->parentNode ) { - $child = $node->firstChild; - while ( $child ) { - $this->stack[] = $child; - $child = $child->nextSibling; - } - } + if ( $was_removed ) { + return null; } + + return $script_components; } /** @@ -292,7 +325,7 @@ public function sanitize() { * @param array $rule_spec Rule spec. * @return array Augmented rule spec. */ - private function get_rule_spec_list_to_validate( $node, $rule_spec ) { + private function get_rule_spec_list_to_validate( DOMElement $node, $rule_spec ) { // Expand extension_spec into a set of attr_spec_list. if ( isset( $rule_spec[ AMP_Rule_Spec::TAG_SPEC ]['extension_spec'] ) ) { @@ -367,14 +400,10 @@ private function get_rule_spec_list_to_validate( $node, $rule_spec ) { * Attributes which are not valid are removed. Elements which are not allowed are also removed, * including elements which miss mandatory attributes. * - * @param DOMNode $node Node. + * @param DOMElement $node Node. + * @return string[]|null Required scripts, or null if the element was removed. */ - private function process_node( $node ) { - - // Don't process text or comment nodes. - if ( XML_TEXT_NODE === $node->nodeType || XML_COMMENT_NODE === $node->nodeType || XML_CDATA_SECTION_NODE === $node->nodeType ) { - return; - } + private function process_node( DOMElement $node ) { // Remove nodes with tags that have not been whitelisted. if ( ! $this->is_amp_allowed_tag( $node ) ) { @@ -383,15 +412,9 @@ private function process_node( $node ) { $this->replace_node_with_children( $node ); // Return early since this node no longer exists. - return; + return null; } - /** - * Node is now an element. - * - * @var DOMElement $node - */ - /* * Compile a list of rule_specs to validate for this node * based on tag name of the node. @@ -410,7 +433,7 @@ private function process_node( $node ) { // If no valid rule_specs exist, then remove this node and return. if ( empty( $rule_spec_list_to_validate ) ) { $this->remove_node( $node ); - return; + return null; } // The remaining validations all have to do with attributes. @@ -451,7 +474,7 @@ private function process_node( $node ) { // If no attribute spec lists match, then the element must be removed. if ( empty( $attr_spec_scores ) ) { $this->remove_node( $node ); - return; + return null; } // Get the key(s) to the highest score(s). @@ -490,7 +513,7 @@ private function process_node( $node ) { if ( ! empty( $attr_spec_list ) && $this->is_missing_mandatory_attribute( $attr_spec_list, $node ) ) { $this->remove_node( $node ); - return; + return null; } // Remove element if it has illegal CDATA. @@ -498,7 +521,7 @@ private function process_node( $node ) { $validity = $this->validate_cdata_for_node( $node, $cdata ); if ( is_wp_error( $validity ) ) { $this->remove_node( $node ); - return; + return null; } } @@ -530,7 +553,7 @@ private function process_node( $node ) { } $this->visited_unique_tag_specs[ $node->nodeName ][ $tag_spec_key ] = true; if ( $removed ) { - return; + return null; } } @@ -543,7 +566,7 @@ private function process_node( $node ) { // If $disallowed_attributes is false then the entire element should be removed. if ( false === $disallowed_attributes ) { $this->remove_node( $node ); - return; + return null; } // Remove all invalid attributes. @@ -563,37 +586,41 @@ private function process_node( $node ) { } } - // Add required AMP component scripts if the element is still in the document. - if ( $node->parentNode ) { - if ( ! empty( $tag_spec['also_requires_tag_warning'] ) ) { - $this->script_components[] = strtok( $tag_spec['also_requires_tag_warning'][0], ' ' ); - } - if ( ! empty( $tag_spec['requires_extension'] ) ) { - $this->script_components = array_merge( $this->script_components, $tag_spec['requires_extension'] ); - } + // Add required AMP component scripts. + $script_components = []; + if ( ! empty( $tag_spec['also_requires_tag_warning'] ) ) { + $script_components[] = strtok( $tag_spec['also_requires_tag_warning'][0], ' ' ); + } + if ( ! empty( $tag_spec['requires_extension'] ) ) { + $script_components = array_merge( $script_components, $tag_spec['requires_extension'] ); + } - // Add required AMP components for attributes. - foreach ( $node->attributes as $attribute ) { - if ( isset( $merged_attr_spec_list[ $attribute->nodeName ]['requires_extension'] ) ) { - $this->script_components = array_merge( $this->script_components, $merged_attr_spec_list[ $attribute->nodeName ]['requires_extension'] ); - } + // Add required AMP components for attributes. + foreach ( $node->attributes as $attribute ) { + if ( isset( $merged_attr_spec_list[ $attribute->nodeName ]['requires_extension'] ) ) { + $script_components = array_merge( + $script_components, + $merged_attr_spec_list[ $attribute->nodeName ]['requires_extension'] + ); } + } - // Manually add components for attributes; this is hard-coded because attributes do not have requires_extension like tags do. See . - if ( $node->hasAttribute( 'lightbox' ) ) { - $this->script_components[] = 'amp-lightbox-gallery'; - } + // Manually add components for attributes; this is hard-coded because attributes do not have requires_extension like tags do. See . + if ( $node->hasAttribute( 'lightbox' ) ) { + $script_components[] = 'amp-lightbox-gallery'; + } - // Check if element needs amp-bind component. - if ( $node instanceof DOMElement && ! in_array( 'amp-bind', $this->script_components, true ) ) { - foreach ( $node->attributes as $name => $value ) { - if ( AMP_DOM_Utils::AMP_BIND_DATA_ATTR_PREFIX === substr( $name, 0, 14 ) ) { - $this->script_components[] = 'amp-bind'; - break; - } + // Check if element needs amp-bind component. + if ( $node instanceof DOMElement && ! in_array( 'amp-bind', $this->script_components, true ) ) { + foreach ( $node->attributes as $name => $value ) { + if ( AMP_DOM_Utils::AMP_BIND_DATA_ATTR_PREFIX === substr( $name, 0, 14 ) ) { + $script_components[] = 'amp-bind'; + break; } } } + + return $script_components; } /** @@ -603,7 +630,7 @@ private function process_node( $node ) { * @param DOMElement $node The DOMElement of the node to check. * @return boolean $is_missing boolean Whether a required attribute is missing. */ - public function is_missing_mandatory_attribute( $attr_spec, $node ) { + public function is_missing_mandatory_attribute( $attr_spec, DOMElement $node ) { if ( ! is_array( $attr_spec ) ) { return false; } @@ -640,7 +667,7 @@ public function is_missing_mandatory_attribute( $attr_spec, $node ) { * @param array $cdata_spec CDATA. * @return true|WP_Error True when valid or error when invalid. */ - private function validate_cdata_for_node( $element, $cdata_spec ) { + private function validate_cdata_for_node( DOMElement $element, $cdata_spec ) { if ( isset( $cdata_spec['max_bytes'] ) && strlen( $element->textContent ) > $cdata_spec['max_bytes'] ) { return new WP_Error( 'excessive_bytes' ); } @@ -667,11 +694,11 @@ private function validate_cdata_for_node( $element, $cdata_spec ) { * * @since 0.5 * - * @param DOMNode $node The node to validate. - * @param array $tag_spec The specification. + * @param DOMElement $node The node to validate. + * @param array $tag_spec The specification. * @return boolean $valid Whether the node's placement is valid. */ - private function validate_tag_spec_for_node( $node, $tag_spec ) { + private function validate_tag_spec_for_node( DOMElement $node, $tag_spec ) { if ( ! empty( $tag_spec[ AMP_Rule_Spec::MANDATORY_PARENT ] ) && ! $this->has_parent( $node, $tag_spec[ AMP_Rule_Spec::MANDATORY_PARENT ] ) ) { return false; @@ -711,12 +738,12 @@ private function validate_tag_spec_for_node( $node, $tag_spec ) { * * @note This can be a very expensive function. Use it sparingly. * - * @param DOMNode $node Node. - * @param array[] $attr_spec_list Attribute Spec list. + * @param DOMElement $node Node. + * @param array[] $attr_spec_list Attribute Spec list. * * @return float Number of times the attribute spec list matched. If there was a mismatch, then 0 is returned. 0.5 is returned if there is an implicit match. */ - private function validate_attr_spec_list_for_node( $node, $attr_spec_list ) { + private function validate_attr_spec_list_for_node( DOMElement $node, $attr_spec_list ) { /* * If node has no attributes there is no point in continuing, but if none of attributes * in the spec list are mandatory, then we give this a score. @@ -730,14 +757,6 @@ private function validate_attr_spec_list_for_node( $node, $attr_spec_list ) { return 0.5; } - if ( ! $node instanceof DOMElement ) { - /* - * A DOMNode is not valid for checks so might - * as well bail here is not an DOMElement. - */ - return 0; - } - foreach ( $node->attributes as $attr_name => $attr_node ) { if ( ! isset( $attr_spec_list[ $attr_name ][ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) { continue; @@ -936,20 +955,11 @@ private function validate_attr_spec_list_for_node( $node, $attr_spec_list ) { /** * Remove attributes from $node that are not listed in $allowed_attrs. * - * @param DOMNode $node Node. - * @param array[] $attr_spec_list Attribute spec list. + * @param DOMElement $node Node. + * @param array[] $attr_spec_list Attribute spec list. * @return DOMAttr[] Attributes to remove. */ - private function get_disallowed_attributes_in_node( $node, $attr_spec_list ) { - - if ( ! $node instanceof DOMElement ) { - /** - * If $node is only a DOMNode and not a DOMElement we can't - * remove an attribute from it anyway. So bail out now. - */ - return []; - } - + private function get_disallowed_attributes_in_node( DOMElement $node, $attr_spec_list ) { /* * We can't remove attributes inside the 'foreach' loop without * breaking the iteration. So we keep track of the attributes to @@ -970,42 +980,12 @@ private function get_disallowed_attributes_in_node( $node, $attr_spec_list ) { * * Allowed values are found $this->globally_allowed_attributes and in parameter $attr_spec_list * - * @param DOMNode $node Node. - * @param array[][] $attr_spec_list Attribute spec list. - * @param DOMAttr[] $attributes_pending_removal Attributes pending removal. - * @return DOMAttr[]|false Attributes to remove, or false if the element itself should be removed. - */ - private function sanitize_disallowed_attribute_values_in_node( $node, $attr_spec_list, $attributes_pending_removal ) { - - if ( ! $node instanceof DOMElement ) { - /* - * If $node is only a DOMNode and not a DOMElement we can't - * remove an attribute from it anyway. So bail out now. - */ - return $attributes_pending_removal; - } - - return $this->delegated_sanitize_disallowed_attribute_values_in_node( - $node, - array_merge( - $this->globally_allowed_attributes, - $attr_spec_list - ), - $attributes_pending_removal - ); - } - - /** - * Remove attributes values from $node that have been disallowed by AMP. - * - * @see $this->sanitize_disallowed_attribute_values_in_node() which delegates to this method - * * @param DOMElement $node Node. * @param array[][] $attr_spec_list Attribute spec list. * @param DOMAttr[] $attributes_pending_removal Attributes pending removal. * @return DOMAttr[]|false Attributes to remove, or false if the element itself should be removed. */ - private function delegated_sanitize_disallowed_attribute_values_in_node( $node, $attr_spec_list, $attributes_pending_removal ) { + private function sanitize_disallowed_attribute_values_in_node( DOMElement $node, $attr_spec_list, $attributes_pending_removal ) { $attrs_to_remove = []; foreach ( $attr_spec_list as $attr_name => $attr_val ) { @@ -1022,6 +1002,12 @@ private function delegated_sanitize_disallowed_attribute_values_in_node( $node, continue; } + // Check the context to see if we are currently within a template tag. + // If this is the case and the attribute value contains a template placeholder, we skip sanitization. + if ( ! empty( $this->open_elements['template'] ) && preg_match( '/{{.*?}}/', $attr_node->nodeValue ) ) { + continue; + } + $should_remove_node = false; $attr_spec_rule = $attr_spec_list[ $attr_name ]; @@ -1102,7 +1088,7 @@ private function delegated_sanitize_disallowed_attribute_values_in_node( $node, * - AMP_Rule_Spec::FAIL - $attr_name is mandatory, but doesn't exist * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name is not mandatory */ - private function check_attr_spec_rule_mandatory( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_mandatory( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::MANDATORY ] ) && $attr_spec_rule[ AMP_Rule_Spec::MANDATORY ] ) { if ( $node->hasAttribute( $attr_name ) ) { return AMP_Rule_Spec::PASS; @@ -1136,7 +1122,7 @@ private function check_attr_spec_rule_mandatory( $node, $attr_name, $attr_spec_r * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_value( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_value( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE ] ) ) { if ( $node->hasAttribute( $attr_name ) ) { if ( $this->check_matching_attribute_value( $attr_name, $node->getAttribute( $attr_name ), $attr_spec_rule[ AMP_Rule_Spec::VALUE ] ) ) { @@ -1211,7 +1197,7 @@ private function check_matching_attribute_value( $attr_name, $attr_value, $spec_ * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_value_casei( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_value_casei( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( ! isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_CASEI ] ) ) { return AMP_Rule_Spec::NOT_APPLICABLE; } @@ -1256,7 +1242,7 @@ private function check_attr_spec_rule_value_casei( $node, $attr_name, $attr_spec * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_value_regex( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_value_regex( DOMElement $node, $attr_name, $attr_spec_rule ) { // Check 'value_regex' - case sensitive regex match. if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX ] ) && $node->hasAttribute( $attr_name ) ) { $rule_value = $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX ]; @@ -1290,10 +1276,8 @@ private function check_attr_spec_rule_value_regex( $node, $attr_name, $attr_spec * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_value_regex_casei( $node, $attr_name, $attr_spec_rule ) { - /** - * Check 'value_regex_casei' - case insensitive regex match - */ + private function check_attr_spec_rule_value_regex_casei( DOMElement $node, $attr_name, $attr_spec_rule ) { + // Check 'value_regex_casei' - case insensitive regex match. if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX_CASEI ] ) && $node->hasAttribute( $attr_name ) ) { $rule_value = $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX_CASEI ]; $rule_value = str_replace( '/', '\\/', $rule_value ); @@ -1323,7 +1307,7 @@ private function check_attr_spec_rule_value_regex_casei( $node, $attr_name, $att * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_valid_url( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) && $node->hasAttribute( $attr_name ) ) { foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) { $url = urldecode( $url ); @@ -1385,7 +1369,7 @@ private function parse_protocol( $url ) { * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_allowed_protocol( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) ) { if ( $node->hasAttribute( $attr_name ) ) { foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) { @@ -1421,7 +1405,7 @@ private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr * @param string|null $spec_attr_name Non-alternative attribute name for the spec. * @return string[] List of URLs. */ - private function extract_attribute_urls( $attribute_node, $spec_attr_name = null ) { + private function extract_attribute_urls( DOMAttr $attribute_node, $spec_attr_name = null ) { /* * Handle the srcset special case where the attribute value can contain multiple parts, each in the format `URL [WIDTH] [PIXEL_DENSITY]`. * So we split the srcset attribute value by commas and then return the first token of each item, omitting width descriptor and pixel density descriptor. @@ -1456,7 +1440,7 @@ static function ( $srcset_part ) { * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_disallowed_relative( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_disallowed_relative( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) && ! $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) { if ( $node->hasAttribute( $attr_name ) ) { foreach ( $this->extract_attribute_urls( $node->getAttributeNode( $attr_name ) ) as $url ) { @@ -1506,7 +1490,7 @@ private function check_attr_spec_rule_disallowed_relative( $node, $attr_name, $a * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_disallowed_empty( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_disallowed_empty( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_EMPTY ] ) && ! $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_EMPTY ] && $node->hasAttribute( $attr_name ) ) { $attr_value = $node->getAttribute( $attr_name ); if ( empty( $attr_value ) ) { @@ -1530,7 +1514,7 @@ private function check_attr_spec_rule_disallowed_empty( $node, $attr_name, $attr * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_disallowed_domain( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_disallowed_domain( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::DISALLOWED_DOMAIN ] ) && $node->hasAttribute( $attr_name ) ) { $attr_value = $node->getAttribute( $attr_name ); $url_domain = wp_parse_url( $attr_value, PHP_URL_HOST ); @@ -1563,7 +1547,7 @@ private function check_attr_spec_rule_disallowed_domain( $node, $attr_name, $att * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_blacklisted_value_regex( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_blacklisted_value_regex( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::BLACKLISTED_VALUE_REGEX ] ) ) { $pattern = '/' . $attr_spec_rule[ AMP_Rule_Spec::BLACKLISTED_VALUE_REGEX ] . '/u'; if ( $node->hasAttribute( $attr_name ) ) { @@ -1605,7 +1589,7 @@ private function check_attr_spec_rule_blacklisted_value_regex( $node, $attr_name * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there * is no rule for this attribute. */ - private function check_attr_spec_rule_value_properties( $node, $attr_name, $attr_spec_rule ) { + private function check_attr_spec_rule_value_properties( DOMElement $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_PROPERTIES ] ) && $node->hasAttribute( $attr_name ) ) { $properties = []; foreach ( explode( ',', $node->getAttribute( $attr_name ) ) as $pair ) { @@ -1660,7 +1644,7 @@ private function check_attr_spec_rule_value_properties( $node, $attr_name, $attr * @param array[]|string[] $attr_spec_list Attribute spec list. * @return bool Return true if attribute name is valid for this attr_spec_list, false otherwise. */ - private function is_amp_allowed_attribute( $attr_node, $attr_spec_list ) { + private function is_amp_allowed_attribute( DOMAttr $attr_node, $attr_spec_list ) { $attr_name = $attr_node->nodeName; if ( isset( $attr_spec_list[ $attr_name ] ) @@ -1710,22 +1694,11 @@ private function is_amp_allowed_attribute( $attr_node, $attr_spec_list ) { * * @since 0.5 * - * @param DOMNode $node Node. + * @param DOMElement $node Node. * @return bool Return true if the specified node's name is an AMP allowed tag, false otherwise. */ - private function is_amp_allowed_tag( $node ) { - if ( ! $node instanceof DOMElement ) { - return false; - } - /** - * Return true if node is an allowed tags or is a text or comment node. - */ - return ( - ( XML_TEXT_NODE === $node->nodeType ) || - isset( $this->allowed_tags[ $node->nodeName ] ) || - ( XML_COMMENT_NODE === $node->nodeType ) || - ( XML_CDATA_SECTION_NODE === $node->nodeType ) - ); + private function is_amp_allowed_tag( DOMElement $node ) { + return isset( $this->allowed_tags[ $node->nodeName ] ); } /** @@ -1735,14 +1708,11 @@ private function is_amp_allowed_tag( $node ) { * * @todo It would be more robust if the the actual tag spec were looked up and then matched against the parent, but this is currently overkill. * - * @param DOMNode $node Node. - * @param string $parent_spec_name Parent spec name, for example 'body' or 'form [method=post]'. + * @param DOMElement $node Node. + * @param string $parent_spec_name Parent spec name, for example 'body' or 'form [method=post]'. * @return bool Return true if given node has direct parent with the given name, false otherwise. */ - private function has_parent( $node, $parent_spec_name ) { - if ( ! $node ) { - return false; - } + private function has_parent( DOMElement $node, $parent_spec_name ) { $parsed_spec_name = $this->parse_tag_and_attributes_from_spec_name( $parent_spec_name ); if ( ! $node->parentNode || $node->parentNode->nodeName !== $parsed_spec_name['tag_name'] ) { @@ -1764,12 +1734,12 @@ private function has_parent( $node, $parent_spec_name ) { * * @since 0.5 * - * @param DOMNode $node Node. - * @param string $ancestor_tag_name Ancestor tag name. + * @param DOMElement $node Node. + * @param string $ancestor_tag_spec_name Ancestor tag spec name. This looks somewhat like a CSS selector, e.g. 'form div [submitting][template]'. * @return bool Return true if given node has any ancestor with the give name, false otherwise. */ - private function has_ancestor( $node, $ancestor_tag_name ) { - if ( $this->get_ancestor_with_matching_spec_name( $node, $ancestor_tag_name ) ) { + private function has_ancestor( DOMElement $node, $ancestor_tag_spec_name ) { + if ( $this->get_ancestor_with_matching_spec_name( $node, $ancestor_tag_spec_name ) ) { return true; } return false; @@ -1824,10 +1794,10 @@ private function parse_tag_and_attributes_from_spec_name( $spec_name ) { /** * Loop through node's descendants and remove the ones that are not whitelisted. * - * @param DOMNode $node Node. - * @param array $allowed_descendants List of allowed descendant tags. + * @param DOMElement $node Node. + * @param array $allowed_descendants List of allowed descendant tags. */ - private function remove_disallowed_descendants( $node, $allowed_descendants ) { + private function remove_disallowed_descendants( DOMElement $node, $allowed_descendants ) { if ( ! $node->hasChildNodes() ) { return; } @@ -1852,9 +1822,9 @@ private function remove_disallowed_descendants( $node, $allowed_descendants ) { /** * Check whether the node validates the constraints for children. * - * @param DOMNode $node Node. - * @param array $child_tags { - * List of allowed child tags. + * @param DOMElement $node Node. + * @param array $child_tags { + * List of allowed child tag. * * @type array $first_child_tag_name_oneof List of tag names that are allowed as the first element child. * @type array $child_tag_name_oneof List of tag names that are allowed as children. @@ -1863,7 +1833,7 @@ private function remove_disallowed_descendants( $node, $allowed_descendants ) { * } * @return bool Whether the element satisfies the requirements, or else it should be removed. */ - private function check_valid_children( $node, $child_tags ) { + private function check_valid_children( DOMElement $node, $child_tags ) { $child_elements = []; for ( $i = 0; $i < $node->childNodes->length; $i++ ) { $child = $node->childNodes->item( $i ); @@ -1905,13 +1875,19 @@ private function check_valid_children( $node, $child_tags ) { * @since 0.5 * @todo It would be more robust if the the actual tag spec were looked up and then matched for each ancestor, but this is currently overkill. * - * @param DOMNode $node Node. - * @param string $ancestor_spec_name Ancestor spec name, e.g. 'body' or 'form [method=post]'. + * @param DOMElement $node Node. + * @param string $ancestor_spec_name Ancestor spec name, e.g. 'body' or 'form [method=post]'. * @return DOMNode|null Returns an ancestor node for the name specified, or null if not found. */ - private function get_ancestor_with_matching_spec_name( $node, $ancestor_spec_name ) { + private function get_ancestor_with_matching_spec_name( DOMElement $node, $ancestor_spec_name ) { $parsed_spec_name = $this->parse_tag_and_attributes_from_spec_name( $ancestor_spec_name ); + // Do quick check to see if the the ancestor element is even open. + // Note first isset check is for the sake of \AMP_Tag_And_Attribute_Sanitizer_Attr_Spec_Rules_Test::test_get_ancestor_with_matching_spec_name(). + if ( isset( $this->open_element['html'] ) && empty( $this->open_elements[ $parsed_spec_name['tag_name'] ] ) ) { + return null; + } + while ( $node && $node->parentNode ) { $node = $node->parentNode; if ( $node->nodeName === $parsed_spec_name['tag_name'] ) { @@ -1943,9 +1919,9 @@ private function get_ancestor_with_matching_spec_name( $node, $ancestor_spec_nam * @since 1.0 Fix silently removing unrecognized elements. * @see https://github.com/ampproject/amp-wp/issues/1100 * - * @param DOMNode $node Node. + * @param DOMElement $node Node. */ - private function replace_node_with_children( $node ) { + private function replace_node_with_children( DOMElement $node ) { // If node has no children or no parent, just remove the node. if ( ! $node->hasChildNodes() || ! $node->parentNode ) { @@ -1965,8 +1941,7 @@ private function replace_node_with_children( $node ) { $child = $node->firstChild; while ( $child ) { $fragment->appendChild( $child ); - $this->stack[] = $child; - $child = $node->firstChild; + $child = $node->firstChild; } // Prevent double-reporting nodes that are rejected for sanitization. @@ -1991,9 +1966,9 @@ private function replace_node_with_children( $node ) { * * @since 0.5 * - * @param DOMNode $node Node. + * @param DOMElement $node Node. */ - private function remove_node( $node ) { + private function remove_node( DOMElement $node ) { /** * Parent. * diff --git a/tests/php/test-amp-form-sanitizer.php b/tests/php/test-amp-form-sanitizer.php index 904ff3b1f97..9966149078e 100644 --- a/tests/php/test-amp-form-sanitizer.php +++ b/tests/php/test-amp-form-sanitizer.php @@ -20,9 +20,18 @@ class AMP_Form_Sanitizer_Test extends WP_UnitTestCase { */ public function setUp() { parent::setUp(); + add_theme_support( 'amp' ); $this->go_to( '/current-page/' ); } + /** + * Tear down. + */ + public function tearDown() { + remove_theme_support( 'amp' ); + parent::tearDown(); + } + /** * Data strings for testing converter. * diff --git a/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php b/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php index f47c7b6e14d..b4ceb056e4c 100644 --- a/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php +++ b/tests/php/test-amp-tag-and-attribute-sanitizer-private-methods.php @@ -702,6 +702,13 @@ public function get_replace_node_with_children_data() { ], '', ], + 'nested_invalid_elements' => [ + [ + 'source' => '

Example Summary

Example expanded text

', + 'tag_name' => 'bad-details', + ], + '

Example Summary

Example expanded text

', + ], 'children_multiple_empty_parents' => [ [ 'source' => '

Good Data

', @@ -792,14 +799,6 @@ public function test_replace_node_with_children( $data, $expected ) { */ public function get_ancestor_with_matching_spec_name_data() { return [ - 'empty' => [ - [ - 'source' => '', - 'node_tag_name' => 'p', - 'ancestor_tag_name' => 'article', - ], - null, - ], 'ancestor_is_immediate_parent' => [ [ 'source' => '

Good Data

', @@ -860,6 +859,11 @@ public function test_get_ancestor_with_matching_spec_name( $data, $expected ) { $this->assertEquals( $ancestor_node, $got, sprintf( "using source: %s\n%s", $data['source'], wp_json_encode( $data ) ) ); } + /** + * Get test data for test_get_ancestor_with_matching_spec_name. + * + * @return array Test data. + */ public function get_validate_attr_spec_list_for_node_data() { return [ 'no_attributes' => [ diff --git a/tests/php/test-tag-and-attribute-sanitizer.php b/tests/php/test-tag-and-attribute-sanitizer.php index 4a43f05a679..c75dc75fda5 100644 --- a/tests/php/test-tag-and-attribute-sanitizer.php +++ b/tests/php/test-tag-and-attribute-sanitizer.php @@ -239,6 +239,18 @@ public function get_body_data() { [ 'amp-playbuzz' ], ], + 'invalid_element_stripped' => [ + '

Foo text

', + '

Foo text

', + [], + ], + + 'nested_invalid_elements_stripped' => [ + '

Example Summary

Example expanded text

', + '

Example Summary

Example expanded text

', + [], + ], + // AMP-NEXT-PAGE > [separator]. 'reference-point-amp-next-page-separator' => [ '

Keep reading

', @@ -571,6 +583,35 @@ static function () { [], // No scripts because removed. ], + 'attribute_requirements_overriden_by_placeholders_within_template' => [ + '', + null, + [ 'amp-mustache', 'amp-timeago' ], + ], + + 'attribute_requirements_overriden_by_placeholders_within_template_newlines' => [ + "", + null, + [ 'amp-mustache', 'amp-timeago' ], + ], + + 'attribute_requirements_not_overriden_by_placeholders_outside_of_template' => [ + '', + '', + ], + + 'attribute_requirements_overriden_in_indirect_template_parents' => [ + '', + null, + [ 'amp-mustache', 'amp-timeago' ], + ], + + 'attribute_requirements_not_overriden_in_sibling_template_tags' => [ + '', + '', + [ 'amp-mustache' ], + ], + 'attribute_amp_accordion_value' => call_user_func( static function() { $html = str_replace( @@ -1619,6 +1660,14 @@ static function() { [ 'amp-user-location' ], ], + 'deeply_nested_elements_250' => [ + // If a DOM tree is too deep, libxml itself will issue an error: Excessive depth in document: 256 use XML_PARSE_HUGE option. + // So this tests just up before that limit to also ensure that the recursive \AMP_Tag_And_Attribute_Sanitizer::sanitize_element() + // method does not cause a different error at the PHP level when a recursion call stack reaches that same level. + str_repeat( '
', 250 ) . 'hello world!' . str_repeat( '
', 250 ), + null, + [], + ], ]; } @@ -1642,9 +1691,6 @@ public function test_is_missing_mandatory_attribute() { $node->setAttribute( 'data-gistid', 'foo-value' ); $this->assertFalse( $sanitizer->is_missing_mandatory_attribute( $spec, $node ) ); - - $spec_non_array = new stdClass(); - $this->assertFalse( $sanitizer->is_missing_mandatory_attribute( $spec_non_array, $node ) ); } /** @@ -1875,121 +1921,191 @@ public function test_html_sanitizer( $source, $expected = null, $expected_script } /** - * Tests replace_node_with_children validation errors. + * Get data for replace_node_with_children_validation_errors test. + * + * @return array[] Test data. */ - public function test_replace_node_with_children_validation_errors() { - $that = $this; - $error_index = 0; - $content = []; - $expected_errors = []; - - $content[] = ''; - $expected_errors[] = [ - 'node_name' => 'amp-image', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [ - 'src' => '/none.jpg', - 'width' => '100', - 'height' => '100', - 'alt' => 'None', + public function get_data_for_replace_node_with_children_validation_errors() { + return [ + 'amp_image' => [ + '', + '', + [ + [ + 'node_name' => 'amp-image', + 'parent_name' => 'body', + 'code' => 'invalid_element', + 'node_attributes' => [ + 'src' => '/none.jpg', + 'width' => '100', + 'height' => '100', + 'alt' => 'None', + ], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + + 'invalid_parent_element' => [ + '

Invalid baz parent element.

', + '

Invalid baz parent element.

', + [ + [ + 'node_name' => 'baz', + 'parent_name' => 'body', + 'code' => 'invalid_element', + 'node_attributes' => [ 'class' => 'baz-invalid' ], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], ], - ]; - $content[] = '

Invalid baz parent element.

'; - $expected_errors[] = [ - 'node_name' => 'baz', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [ 'class' => 'baz-invalid' ], - ]; - $content[] = 'Invalid a tag.'; - $expected_errors[] = [ - 'node_name' => 'amp-story-grid-layer', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [ 'class' => 'a-invalid' ], - ]; - $content[] = 'Invalid foo tag.'; - $expected_errors[] = [ - 'node_name' => 'foo', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [ 'class' => 'foo-invalid' ], - ]; - $content[] = 'Invalid nested elements'; - $expected_errors[] = [ - 'node_name' => 'divs', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [], - ]; - $expected_errors[] = [ - 'node_name' => 'foo', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [], - ]; - $content[] = 'Is an invalid "bar" tag.'; - $expected_errors[] = [ - 'node_name' => 'bazbar', - 'parent_name' => 'body', - 'code' => 'invalid_element', - 'node_attributes' => [], - ]; - $content[] = << -

Nesting valid and invalid elements.

- Is an invalid "invalid" tag - Is an invalid "foo" tag

This should pass.

- -EOB; - $expected_errors[] = [ - 'node_name' => 'invalid_p', - 'parent_name' => 'div', - 'code' => 'invalid_element', - 'node_attributes' => [ 'id' => 'invalid' ], - ]; - $expected_errors[] = [ - 'node_name' => 'bazfoo', - 'parent_name' => 'div', - 'code' => 'invalid_element', - 'node_attributes' => [], - ]; - $content[] = << -
  • hello
  • - world - -EOB; - $expected_errors[] = [ - 'node_name' => 'lili', - 'parent_name' => 'ul', - 'code' => 'invalid_element', - 'node_attributes' => [], - ]; - // Test validation error for nested invalid tags. - foreach ( $content as $dom_content ) { - $dom = AMP_DOM_Utils::get_dom_from_content( $dom_content ); - $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( - $dom, + 'invalid_a_tag' => [ + 'Invalid a tag.', + '', [ - 'validation_error_callback' => static function( $error, $context ) use ( $that, $expected_errors, &$error_index ) { - $expected = $expected_errors[ $error_index ]; - $expected['type'] = AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE; - $tag = $expected['node_name']; - $that->assertEquals( $expected, $error ); - $that->assertInstanceOf( 'DOMElement', $context['node'] ); - $that->assertEquals( $tag, $context['node']->tagName ); - $that->assertEquals( $tag, $context['node']->nodeName ); - $error_index++; - - return true; - }, - ] - ); - $sanitizer->sanitize(); - } + [ + 'node_name' => 'amp-story-grid-layer', + 'parent_name' => 'body', + 'code' => 'invalid_element', + 'node_attributes' => [ 'class' => 'a-invalid' ], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + + 'invalid_foo_tag' => [ + 'Invalid foo tag.', + 'Invalid foo tag.', + [ + [ + 'node_name' => 'foo', + 'parent_name' => 'body', + 'code' => 'invalid_element', + 'node_attributes' => [ 'class' => 'foo-invalid' ], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + + 'invalid_barbaz_tag' => [ + 'Is an invalid "bazbar" tag.', + 'Is an invalid "bazbar" tag.', + [ + [ + 'node_name' => 'bazbar', + 'parent_name' => 'body', + 'code' => 'invalid_element', + 'node_attributes' => [], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + + 'nested_valid_and_invalud' => [ + ' +
    +

    Nesting valid and invalid elements.

    + Is an invalid "invalid" tag + Is an invalid "foo" tag

    This should pass.

    +
    + ', + ' +
    +

    Nesting valid and invalid elements.

    + Is an invalid "invalid" tagIs an invalid "foo" tag

    This should pass.

    +
    + ', + [ + [ + 'node_name' => 'invalid_p', + 'parent_name' => 'div', + 'code' => 'invalid_element', + 'node_attributes' => [ 'id' => 'invalid' ], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + [ + 'node_name' => 'bazfoo', + 'parent_name' => 'div', + 'code' => 'invalid_element', + 'node_attributes' => [], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + + 'bad_lili' => [ + ' +
      +
    • hello
    • + world +
    + ', + '
    • hello
    • world
    ', + [ + [ + 'node_name' => 'lili', + 'parent_name' => 'ul', + 'code' => 'invalid_element', + 'node_attributes' => [], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + + 'invalid_nested_elements' => [ + 'Invalid nested elements', + 'Invalid nested elements', + [ + [ + 'node_name' => 'foo', + 'parent_name' => 'divs', + 'code' => 'invalid_element', + 'node_attributes' => [], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + [ + 'node_name' => 'divs', + 'parent_name' => 'body', + 'code' => 'invalid_element', + 'node_attributes' => [], + 'type' => AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE, + ], + ], + ], + ]; + } + + /** + * Tests replace_node_with_children validation errors. + * + * @dataProvider get_data_for_replace_node_with_children_validation_errors + * @covers \AMP_Tag_And_Attribute_Sanitizer::replace_node_with_children() + * + * @param string $source_content Source DOM content. + * @param string $expected_content Expected content after sanitization. + * @param array[] $expected_errors Expected errors. + */ + public function test_replace_node_with_children_validation_errors( $source_content, $expected_content, $expected_errors ) { + $dom = AMP_DOM_Utils::get_dom_from_content( $source_content ); + $sanitizer = new AMP_Tag_And_Attribute_Sanitizer( + $dom, + [ + 'validation_error_callback' => function( $error, $context ) use ( &$expected_errors ) { + $expected = array_shift( $expected_errors ); + $tag = $expected['node_name']; + $this->assertEquals( $expected, $error ); + $this->assertInstanceOf( 'DOMElement', $context['node'] ); + $this->assertEquals( $tag, $context['node']->tagName ); + $this->assertEquals( $tag, $context['node']->nodeName ); + return true; + }, + ] + ); + $sanitizer->sanitize(); + + $this->assertEmpty( $expected_errors, 'There should be no expected validation errors remaining.' ); + $this->assertEqualMarkup( $expected_content, AMP_DOM_Utils::get_content_from_dom( $dom ) ); } /**