diff --git a/src/wp-includes/kses.php b/src/wp-includes/kses.php index e890b451933ab..e0b161d3575fa 100644 --- a/src/wp-includes/kses.php +++ b/src/wp-includes/kses.php @@ -1416,149 +1416,50 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`). * * @since 1.0.0 + * @since 6.9.0 Rebuilt on HTML API * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return array[] Array of attribute information after parsing. */ function wp_kses_hair( $attr, $allowed_protocols ) { - $attrarr = array(); - $mode = 0; - $attrname = ''; - $uris = wp_kses_uri_attributes(); + $attributes = array(); + $uris = wp_kses_uri_attributes(); // Loop through the whole attribute list. - while ( strlen( $attr ) !== 0 ) { - $working = 0; // Was the last operation successful? + $processor = new WP_HTML_Tag_Processor( "" ); + $processor->next_token(); - switch ( $mode ) { - case 0: - if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) { - $attrname = $match[1]; - $working = 1; - $mode = 1; - $attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr ); - } - - break; - - case 1: - if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign. - $working = 1; - $mode = 2; - $attr = preg_replace( '/^\s*=\s*/', '', $attr ); - break; - } - - if ( preg_match( '/^\s+/', $attr ) ) { // Valueless. - $working = 1; - $mode = 0; - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => '', - 'whole' => $attrname, - 'vless' => 'y', - ); - } - - $attr = preg_replace( '/^\s+/', '', $attr ); - } - - break; - - case 2: - if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) { - // "value" - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname=\"$thisval\"", - 'vless' => 'n', - ); - } - - $working = 1; - $mode = 0; - $attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr ); - break; - } - - if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) { - // 'value' - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname='$thisval'", - 'vless' => 'n', - ); - } - - $working = 1; - $mode = 0; - $attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr ); - break; - } - - if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) { - // value - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname=\"$thisval\"", - 'vless' => 'n', - ); - } - - // We add quotes to conform to W3C's HTML spec. - $working = 1; - $mode = 0; - $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); - } + foreach ( $processor->get_attribute_names_with_prefix( '' ) as $name ) { + $value = $processor->get_attribute( $name ); + $is_bool = true === $value; + if ( is_string( $value ) && in_array( $name, $uris, true ) ) { + $value = wp_kses_bad_protocol( $value, $allowed_protocols ); + } - break; - } // End switch. + // Reconstruct and normalize the attribute value. + $syntax_characters = array( + '&' => '&', + '<' => '<', + '>' => '>', + "'" => ''', + '"' => '"', + ); - if ( 0 === $working ) { // Not well-formed, remove and try again. - $attr = wp_kses_html_error( $attr ); - $mode = 0; - } - } // End while. + $recoded = $is_bool ? '' : strtr( $value, $syntax_characters ); + $whole = $is_bool ? $name : "{$name}=\"{$recoded}\""; - if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) { - /* - * Special case, for when the attribute list ends with a valueless - * attribute like "selected". - */ - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => '', - 'whole' => $attrname, - 'vless' => 'y', + // @todo What security issue need review on the names? + $attributes[ $name ] = array( + 'name' => $name, + 'value' => $recoded, + 'whole' => $whole, + 'vless' => $is_bool ? 'y' : 'n', ); } - return $attrarr; + return $attributes; } /** diff --git a/tests/phpunit/tests/kses.php b/tests/phpunit/tests/kses.php index 3384a6f137e81..29d079a93d055 100644 --- a/tests/phpunit/tests/kses.php +++ b/tests/phpunit/tests/kses.php @@ -2394,4 +2394,255 @@ public function data_allowed_attributes_in_descriptions() { ), ); } + + /** + * Test that wp_kses_hair() parses attributes correctly. + * + * @ticket 63724 + * + * @dataProvider data_test_wp_kses_hair + * + * @param string $attribute Attribute to test. + * @param array $expected Expected result. + */ + public function test_wp_kses_hair( $attribute, $expected ) { + $this->assertSame( $expected, wp_kses_hair( $attribute, wp_allowed_protocols() ) ); + } + + /** + * Data provider for data_test_wp_kses_hair. + * + * @return array + */ + public function data_test_wp_kses_hair() { + return array( + array( + '', + array(), + ), + array( + ' ', + array(), + ), + array( + 'title="foo"', + array( + 'title' => array( + 'name' => 'title', + 'value' => 'foo', + 'whole' => 'title="foo"', + 'vless' => 'n', + ), + ), + ), + array( + 'title="Numbers: 10 < 20 and 30 > 20"', + array( + 'title' => array( + 'name' => 'title', + 'value' => 'Numbers: 10 < 20 and 30 > 20', + 'whole' => 'title="Numbers: 10 < 20 and 30 > 20"', + 'vless' => 'n', + ), + ), + ), + array( + 'min=123', + array( + 'min' => array( + 'name' => 'min', + 'value' => '123', + 'whole' => 'min="123"', + 'vless' => 'n', + ), + ), + ), + array( + 'disabled', + array( + 'disabled' => array( + 'name' => 'disabled', + 'value' => '', + 'whole' => 'disabled', + 'vless' => 'y', + ), + ), + ), + array( + 'onclick=alert(1)', + array( + 'onclick' => array( + 'name' => 'onclick', + 'value' => 'alert(1)', + 'whole' => 'onclick="alert(1)"', + 'vless' => 'n', + ), + ), + ), + array( + 'title="hello & hi" href="#" id="my_id" ', + array( + 'title' => array( + 'name' => 'title', + 'value' => 'hello & hi', + 'whole' => 'title="hello & hi"', + 'vless' => 'n', + ), + 'href' => array( + 'name' => 'href', + 'value' => '#', + 'whole' => 'href="#"', + 'vless' => 'n', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'my_id', + 'whole' => 'id="my_id"', + 'vless' => 'n', + ), + ), + ), + array( + 'title="hello"disabled href="mailto:info@test.com" id=\'my_id\'', + array( + 'title' => array( + 'name' => 'title', + 'value' => 'hello', + 'whole' => 'title="hello"', + 'vless' => 'n', + ), + 'disabled' => array( + 'name' => 'disabled', + 'value' => '', + 'whole' => 'disabled', + 'vless' => 'y', + ), + 'href' => array( + 'name' => 'href', + 'value' => 'mailto:info@test.com', + 'whole' => 'href="mailto:info@test.com"', + 'vless' => 'n', + ), + 'id' => array( + 'name' => 'id', + 'value' => 'my_id', + 'whole' => 'id="my_id"', + 'vless' => 'n', + ), + ), + ), + array( + 'abcd=abcd"abcd"', + array( + 'abcd' => array( + 'name' => 'abcd', + 'value' => 'abcd"abcd"', + 'whole' => 'abcd="abcd"abcd""', + 'vless' => 'n', + ), + ), + ), + array( + "array[1]='z'z'z'z", + array( + 'array[1]' => array( + 'name' => 'array[1]', + 'value' => 'z', + 'whole' => 'array[1]="z"', + 'vless' => 'n', + ), + "z'z'z" => array( + 'name' => "z'z'z", + 'value' => '', + 'whole' => "z'z'z", + 'vless' => 'y', + ), + ), + ), + // Using a digit in attribute name should work. + array( + 'href="https://example.com/[shortcode attr=\'value\']" data-op3-timer-seconds="0"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'https://example.com/[shortcode attr='value']', + 'whole' => 'href="https://example.com/[shortcode attr='value']"', + 'vless' => 'n', + ), + 'data-op3-timer-seconds' => array( + 'name' => 'data-op3-timer-seconds', + 'value' => '0', + 'whole' => 'data-op3-timer-seconds="0"', + 'vless' => 'n', + ), + ), + ), + // Using an underscore in attribute name should work. + array( + 'href="https://example.com/[shortcode attr=\'value\']" data-op_timer-seconds="0"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'https://example.com/[shortcode attr='value']', + 'whole' => 'href="https://example.com/[shortcode attr='value']"', + 'vless' => 'n', + ), + 'data-op_timer-seconds' => array( + 'name' => 'data-op_timer-seconds', + 'value' => '0', + 'whole' => 'data-op_timer-seconds="0"', + 'vless' => 'n', + ), + ), + ), + // Using a period in attribute name should work. + array( + 'href="https://example.com/[shortcode attr=\'value\']" data-op.timer-seconds="0"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'https://example.com/[shortcode attr='value']', + 'whole' => 'href="https://example.com/[shortcode attr='value']"', + 'vless' => 'n', + ), + 'data-op.timer-seconds' => array( + 'name' => 'data-op.timer-seconds', + 'value' => '0', + 'whole' => 'data-op.timer-seconds="0"', + 'vless' => 'n', + ), + ), + ), + // Using a digit at the beginning of attribute name should work. + array( + 'href="http://example.org/" 3data-op-timer-seconds="0"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'http://example.org/', + 'whole' => 'href="http://example.org/"', + 'vless' => 'n', + ), + '3data-op-timer-seconds' => array( + 'name' => '3data-op-timer-seconds', + 'value' => '0', + 'whole' => '3data-op-timer-seconds="0"', + 'vless' => 'n', + ), + ), + ), + // Use invalid protocol in href attribute. + array( + 'href="javascript:alert(1)"', + array( + 'href' => array( + 'name' => 'href', + 'value' => 'alert(1)', + 'whole' => 'href="alert(1)"', + 'vless' => 'n', + ), + ), + ), + ); + } }