Skip to content
Closed
23 changes: 20 additions & 3 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -5290,13 +5290,30 @@ public function get_attribute( $name ) {
/**
* Updates or creates a new attribute on the currently matched tag with the passed value.
*
* For boolean attributes special handling is provided:
* This function handles all necessary HTML encoding. Provide normal, unescaped string values.
* The HTML API will encode the strings appropriately so that the browser will interpret them
* as the intended value.
*
* Example:
*
* // Renders “Eggs & Milk” in a browser, encoded as `<abbr title="Eggs &amp; Milk">`.
* $processor->set_attribute( 'title', 'Eggs & Milk' );
*
* // Renders “Eggs &amp; Milk” in a browser, encoded as `<abbr title="Eggs &amp;amp; Milk">`.
* $processor->set_attribute( 'title', 'Eggs &amp; Milk' );
*
* // Renders `true` as `<abbr title>`.
* $processor->set_attribute( 'title', true );
*
* // Renders without the attribute for `false` as `<abbr>`.
* $processor->set_attribute( 'title', false );
*
* Special handling is provided for boolean attribute values:
* - When `true` is passed as the value, then only the attribute name is added to the tag.
* - When `false` is passed, the attribute gets removed if it existed before.
*
* For string attributes, the value is escaped using the `esc_attr` function.
*
* @since 6.6.0 Subclassed for the HTML Processor.
* @since 6.9.0 Escapes all character references instead of trying to avoid double-escaping.
*
* @param string $name The attribute name to target.
* @param string|bool $value The new attribute value.
Expand Down
65 changes: 57 additions & 8 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -3746,18 +3746,39 @@ public function get_modifiable_text(): string {
* $processor->set_modifiable_text( str_replace( ':)', '🙂', $chunk ) );
* }
*
* This function handles all necessary HTML encoding. Provide normal, unescaped string values.
* The HTML API will encode the strings appropriately so that the browser will interpret them
* as the intended value.
*
* Example:
*
* // Renders as “Eggs & Milk” in a browser, encoded as `<p>Eggs &amp; Milk</p>`.
* $processor->set_modifiable_text( 'Eggs & Milk' );
*
* // Renders as “Eggs &amp; Milk” in a browser, encoded as `<p>Eggs &amp;amp; Milk</p>`.
* $processor->set_modifiable_text( 'Eggs &amp; Milk' );
*
* @since 6.7.0
* @since 6.9.0 Escapes all character references instead of trying to avoid double-escaping.
*
* @param string $plaintext_content New text content to represent in the matched token.
*
* @return bool Whether the text was able to update.
*/
public function set_modifiable_text( string $plaintext_content ): bool {
if ( self::STATE_TEXT_NODE === $this->parser_state ) {
$this->lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement(
$this->text_starts_at,
$this->text_length,
htmlspecialchars( $plaintext_content, ENT_QUOTES | ENT_HTML5 )
strtr(
$plaintext_content,
array(
'<' => '&lt;',
'>' => '&gt;',
'&' => '&amp;',
'"' => '&quot;',
"'" => '&apos;',
)
)
);

return true;
Expand Down Expand Up @@ -3871,14 +3892,31 @@ static function ( $tag_match ) {
/**
* Updates or creates a new attribute on the currently matched tag with the passed value.
*
* For boolean attributes special handling is provided:
* This function handles all necessary HTML encoding. Provide normal, unescaped string values.
* The HTML API will encode the strings appropriately so that the browser will interpret them
* as the intended value.
*
* Example:
*
* // Renders “Eggs & Milk” in a browser, encoded as `<abbr title="Eggs &amp; Milk">`.
* $processor->set_attribute( 'title', 'Eggs & Milk' );
*
* // Renders “Eggs &amp; Milk” in a browser, encoded as `<abbr title="Eggs &amp;amp; Milk">`.
* $processor->set_attribute( 'title', 'Eggs &amp; Milk' );
*
* // Renders `true` as `<abbr title>`.
* $processor->set_attribute( 'title', true );
*
* // Renders without the attribute for `false` as `<abbr>`.
* $processor->set_attribute( 'title', false );
*
* Special handling is provided for boolean attribute values:
* - When `true` is passed as the value, then only the attribute name is added to the tag.
* - When `false` is passed, the attribute gets removed if it existed before.
*
* For string attributes, the value is escaped using the `esc_attr` function.
*
* @since 6.2.0
* @since 6.2.1 Fix: Only create a single update for multiple calls with case-variant attribute names.
* @since 6.9.0 Escapes all character references instead of trying to avoid double-escaping.
*
* @param string $name The attribute name to target.
* @param string|bool $value The new attribute value.
Expand Down Expand Up @@ -3950,12 +3988,23 @@ public function set_attribute( $name, $value ): bool {
} else {
$comparable_name = strtolower( $name );

/*
* Escape URL attributes.
/**
* Escape attribute values appropriately.
*
* @see https://html.spec.whatwg.org/#attributes-3
*/
$escaped_new_value = in_array( $comparable_name, wp_kses_uri_attributes(), true ) ? esc_url( $value ) : esc_attr( $value );
$escaped_new_value = in_array( $comparable_name, wp_kses_uri_attributes(), true )
? esc_url( $value )
Copy link
Member

@westonruter westonruter Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The esc_url() function also does some stuff with entities, namely:

// Replace ampersands and single quotes only when displaying.
if ( 'display' === $_context ) {
$url = wp_kses_normalize_entities( $url );
$url = str_replace( '&amp;', '&#038;', $url );
$url = str_replace( "'", '&#039;', $url );
}

To avoid that, I think using esc_url_raw() is preferable:

Suggested change
? esc_url( $value )
? htmlspecialchars( esc_url_raw( $value ) )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is only a formatting change, I broke the ternary onto three lines.

Can we consider that suggestion in a separate ticket and PR?

Copy link
Member

@westonruter westonruter Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sure, but it it is related to the double-escaping issue, because esc_url() does wp_kses_normalize_entities() which includes converting all & to &amp; among other things. For example, these:

var_dump( esc_url( 'http://example.com/?foo&amp;bar' ) );
var_dump( esc_url( 'http://example.com/?foo&#038;bar' ) );
var_dump( esc_url( 'http://example.com/?foo&bar' ) );

All result in:

string(32) "http://example.com/?foo&#038;bar"
string(32) "http://example.com/?foo&#038;bar"
string(32) "http://example.com/?foo&#038;bar"

I don't really see why a URI attribute should be treated differently at the HTML API layer. So this could instead just be:

$escaped_new_value = strtr(
	$value,
	array(
		'<' => '&lt;',
		'>' => '&gt;',
		'&' => '&amp;',
		'"' => '&quot;',
		"'" => '&apos;',
	)
);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're absolutely right that the double-escaping issue also exists in esc_url(), that's unfortunate. esc_url() does a lot of things, it doesn't seem as easily replaced as esc_attr() or esc_html(). I suspect more discussion will be necessary regarding URL-related changes which is why I'm reluctant to include those changes here.

See r58472 and r58473.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, sounds good.

: strtr(
$value,
array(
'<' => '&lt;',
'>' => '&gt;',
'&' => '&amp;',
'"' => '&quot;',
"'" => '&apos;',
)
);

// If the escaping functions wiped out the update, reject it and indicate it was rejected.
if ( '' === $escaped_new_value && '' !== $value ) {
Expand Down
10 changes: 5 additions & 5 deletions tests/phpunit/tests/block-supports/wpRenderBackgroundSupport.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public function data_background_block_support() {
'url' => 'https://example.com/image.jpg',
),
),
'expected_wrapper' => '<div class="has-background" style="background-image:url(&#039;https://example.com/image.jpg&#039;);background-size:cover;">Content</div>',
'expected_wrapper' => '<div class="has-background" style="background-image:url(&apos;https://example.com/image.jpg&apos;);background-size:cover;">Content</div>',
'wrapper' => '<div>Content</div>',
),
'background image style with contain, position, attachment, and repeat is applied' => array(
Expand All @@ -155,7 +155,7 @@ public function data_background_block_support() {
'backgroundSize' => 'contain',
'backgroundAttachment' => 'fixed',
),
'expected_wrapper' => '<div class="has-background" style="background-image:url(&#039;https://example.com/image.jpg&#039;);background-position:50% 50%;background-repeat:no-repeat;background-size:contain;background-attachment:fixed;">Content</div>',
'expected_wrapper' => '<div class="has-background" style="background-image:url(&apos;https://example.com/image.jpg&apos;);background-position:50% 50%;background-repeat:no-repeat;background-size:contain;background-attachment:fixed;">Content</div>',
'wrapper' => '<div>Content</div>',
),
'background image style is appended if a style attribute already exists' => array(
Expand All @@ -169,7 +169,7 @@ public function data_background_block_support() {
'url' => 'https://example.com/image.jpg',
),
),
'expected_wrapper' => '<div class="wp-block-test has-background" style="color: red;background-image:url(&#039;https://example.com/image.jpg&#039;);background-size:cover;">Content</div>',
'expected_wrapper' => '<div class="wp-block-test has-background" style="color: red;background-image:url(&apos;https://example.com/image.jpg&apos;);background-size:cover;">Content</div>',
'wrapper' => '<div class="wp-block-test" style="color: red">Content</div>',
),
'background image style is appended if a style attribute containing multiple styles already exists' => array(
Expand All @@ -183,7 +183,7 @@ public function data_background_block_support() {
'url' => 'https://example.com/image.jpg',
),
),
'expected_wrapper' => '<div class="wp-block-test has-background" style="color: red;font-size: 15px;background-image:url(&#039;https://example.com/image.jpg&#039;);background-size:cover;">Content</div>',
'expected_wrapper' => '<div class="wp-block-test has-background" style="color: red;font-size: 15px;background-image:url(&apos;https://example.com/image.jpg&apos;);background-size:cover;">Content</div>',
'wrapper' => '<div class="wp-block-test" style="color: red;font-size: 15px;">Content</div>',
),
'background image style is appended if a boolean style attribute already exists' => array(
Expand All @@ -198,7 +198,7 @@ public function data_background_block_support() {
'source' => 'file',
),
),
'expected_wrapper' => '<div class="has-background" classname="wp-block-test" style="background-image:url(&#039;https://example.com/image.jpg&#039;);background-size:cover;">Content</div>',
'expected_wrapper' => '<div class="has-background" classname="wp-block-test" style="background-image:url(&apos;https://example.com/image.jpg&apos;);background-size:cover;">Content</div>',
'wrapper' => '<div classname="wp-block-test" style>Content</div>',
),
'background image style is not applied if the block does not support background image' => array(
Expand Down
47 changes: 33 additions & 14 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
*
* @param string $attribute_value A value with potential XSS exploit.
*/
public function test_set_attribute_prevents_xss( $attribute_value ) {
public function test_set_attribute_prevents_xss( $attribute_value, $escaped_attribute_value = null ) {
$processor = new WP_HTML_Tag_Processor( '<div></div>' );
$processor->next_tag();
$processor->set_attribute( 'test', $attribute_value );
Expand All @@ -861,7 +861,7 @@ public function test_set_attribute_prevents_xss( $attribute_value ) {
preg_match( '~^<div test=(.*)></div>$~', $processor->get_updated_html(), $match );
list( , $actual_value ) = $match;

$this->assertSame( '"' . esc_attr( $attribute_value ) . '"', $actual_value, 'Entities were not properly escaped in the attribute value' );
$this->assertSame( '"' . $escaped_attribute_value . '"', $actual_value, 'Entities were not properly escaped in the attribute value' );
}

/**
Expand All @@ -871,15 +871,18 @@ public function test_set_attribute_prevents_xss( $attribute_value ) {
*/
public static function data_set_attribute_prevents_xss() {
return array(
array( '"' ),
array( '&quot;' ),
array( '&' ),
array( '&amp;' ),
array( '&euro;' ),
array( "'" ),
array( '<>' ),
array( '&quot";' ),
array( '" onclick="alert(\'1\');"><span onclick=""></span><script>alert("1")</script>' ),
array( '"', '&quot;' ),
array( '&quot;', '&amp;quot;' ),
array( '&', '&amp;' ),
array( '&amp;', '&amp;amp;' ),
array( '&euro;', '&amp;euro;' ),
array( "'", '&apos;' ),
array( '<>', '&lt;&gt;' ),
array( '&quot";', '&amp;quot&quot;;' ),
array(
'" onclick="alert(\'1\');"><span onclick=""></span><script>alert("1")</script>',
'&quot; onclick=&quot;alert(&apos;1&apos;);&quot;&gt;&lt;span onclick=&quot;&quot;&gt;&lt;/span&gt;&lt;script&gt;alert(&quot;1&quot;)&lt;/script&gt;',
),
);
}

Expand All @@ -905,6 +908,21 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr
);
}

/**
* Ensure that attribute values that appear to contain HTML character references are correctly
* encoded and preserve the original value.
*
* @ticket 64054
*/
public function test_set_attribute_encodes_html_character_references() {
$original = 'HTML character references: &lt; &gt; &amp;';
$processor = new WP_HTML_Tag_Processor( '<span>' );
$processor->next_tag();
$processor->set_attribute( 'data-attr', $original );
$this->assertSame( $original, $processor->get_attribute( 'data-attr' ) );
$this->assertEqualHTML( '<span data-attr="HTML character references: &amp;lt; &amp;gt; &amp;amp;">', $processor->get_updated_html() );
}

/**
* @ticket 56299
*
Expand Down Expand Up @@ -2786,9 +2804,10 @@ public function test_updating_attributes_in_malformed_html( $html, $expected ) {
$processor->next_tag();
$processor->add_class( 'secondTag' );

$this->assertSame(
$this->assertEqualHTML(
$expected,
$processor->get_updated_html(),
'<body>',
'Did not properly update attributes and classnames given malformed input'
);
}
Expand All @@ -2806,11 +2825,11 @@ public static function data_updating_attributes_in_malformed_html() {
),
'HTML tag opening inside attribute value' => array(
'input' => '<pre id="<code" class="wp-block-code <code is poetry&gt;"><code>This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'expected' => '<pre foo="bar" id="<code" class="wp-block-code &lt;code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'expected' => '<pre foo="bar" id="<code" class="wp-block-code &lt;code is poetry&amp;gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
),
'HTML tag brackets in attribute values and data markup' => array(
'input' => '<pre id="<code-&gt;-block-&gt;" class="wp-block-code <code is poetry&gt;"><code>This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'expected' => '<pre foo="bar" id="<code-&gt;-block-&gt;" class="wp-block-code &lt;code is poetry&gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
'expected' => '<pre foo="bar" id="<code-&gt;-block-&gt;" class="wp-block-code &lt;code is poetry&amp;gt; firstTag"><code class="secondTag">This &lt;is> a &lt;strong is="true">thing.</code></pre><span>test</span>',
),
'Single and double quotes in attribute value' => array(
'input' => '<p title="Demonstrating how to use single quote (\') and double quote (&quot;)"><span>test</span>',
Expand Down
Loading