From d366f163c791b80b5113fa4ffec129a6fe8f01c3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 29 Aug 2024 13:52:19 +0200 Subject: [PATCH 1/6] Ensure we get bookmark_name on tokens --- src/wp-includes/html-api/class-wp-html-token.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-token.php b/src/wp-includes/html-api/class-wp-html-token.php index d5e51ac29007f..cd0a9efa12e71 100644 --- a/src/wp-includes/html-api/class-wp-html-token.php +++ b/src/wp-includes/html-api/class-wp-html-token.php @@ -31,7 +31,7 @@ class WP_HTML_Token { * * @var string */ - public $bookmark_name = null; + public $bookmark_name; /** * Name of node; lowercase names such as "marker" are not HTML elements. @@ -90,13 +90,13 @@ class WP_HTML_Token { * * @since 6.4.0 * - * @param string|null $bookmark_name Name of bookmark corresponding to location in HTML where token is found, + * @param string $bookmark_name Name of bookmark corresponding to location in HTML where token is found, * or `null` for markers and nodes without a bookmark. * @param string $node_name Name of node token represents; if uppercase, an HTML element; if lowercase, a special value like "marker". * @param bool $has_self_closing_flag Whether the source token contains the self-closing flag, regardless of whether it's valid. * @param callable|null $on_destroy Optional. Function to call when destroying token, useful for releasing the bookmark. */ - public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { + public function __construct( string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { $this->bookmark_name = $bookmark_name; $this->namespace = 'html'; $this->node_name = $node_name; From d68b404a05d64c102e15b7c5b9e11df199534469 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 29 Aug 2024 14:02:59 +0200 Subject: [PATCH 2/6] Iterate on active formatting --- ...ass-wp-html-active-formatting-elements.php | 48 ++++++++++----- .../html-api/class-wp-html-processor.php | 60 ++++++++++++------- 2 files changed, 73 insertions(+), 35 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 2428f9a88e8f1..5e2eaf948a367 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -39,7 +39,7 @@ class WP_HTML_Active_Formatting_Elements { * * @since 6.4.0 * - * @var WP_HTML_Token[] + * @var Array */ private $stack = array(); @@ -53,9 +53,9 @@ class WP_HTML_Active_Formatting_Elements { * @access private * * @param int $index Number of nodes from the top node to return. - * @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null. + * @return AFE_Element|AFE_Marker|null Node at the given index in the stack, if one exists, otherwise null. */ - public function at( $nth ) { + public function at( $nth ): AFE_Element|AFE_Marker|null { return $this->stack[ $nth - 1 ]; } @@ -67,9 +67,9 @@ public function at( $nth ) { * @param WP_HTML_Token $token Look for this node in the stack. * @return bool Whether the referenced node is in the stack of active formatting elements. */ - public function contains_node( WP_HTML_Token $token ) { + public function contains_node( WP_HTML_Token $token ): bool { foreach ( $this->walk_up() as $item ) { - if ( $token->bookmark_name === $item->bookmark_name ) { + if ( $item instanceof AFE_Element && $token->bookmark_name === $item->token->bookmark_name ) { return true; } } @@ -103,7 +103,7 @@ public function current_node() { } /** - * Inserts a "marker" at the end of the list of active formatting elements. + * Inserts a marker at the end of the list of active formatting elements. * * > The markers are inserted when entering applet, object, marquee, * > template, td, th, and caption elements, and are used to prevent @@ -115,7 +115,7 @@ public function current_node() { * @since 6.7.0 */ public function insert_marker(): void { - $this->push( new WP_HTML_Token( null, 'marker', false ) ); + $this->push( new AFE_Marker() ); } /** @@ -125,9 +125,9 @@ public function insert_marker(): void { * * @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements * - * @param WP_HTML_Token $token Push this node onto the stack. + * @param AFE_Element|AFE_Marker $token Push this node onto the stack. */ - public function push( WP_HTML_Token $token ) { + public function push( AFE_Element|AFE_Marker $afe ): void { /* * > If there are already three elements in the list of active formatting elements after the last marker, * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and @@ -139,8 +139,7 @@ public function push( WP_HTML_Token $token ) { * * @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack. */ - // > Add element to the list of active formatting elements. - $this->stack[] = $token; + $this->stack[] = $afe; } /** @@ -148,12 +147,12 @@ public function push( WP_HTML_Token $token ) { * * @since 6.4.0 * - * @param WP_HTML_Token $token Remove this node from the stack, if it's there already. + * @param AFE_Element|AFE_Marker $node Remove this node from the stack, if it's there already. * @return bool Whether the node was found and removed from the stack of active formatting elements. */ - public function remove_node( WP_HTML_Token $token ) { + public function remove_node( AFE_Element $node ) { foreach ( $this->walk_up() as $position_from_end => $item ) { - if ( $token->bookmark_name !== $item->bookmark_name ) { + if ( $item instanceof AFE_Element && $node->token->bookmark_name !== $item->token->bookmark_name ) { continue; } @@ -237,9 +236,28 @@ public function walk_up() { public function clear_up_to_last_marker(): void { foreach ( $this->walk_up() as $item ) { array_pop( $this->stack ); - if ( 'marker' === $item->node_name ) { + if ( $item instanceof AFE_Marker ) { break; } } } } + +class AFE_Marker {} +class AFE_Element { + /** @var string */ + public $namespace; + /** @var string */ + public $tag_name; + /** @var array */ + public $attributes; + /** @var WP_HTML_Token */ + public $token; + + public function __construct( string $tag_namespace, string $tag_name, array $attributes, WP_HTML_Token $token ) { + $this->namespace = $tag_namespace; + $this->tag_name = $tag_name; + $this->attributes = $attributes; + $this->token = $token; + } +} diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cd3fa57fd7860..5789b77a0e1d3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -442,7 +442,7 @@ private function bail( string $message ) { $active_formats = array(); foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { - $active_formats[] = $item->node_name; + $active_formats[] = $item instanceof AFE_Marker ? '(marker)' : $item->tag_name; } $this->last_error = self::ERROR_UNSUPPORTED; @@ -2350,10 +2350,10 @@ private function step_in_body(): bool { */ case '+A': foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - switch ( $item->node_name ) { - case 'marker': - break; - + if ( $item instanceof AFE_Marker ) { + break; + } + switch ( $item->tag_name ) { case 'A': $this->run_adoption_agency_algorithm(); $this->state->active_formatting_elements->remove_node( $item ); @@ -2364,7 +2364,7 @@ private function step_in_body(): bool { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->push( $this->state->current_token ); + $this->push_active_formatting_element( $this->state->current_token ); return true; /* @@ -2385,7 +2385,7 @@ private function step_in_body(): bool { case '+U': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->push( $this->state->current_token ); + $this->push_active_formatting_element( $this->state->current_token ); return true; /* @@ -2401,7 +2401,7 @@ private function step_in_body(): bool { } $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->push( $this->state->current_token ); + $this->push_active_formatting_element( $this->state->current_token ); return true; /* @@ -5121,7 +5121,7 @@ public function seek( $bookmark_name ): bool { } foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { + if ( 'context-node' === $item->token->bookmark_name ) { break; } @@ -5401,8 +5401,8 @@ private function reconstruct_active_formatting_elements(): bool { * > elements, then there is nothing to reconstruct; stop this algorithm. */ if ( - 'marker' === $last_entry->node_name || - $this->state->stack_of_open_elements->contains_node( $last_entry ) + $last_entry instanceof AFE_Marker || + $this->state->stack_of_open_elements->contains_node( $last_entry->token ) ) { return false; } @@ -5433,8 +5433,8 @@ private function reconstruct_active_formatting_elements(): bool { * > the stack of open elements, go to the step labeled rewind. */ if ( - 'marker' !== $entry->node_name && - ! $this->state->stack_of_open_elements->contains_node( $entry ) + ! $entry instanceof AFE_Marker && + ! $this->state->stack_of_open_elements->contains_node( $entry->token ) ) { goto rewind; } @@ -5451,7 +5451,7 @@ private function reconstruct_active_formatting_elements(): bool { * > element entry was created, to obtain new element. */ create: - $this->insert_html_element( $entry ); + $this->insert_html_element( $entry->token ); /* * > 9. Replace the entry for entry in the list with an entry for new element. @@ -5690,11 +5690,11 @@ private function run_adoption_agency_algorithm(): void { */ $formatting_element = null; foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - if ( 'marker' === $item->node_name ) { + if ( $item instanceof AFE_Marker ) { break; } - if ( $subject === $item->node_name ) { + if ( $subject === $item->tag_name ) { $formatting_element = $item; break; } @@ -5706,13 +5706,13 @@ private function run_adoption_agency_algorithm(): void { } // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. - if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { + if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element->token ) ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); return; } // > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return. - if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) { + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->tag_name ) ) { return; } @@ -5723,7 +5723,7 @@ private function run_adoption_agency_algorithm(): void { $is_above_formatting_element = true; $furthest_block = null; foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) { - if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) { + if ( $is_above_formatting_element && $formatting_element->token->bookmark_name !== $item->bookmark_name ) { continue; } @@ -5747,7 +5747,7 @@ private function run_adoption_agency_algorithm(): void { foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { $this->state->stack_of_open_elements->pop(); - if ( $formatting_element->bookmark_name === $item->bookmark_name ) { + if ( $formatting_element->token->bookmark_name === $item->bookmark_name ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); return; } @@ -6211,4 +6211,24 @@ protected static function get_encoding( string $label ): ?string { * @access private */ const CONSTRUCTOR_UNLOCK_CODE = 'Use WP_HTML_Processor::create_fragment() instead of calling the class constructor directly.'; + + private function push_active_formatting_element( WP_HTML_Token $token ) { + $bookmark = $this->bookmarks[ $token->bookmark_name ]; + $proc = new WP_HTML_Tag_Processor( + substr( $this->html, $bookmark->start, $bookmark->length ) + ); + $proc->change_parsing_namespace( $token->namespace ); + $proc->next_tag(); + $attributes = array(); + foreach ( $proc->get_attribute_names_with_prefix( '' ) as $name ) { + $attributes[ $name ] = $proc->get_attribute( $name ); + } + $afe = new AFE_Element( + $token->namespace, + $token->node_name, + $attributes, + $token + ); + $this->state->active_formatting_elements->push( $afe ); + } } From 24a837c9c5fba34c1002e2ee9bd0841996388b2b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 29 Aug 2024 14:03:22 +0200 Subject: [PATCH 3/6] PICKME: Fix off-by-one active formatting removal --- src/wp-includes/html-api/class-wp-html-processor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 5789b77a0e1d3..e26759ade4bf5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2357,8 +2357,8 @@ private function step_in_body(): bool { case 'A': $this->run_adoption_agency_algorithm(); $this->state->active_formatting_elements->remove_node( $item ); - $this->state->stack_of_open_elements->remove_node( $item ); - break; + $this->state->stack_of_open_elements->remove_node( $item->token ); + break 2; } } From b83d5b4ed58cb2bb53b2c2c03df788164a1a7aa3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 29 Aug 2024 14:30:40 +0200 Subject: [PATCH 4/6] Implement noah's ark active format reconstruction --- ...ass-wp-html-active-formatting-elements.php | 44 ++++++++++++++++++- .../html-api/class-wp-html-processor.php | 4 ++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 5e2eaf948a367..a2c5d4f72452d 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -128,6 +128,11 @@ public function insert_marker(): void { * @param AFE_Element|AFE_Marker $token Push this node onto the stack. */ public function push( AFE_Element|AFE_Marker $afe ): void { + if ( $afe instanceof AFE_Marker ) { + $this->stack[] = $afe; + return; + } + /* * > If there are already three elements in the list of active formatting elements after the last marker, * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and @@ -136,9 +141,19 @@ public function push( AFE_Element|AFE_Marker $afe ): void { * > created by the parser; two elements have the same attributes if all their parsed attributes can be * > paired such that the two attributes in each pair have identical names, namespaces, and values * > (the order of the attributes does not matter). - * - * @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack. */ + $count = 0; + foreach ( $this->walk_up_until_marker() as $item ) { + if ( + $item->namespace === $afe->namespace && + $item->tag_name === $afe->tag_name && + $item->attributes === $afe->attributes + ) { + if ( ++$count >= 3 ) { + return; + } + } + } $this->stack[] = $afe; } @@ -216,6 +231,31 @@ public function walk_up() { } } + /** + * Steps through the stack starting from the last added and stopping at the first marker, if present. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = '
We are here'; + * foreach ( $stack->walk_up_until_marker() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > I + * + * @since 6.7.0 + */ + public function walk_up_until_marker() { + foreach ( $this->walk_up() as $item ) { + if ( $item instanceof AFE_Marker ) { + break; + } + + yield $item; + } + } + /** * Clears the list of active formatting elements up to the last marker. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e26759ade4bf5..788a533f88f2a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5451,6 +5451,9 @@ private function reconstruct_active_formatting_elements(): bool { * > element entry was created, to obtain new element. */ create: + if ( array() !== $entry->attributes ) { + $this->bail( 'Cannot create active formatting elements with attributes.' ); + } $this->insert_html_element( $entry->token ); /* @@ -6223,6 +6226,7 @@ private function push_active_formatting_element( WP_HTML_Token $token ) { foreach ( $proc->get_attribute_names_with_prefix( '' ) as $name ) { $attributes[ $name ] = $proc->get_attribute( $name ); } + sort( $attributes ); $afe = new AFE_Element( $token->namespace, $token->node_name, From 096e5d0ae43c4b09d931e1f83a55bd55cf0295a1 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 29 Aug 2024 14:34:20 +0200 Subject: [PATCH 5/6] PICKME: Add missing nobr end tag handling --- src/wp-includes/html-api/class-wp-html-processor.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 788a533f88f2a..f9e801572a16c 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2415,6 +2415,7 @@ private function step_in_body(): bool { case '-EM': case '-FONT': case '-I': + case '-NOBR': case '-S': case '-SMALL': case '-STRIKE': From 7942a0c85d45c6b763dc8ad9c550628ea54d75e4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 29 Aug 2024 17:21:30 +0200 Subject: [PATCH 6/6] Add is_equivalent method to AFE, remove eager sort --- ...ass-wp-html-active-formatting-elements.php | 23 +++++++++++++++---- .../html-api/class-wp-html-processor.php | 1 - 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index a2c5d4f72452d..fb05a6e937499 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -144,11 +144,7 @@ public function push( AFE_Element|AFE_Marker $afe ): void { */ $count = 0; foreach ( $this->walk_up_until_marker() as $item ) { - if ( - $item->namespace === $afe->namespace && - $item->tag_name === $afe->tag_name && - $item->attributes === $afe->attributes - ) { + if ( $item->is_equivalent( $afe ) ) { if ( ++$count >= 3 ) { return; } @@ -294,6 +290,23 @@ class AFE_Element { /** @var WP_HTML_Token */ public $token; + public function is_equivalent( self $afe ): bool { + if ( + $this->namespace !== $afe->namespace || + $this->tag_name !== $afe->tag_name || + count( $this->attributes ) !== count( $afe->attributes ) + ) { + return false; + } + + foreach ( $this->attributes as $name => $value ) { + if ( ! array_key_exists( $name, $afe->attributes ) || $value !== $afe->attributes[ $name ] ) { + return false; + } + } + return true; + } + public function __construct( string $tag_namespace, string $tag_name, array $attributes, WP_HTML_Token $token ) { $this->namespace = $tag_namespace; $this->tag_name = $tag_name; diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f9e801572a16c..cfcb809abdb93 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -6227,7 +6227,6 @@ private function push_active_formatting_element( WP_HTML_Token $token ) { foreach ( $proc->get_attribute_names_with_prefix( '' ) as $name ) { $attributes[ $name ] = $proc->get_attribute( $name ); } - sort( $attributes ); $afe = new AFE_Element( $token->namespace, $token->node_name,