WordPress · dmsnell · Feb 15, 2024 · Mar 9, 2024 · Mar 9, 2024 · Mar 9, 2024
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -837,8 +837,27 @@ public function next_tag( $query = null ) {
 	 * @return bool Whether a token was parsed.
 	 */
 	public function next_token() {
+		return $this->base_class_next_token();
+	}
+
+	/**
+	 * Internal method which finds the next token in the HTML document.
+	 *
+	 * This method is a protected internal function which implements the logic for
+	 * finding the next token in a document. It exists so that the parser can update
+	 * its state without affecting the location of the cursor in the document and
+	 * without triggering subclass methods for things like `next_token()`, e.g. when
+	 * applying patches before searching for the next token.
+	 *
+	 * @since 6.5.0
+	 *
+	 * @access private
+	 *
+	 * @return bool Whether a token was parsed.
+	 */
+	private function base_class_next_token() {
 		$was_at = $this->bytes_already_parsed;
-		$this->get_updated_html();
+		$this->after_tag();
 
 		// Don't proceed if there's nothing more to scan.
 		if (
@@ -2041,6 +2060,45 @@ private function skip_whitespace() {
 	 * @since 6.2.0
 	 */
 	private function after_tag() {
+		/*
+		 * There could be lexical updates enqueued for an attribute that
+		 * also exists on the next tag. In order to avoid conflating the
+		 * attributes across the two tags, lexical updates with names
+		 * need to be flushed to raw lexical updates.
+		 */
+		$this->class_name_updates_to_attributes_updates();
+
+		/*
+		 * Purge updates if there are too many. The actual count isn't
+		 * scientific, but a few values from 100 to a few thousand were
+		 * tests to find a practially-useful limit.
+		 *
+		 * If the update queue grows too big, then the Tag Processor
+		 * will spend more time iterating through them and lose the
+		 * efficiency gains of deferring applying them.
+		 */
+		if ( 1000 < count( $this->lexical_updates ) ) {
+			$this->get_updated_html();
+		}
+
+		foreach ( $this->lexical_updates as $name => $update ) {
+			/*
+			 * Any updates appearing after the cursor should be applied
+			 * before proceeding, otherwise they may be overlooked.
+			 */
+			if ( $update->start >= $this->bytes_already_parsed ) {
+				$this->get_updated_html();
+				break;
+			}
+
+			if ( is_int( $name ) ) {
+				continue;
+			}
+
+			$this->lexical_updates[] = $update;
+			unset( $this->lexical_updates[ $name ] );
+		}
+
 		$this->token_starts_at      = null;
 		$this->token_length         = null;
 		$this->tag_name_starts_at   = null;
@@ -2230,7 +2288,7 @@ private function apply_attributes_updates( $shift_this_point = 0 ) {
 			$shift = strlen( $diff->text ) - $diff->length;
 
 			// Adjust the cursor position by however much an update affects it.
-			if ( $diff->start <= $this->bytes_already_parsed ) {
+			if ( $diff->start < $this->bytes_already_parsed ) {
 				$this->bytes_already_parsed += $shift;
 			}
 
@@ -3164,15 +3222,7 @@ public function get_updated_html() {
 		 *                 └←─┘ back up by strlen("em") + 1 ==> 3
 		 */
 		$this->bytes_already_parsed = $before_current_tag;
-		$this->parse_next_tag();
-		// Reparse the attributes.
-		while ( $this->parse_next_attribute() ) {
-			continue;
-		}
-
-		$tag_ends_at                = strpos( $this->html, '>', $this->bytes_already_parsed );
-		$this->token_length         = $tag_ends_at - $this->token_starts_at;
-		$this->bytes_already_parsed = $tag_ends_at;
+		$this->base_class_next_token();
 
 		return $this->html;
 	}

diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php
@@ -293,21 +293,30 @@ public function test_bookmarks_complex_use_case() {
 
 	/**
 	 * @ticket 56299
+	 * @ticket 60697
 	 *
 	 * @covers WP_HTML_Tag_Processor::seek
 	 */
 	public function test_updates_bookmark_for_additions_after_both_sides() {
 		$processor = new WP_HTML_Tag_Processor( '<div>First</div><div>Second</div>' );
 		$processor->next_tag();
+		$processor->set_attribute( 'id', 'one' );
 		$processor->set_bookmark( 'first' );
 		$processor->next_tag();
+		$processor->set_attribute( 'id', 'two' );
 		$processor->add_class( 'second' );
 
 		$processor->seek( 'first' );
 		$processor->add_class( 'first' );
 
 		$this->assertSame(
-			'<div class="first">First</div><div class="second">Second</div>',
+			'one',
+			$processor->get_attribute( 'id' ),
+			'Should have remembered attribute change from before the seek.'
+		);
+
+		$this->assertSame(
+			'<div class="first" id="one">First</div><div class="second" id="two">Second</div>',
 			$processor->get_updated_html(),
 			'The bookmark was updated incorrectly in response to HTML markup updates'
 		);

diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -2727,4 +2727,49 @@ public function test_single_text_node_with_taglike_text() {
 		$this->assertSame( '#text', $processor->get_token_type(), 'Did not find text node.' );
 		$this->assertSame( 'test< /A>', $processor->get_modifiable_text(), 'Did not find complete text node.' );
 	}
+
+	/**
+	 * Ensures that updates which are enqueued in front of the cursor
+	 * are applied before moving forward in the document.
+	 *
+	 * @ticket 60697
+	 */
+	public function test_applies_updates_before_proceeding() {
+		$html = '<div><img></div><div><img></div>';
+
+		$subclass = new class( $html ) extends WP_HTML_Tag_Processor {
+			/**
+			 * Inserts raw text after the current token.
+			 *
+			 * @param string $new_html Raw text to insert.
+			 */
+			public function insert_after( $new_html ) {
+				$this->set_bookmark( 'here' );
+				$this->lexical_updates[] = new WP_HTML_Text_Replacement(
+					$this->bookmarks['here']->start + $this->bookmarks['here']->length + 1,
+					0,
+					$new_html
+				);
+			}
+		};
+
+		$subclass->next_tag( 'img' );
+		$subclass->insert_after( '<p>snow-capped</p>' );
+
+		$subclass->next_tag();
+		$this->assertSame(
+			'P',
+			$subclass->get_tag(),
+			'Should have matched inserted HTML as next tag.'
+		);
+
+		$subclass->next_tag( 'img' );
+		$subclass->set_attribute( 'alt', 'mountain' );
+
+		$this->assertSame(
+			'<div><img><p>snow-capped</p></div><div><img alt="mountain"></div>',
+			$subclass->get_updated_html(),
+			'Should have properly applied the update from in front of the cursor.'
+		);
+	}
 }