Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Defer applying updates until necessary. #6120

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 61 additions & 11 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -837,8 +837,27 @@ public function next_tag( $query = null ) {
* @return bool Whether a token was parsed.
*/
public function next_token() {
return $this->base_class_next_token();
}

/**
* Internal method which finds the next token in the HTML document.
*
* This method is a protected internal function which implements the logic for
* finding the next token in a document. It exists so that the parser can update
* its state without affecting the location of the cursor in the document and
* without triggering subclass methods for things like `next_token()`, e.g. when
* applying patches before searching for the next token.
*
* @since 6.5.0
*
* @access private
*
* @return bool Whether a token was parsed.
*/
private function base_class_next_token() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗒️ A note for myself and other reviewers. Private methods cannot be extended so this should be safe. In PHP 8 it would be a warning to add final here.

$was_at = $this->bytes_already_parsed;
$this->get_updated_html();
$this->after_tag();

// Don't proceed if there's nothing more to scan.
if (
Expand Down Expand Up @@ -2041,6 +2060,45 @@ private function skip_whitespace() {
* @since 6.2.0
*/
private function after_tag() {
/*
* There could be lexical updates enqueued for an attribute that
* also exists on the next tag. In order to avoid conflating the
* attributes across the two tags, lexical updates with names
* need to be flushed to raw lexical updates.
*/
$this->class_name_updates_to_attributes_updates();

/*
* Purge updates if there are too many. The actual count isn't
* scientific, but a few values from 100 to a few thousand were
* tests to find a practially-useful limit.
*
* If the update queue grows too big, then the Tag Processor
* will spend more time iterating through them and lose the
* efficiency gains of deferring applying them.
*/
if ( 1000 < count( $this->lexical_updates ) ) {
westonruter marked this conversation as resolved.
Show resolved Hide resolved
$this->get_updated_html();
}

foreach ( $this->lexical_updates as $name => $update ) {
/*
* Any updates appearing after the cursor should be applied
* before proceeding, otherwise they may be overlooked.
*/
if ( $update->start >= $this->bytes_already_parsed ) {
$this->get_updated_html();
break;
}

if ( is_int( $name ) ) {
continue;
}

$this->lexical_updates[] = $update;
unset( $this->lexical_updates[ $name ] );
}

$this->token_starts_at = null;
$this->token_length = null;
$this->tag_name_starts_at = null;
Expand Down Expand Up @@ -2230,7 +2288,7 @@ private function apply_attributes_updates( $shift_this_point = 0 ) {
$shift = strlen( $diff->text ) - $diff->length;

// Adjust the cursor position by however much an update affects it.
if ( $diff->start <= $this->bytes_already_parsed ) {
if ( $diff->start < $this->bytes_already_parsed ) {
$this->bytes_already_parsed += $shift;
}

Expand Down Expand Up @@ -3164,15 +3222,7 @@ public function get_updated_html() {
* └←─┘ back up by strlen("em") + 1 ==> 3
*/
$this->bytes_already_parsed = $before_current_tag;
$this->parse_next_tag();
// Reparse the attributes.
while ( $this->parse_next_attribute() ) {
continue;
}

$tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
$this->token_length = $tag_ends_at - $this->token_starts_at;
$this->bytes_already_parsed = $tag_ends_at;
$this->base_class_next_token();

return $this->html;
}
Expand Down
11 changes: 10 additions & 1 deletion tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php
Original file line number Diff line number Diff line change
Expand Up @@ -293,21 +293,30 @@ public function test_bookmarks_complex_use_case() {

/**
* @ticket 56299
* @ticket 60697
*
* @covers WP_HTML_Tag_Processor::seek
*/
public function test_updates_bookmark_for_additions_after_both_sides() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might wanna add the @ticket XXXXXX reference for this change to this test's docblock as well 🙂

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Is it fine to add the explanation after the ticket id or will that throw off some parser somewhere? I can remove the explanation if it's a problem.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks as if no other test cases are using an explanation after the ticket id, so maybe better remove it for consistency's sake.

$processor = new WP_HTML_Tag_Processor( '<div>First</div><div>Second</div>' );
$processor->next_tag();
$processor->set_attribute( 'id', 'one' );
$processor->set_bookmark( 'first' );
$processor->next_tag();
$processor->set_attribute( 'id', 'two' );
$processor->add_class( 'second' );

$processor->seek( 'first' );
$processor->add_class( 'first' );

$this->assertSame(
'<div class="first">First</div><div class="second">Second</div>',
'one',
$processor->get_attribute( 'id' ),
'Should have remembered attribute change from before the seek.'
);

$this->assertSame(
'<div class="first" id="one">First</div><div class="second" id="two">Second</div>',
$processor->get_updated_html(),
'The bookmark was updated incorrectly in response to HTML markup updates'
);
Expand Down
45 changes: 45 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2727,4 +2727,49 @@ public function test_single_text_node_with_taglike_text() {
$this->assertSame( '#text', $processor->get_token_type(), 'Did not find text node.' );
$this->assertSame( 'test< /A>', $processor->get_modifiable_text(), 'Did not find complete text node.' );
}

/**
* Ensures that updates which are enqueued in front of the cursor
* are applied before moving forward in the document.
*
* @ticket 60697
*/
public function test_applies_updates_before_proceeding() {
$html = '<div><img></div><div><img></div>';

$subclass = new class( $html ) extends WP_HTML_Tag_Processor {
/**
* Inserts raw text after the current token.
*
* @param string $new_html Raw text to insert.
*/
public function insert_after( $new_html ) {
$this->set_bookmark( 'here' );
$this->lexical_updates[] = new WP_HTML_Text_Replacement(
$this->bookmarks['here']->start + $this->bookmarks['here']->length + 1,
0,
$new_html
);
}
};

$subclass->next_tag( 'img' );
$subclass->insert_after( '<p>snow-capped</p>' );

$subclass->next_tag();
$this->assertSame(
'P',
$subclass->get_tag(),
'Should have matched inserted HTML as next tag.'
);

$subclass->next_tag( 'img' );
$subclass->set_attribute( 'alt', 'mountain' );

$this->assertSame(
'<div><img><p>snow-capped</p></div><div><img alt="mountain"></div>',
$subclass->get_updated_html(),
'Should have properly applied the update from in front of the cursor.'
);
}
}
Loading