Skip to content

Commit

Permalink
HTML API: Simplify breadcrumb accounting.
Browse files Browse the repository at this point in the history
Since the HTML Processor started visiting all nodes in a document, both
real and virtual, the breadcrumb accounting became a bit complicated
and it's not entirely clear that it is fully reliable.

In this patch the breadcrumbs are rebuilt separately from the stack of
open elements in order to eliminate the problem of the stateful stack
interactions and the post-hoc event queue.

Breadcrumbs are greatly simplified as a result, and more verifiably
correct, in this construction.

Developed in WordPress/wordpress-develop#6981
Discussed in https://core.trac.wordpress.org/ticket/61576

Follow-up to [58590].

Props bernhard-reiter, dmsnell.
See #61576.

Built from https://develop.svn.wordpress.org/trunk@58713


git-svn-id: http://core.svn.wordpress.org/trunk@58115 1a063a9b-81f0-0310-95a4-ce76da25c4cd
  • Loading branch information
dmsnell committed Jul 12, 2024
1 parent befa5f9 commit 519a8f6
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 76 deletions.
126 changes: 51 additions & 75 deletions wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private $element_queue = array();

/**
* Stores the current breadcrumbs.
*
* @since 6.7.0
*
* @var string[]
*/
private $breadcrumbs = array();

/**
* Current stack event, if set, representing a matched token.
*
Expand Down Expand Up @@ -310,8 +319,8 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
false
);

$processor->state->stack_of_open_elements->push( $context_node );
$processor->context_node = $context_node;
$processor->breadcrumbs = array( 'HTML', $context_node->node_name );

return $processor;
}
Expand Down Expand Up @@ -523,44 +532,46 @@ public function next_token() {
return false;
}

if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) {
while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) {
continue;
}
$this->has_seen_context_node = 'done';
return $this->next_token();
/*
* Prime the events if there are none.
*
* @todo In some cases, probably related to the adoption agency
* algorithm, this call to step() doesn't create any new
* events. Calling it again creates them. Figure out why
* this is and if it's inherent or if it's a bug. Looping
* until there are events or until there are no more
* tokens works in the meantime and isn't obviously wrong.
*/
while ( empty( $this->element_queue ) && $this->step() ) {
continue;
}

// Process the next event on the queue.
$this->current_element = array_shift( $this->element_queue );
while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) {
if ( isset( $this->current_element ) ) {
if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
$this->has_seen_context_node = true;
return $this->next_token();
}
}
$this->current_element = array_shift( $this->element_queue );
if ( ! isset( $this->current_element ) ) {
return false;
}

if ( ! isset( $this->current_element ) ) {
if ( 'done' === $this->has_seen_context_node ) {
return false;
} else {
return $this->next_token();
}
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;

/*
* The root node only exists in the fragment parser, and closing it
* indicates that the parse is complete. Stop before popping if from
* the breadcrumbs.
*/
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
return ! $is_pop && $this->next_token();
}

if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) {
$this->element_queue = array();
$this->current_element = null;
return false;
// Adjust the breadcrumbs for this event.
if ( $is_pop ) {
array_pop( $this->breadcrumbs );
} else {
$this->breadcrumbs[] = $this->current_element->token->node_name;
}

// Avoid sending close events for elements which don't expect a closing.
if (
WP_HTML_Stack_Event::POP === $this->current_element->operation &&
! static::expects_closer( $this->current_element->token )
) {
if ( $is_pop && ! static::expects_closer( $this->current_element->token ) ) {
return $this->next_token();
}

Expand Down Expand Up @@ -643,10 +654,11 @@ public function matches_breadcrumbs( $breadcrumbs ) {
return false;
}

foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) {
$node = $this->breadcrumbs[ $i ];
$crumb = strtoupper( current( $breadcrumbs ) );

if ( '*' !== $crumb && $node->node_name !== $crumb ) {
if ( '*' !== $crumb && $node !== $crumb ) {
return false;
}

Expand Down Expand Up @@ -862,46 +874,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
* @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL.
*/
public function get_breadcrumbs() {
$breadcrumbs = array();

foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) {
$breadcrumbs[] = $stack_item->node_name;
}

if ( ! $this->is_virtual() ) {
return $breadcrumbs;
}

foreach ( $this->element_queue as $queue_item ) {
if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) {
break;
}

if ( 'context-node' === $queue_item->token->bookmark_name ) {
break;
}

if ( 'real' === $queue_item->provenance ) {
break;
}

if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) {
$breadcrumbs[] = $queue_item->token->node_name;
} else {
array_pop( $breadcrumbs );
}
}

if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) {
array_pop( $breadcrumbs );
}

// Add the virtual node we're at.
if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
$breadcrumbs[] = $this->current_element->token->node_name;
}

return $breadcrumbs;
return $this->breadcrumbs;
}

/**
Expand Down Expand Up @@ -930,9 +903,7 @@ public function get_breadcrumbs() {
* @return int Nesting-depth of current location in the document.
*/
public function get_current_depth() {
return $this->is_virtual()
? count( $this->get_breadcrumbs() )
: $this->state->stack_of_open_elements->count();
return count( $this->breadcrumbs );
}

/**
Expand Down Expand Up @@ -2552,7 +2523,6 @@ public function seek( $bookmark_name ) {
? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start
: 0;
$bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start;
$bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length;
$direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';

/*
Expand Down Expand Up @@ -2610,6 +2580,12 @@ public function seek( $bookmark_name ) {
$this->state->frameset_ok = true;
$this->element_queue = array();
$this->current_element = null;

if ( isset( $this->context_node ) ) {
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
} else {
$this->breadcrumbs = array();
}
}

// When moving forwards, reparse the document until reaching the same location as the original bookmark.
Expand Down
2 changes: 1 addition & 1 deletion wp-includes/version.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.7-alpha-58712';
$wp_version = '6.7-alpha-58713';

/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
Expand Down

0 comments on commit 519a8f6

Please sign in to comment.