Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Active format reconstruction with noah's ark #19

Draft
wants to merge 7 commits into
base: html-api/improve-active-element-reconstruction
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 88 additions & 17 deletions src/wp-includes/html-api/class-wp-html-active-formatting-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class WP_HTML_Active_Formatting_Elements {
*
* @since 6.4.0
*
* @var WP_HTML_Token[]
* @var Array<AFE_Element|AFE_Marker>
*/
private $stack = array();

Expand All @@ -53,9 +53,9 @@ class WP_HTML_Active_Formatting_Elements {
* @access private
*
* @param int $index Number of nodes from the top node to return.
* @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null.
* @return AFE_Element|AFE_Marker|null Node at the given index in the stack, if one exists, otherwise null.
*/
public function at( $nth ) {
public function at( $nth ): AFE_Element|AFE_Marker|null {
return $this->stack[ $nth - 1 ];
}

Expand All @@ -67,9 +67,9 @@ public function at( $nth ) {
* @param WP_HTML_Token $token Look for this node in the stack.
* @return bool Whether the referenced node is in the stack of active formatting elements.
*/
public function contains_node( WP_HTML_Token $token ) {
public function contains_node( WP_HTML_Token $token ): bool {
foreach ( $this->walk_up() as $item ) {
if ( $token->bookmark_name === $item->bookmark_name ) {
if ( $item instanceof AFE_Element && $token->bookmark_name === $item->token->bookmark_name ) {
return true;
}
}
Expand Down Expand Up @@ -103,7 +103,7 @@ public function current_node() {
}

/**
* Inserts a "marker" at the end of the list of active formatting elements.
* Inserts a marker at the end of the list of active formatting elements.
*
* > The markers are inserted when entering applet, object, marquee,
* > template, td, th, and caption elements, and are used to prevent
Expand All @@ -115,7 +115,7 @@ public function current_node() {
* @since 6.7.0
*/
public function insert_marker(): void {
$this->push( new WP_HTML_Token( null, 'marker', false ) );
$this->push( new AFE_Marker() );
}

/**
Expand All @@ -125,9 +125,14 @@ public function insert_marker(): void {
*
* @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements
*
* @param WP_HTML_Token $token Push this node onto the stack.
* @param AFE_Element|AFE_Marker $token Push this node onto the stack.
*/
public function push( WP_HTML_Token $token ) {
public function push( AFE_Element|AFE_Marker $afe ): void {
if ( $afe instanceof AFE_Marker ) {
$this->stack[] = $afe;
return;
}

/*
* > If there are already three elements in the list of active formatting elements after the last marker,
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
Expand All @@ -136,24 +141,29 @@ public function push( WP_HTML_Token $token ) {
* > created by the parser; two elements have the same attributes if all their parsed attributes can be
* > paired such that the two attributes in each pair have identical names, namespaces, and values
* > (the order of the attributes does not matter).
*
* @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack.
*/
// > Add element to the list of active formatting elements.
$this->stack[] = $token;
$count = 0;
foreach ( $this->walk_up_until_marker() as $item ) {
if ( $item->is_equivalent( $afe ) ) {
if ( ++$count >= 3 ) {
return;
}
}
}
$this->stack[] = $afe;
}

/**
* Removes a node from the stack of active formatting elements.
*
* @since 6.4.0
*
* @param WP_HTML_Token $token Remove this node from the stack, if it's there already.
* @param AFE_Element|AFE_Marker $node Remove this node from the stack, if it's there already.
* @return bool Whether the node was found and removed from the stack of active formatting elements.
*/
public function remove_node( WP_HTML_Token $token ) {
public function remove_node( AFE_Element $node ) {
foreach ( $this->walk_up() as $position_from_end => $item ) {
if ( $token->bookmark_name !== $item->bookmark_name ) {
if ( $item instanceof AFE_Element && $node->token->bookmark_name !== $item->token->bookmark_name ) {
continue;
}

Expand Down Expand Up @@ -217,6 +227,31 @@ public function walk_up() {
}
}

/**
* Steps through the stack starting from the last added and stopping at the first marker, if present.
*
* This generator function is designed to be used inside a "foreach" loop.
*
* Example:
*
* $html = '<em><table><td><i>We are here';
* foreach ( $stack->walk_up_until_marker() as $node ) {
* echo "{$node->node_name} -> ";
* }
* > I
*
* @since 6.7.0
*/
public function walk_up_until_marker() {
foreach ( $this->walk_up() as $item ) {
if ( $item instanceof AFE_Marker ) {
break;
}

yield $item;
}
}

/**
* Clears the list of active formatting elements up to the last marker.
*
Expand All @@ -237,9 +272,45 @@ public function walk_up() {
public function clear_up_to_last_marker(): void {
foreach ( $this->walk_up() as $item ) {
array_pop( $this->stack );
if ( 'marker' === $item->node_name ) {
if ( $item instanceof AFE_Marker ) {
break;
}
}
}
}

class AFE_Marker {}
class AFE_Element {
/** @var string */
public $namespace;
/** @var string */
public $tag_name;
/** @var array<string, string|bool|null> */
public $attributes;
/** @var WP_HTML_Token */
public $token;

public function is_equivalent( self $afe ): bool {
if (
$this->namespace !== $afe->namespace ||
$this->tag_name !== $afe->tag_name ||
count( $this->attributes ) !== count( $afe->attributes )
) {
return false;
}

foreach ( $this->attributes as $name => $value ) {
if ( ! array_key_exists( $name, $afe->attributes ) || $value !== $afe->attributes[ $name ] ) {
return false;
}
}
return true;
}

public function __construct( string $tag_namespace, string $tag_name, array $attributes, WP_HTML_Token $token ) {
$this->namespace = $tag_namespace;
$this->tag_name = $tag_name;
$this->attributes = $attributes;
$this->token = $token;
}
}
68 changes: 46 additions & 22 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ private function bail( string $message ) {

$active_formats = array();
foreach ( $this->state->active_formatting_elements->walk_down() as $item ) {
$active_formats[] = $item->node_name;
$active_formats[] = $item instanceof AFE_Marker ? '(marker)' : $item->tag_name;
}

$this->last_error = self::ERROR_UNSUPPORTED;
Expand Down Expand Up @@ -2350,21 +2350,21 @@ private function step_in_body(): bool {
*/
case '+A':
foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
switch ( $item->node_name ) {
case 'marker':
break;

if ( $item instanceof AFE_Marker ) {
break;
}
switch ( $item->tag_name ) {
case 'A':
$this->run_adoption_agency_algorithm();
$this->state->active_formatting_elements->remove_node( $item );
$this->state->stack_of_open_elements->remove_node( $item );
break;
$this->state->stack_of_open_elements->remove_node( $item->token );
break 2;
}
}

$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->active_formatting_elements->push( $this->state->current_token );
$this->push_active_formatting_element( $this->state->current_token );
return true;

/*
Expand All @@ -2385,7 +2385,7 @@ private function step_in_body(): bool {
case '+U':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->active_formatting_elements->push( $this->state->current_token );
$this->push_active_formatting_element( $this->state->current_token );
return true;

/*
Expand All @@ -2401,7 +2401,7 @@ private function step_in_body(): bool {
}

$this->insert_html_element( $this->state->current_token );
$this->state->active_formatting_elements->push( $this->state->current_token );
$this->push_active_formatting_element( $this->state->current_token );
return true;

/*
Expand All @@ -2415,6 +2415,7 @@ private function step_in_body(): bool {
case '-EM':
case '-FONT':
case '-I':
case '-NOBR':
case '-S':
case '-SMALL':
case '-STRIKE':
Expand Down Expand Up @@ -5121,7 +5122,7 @@ public function seek( $bookmark_name ): bool {
}

foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
if ( 'context-node' === $item->bookmark_name ) {
if ( 'context-node' === $item->token->bookmark_name ) {
break;
}

Expand Down Expand Up @@ -5401,8 +5402,8 @@ private function reconstruct_active_formatting_elements(): bool {
* > elements, then there is nothing to reconstruct; stop this algorithm.
*/
if (
'marker' === $last_entry->node_name ||
$this->state->stack_of_open_elements->contains_node( $last_entry )
$last_entry instanceof AFE_Marker ||
$this->state->stack_of_open_elements->contains_node( $last_entry->token )
) {
return false;
}
Expand Down Expand Up @@ -5433,8 +5434,8 @@ private function reconstruct_active_formatting_elements(): bool {
* > the stack of open elements, go to the step labeled rewind.
*/
if (
'marker' !== $entry->node_name &&
! $this->state->stack_of_open_elements->contains_node( $entry )
! $entry instanceof AFE_Marker &&
! $this->state->stack_of_open_elements->contains_node( $entry->token )
) {
goto rewind;
}
Expand All @@ -5451,7 +5452,10 @@ private function reconstruct_active_formatting_elements(): bool {
* > element entry was created, to obtain new element.
*/
create:
$this->insert_html_element( $entry );
if ( array() !== $entry->attributes ) {
$this->bail( 'Cannot create active formatting elements with attributes.' );
}
$this->insert_html_element( $entry->token );

/*
* > 9. Replace the entry for entry in the list with an entry for new element.
Expand Down Expand Up @@ -5690,11 +5694,11 @@ private function run_adoption_agency_algorithm(): void {
*/
$formatting_element = null;
foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
if ( 'marker' === $item->node_name ) {
if ( $item instanceof AFE_Marker ) {
break;
}

if ( $subject === $item->node_name ) {
if ( $subject === $item->tag_name ) {
$formatting_element = $item;
break;
}
Expand All @@ -5706,13 +5710,13 @@ private function run_adoption_agency_algorithm(): void {
}

// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element->token ) ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}

// > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return.
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) {
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->tag_name ) ) {
return;
}

Expand All @@ -5723,7 +5727,7 @@ private function run_adoption_agency_algorithm(): void {
$is_above_formatting_element = true;
$furthest_block = null;
foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) {
if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) {
if ( $is_above_formatting_element && $formatting_element->token->bookmark_name !== $item->bookmark_name ) {
continue;
}

Expand All @@ -5747,7 +5751,7 @@ private function run_adoption_agency_algorithm(): void {
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();

if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
if ( $formatting_element->token->bookmark_name === $item->bookmark_name ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}
Expand Down Expand Up @@ -6211,4 +6215,24 @@ protected static function get_encoding( string $label ): ?string {
* @access private
*/
const CONSTRUCTOR_UNLOCK_CODE = 'Use WP_HTML_Processor::create_fragment() instead of calling the class constructor directly.';

private function push_active_formatting_element( WP_HTML_Token $token ) {
$bookmark = $this->bookmarks[ $token->bookmark_name ];
$proc = new WP_HTML_Tag_Processor(
substr( $this->html, $bookmark->start, $bookmark->length )
);
$proc->change_parsing_namespace( $token->namespace );
$proc->next_tag();
$attributes = array();
foreach ( $proc->get_attribute_names_with_prefix( '' ) as $name ) {
$attributes[ $name ] = $proc->get_attribute( $name );
}
$afe = new AFE_Element(
$token->namespace,
$token->node_name,
$attributes,
$token
);
$this->state->active_formatting_elements->push( $afe );
}
}
6 changes: 3 additions & 3 deletions src/wp-includes/html-api/class-wp-html-token.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class WP_HTML_Token {
*
* @var string
*/
public $bookmark_name = null;
public $bookmark_name;

/**
* Name of node; lowercase names such as "marker" are not HTML elements.
Expand Down Expand Up @@ -90,13 +90,13 @@ class WP_HTML_Token {
*
* @since 6.4.0
*
* @param string|null $bookmark_name Name of bookmark corresponding to location in HTML where token is found,
* @param string $bookmark_name Name of bookmark corresponding to location in HTML where token is found,
* or `null` for markers and nodes without a bookmark.
* @param string $node_name Name of node token represents; if uppercase, an HTML element; if lowercase, a special value like "marker".
* @param bool $has_self_closing_flag Whether the source token contains the self-closing flag, regardless of whether it's valid.
* @param callable|null $on_destroy Optional. Function to call when destroying token, useful for releasing the bookmark.
*/
public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) {
public function __construct( string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) {
$this->bookmark_name = $bookmark_name;
$this->namespace = 'html';
$this->node_name = $node_name;
Expand Down
Loading