diff --git a/lib/compat/wordpress-6.5/html-api/class-gutenberg-html-open-elements-6-5.php b/lib/compat/wordpress-6.5/html-api/class-gutenberg-html-open-elements-6-5.php new file mode 100644 index 0000000000000..0d8901225c792 --- /dev/null +++ b/lib/compat/wordpress-6.5/html-api/class-gutenberg-html-open-elements-6-5.php @@ -0,0 +1,462 @@ + Initially, the stack of open elements is empty. The stack grows + * > downwards; the topmost node on the stack is the first one added + * > to the stack, and the bottommost node of the stack is the most + * > recently added node in the stack (notwithstanding when the stack + * > is manipulated in a random access fashion as part of the handling + * > for misnested tags). + * + * @since 6.4.0 + * + * @access private + * + * @see https://html.spec.whatwg.org/#stack-of-open-elements + * @see WP_HTML_Processor + */ +class Gutenberg_HTML_Open_Elements_6_5 { + /** + * Holds the stack of open element references. + * + * @since 6.4.0 + * + * @var WP_HTML_Token[] + */ + public $stack = array(); + + /** + * Whether a P element is in button scope currently. + * + * This class optimizes scope lookup by pre-calculating + * this value when elements are added and removed to the + * stack of open elements which might change its value. + * This avoids frequent iteration over the stack. + * + * @since 6.4.0 + * + * @var bool + */ + private $has_p_in_button_scope = false; + + /** + * Reports if a specific node is in the stack of open elements. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $token Look for this node in the stack. + * @return bool Whether the referenced node is in the stack of open elements. + */ + public function contains_node( $token ) { + foreach ( $this->walk_up() as $item ) { + if ( $token->bookmark_name === $item->bookmark_name ) { + return true; + } + } + + return false; + } + + /** + * Returns how many nodes are currently in the stack of open elements. + * + * @since 6.4.0 + * + * @return int How many node are in the stack of open elements. + */ + public function count() { + return count( $this->stack ); + } + + /** + * Returns the node at the end of the stack of open elements, + * if one exists. If the stack is empty, returns null. + * + * @since 6.4.0 + * + * @return WP_HTML_Token|null Last node in the stack of open elements, if one exists, otherwise null. + */ + public function current_node() { + $current_node = end( $this->stack ); + + return $current_node ? $current_node : null; + } + + /** + * Returns whether an element is in a specific scope. + * + * ## HTML Support + * + * This function skips checking for the termination list because there + * are no supported elements which appear in the termination list. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-the-specific-scope + * + * @param string $tag_name Name of tag check. + * @param string[] $termination_list List of elements that terminate the search. + * @return bool Whether the element was found in a specific scope. + */ + public function has_element_in_specific_scope( $tag_name, $termination_list ) { + foreach ( $this->walk_up() as $node ) { + if ( $node->node_name === $tag_name ) { + return true; + } + + if ( + '(internal: H1 through H6 - do not use)' === $tag_name && + in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + ) { + return true; + } + + switch ( $node->node_name ) { + case 'HTML': + return false; + } + + if ( in_array( $node->node_name, $termination_list, true ) ) { + return false; + } + } + + return false; + } + + /** + * Returns whether a particular element is in scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-scope + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_scope( $tag_name ) { + return $this->has_element_in_specific_scope( + $tag_name, + array( + + /* + * Because it's not currently possible to encounter + * one of the termination elements, they don't need + * to be listed here. If they were, they would be + * unreachable and only waste CPU cycles while + * scanning through HTML. + */ + ) + ); + } + + /** + * Returns whether a particular element is in list item scope. + * + * @since 6.4.0 + * @since 6.5.0 Implemented: no longer throws on every invocation. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_list_item_scope( $tag_name ) { + return $this->has_element_in_specific_scope( + $tag_name, + array( + // There are more elements that belong here which aren't currently supported. + 'OL', + 'UL', + ) + ); + } + + /** + * Returns whether a particular element is in button scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_button_scope( $tag_name ) { + return $this->has_element_in_specific_scope( $tag_name, array( 'BUTTON' ) ); + } + + /** + * Returns whether a particular element is in table scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope + * + * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_table_scope( $tag_name ) { + throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' ); + + return false; // The linter requires this unreachable code until the function is implemented and can return. + } + + /** + * Returns whether a particular element is in select scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-select-scope + * + * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_select_scope( $tag_name ) { + throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on select scope.' ); + + return false; // The linter requires this unreachable code until the function is implemented and can return. + } + + /** + * Returns whether a P is in BUTTON scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope + * + * @return bool Whether a P is in BUTTON scope. + */ + public function has_p_in_button_scope() { + return $this->has_p_in_button_scope; + } + + /** + * Pops a node off of the stack of open elements. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#stack-of-open-elements + * + * @return bool Whether a node was popped off of the stack. + */ + public function pop() { + $item = array_pop( $this->stack ); + + if ( null === $item ) { + return false; + } + + $this->after_element_pop( $item ); + return true; + } + + /** + * Pops nodes off of the stack of open elements until one with the given tag name has been popped. + * + * @since 6.4.0 + * + * @see WP_HTML_Open_Elements::pop + * + * @param string $tag_name Name of tag that needs to be popped off of the stack of open elements. + * @return bool Whether a tag of the given name was found and popped off of the stack of open elements. + */ + public function pop_until( $tag_name ) { + foreach ( $this->walk_up() as $item ) { + $this->pop(); + + if ( + '(internal: H1 through H6 - do not use)' === $tag_name && + in_array( $item->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + ) { + return true; + } + + if ( $tag_name === $item->node_name ) { + return true; + } + } + + return false; + } + + /** + * Pushes a node onto the stack of open elements. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#stack-of-open-elements + * + * @param WP_HTML_Token $stack_item Item to add onto stack. + */ + public function push( $stack_item ) { + $this->stack[] = $stack_item; + $this->after_element_push( $stack_item ); + } + + /** + * Removes a specific node from the stack of open elements. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $token The node to remove from the stack of open elements. + * @return bool Whether the node was found and removed from the stack of open elements. + */ + public function remove_node( $token ) { + foreach ( $this->walk_up() as $position_from_end => $item ) { + if ( $token->bookmark_name !== $item->bookmark_name ) { + continue; + } + + $position_from_start = $this->count() - $position_from_end - 1; + array_splice( $this->stack, $position_from_start, 1 ); + $this->after_element_pop( $item ); + return true; + } + + return false; + } + + + /** + * Steps through the stack of open elements, starting with the top element + * (added first) and walking downwards to the one added last. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = 'We are here'; + * foreach ( $stack->walk_down() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > EM -> STRONG -> A -> + * + * To start with the most-recently added element and walk towards the top, + * see WP_HTML_Open_Elements::walk_up(). + * + * @since 6.4.0 + */ + public function walk_down() { + $count = count( $this->stack ); + + for ( $i = 0; $i < $count; $i++ ) { + yield $this->stack[ $i ]; + } + } + + /** + * Steps through the stack of open elements, starting with the bottom element + * (added last) and walking upwards to the one added first. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = 'We are here'; + * foreach ( $stack->walk_up() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > A -> STRONG -> EM -> + * + * To start with the first added element and walk towards the bottom, + * see WP_HTML_Open_Elements::walk_down(). + * + * @since 6.4.0 + * @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists. + * + * @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists. + */ + public function walk_up( $above_this_node = null ) { + $has_found_node = null === $above_this_node; + + for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { + $node = $this->stack[ $i ]; + + if ( ! $has_found_node ) { + $has_found_node = $node === $above_this_node; + continue; + } + + yield $node; + } + } + + /* + * Internal helpers. + */ + + /** + * Updates internal flags after adding an element. + * + * Certain conditions (such as "has_p_in_button_scope") are maintained here as + * flags that are only modified when adding and removing elements. This allows + * the HTML Processor to quickly check for these conditions instead of iterating + * over the open stack elements upon each new tag it encounters. These flags, + * however, need to be maintained as items are added and removed from the stack. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $item Element that was added to the stack of open elements. + */ + public function after_element_push( $item ) { + /* + * When adding support for new elements, expand this switch to trap + * cases where the precalculated value needs to change. + */ + switch ( $item->node_name ) { + case 'BUTTON': + $this->has_p_in_button_scope = false; + break; + + case 'P': + $this->has_p_in_button_scope = true; + break; + } + } + + /** + * Updates internal flags after removing an element. + * + * Certain conditions (such as "has_p_in_button_scope") are maintained here as + * flags that are only modified when adding and removing elements. This allows + * the HTML Processor to quickly check for these conditions instead of iterating + * over the open stack elements upon each new tag it encounters. These flags, + * however, need to be maintained as items are added and removed from the stack. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $item Element that was removed from the stack of open elements. + */ + public function after_element_pop( $item ) { + /* + * When adding support for new elements, expand this switch to trap + * cases where the precalculated value needs to change. + */ + switch ( $item->node_name ) { + case 'BUTTON': + $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); + break; + + case 'P': + $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); + break; + } + } +} diff --git a/lib/compat/wordpress-6.5/html-api/class-gutenberg-html-processor-6-5.php b/lib/compat/wordpress-6.5/html-api/class-gutenberg-html-processor-6-5.php new file mode 100644 index 0000000000000..eae2b1eda815c --- /dev/null +++ b/lib/compat/wordpress-6.5/html-api/class-gutenberg-html-processor-6-5.php @@ -0,0 +1,1745 @@ +next_tag( array( 'breadcrumbs' => array( 'DIV', 'FIGURE', 'IMG' ) ) ) ) { + * $processor->add_class( 'responsive-image' ); + * } + * + * #### Breadcrumbs + * + * Breadcrumbs represent the stack of open elements from the root + * of the document or fragment down to the currently-matched node, + * if one is currently selected. Call WP_HTML_Processor::get_breadcrumbs() + * to inspect the breadcrumbs for a matched tag. + * + * Breadcrumbs can specify nested HTML structure and are equivalent + * to a CSS selector comprising tag names separated by the child + * combinator, such as "DIV > FIGURE > IMG". + * + * Since all elements find themselves inside a full HTML document + * when parsed, the return value from `get_breadcrumbs()` will always + * contain any implicit outermost elements. For example, when parsing + * with `create_fragment()` in the `BODY` context (the default), any + * tag in the given HTML document will contain `array( 'HTML', 'BODY', … )` + * in its breadcrumbs. + * + * Despite containing the implied outermost elements in their breadcrumbs, + * tags may be found with the shortest-matching breadcrumb query. That is, + * `array( 'IMG' )` matches all IMG elements and `array( 'P', 'IMG' )` + * matches all IMG elements directly inside a P element. To ensure that no + * partial matches erroneously match it's possible to specify in a query + * the full breadcrumb match all the way down from the root HTML element. + * + * Example: + * + * $html = '
A lovely day outside
'; + * // ----- Matches here. + * $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG' ) ) ); + * + * $html = '
A lovely day outside
'; + * // ---- Matches here. + * $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'FIGCAPTION', 'EM' ) ) ); + * + * $html = '
'; + * // ----- Matches here, because IMG must be a direct child of the implicit BODY. + * $processor->next_tag( array( 'breadcrumbs' => array( 'BODY', 'IMG' ) ) ); + * + * ## HTML Support + * + * This class implements a small part of the HTML5 specification. + * It's designed to operate within its support and abort early whenever + * encountering circumstances it can't properly handle. This is + * the principle way in which this class remains as simple as possible + * without cutting corners and breaking compliance. + * + * ### Supported elements + * + * If any unsupported element appears in the HTML input the HTML Processor + * will abort early and stop all processing. This draconian measure ensures + * that the HTML Processor won't break any HTML it doesn't fully understand. + * + * The following list specifies the HTML tags that _are_ supported: + * + * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. + * - Custom elements: All custom elements are supported. :) + * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH. + * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. + * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. + * - Links: A. + * - Lists: DD, DL, DT, LI, OL, LI. + * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO. + * - Paragraph: P. + * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. + * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION. + * - Templating elements: SLOT. + * - Text decoration: RUBY. + * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER. + * + * ### Supported markup + * + * Some kinds of non-normative HTML involve reconstruction of formatting elements and + * re-parenting of mis-nested elements. For example, a DIV tag found inside a TABLE + * may in fact belong _before_ the table in the DOM. If the HTML Processor encounters + * such a case it will stop processing. + * + * The following list specifies HTML markup that _is_ supported: + * + * - Markup involving only those tags listed above. + * - Fully-balanced and non-overlapping tags. + * - HTML with unexpected tag closers. + * - Some unbalanced or overlapping tags. + * - P tags after unclosed P tags. + * - BUTTON tags after unclosed BUTTON tags. + * - A tags after unclosed A tags that don't involve any active formatting elements. + * + * @since 6.4.0 + * + * @see WP_HTML_Tag_Processor + * @see https://html.spec.whatwg.org/ + */ +class Gutenberg_HTML_Processor_6_5 extends Gutenberg_HTML_Tag_Processor_6_5 { + /** + * The maximum number of bookmarks allowed to exist at any given time. + * + * HTML processing requires more bookmarks than basic tag processing, + * so this class constant from the Tag Processor is overwritten. + * + * @since 6.4.0 + * + * @var int + */ + const MAX_BOOKMARKS = 100; + + /** + * Static query for instructing the Tag Processor to visit every token. + * + * @access private + * + * @since 6.4.0 + * + * @var array + */ + const VISIT_EVERYTHING = array( 'tag_closers' => 'visit' ); + + /** + * Holds the working state of the parser, including the stack of + * open elements and the stack of active formatting elements. + * + * Initialized in the constructor. + * + * @since 6.4.0 + * + * @var WP_HTML_Processor_State + */ + private $state = null; + + /** + * Used to create unique bookmark names. + * + * This class sets a bookmark for every tag in the HTML document that it encounters. + * The bookmark name is auto-generated and increments, starting with `1`. These are + * internal bookmarks and are automatically released when the referring WP_HTML_Token + * goes out of scope and is garbage-collected. + * + * @since 6.4.0 + * + * @see WP_HTML_Processor::$release_internal_bookmark_on_destruct + * + * @var int + */ + private $bookmark_counter = 0; + + /** + * Stores an explanation for why something failed, if it did. + * + * @see self::get_last_error + * + * @since 6.4.0 + * + * @var string|null + */ + private $last_error = null; + + /** + * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance. + * + * This function is created inside the class constructor so that it can be passed to + * the stack of open elements and the stack of active formatting elements without + * exposing it as a public method on the class. + * + * @since 6.4.0 + * + * @var closure + */ + private $release_internal_bookmark_on_destruct = null; + + /* + * Public Interface Functions + */ + + /** + * Creates an HTML processor in the fragment parsing mode. + * + * Use this for cases where you are processing chunks of HTML that + * will be found within a bigger HTML document, such as rendered + * block output that exists within a post, `the_content` inside a + * rendered site layout. + * + * Fragment parsing occurs within a context, which is an HTML element + * that the document will eventually be placed in. It becomes important + * when special elements have different rules than others, such as inside + * a TEXTAREA or a TITLE tag where things that look like tags are text, + * or inside a SCRIPT tag where things that look like HTML syntax are JS. + * + * The context value should be a representation of the tag into which the + * HTML is found. For most cases this will be the body element. The HTML + * form is provided because a context element may have attributes that + * impact the parse, such as with a SCRIPT tag and its `type` attribute. + * + * ## Current HTML Support + * + * - The only supported context is ``, which is the default value. + * - The only supported document encoding is `UTF-8`, which is the default value. + * + * @since 6.4.0 + * + * @param string $html Input HTML fragment to process. + * @param string $context Context element for the fragment, must be default of ``. + * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. + * @return WP_HTML_Processor|null The created processor if successful, otherwise null. + */ + public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { + if ( '' !== $context || 'UTF-8' !== $encoding ) { + return null; + } + + $p = new self( $html, self::CONSTRUCTOR_UNLOCK_CODE ); + $p->state->context_node = array( 'BODY', array() ); + $p->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + + // @todo Create "fake" bookmarks for non-existent but implied nodes. + $p->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); + $p->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); + + $p->state->stack_of_open_elements->push( + new WP_HTML_Token( + 'root-node', + 'HTML', + false + ) + ); + + $p->state->stack_of_open_elements->push( + new WP_HTML_Token( + 'context-node', + $p->state->context_node[0], + false + ) + ); + + return $p; + } + + /** + * Constructor. + * + * Do not use this method. Use the static creator methods instead. + * + * @access private + * + * @since 6.4.0 + * + * @see WP_HTML_Processor::create_fragment() + * + * @param string $html HTML to process. + * @param string|null $use_the_static_create_methods_instead This constructor should not be called manually. + */ + public function __construct( $html, $use_the_static_create_methods_instead = null ) { + parent::__construct( $html ); + + if ( self::CONSTRUCTOR_UNLOCK_CODE !== $use_the_static_create_methods_instead ) { + _doing_it_wrong( + __METHOD__, + sprintf( + /* translators: %s: WP_HTML_Processor::create_fragment(). */ + __( 'Call %s to create an HTML Processor instead of calling the constructor directly.' ), + 'WP_HTML_Processor::create_fragment()' + ), + '6.4.0' + ); + } + + $this->state = new Gutenberg_HTML_Processor_State_6_5(); + + /* + * Create this wrapper so that it's possible to pass + * a private method into WP_HTML_Token classes without + * exposing it to any public API. + */ + $this->release_internal_bookmark_on_destruct = function ( $name ) { + parent::release_bookmark( $name ); + }; + } + + /** + * Returns the last error, if any. + * + * Various situations lead to parsing failure but this class will + * return `false` in all those cases. To determine why something + * failed it's possible to request the last error. This can be + * helpful to know to distinguish whether a given tag couldn't + * be found or if content in the document caused the processor + * to give up and abort processing. + * + * Example + * + * $processor = WP_HTML_Processor::create_fragment( '