Skip to content

Commit 0346ceb

Browse files
committed
HTML API: Track spans of text with (offset, length) instead of (start, end)
This patch follows-up with earlier design questions around how to represent spans of strings inside the class. It's relevant now as preparation for #5683. The mixture of (offset, length) and (start, end) coordinates becomes confusing at times and all final string operations are performed with the (offset, length) pair, since these feed into `strlen()`. In preparation for exposing all tokens within an HTML document this change: - Unifies the representation throughout the class. - It creates `token_starts_at` to track the start of the current token. - It replaces `tag_ends_at` with `token_length` for re-use with other token types. There should be no functional or behavioral changes in this patch. For the internal helper classes this patch introduces breaking changes, but those classes are marked private and should not be used outside of the HTML API itself.
1 parent 1850589 commit 0346ceb

File tree

4 files changed

+133
-67
lines changed

4 files changed

+133
-67
lines changed

src/wp-includes/html-api/class-wp-html-attribute-token.php

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*
1616
* @access private
1717
* @since 6.2.0
18+
* @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`.
1819
*
1920
* @see WP_HTML_Tag_Processor
2021
*/
@@ -23,6 +24,7 @@ class WP_HTML_Attribute_Token {
2324
* Attribute name.
2425
*
2526
* @since 6.2.0
27+
*
2628
* @var string
2729
*/
2830
public $name;
@@ -31,6 +33,7 @@ class WP_HTML_Attribute_Token {
3133
* Attribute value.
3234
*
3335
* @since 6.2.0
36+
*
3437
* @var int
3538
*/
3639
public $value_starts_at;
@@ -39,6 +42,7 @@ class WP_HTML_Attribute_Token {
3942
* How many bytes the value occupies in the input HTML.
4043
*
4144
* @since 6.2.0
45+
*
4246
* @var int
4347
*/
4448
public $value_length;
@@ -47,22 +51,43 @@ class WP_HTML_Attribute_Token {
4751
* The string offset where the attribute name starts.
4852
*
4953
* @since 6.2.0
54+
*
5055
* @var int
5156
*/
5257
public $start;
5358

5459
/**
55-
* The string offset after the attribute value or its name.
60+
* Byte length of text spanning the attribute inside a tag.
61+
*
62+
* This span starts at the first character of the attribute name
63+
* and it ends after one of three cases:
64+
*
65+
* - at the end of the attribute name for boolean attributes.
66+
* - at the end of the value for unquoted attributes.
67+
* - at the final single or double quote for quoted attributes.
68+
*
69+
* Example:
70+
*
71+
* <div class="post">
72+
* ------------ length is 12, including quotes
73+
*
74+
* <input type="checked" checked id="selector">
75+
* ------- length is 6
76+
*
77+
* <a rel=noopener>
78+
* ------------ length is 11
79+
*
80+
* @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`.
5681
*
57-
* @since 6.2.0
5882
* @var int
5983
*/
60-
public $end;
84+
public $length;
6185

6286
/**
6387
* Whether the attribute is a boolean attribute with value `true`.
6488
*
6589
* @since 6.2.0
90+
*
6691
* @var bool
6792
*/
6893
public $is_true;
@@ -71,20 +96,21 @@ class WP_HTML_Attribute_Token {
7196
* Constructor.
7297
*
7398
* @since 6.2.0
99+
* @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`.
74100
*
75101
* @param string $name Attribute name.
76102
* @param int $value_start Attribute value.
77103
* @param int $value_length Number of bytes attribute value spans.
78104
* @param int $start The string offset where the attribute name starts.
79-
* @param int $end The string offset after the attribute value or its name.
105+
* @param int $length Byte length of the entire attribute name or name and value pair expression.
80106
* @param bool $is_true Whether the attribute is a boolean attribute with true value.
81107
*/
82-
public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) {
108+
public function __construct( $name, $value_start, $value_length, $start, $length, $is_true ) {
83109
$this->name = $name;
84110
$this->value_starts_at = $value_start;
85111
$this->value_length = $value_length;
86112
$this->start = $start;
87-
$this->end = $end;
113+
$this->length = $length;
88114
$this->is_true = $is_true;
89115
}
90116
}

src/wp-includes/html-api/class-wp-html-span.php

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*
1919
* @access private
2020
* @since 6.2.0
21+
* @since 6.5.0 Replaced `end` with `length` to more closely align with `substr()`.
2122
*
2223
* @see WP_HTML_Tag_Processor
2324
*/
@@ -26,28 +27,30 @@ class WP_HTML_Span {
2627
* Byte offset into document where span begins.
2728
*
2829
* @since 6.2.0
30+
*
2931
* @var int
3032
*/
3133
public $start;
3234

3335
/**
34-
* Byte offset into document where span ends.
36+
* Byte length of this span.
37+
*
38+
* @since 6.5.0
3539
*
36-
* @since 6.2.0
3740
* @var int
3841
*/
39-
public $end;
42+
public $length;
4043

4144
/**
4245
* Constructor.
4346
*
4447
* @since 6.2.0
4548
*
46-
* @param int $start Byte offset into document where replacement span begins.
47-
* @param int $end Byte offset into document where replacement span ends.
49+
* @param int $start Byte offset into document where replacement span begins.
50+
* @param int $length Byte length of span.
4851
*/
49-
public function __construct( $start, $end ) {
50-
$this->start = $start;
51-
$this->end = $end;
52+
public function __construct( $start, $length ) {
53+
$this->start = $start;
54+
$this->length = $length;
5255
}
5356
}

0 commit comments

Comments
 (0)