Skip to content

Commit

Permalink
Short-circuit tag scans for custom tags
Browse files Browse the repository at this point in the history
  • Loading branch information
jhy committed Sep 27, 2021
1 parent d3f4e31 commit 4b46397
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 7 deletions.
7 changes: 4 additions & 3 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,9 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
break;
default:
// todo - bring scan groups in if desired
if (inSorted(name, Constants.InBodyStartEmptyFormatters)) {
if (!Tag.isKnownTag(name)) { // no special rules for custom tags
tb.insert(startTag);
} else if (inSorted(name, Constants.InBodyStartEmptyFormatters)) {
tb.reconstructFormattingElements();
tb.insertEmpty(startTag);
tb.framesetOk(false);
Expand All @@ -658,8 +660,7 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
tb.error(this);
return false;
} else {
if (Tag.isKnownTag(name)) // don't reconstruct for custom elements
tb.reconstructFormattingElements();
tb.reconstructFormattingElements();
tb.insert(startTag);
}
}
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jsoup/parser/Tag.java
Original file line number Diff line number Diff line change
Expand Up @@ -237,15 +237,16 @@ protected Tag clone() {
"ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
"del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
"td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
"svg", "math", "center", "template"
"svg", "math", "center", "template",
"dir", "applet", "marquee", "listing" // deprecated but still known / special handling
};
private static final String[] inlineTags = {
"object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
"var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
"sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
"option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
"summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
"data", "bdi", "s"
"data", "bdi", "s", "strike", "nobr"
};
private static final String[] emptyTags = {
"meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
Expand Down
17 changes: 15 additions & 2 deletions src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jsoup.parser;

import org.jsoup.Jsoup;
import org.jsoup.internal.StringUtil;
import org.jsoup.parser.HtmlTreeBuilderState.Constants;
import org.junit.jupiter.api.Test;

Expand All @@ -10,8 +11,8 @@
import java.util.Arrays;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InBodyStartInputAttribs;
import static org.junit.jupiter.api.Assertions.*;

public class HtmlTreeBuilderStateTest {
static List<Object[]> findConstantArrays(Class aClass) {
Expand Down Expand Up @@ -47,6 +48,18 @@ public void ensureArraysAreSorted() {
assertEquals(40, constants.size());
}

@Test public void ensureTagSearchesAreKnownTags() {
List<Object[]> constants = findConstantArrays(Constants.class);
for (Object[] constant : constants) {
String[] tagNames = (String[]) constant;
for (String tagName : tagNames) {
if (StringUtil.inSorted(tagName, InBodyStartInputAttribs))
continue; // odd one out in the constant
assertTrue(Tag.isKnownTag(tagName), String.format("Unknown tag name: %s", tagName));
}
}
}


@Test
public void nestedAnchorElements01() {
Expand Down

0 comments on commit 4b46397

Please sign in to comment.