Skip to content

Commit

Permalink
Make the HTML collector parsing more robust
Browse files Browse the repository at this point in the history
Most notably better handling self-closing elements

Closes gohugoio#10698
  • Loading branch information
bep committed Feb 6, 2023
1 parent 2a364cc commit a810ced
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
17 changes: 13 additions & 4 deletions publisher/htmlElementsCollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,10 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc
}

tagName := w.buff.Bytes()[1:]
isSelfClosing := tagName[len(tagName)-1] == '/'

switch {
case skipInnerElementRe.Match(tagName):
case !isSelfClosing && skipInnerElementRe.Match(tagName):
// pre, script etc. We collect classes etc. on the surrounding
// element, but skip the inner content.
w.backup()
Expand Down Expand Up @@ -432,10 +433,18 @@ func parseStartTag(s string) string {
})

if spaceIndex == -1 {
return s[1 : len(s)-1]
s = s[1 : len(s)-1]
} else {
s = s[1:spaceIndex]
}

return s[1:spaceIndex]
if s[len(s)-1] == '/' {
// Self closing.
s = s[:len(s)-1]
}

return s

}

// isClosedByTag reports whether b ends with a closing tag for tagName.
Expand Down Expand Up @@ -487,7 +496,7 @@ LOOP:
}
}

if state != 2 {
if state != 2 || lo >= hi {
return false
}

Expand Down
4 changes: 4 additions & 0 deletions publisher/htmlElementsCollector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ func TestClassCollector(t *testing.T) {
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
{"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
{"Self closing tag", `<div><hr/></div>`, f("div hr", "", "")},
// svg with self closing style tag.
{"SVG with self closing style tag", `<svg><style/><g><path class="foo"/></g></svg>`, f("g path style svg", "foo", "")},
// Issue #8530
{"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
Expand Down Expand Up @@ -174,6 +177,7 @@ func TestEndsWithTag(t *testing.T) {
{"match space", "foo< / div>", "div", true},
{"match space 2", "foo< / div \n>", "div", true},
{"match case", "foo</DIV>", "div", true},
{"self closing", `</defs><g><g><path fill="#010101" d=asdf"/>`, "div", false},
} {
c.Run(test.name, func(c *qt.C) {
got := isClosedByTag([]byte(test.s), []byte(test.tagName))
Expand Down

0 comments on commit a810ced

Please sign in to comment.