Skip to content

Commit

Permalink
More robust prevention of XML Declaration recursion
Browse files Browse the repository at this point in the history
  • Loading branch information
jhy committed Jul 9, 2021
1 parent 81d80c3 commit f49f92c
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
16 changes: 8 additions & 8 deletions src/main/java/org/jsoup/nodes/Comment.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package org.jsoup.nodes;

import org.jsoup.Jsoup;
import org.jsoup.parser.ParseSettings;
import org.jsoup.parser.Parser;

import javax.annotation.Nullable;
import java.io.IOException;
import java.util.regex.Pattern;

/**
A comment node.
Expand Down Expand Up @@ -67,9 +66,8 @@ public boolean isXmlDeclaration() {
return isXmlDeclarationData(data);
}

private static final Pattern xmlDeclPattern = Pattern.compile("^[!?]xml.*", Pattern.CASE_INSENSITIVE);
private static boolean isXmlDeclarationData(String data) {
return data.length() > 4 && xmlDeclPattern.matcher(data).matches();
return (data.length() > 1 && (data.startsWith("!") || data.startsWith("?")));
}

/**
Expand All @@ -81,13 +79,15 @@ private static boolean isXmlDeclarationData(String data) {

XmlDeclaration decl = null;
String declContent = data.substring(1, data.length() - 1);
// make sure this bogus comment is not packed with recursive xml decls; null out if so
// make sure this bogus comment is not immediately followed by another, treat as comment if so
if (isXmlDeclarationData(declContent))
return null;

Document doc = Jsoup.parse("<" + declContent + ">", baseUri(), Parser.xmlParser());
if (doc.children().size() > 0) {
Element el = doc.child(0);
String fragment = "<" + declContent + ">";
// use the HTML parser not XML, so we don't get into a recursive XML Declaration on contrived data
Document doc = Parser.htmlParser().settings(ParseSettings.preserveCase).parseInput(fragment, baseUri());
if (doc.body().children().size() > 0) {
Element el = doc.body().child(0);
decl = new XmlDeclaration(NodeUtils.parser(doc).settings().normalizeTag(el.tagName()), data.startsWith("!"));
decl.attributes().addAll(el.attributes());
}
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/org/jsoup/integration/FuzzFixesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ public void xmlDeclOverflow() throws IOException {
assertNotNull(docXml);
}

@Test
public void xmlDeclOverflowOOM() throws IOException {
// https://github.com/jhy/jsoup/issues/1569
File in = ParseTest.getFile("/fuzztests/1569.html");
Document doc = Jsoup.parse(in, "UTF-8");
assertNotNull(doc);

Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser());
assertNotNull(docXml);
}

@Test
public void stackOverflowState14() throws IOException {
// https://github.com/jhy/jsoup/issues/1543
Expand Down
Binary file added src/test/resources/fuzztests/1569.html
Binary file not shown.

0 comments on commit f49f92c

Please sign in to comment.