From 7e93907e9c98270e76e20d55c4d35bd600edbb20 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Mon, 24 Oct 2022 03:01:56 +0100 Subject: [PATCH] add recursion limit of 500 for DTD parsing (#159) --- .../java/com/ctc/wstx/dtd/FullDTDReader.java | 35 +++++++-- src/test/java/wstxtest/BaseWstxTest.java | 23 ++++++ .../java/wstxtest/fuzz/Fuzz_DTDReadTest.java | 76 +++++++++++++++++++ ...se-modified-XmlFuzzer-5219006592450560.txt | 1 + 4 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java create mode 100644 src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt diff --git a/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java b/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java index fd8a4d03..19f7ab8e 100644 --- a/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java +++ b/src/main/java/com/ctc/wstx/dtd/FullDTDReader.java @@ -74,6 +74,9 @@ public class FullDTDReader final static Boolean ENTITY_EXP_PE = Boolean.TRUE; + final static int DEFAULT_DTD_RECURSION_DEPTH_LIMIT = 500; + static int DTD_RECURSION_DEPTH_LIMIT = DEFAULT_DTD_RECURSION_DEPTH_LIMIT; + /* /////////////////////////////////////////////////////////// // Configuration @@ -327,6 +330,24 @@ public class FullDTDReader transient TextBuffer mTextBuffer = null; + /** + * Sets the limit on how many times the code will recurse through DTD data. + * The default is 500. + * @param limit new limit on how many times the code will recurse through DTD data + */ + public static void setDtdRecursionDepthLimit(final int limit) { + DTD_RECURSION_DEPTH_LIMIT = limit; + } + + /** + * Gets the limit on how many times the code will recurse through DTD data. + * The default is 500. + * @return limit on how many times the code will recurse through DTD data + */ + public static int getDtdRecursionDepthLimit() { + return DTD_RECURSION_DEPTH_LIMIT; + } + /* /////////////////////////////////////////////////////////// // Life-cycle @@ -2271,7 +2292,7 @@ private void handleElementDecl() vldContent = XMLValidator.CONTENT_ALLOW_ANY_TEXT; // checked against DTD } else { --mInputPtr; // let's push it back... - ContentSpec spec = readContentSpec(elemName, true, mCfgFullyValidating); + ContentSpec spec = readContentSpec(elemName, mCfgFullyValidating, 0); val = spec.getSimpleValidator(); if (val == null) { val = new DFAValidator(DFAState.constructDFA(spec)); @@ -3049,13 +3070,13 @@ private StructValidator readMixedSpec(PrefixedName elemName, boolean construct) return val; } - /** - * @param mainLevel Whether this is the main-level content specification or nested - */ - private ContentSpec readContentSpec(PrefixedName elemName, boolean mainLevel, - boolean construct) + private ContentSpec readContentSpec(final PrefixedName elemName, final boolean construct, final int recursionDepth) throws XMLStreamException { + if (recursionDepth > DTD_RECURSION_DEPTH_LIMIT) { + throw new XMLStreamException("FullDTDReader has reached recursion depth limit of " + DTD_RECURSION_DEPTH_LIMIT); + } + ArrayList subSpecs = new ArrayList(); boolean isChoice = false; // default to sequence boolean choiceSet = false; @@ -3087,7 +3108,7 @@ private ContentSpec readContentSpec(PrefixedName elemName, boolean mainLevel, } } if (c == '(') { - ContentSpec cs = readContentSpec(elemName, false, construct); + ContentSpec cs = readContentSpec(elemName, construct, recursionDepth + 1); subSpecs.add(cs); continue; } diff --git a/src/test/java/wstxtest/BaseWstxTest.java b/src/test/java/wstxtest/BaseWstxTest.java index 8e8bc6f5..64cec6fd 100644 --- a/src/test/java/wstxtest/BaseWstxTest.java +++ b/src/test/java/wstxtest/BaseWstxTest.java @@ -494,6 +494,29 @@ protected static String getAndVerifyText(XMLStreamReader sr) return text; } + protected byte[] readResource(String ref) + { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + final byte[] buf = new byte[4000]; + + InputStream in = getClass().getResourceAsStream(ref); + if (in != null) { + try { + int len; + while ((len = in.read(buf)) > 0) { + bytes.write(buf, 0, len); + } + in.close(); + } catch (IOException e) { + throw new RuntimeException("Failed to read resource '"+ref+"': "+e); + } + } + if (bytes.size() == 0) { + throw new IllegalArgumentException("Failed to read resource '"+ref+"': empty resource?"); + } + return bytes.toByteArray(); + } + /* ////////////////////////////////////////////////// // Debug/output helpers diff --git a/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java b/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java new file mode 100644 index 00000000..d784125e --- /dev/null +++ b/src/test/java/wstxtest/fuzz/Fuzz_DTDReadTest.java @@ -0,0 +1,76 @@ +package wstxtest.fuzz; + +import com.ctc.wstx.dtd.FullDTDReader; +import com.ctc.wstx.exc.WstxLazyException; +import com.ctc.wstx.stax.WstxInputFactory; +import org.codehaus.stax2.io.Stax2ByteArraySource; +import wstxtest.stream.BaseStreamTest; + +import javax.xml.stream.XMLStreamReader; +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.io.Reader; + +public class Fuzz_DTDReadTest extends BaseStreamTest +{ + private final byte[] DOC = readResource("/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt"); + + private final WstxInputFactory STAX_F = getWstxInputFactory(); + + public void testIssueInputStream() throws Exception + { + XMLStreamReader sr = STAX_F.createXMLStreamReader(new ByteArrayInputStream(DOC)); + try { + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + } + sr.close(); + } + + public void testIssueInputStreamHigherRecursionLimit() throws Exception + { + final int defaultLimit = FullDTDReader.getDtdRecursionDepthLimit(); + XMLStreamReader sr = STAX_F.createXMLStreamReader(new ByteArrayInputStream(DOC)); + try { + FullDTDReader.setDtdRecursionDepthLimit(1000); + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 1000"); + } finally { + FullDTDReader.setDtdRecursionDepthLimit(defaultLimit); + } + sr.close(); + } + + public void testIssueReader() throws Exception + { + Reader r = new InputStreamReader(new ByteArrayInputStream(DOC), + "UTF-8"); + XMLStreamReader sr = STAX_F.createXMLStreamReader(r); + try { + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + } + sr.close(); + } + + public void testIssueStax2ByteArray() throws Exception + { + // Then "native" Byte array + Stax2ByteArraySource src = new Stax2ByteArraySource(DOC, 0, DOC.length); + XMLStreamReader sr = STAX_F.createXMLStreamReader(src); + try { + streamThrough(sr); + fail("Should not pass"); + } catch (WstxLazyException e) { + verifyException(e, "FullDTDReader has reached recursion depth limit of 500"); + } + sr.close(); + } +} + diff --git a/src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt b/src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt new file mode 100644 index 00000000..1fb049bd --- /dev/null +++ b/src/test/resources/fuzz/clusterfuzz-testcase-modified-XmlFuzzer-5219006592450560.txt @@ -0,0 +1 @@ +