diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java index 0af4f51f..4cb3022f 100644 --- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java +++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java @@ -94,8 +94,163 @@ public void closeDocument() { public void text(String textChunk) { if (!skipText) { - out.text(textChunk); + // Check if we're inside a CDATA element (style/script) with allowTextIn + // where tags are reclassified as UNESCAPED text and need to be validated + // Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA + boolean insideCdataElement = false; + for (int i = openElementStack.size() - 1; i >= 0; i -= 2) { + String adjustedName = openElementStack.get(i); + if (adjustedName != null + && allowedTextContainers.contains(adjustedName) + && ("style".equals(adjustedName) || "script".equals(adjustedName))) { + insideCdataElement = true; + break; + } + } + + // If inside a CDATA element (style/script) with allowTextIn, we need to filter out + // HTML tags that aren't allowed because tags inside these blocks are reclassified + // as UNESCAPED text by the lexer + if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) { + // Strip out HTML tags that aren't in the allowed elements list + String filtered = stripDisallowedTags(textChunk); + out.text(filtered); + } else { + out.text(textChunk); + } + } + } + + /** + * Strips out HTML tags that aren't in the allowed elements list from text content. + * This is used when tags appear inside text containers (like style blocks) where + * they're treated as text but should still be validated. + */ + private String stripDisallowedTags(String text) { + if (text == null) { + return text; + } + + StringBuilder result = new StringBuilder(); + int len = text.length(); + int i = 0; + + while (i < len) { + int tagStart = text.indexOf('<', i); + if (tagStart < 0) { + // No more tags, append the rest + result.append(text.substring(i)); + break; + } + + // Append text before the tag + if (tagStart > i) { + result.append(text.substring(i, tagStart)); + } + + // Find the end of the tag (either '>' or end of string) + int tagEnd = text.indexOf('>', tagStart + 1); + if (tagEnd < 0) { + // Unclosed tag, skip it + i = tagStart + 1; + continue; + } + + // Extract the tag content (between < and >) + String tagContent = text.substring(tagStart + 1, tagEnd); + + // Only process if this looks like a valid HTML element tag + // Valid tags start with a letter or / followed by a letter + // Skip things like <, , <3, etc. + boolean isValidTag = false; + String tagName = null; + + if (tagContent.startsWith("/")) { + // Closing tag - must have / followed by a letter + if (tagContent.length() > 1) { + char firstChar = tagContent.charAt(1); + if (Character.isLetter(firstChar)) { + isValidTag = true; + tagName = tagContent.substring(1).trim().split("\\s")[0]; + tagName = HtmlLexer.canonicalElementName(tagName); + } + } + } else { + // Opening tag - must start with a letter + char firstChar = tagContent.charAt(0); + if (Character.isLetter(firstChar)) { + isValidTag = true; + tagName = tagContent.trim().split("\\s")[0]; + tagName = HtmlLexer.canonicalElementName(tagName); + } + } + + if (!isValidTag) { + // Not a valid HTML tag, just append it as-is + result.append('<').append(tagContent).append('>'); + i = tagEnd + 1; + continue; + } + + // Check if it's a closing tag + if (tagContent.startsWith("/")) { + // Only allow closing tags if the element is allowed + if (elAndAttrPolicies.containsKey(tagName)) { + result.append('<').append(tagContent).append('>'); + } + // Otherwise skip the closing tag + i = tagEnd + 1; + } else { + // Opening tag - only allow tags if the element is in the allowed list + if (elAndAttrPolicies.containsKey(tagName)) { + result.append('<').append(tagContent).append('>'); + i = tagEnd + 1; + } else { + // Skip disallowed tag and its content until matching closing tag + i = tagEnd + 1; + // Track nesting level to find the matching closing tag + int nestingLevel = 1; + while (i < len && nestingLevel > 0) { + int nextTagStart = text.indexOf('<', i); + if (nextTagStart < 0) { + // No more tags, skip to end + i = len; + break; + } + int nextTagEnd = text.indexOf('>', nextTagStart + 1); + if (nextTagEnd < 0) { + // Unclosed tag, skip to end + i = len; + break; + } + String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd); + String nextTagName = nextTagContent.trim().split("\\s")[0]; + if (nextTagContent.startsWith("/")) { + // Closing tag + nextTagName = nextTagName.substring(1); + nextTagName = HtmlLexer.canonicalElementName(nextTagName); + if (nextTagName.equals(tagName)) { + nestingLevel--; + if (nestingLevel == 0) { + // Found matching closing tag, skip it and continue + i = nextTagEnd + 1; + break; + } + } + } else { + // Opening tag + nextTagName = HtmlLexer.canonicalElementName(nextTagName); + if (nextTagName.equals(tagName)) { + nestingLevel++; + } + } + i = nextTagEnd + 1; + } + } + } } + + return result.toString(); } public void openTag(String elementName, List attrs) { diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java index 2ebf55ea..4065eb93 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase { public final void testHtmlLexer() throws Exception { // Do the lexing. String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8); + // Normalize line endings in input to handle Windows/Unix differences + input = input.replace("\r\n", "\n").replace("\r", "\n"); StringBuilder actual = new StringBuilder(); lex(input, actual); // Get the golden. String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8); + // Normalize line endings to handle Windows/Unix differences + golden = golden.replace("\r\n", "\n").replace("\r", "\n"); + String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n"); // Compare. - assertEquals(golden, actual.toString()); + assertEquals(golden, actualStr); } @Test diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java index c302dd8e..cfeba7d3 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java @@ -28,16 +28,13 @@ package org.owasp.html; -import java.io.BufferedReader; -import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.List; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; - -import org.apache.commons.codec.Resources; /** * Throws malformed inputs at the HTML sanitizer to try and crash it. @@ -62,9 +59,9 @@ public void text(String textChunk) { /* do nothing */ } }; public final void testFuzzHtmlParser() throws Exception { - String html = new BufferedReader(new InputStreamReader( - Resources.getInputStream("benchmark-data/Yahoo!.html"), - StandardCharsets.UTF_8)).lines().collect(Collectors.joining()); + String html = new String(Files.readAllBytes( + Paths.get(getClass().getResource("/benchmark-data/Yahoo!.html").toURI())), + StandardCharsets.UTF_8); int length = html.length(); char[] fuzzyHtml0 = new char[length]; diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java index 1ff169df..d2559e86 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java @@ -454,6 +454,101 @@ public static final void testStylingCornerCase() { assertEquals(want, sanitize(input)); } + @Test + public static final void testCVE202566021_1() { + // Arrange + String actualPayload = ""; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + @Test + public static final void testCVE202566021_2() { + // Arrange + String actualPayload = ""; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + @Test + public static final void testCVE202566021_3() { + // Arrange + String actualPayload = ""; + String expectedPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript", "div") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + @Test + public static final void testCVE202566021_4() { + // Arrange + String actualPayload = ""; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript", "p") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + + @Test + public static final void testCVE202566021_5() { + // Arrange + String actualPayload = "

"; + + HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder(); + PolicyFactory policy = htmlPolicyBuilder + .allowElements("style", "noscript", "p") + .allowTextIn("style") + .toFactory(); + + // Act + String sanitized = policy.sanitize(actualPayload); + + // Assert + assertEquals(expectedPayload, sanitized); + } + private static String sanitize(@Nullable String html) { StringBuilder sb = new StringBuilder(); HtmlStreamRenderer renderer = HtmlStreamRenderer.create(