Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not lcase element or attribute names that match SVG or MathML name… #206

Merged
merged 2 commits into from
Jul 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

/**
* Encapsulates all the information needed by the
* {@link ElementAndAttributePolicySanitizerPolicy} to sanitize one kind
* {@link ElementAndAttributePolicyBasedSanitizerPolicy} to sanitize one kind
* of element.
*/
@Immutable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void openTag(String elementName, List<String> attrs) {

adjustedElementName = policies.elPolicy.apply(elementName, attrs);
if (adjustedElementName != null) {
adjustedElementName = HtmlLexer.canonicalName(adjustedElementName);
adjustedElementName = HtmlLexer.canonicalElementName(adjustedElementName);
}
} else {
adjustedElementName = null;
Expand Down
155 changes: 144 additions & 11 deletions src/main/java/org/owasp/html/HtmlLexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,27 @@ public HtmlLexer(String input) {

/**
* Normalize case of names that are not name-spaced. This lower-cases HTML
* element and attribute names, but not ones for embedded SVG or MATHML.
* element names, but not ones for embedded SVG or MathML.
*/
static String canonicalName(String elementOrAttribName) {
return elementOrAttribName.indexOf(':') >= 0
? elementOrAttribName : Strings.toLowerCase(elementOrAttribName);
static String canonicalElementName(String elementName) {
return elementName.indexOf(':') >= 0 || mixedCaseForeignElementNames.contains(elementName)
? elementName : Strings.toLowerCase(elementName);
}

/**
* Normalize case of names that are not name-spaced. This lower-cases HTML
* attribute names, but not ones for embedded SVG or MathML.
*/
static String canonicalAttributeName(String attribName) {
return attribName.indexOf(':') >= 0 || mixedCaseForeignAttributeNames.contains(attribName)
? attribName : Strings.toLowerCase(attribName);
}

/**
* Normalize case of keywords in attribute values.
*/
public static String canonicalKeywordAttributeValue(String keywordValue) {
return Strings.toLowerCase(keywordValue);
}

/**
Expand Down Expand Up @@ -243,16 +259,133 @@ private void pushbackToken(HtmlToken token) {

/** Can the attribute appear in HTML without a value. */
private static boolean isValuelessAttribute(String attribName) {
boolean valueless = VALUELESS_ATTRIB_NAMES.contains(
Strings.toLowerCase(attribName));
return valueless;
return VALUELESS_ATTRIB_NAMES.contains(canonicalAttributeName(attribName));
}

// From http://issues.apache.org/jira/browse/XALANC-519
private static final Set<String> VALUELESS_ATTRIB_NAMES = ImmutableSet.of(
"checked", "compact", "declare", "defer", "disabled",
"ismap", "multiple", "nohref", "noresize", "noshade",
"nowrap", "readonly", "selected");

private static final ImmutableSet<String> mixedCaseForeignAttributeNames = ImmutableSet.of(
"attributeName",
"attributeType",
"baseFrequency",
"baseProfile",
"calcMode",
"clipPathUnits",
"contentScriptType",
"defaultAction",
"definitionURL",
"diffuseConstant",
"edgeMode",
"externalResourcesRequired",
"filterUnits",
"focusHighlight",
"gradientTransform",
"gradientUnits",
"initialVisibility",
"kernelMatrix",
"kernelUnitLength",
"keyPoints",
"keySplines",
"keyTimes",
"lengthAdjust",
"limitingConeAngle",
"markerHeight",
"markerUnits",
"markerWidth",
"maskContentUnits",
"maskUnits",
"mediaCharacterEncoding",
"mediaContentEncodings",
"mediaSize",
"mediaTime",
"numOctaves",
"pathLength",
"patternContentUnits",
"patternTransform",
"patternUnits",
"playbackOrder",
"pointsAtX",
"pointsAtY",
"pointsAtZ",
"preserveAlpha",
"preserveAspectRatio",
"primitiveUnits",
"refX",
"refY",
"repeatCount",
"repeatDur",
"requiredExtensions",
"requiredFeatures",
"requiredFonts",
"requiredFormats",
"schemaLocation",
"snapshotTime",
"specularConstant",
"specularExponent",
"spreadMethod",
"startOffset",
"stdDeviation",
"stitchTiles",
"surfaceScale",
"syncBehavior",
"syncBehaviorDefault",
"syncMaster",
"syncTolerance",
"syncToleranceDefault",
"systemLanguage",
"tableValues",
"targetX",
"targetY",
"textLength",
"timelineBegin",
"transformBehavior",
"viewBox",
"xChannelSelector",
"yChannelSelector",
"zoomAndPan"
);

private static final ImmutableSet<String> mixedCaseForeignElementNames = ImmutableSet.of(
"animateColor",
"animateMotion",
"animateTransform",
"clipPath",
"feBlend",
"feColorMatrix",
"feComponentTransfer",
"feComposite",
"feConvolveMatrix",
"feDiffuseLighting",
"feDisplacementMap",
"feDistantLight",
"feDropShadow",
"feFlood",
"feFuncA",
"feFuncB",
"feFuncG",
"feFuncR",
"feGaussianBlur",
"feImage",
"feMerge",
"feMergeNode",
"feMorphology",
"feOffset",
"fePointLight",
"feSpecularLighting",
"feSpotLight",
"feTile",
"feTurbulence",
"foreignObject",
"linearGradient",
"radialGradient",
"solidColor",
"textArea",
"textPath"
);
}

/**
Expand Down Expand Up @@ -311,7 +444,7 @@ protected HtmlToken produce() {
switch (token.type) {
case TAGBEGIN:
{
String canonTagName = canonicalName(
String canonTagName = canonicalElementName(
token.start + 1, token.end);
if (HtmlTextEscapingMode.isTagFollowedByLiteralContent(
canonTagName)) {
Expand Down Expand Up @@ -478,7 +611,7 @@ private HtmlToken parseToken() {
if (this.inEscapeExemptBlock
&& '/' == input.charAt(start + 1)
&& textEscapingMode != HtmlTextEscapingMode.PLAIN_TEXT
&& canonicalName(start + 2, end)
&& canonicalElementName(start + 2, end)
.equals(escapeExemptTagName)) {
this.inEscapeExemptBlock = false;
this.escapeExemptTagName = null;
Expand Down Expand Up @@ -612,8 +745,8 @@ && canonicalName(start + 2, end)
return result;
}

private String canonicalName(int start, int end) {
return HtmlLexer.canonicalName(input.substring(start, end));
private String canonicalElementName(int start, int end) {
return HtmlLexer.canonicalElementName(input.substring(start, end));
}

private static boolean isIdentStart(char ch) {
Expand Down
18 changes: 9 additions & 9 deletions src/main/java/org/owasp/html/HtmlPolicyBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ public HtmlPolicyBuilder allowElements(
ElementPolicy policy, String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
ElementPolicy newPolicy = ElementPolicy.Util.join(
elPolicies.get(elementName), policy);
// Don't remove if newPolicy is the always reject policy since we want
Expand Down Expand Up @@ -286,7 +286,7 @@ public HtmlPolicyBuilder allowCommonBlockElements() {
public HtmlPolicyBuilder allowTextIn(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
textContainers.put(elementName, true);
}
return this;
Expand All @@ -305,7 +305,7 @@ public HtmlPolicyBuilder allowTextIn(String... elementNames) {
public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
textContainers.put(elementName, false);
}
return this;
Expand All @@ -321,7 +321,7 @@ public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
skipIssueTagMap.put(elementName, HtmlTagSkipType.DO_NOT_SKIP);
}
return this;
Expand All @@ -336,7 +336,7 @@ public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalName(elementName);
elementName = HtmlLexer.canonicalElementName(elementName);
skipIssueTagMap.put(elementName, HtmlTagSkipType.SKIP);
}
return this;
Expand All @@ -349,7 +349,7 @@ public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
public AttributeBuilder allowAttributes(String... attributeNames) {
ImmutableList.Builder<String> b = ImmutableList.builder();
for (String attributeName : attributeNames) {
b.add(HtmlLexer.canonicalName(attributeName));
b.add(HtmlLexer.canonicalAttributeName(attributeName));
}
return new AttributeBuilder(b.build());
}
Expand Down Expand Up @@ -432,7 +432,7 @@ public HtmlPolicyBuilder requireRelsOnLinks(String... linkValues) {
this.extraRelsForLinks = Sets.newLinkedHashSet();
}
for (String linkValue : linkValues) {
linkValue = HtmlLexer.canonicalName(linkValue);
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
Preconditions.checkArgument(
!Strings.containsHtmlSpace(linkValue),
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
Expand All @@ -456,7 +456,7 @@ public HtmlPolicyBuilder skipRelsOnLinks(String... linkValues) {
this.skipRelsForLinks = Sets.newLinkedHashSet();
}
for (String linkValue : linkValues) {
linkValue = HtmlLexer.canonicalName(linkValue);
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
Preconditions.checkArgument(
!Strings.containsHtmlSpace(linkValue),
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
Expand Down Expand Up @@ -980,7 +980,7 @@ public HtmlPolicyBuilder globally() {
public HtmlPolicyBuilder onElements(String... elementNames) {
ImmutableList.Builder<String> b = ImmutableList.builder();
for (String elementName : elementNames) {
b.add(HtmlLexer.canonicalName(elementName));
b.add(HtmlLexer.canonicalElementName(elementName));
}
return HtmlPolicyBuilder.this.allowAttributesOnElements(
policy, attributeNames, b.build());
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/owasp/html/HtmlSanitizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ public static void sanitize(
break;
case TAGBEGIN:
if (htmlContent.charAt(token.start + 1) == '/') { // A close tag.
receiver.closeTag(HtmlLexer.canonicalName(
receiver.closeTag(HtmlLexer.canonicalElementName(
htmlContent.substring(token.start + 2, token.end)));
while (lexer.hasNext()
&& lexer.next().type != HtmlTokenType.TAGEND) {
Expand All @@ -173,7 +173,7 @@ public static void sanitize(
} else {
attrsReadyForName = false;
}
attrs.add(HtmlLexer.canonicalName(
attrs.add(HtmlLexer.canonicalAttributeName(
htmlContent.substring(tagBodyToken.start, tagBodyToken.end)));
break;
case ATTRVALUE:
Expand All @@ -191,7 +191,7 @@ public static void sanitize(
attrs.add(attrs.getLast());
}
receiver.openTag(
HtmlLexer.canonicalName(
HtmlLexer.canonicalElementName(
htmlContent.substring(token.start + 1, token.end)),
attrs);
}
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/owasp/html/HtmlStreamRenderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ private void writeOpenTag(
attrIt.hasNext();) {
String name = attrIt.next();
String value = attrIt.next();
name = HtmlLexer.canonicalName(name);
name = HtmlLexer.canonicalAttributeName(name);
if (!isValidHtmlName(name)) {
error("Invalid attr name", name);
continue;
Expand Down Expand Up @@ -234,7 +234,7 @@ public final void closeTag(String elementName) {
private final void writeCloseTag(String uncanonElementName)
throws IOException {
if (!open) { throw new IllegalStateException(); }
String elementName = HtmlLexer.canonicalName(uncanonElementName);
String elementName = HtmlLexer.canonicalElementName(uncanonElementName);
if (!isValidHtmlName(elementName)) {
error("Invalid element name", elementName);
return;
Expand Down Expand Up @@ -386,7 +386,7 @@ static boolean isValidHtmlName(String name) {
* that has more consistent semantics.
*/
static String safeName(String unsafeElementName) {
String elementName = HtmlLexer.canonicalName(unsafeElementName);
String elementName = HtmlLexer.canonicalElementName(unsafeElementName);

// Substitute a reliably non-raw-text element for raw-text and
// plain-text elements.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void openTag(String elementName, List<String> attrs) {
if (DEBUG) {
dumpState("open " + elementName);
}
String canonElementName = HtmlLexer.canonicalName(elementName);
String canonElementName = HtmlLexer.canonicalElementName(elementName);

int elIndex = METADATA.indexForName(canonElementName);
// Treat unrecognized tags as void, but emit closing tags in closeTag().
Expand Down Expand Up @@ -238,7 +238,7 @@ public void closeTag(String elementName) {
if (DEBUG) {
dumpState("close " + elementName);
}
String canonElementName = HtmlLexer.canonicalName(elementName);
String canonElementName = HtmlLexer.canonicalElementName(elementName);

int elIndex = METADATA.indexForName(canonElementName);
if (elIndex == UNRECOGNIZED_TAG) { // Allow unrecognized end tags through.
Expand Down
19 changes: 19 additions & 0 deletions src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,25 @@ public static final void testTableStructure() {
sanitized);
}

@Test
public static final void testSvgNames() {
PolicyFactory policyFactory = new HtmlPolicyBuilder()
.allowElements("svg", "animateColor")
.allowAttributes("viewBox").onElements("svg")
.toFactory();
String svg = "<svg viewBox=\"0 0 0 0\"><animateColor></animateColor></svg>";
assertEquals(svg, policyFactory.sanitize(svg));
}

@Test
public static final void testTextareaIsNotTextArea() {
String input = "<textarea>x</textarea><textArea>y</textArea>";
PolicyFactory textareaPolicy = new HtmlPolicyBuilder().allowElements("textarea").toFactory();
PolicyFactory textAreaPolicy = new HtmlPolicyBuilder().allowElements("textArea").toFactory();
assertEquals("<textarea>x</textarea>y", textareaPolicy.sanitize(input));
assertEquals("x<textArea>y</textArea>", textAreaPolicy.sanitize(input));
}

private static String apply(HtmlPolicyBuilder b) {
return apply(b, EXAMPLE);
}
Expand Down