Skip to content

Commit

Permalink
[CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder
Browse files Browse the repository at this point in the history
Co-authored-by: David Handermann <exceptionfactory@apache.org>
  • Loading branch information
rubenada and exceptionfactory committed Sep 7, 2022
1 parent df8ee28 commit ba80b91
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 9 deletions.
67 changes: 58 additions & 9 deletions core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
Expand All @@ -33,6 +35,10 @@
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.ErrorListener;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
Expand All @@ -48,6 +54,7 @@
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFactoryConfigurationException;

import static org.apache.calcite.linq4j.Nullness.castNonNull;
import static org.apache.calcite.util.Static.RESOURCE;
Expand All @@ -60,13 +67,41 @@
public class XmlFunctions {

private static final ThreadLocal<@Nullable XPathFactory> XPATH_FACTORY =
ThreadLocal.withInitial(XPathFactory::newInstance);
ThreadLocal.withInitial(() -> {
final XPathFactory xPathFactory = XPathFactory.newInstance();
try {
xPathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (XPathFactoryConfigurationException e) {
throw new IllegalStateException("XPath Factory configuration failed", e);
}
return xPathFactory;
});
private static final ThreadLocal<@Nullable TransformerFactory> TRANSFORMER_FACTORY =
ThreadLocal.withInitial(() -> {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
final TransformerFactory transformerFactory = TransformerFactory.newInstance();
transformerFactory.setErrorListener(new InternalErrorListener());
try {
transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (TransformerConfigurationException e) {
throw new IllegalStateException("Transformer Factory configuration failed", e);
}
return transformerFactory;
});
private static final ThreadLocal<@Nullable DocumentBuilderFactory> DOCUMENT_BUILDER_FACTORY =
ThreadLocal.withInitial(() -> {
final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setXIncludeAware(false);
documentBuilderFactory.setExpandEntityReferences(false);
documentBuilderFactory.setNamespaceAware(true);
try {
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
documentBuilderFactory
.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
} catch (final ParserConfigurationException e) {
throw new IllegalStateException("Document Builder configuration failed", e);
}
return documentBuilderFactory;
});

private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
.compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( [0-9a-zA-Z:_-]+=\"[^\"]*\")*)$");
Expand All @@ -81,10 +116,11 @@ private XmlFunctions() {
return null;
}
try {
final Node documentNode = getDocumentNode(input);
XPathExpression xpathExpression = castNonNull(XPATH_FACTORY.get()).newXPath().compile(xpath);
try {
NodeList nodes = (NodeList) xpathExpression
.evaluate(new InputSource(new StringReader(input)), XPathConstants.NODESET);
.evaluate(documentNode, XPathConstants.NODESET);
List<@Nullable String> result = new ArrayList<>();
for (int i = 0; i < nodes.getLength(); i++) {
Node item = castNonNull(nodes.item(i));
Expand All @@ -94,9 +130,9 @@ private XmlFunctions() {
}
return StringUtils.join(result, " ");
} catch (XPathExpressionException e) {
return xpathExpression.evaluate(new InputSource(new StringReader(input)));
return xpathExpression.evaluate(documentNode);
}
} catch (XPathExpressionException ex) {
} catch (IllegalArgumentException | XPathExpressionException ex) {
throw RESOURCE.invalidInputForExtractValue(input, xpath).ex();
}
}
Expand Down Expand Up @@ -140,17 +176,18 @@ private XmlFunctions() {

XPathExpression xpathExpression = xPath.compile(xpath);

final Node documentNode = getDocumentNode(xml);
try {
List<String> result = new ArrayList<>();
NodeList nodes = (NodeList) xpathExpression
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET);
.evaluate(documentNode, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
result.add(convertNodeToString(castNonNull(nodes.item(i))));
}
return StringUtils.join(result, "");
} catch (XPathExpressionException e) {
Node node = (Node) xpathExpression
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE);
.evaluate(documentNode, XPathConstants.NODE);
return convertNodeToString(node);
}
} catch (IllegalArgumentException | XPathExpressionException | TransformerException ex) {
Expand All @@ -174,16 +211,17 @@ private XmlFunctions() {
}

XPathExpression xpathExpression = xPath.compile(xpath);
final Node documentNode = getDocumentNode(xml);
try {
NodeList nodes = (NodeList) xpathExpression
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET);
.evaluate(documentNode, XPathConstants.NODESET);
if (nodes != null && nodes.getLength() > 0) {
return 1;
}
return 0;
} catch (XPathExpressionException e) {
Node node = (Node) xpathExpression
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE);
.evaluate(documentNode, XPathConstants.NODE);
if (node != null) {
return 1;
}
Expand Down Expand Up @@ -215,6 +253,17 @@ private static String convertNodeToString(Node node) throws TransformerException
return writer.toString();
}

private static Node getDocumentNode(final String xml) {
try {
final DocumentBuilder documentBuilder =
castNonNull(DOCUMENT_BUILDER_FACTORY.get()).newDocumentBuilder();
final InputSource inputSource = new InputSource(new StringReader(xml));
return documentBuilder.parse(inputSource);
} catch (final ParserConfigurationException | SAXException | IOException e) {
throw new IllegalArgumentException("XML parsing failed", e);
}
}

/** The internal default ErrorListener for Transformer. Just rethrows errors to
* discontinue the XML transformation. */
private static class InternalErrorListener implements ErrorListener {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@
import org.apache.calcite.runtime.XmlFunctions;
import org.apache.calcite.util.BuiltInMethod;

import org.checkerframework.checker.nullness.qual.Nullable;
import org.hamcrest.Matcher;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.function.Supplier;

import static org.hamcrest.CoreMatchers.is;
Expand All @@ -36,6 +40,23 @@
*/
class SqlXmlFunctionsTest {

private static final String XML = "<document>string</document>";
private static final String XSLT =
"<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"></xsl:stylesheet>";
private static final String DOCUMENT_PATH = "/document";
private static @Nullable String xmlExternalEntity = null;
private static @Nullable String xsltExternalEntity = null;

@BeforeAll public static void setup() throws Exception {
final Path testFile = Files.createTempFile("foo", "temp");
testFile.toFile().deleteOnExit();
final String filePath = "file:///" + testFile.toAbsolutePath();
xmlExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath
+ "\"> ]><document>&entity;</document>";
xsltExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath
+ "\"> ]><xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">&entity;</xsl:stylesheet>";
}

@Test void testExtractValue() {
assertExtractValue("<a>ccc<b>ddd</b></a>", "/a", is("ccc"));

Expand All @@ -45,6 +66,33 @@ class SqlXmlFunctionsTest {
assertExtractValueFailed(input, "#", Matchers.expectThrowable(expected));
}

@Test void testExtractValueExternalEntity() {
String message = "Invalid input for EXTRACTVALUE: xml: '"
+ xmlExternalEntity + "', xpath expression: '" + DOCUMENT_PATH + "'";
CalciteException expected = new CalciteException(message, null);
assertExtractValueFailed(xmlExternalEntity, DOCUMENT_PATH,
Matchers.expectThrowable(expected));
}

@Test void testExistsNodeExternalEntity() {
String message = "Invalid input for EXISTSNODE xpath: '"
+ DOCUMENT_PATH + "', namespace: '" + null + "'";
CalciteException expected = new CalciteException(message, null);
assertExistsNodeFailed(xmlExternalEntity, DOCUMENT_PATH, null,
Matchers.expectThrowable(expected));
}

@Test void testXmlTransformExternalEntity() {
String message = "Invalid input for XMLTRANSFORM xml: '" + xmlExternalEntity + "'";
CalciteException expected = new CalciteException(message, null);
assertXmlTransformFailed(xmlExternalEntity, XSLT, Matchers.expectThrowable(expected));
}

@Test void testXmlTransformExternalEntityXslt() {
String message = "Illegal xslt specified : '" + xsltExternalEntity + "'";
CalciteException expected = new CalciteException(message, null);
assertXmlTransformFailed(XML, xsltExternalEntity, Matchers.expectThrowable(expected));
}

@Test void testXmlTransform() {
assertXmlTransform(null, "", nullValue());
Expand Down

0 comments on commit ba80b91

Please sign in to comment.