diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index ac5f1fc923e7..5ddfb02f0de3 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -189,6 +189,19 @@
org.codehaus.mojo
build-helper-maven-plugin
+
+ add-sources
+ generate-sources
+
+ add-source
+
+
+
+ v${hive.version.short}/src/main/scala
+ v${hive.version.short}/src/main/java
+
+
+
add-scala-test-sources
generate-test-sources
@@ -197,6 +210,7 @@
+ v${hive.version.short}/src/test/scala
src/test/gen-java
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v1.2.1/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
similarity index 100%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
rename to sql/core/v1.2.1/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
rename to sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
rename to sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
similarity index 100%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
rename to sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
diff --git a/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
new file mode 100644
index 000000000000..9bfad1e83ee7
--- /dev/null
+++ b/sql/core/v2.3.4/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc;
+
+import java.math.BigDecimal;
+
+import org.apache.orc.storage.ql.exec.vector.*;
+
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.TimestampType;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * A column vector class wrapping Hive's ColumnVector. Because Spark ColumnarBatch only accepts
+ * Spark's vectorized.ColumnVector, this column vector is used to adapt Hive ColumnVector with
+ * Spark ColumnarVector.
+ */
+public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVector {
+ private ColumnVector baseData;
+ private LongColumnVector longData;
+ private DoubleColumnVector doubleData;
+ private BytesColumnVector bytesData;
+ private DecimalColumnVector decimalData;
+ private TimestampColumnVector timestampData;
+ private final boolean isTimestamp;
+
+ private int batchSize;
+
+ OrcColumnVector(DataType type, ColumnVector vector) {
+ super(type);
+
+ if (type instanceof TimestampType) {
+ isTimestamp = true;
+ } else {
+ isTimestamp = false;
+ }
+
+ baseData = vector;
+ if (vector instanceof LongColumnVector) {
+ longData = (LongColumnVector) vector;
+ } else if (vector instanceof DoubleColumnVector) {
+ doubleData = (DoubleColumnVector) vector;
+ } else if (vector instanceof BytesColumnVector) {
+ bytesData = (BytesColumnVector) vector;
+ } else if (vector instanceof DecimalColumnVector) {
+ decimalData = (DecimalColumnVector) vector;
+ } else if (vector instanceof TimestampColumnVector) {
+ timestampData = (TimestampColumnVector) vector;
+ } else {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public void setBatchSize(int batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ @Override
+ public boolean hasNull() {
+ return !baseData.noNulls;
+ }
+
+ @Override
+ public int numNulls() {
+ if (baseData.isRepeating) {
+ if (baseData.isNull[0]) {
+ return batchSize;
+ } else {
+ return 0;
+ }
+ } else if (baseData.noNulls) {
+ return 0;
+ } else {
+ int count = 0;
+ for (int i = 0; i < batchSize; i++) {
+ if (baseData.isNull[i]) count++;
+ }
+ return count;
+ }
+ }
+
+ /* A helper method to get the row index in a column. */
+ private int getRowIndex(int rowId) {
+ return baseData.isRepeating ? 0 : rowId;
+ }
+
+ @Override
+ public boolean isNullAt(int rowId) {
+ return baseData.isNull[getRowIndex(rowId)];
+ }
+
+ @Override
+ public boolean getBoolean(int rowId) {
+ return longData.vector[getRowIndex(rowId)] == 1;
+ }
+
+ @Override
+ public byte getByte(int rowId) {
+ return (byte) longData.vector[getRowIndex(rowId)];
+ }
+
+ @Override
+ public short getShort(int rowId) {
+ return (short) longData.vector[getRowIndex(rowId)];
+ }
+
+ @Override
+ public int getInt(int rowId) {
+ return (int) longData.vector[getRowIndex(rowId)];
+ }
+
+ @Override
+ public long getLong(int rowId) {
+ int index = getRowIndex(rowId);
+ if (isTimestamp) {
+ return timestampData.time[index] * 1000 + timestampData.nanos[index] / 1000 % 1000;
+ } else {
+ return longData.vector[index];
+ }
+ }
+
+ @Override
+ public float getFloat(int rowId) {
+ return (float) doubleData.vector[getRowIndex(rowId)];
+ }
+
+ @Override
+ public double getDouble(int rowId) {
+ return doubleData.vector[getRowIndex(rowId)];
+ }
+
+ @Override
+ public Decimal getDecimal(int rowId, int precision, int scale) {
+ if (isNullAt(rowId)) return null;
+ BigDecimal data = decimalData.vector[getRowIndex(rowId)].getHiveDecimal().bigDecimalValue();
+ return Decimal.apply(data, precision, scale);
+ }
+
+ @Override
+ public UTF8String getUTF8String(int rowId) {
+ if (isNullAt(rowId)) return null;
+ int index = getRowIndex(rowId);
+ BytesColumnVector col = bytesData;
+ return UTF8String.fromBytes(col.vector[index], col.start[index], col.length[index]);
+ }
+
+ @Override
+ public byte[] getBinary(int rowId) {
+ if (isNullAt(rowId)) return null;
+ int index = getRowIndex(rowId);
+ byte[] binary = new byte[bytesData.length[index]];
+ System.arraycopy(bytesData.vector[index], bytesData.start[index], binary, 0, binary.length);
+ return binary;
+ }
+
+ @Override
+ public ColumnarArray getArray(int rowId) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ColumnarMap getMap(int rowId) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public org.apache.spark.sql.vectorized.ColumnVector getChild(int ordinal) {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
new file mode 100644
index 000000000000..112dcb2cb238
--- /dev/null
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.orc.storage.common.`type`.HiveDecimal
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
+import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
+import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
+import org.apache.orc.storage.serde2.io.HiveDecimalWritable
+
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types._
+
+/**
+ * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down.
+ *
+ * Due to limitation of ORC `SearchArgument` builder, we had to end up with a pretty weird double-
+ * checking pattern when converting `And`/`Or`/`Not` filters.
+ *
+ * An ORC `SearchArgument` must be built in one pass using a single builder. For example, you can't
+ * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`. This is quite
+ * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using
+ * existing simpler ones.
+ *
+ * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and
+ * `startNot()` mutate internal state of the builder instance. This forces us to translate all
+ * convertible filters with a single builder instance. However, before actually converting a filter,
+ * we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible filter is
+ * found, we may already end up with a builder whose internal state is inconsistent.
+ *
+ * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then
+ * try to convert its children. Say we convert `left` child successfully, but find that `right`
+ * child is inconvertible. Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent
+ * now.
+ *
+ * The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their
+ * children with brand new builders, and only do the actual conversion with the right builder
+ * instance when the children are proven to be convertible.
+ *
+ * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only. Usage of
+ * builder methods mentioned above can only be found in test code, where all tested filters are
+ * known to be convertible.
+ */
+private[sql] object OrcFilters extends OrcFiltersBase {
+
+ /**
+ * Create ORC filter as a SearchArgument instance.
+ */
+ def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = {
+ val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
+ for {
+ // Combines all convertible filters using `And` to produce a single conjunction
+ conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, filters))
+ // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
+ builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
+ } yield builder.build()
+ }
+
+ def convertibleFilters(
+ schema: StructType,
+ dataTypeMap: Map[String, DataType],
+ filters: Seq[Filter]): Seq[Filter] = {
+ for {
+ filter <- filters
+ _ <- buildSearchArgument(dataTypeMap, filter, newBuilder())
+ } yield filter
+ }
+
+ /**
+ * Get PredicateLeafType which is corresponding to the given DataType.
+ */
+ private def getPredicateLeafType(dataType: DataType) = dataType match {
+ case BooleanType => PredicateLeaf.Type.BOOLEAN
+ case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG
+ case FloatType | DoubleType => PredicateLeaf.Type.FLOAT
+ case StringType => PredicateLeaf.Type.STRING
+ case DateType => PredicateLeaf.Type.DATE
+ case TimestampType => PredicateLeaf.Type.TIMESTAMP
+ case _: DecimalType => PredicateLeaf.Type.DECIMAL
+ case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}")
+ }
+
+ /**
+ * Cast literal values for filters.
+ *
+ * We need to cast to long because ORC raises exceptions
+ * at 'checkLiteralType' of SearchArgumentImpl.java.
+ */
+ private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match {
+ case ByteType | ShortType | IntegerType | LongType =>
+ value.asInstanceOf[Number].longValue
+ case FloatType | DoubleType =>
+ value.asInstanceOf[Number].doubleValue()
+ case _: DecimalType =>
+ new HiveDecimalWritable(HiveDecimal.create(value.asInstanceOf[java.math.BigDecimal]))
+ case _ => value
+ }
+
+ /**
+ * Build a SearchArgument and return the builder so far.
+ */
+ private def buildSearchArgument(
+ dataTypeMap: Map[String, DataType],
+ expression: Filter,
+ builder: Builder): Option[Builder] = {
+ createBuilder(dataTypeMap, expression, builder, canPartialPushDownConjuncts = true)
+ }
+
+ /**
+ * @param dataTypeMap a map from the attribute name to its data type.
+ * @param expression the input filter predicates.
+ * @param builder the input SearchArgument.Builder.
+ * @param canPartialPushDownConjuncts whether a subset of conjuncts of predicates can be pushed
+ * down safely. Pushing ONLY one side of AND down is safe to
+ * do at the top level or none of its ancestors is NOT and OR.
+ * @return the builder so far.
+ */
+ private def createBuilder(
+ dataTypeMap: Map[String, DataType],
+ expression: Filter,
+ builder: Builder,
+ canPartialPushDownConjuncts: Boolean): Option[Builder] = {
+ def getType(attribute: String): PredicateLeaf.Type =
+ getPredicateLeafType(dataTypeMap(attribute))
+
+ import org.apache.spark.sql.sources._
+
+ expression match {
+ case And(left, right) =>
+ // At here, it is not safe to just convert one side and remove the other side
+ // if we do not understand what the parent filters are.
+ //
+ // Here is an example used to explain the reason.
+ // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
+ // convert b in ('1'). If we only convert a = 2, we will end up with a filter
+ // NOT(a = 2), which will generate wrong results.
+ //
+ // Pushing one side of AND down is only safe to do at the top level or in the child
+ // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
+ // can be safely removed.
+ val leftBuilderOption =
+ createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
+ val rightBuilderOption =
+ createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
+ (leftBuilderOption, rightBuilderOption) match {
+ case (Some(_), Some(_)) =>
+ for {
+ lhs <- createBuilder(dataTypeMap, left,
+ builder.startAnd(), canPartialPushDownConjuncts)
+ rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
+ } yield rhs.end()
+
+ case (Some(_), None) if canPartialPushDownConjuncts =>
+ createBuilder(dataTypeMap, left, builder, canPartialPushDownConjuncts)
+
+ case (None, Some(_)) if canPartialPushDownConjuncts =>
+ createBuilder(dataTypeMap, right, builder, canPartialPushDownConjuncts)
+
+ case _ => None
+ }
+
+ case Or(left, right) =>
+ for {
+ _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts = false)
+ _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts = false)
+ lhs <- createBuilder(dataTypeMap, left,
+ builder.startOr(), canPartialPushDownConjuncts = false)
+ rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts = false)
+ } yield rhs.end()
+
+ case Not(child) =>
+ for {
+ _ <- createBuilder(dataTypeMap, child, newBuilder, canPartialPushDownConjuncts = false)
+ negate <- createBuilder(dataTypeMap,
+ child, builder.startNot(), canPartialPushDownConjuncts = false)
+ } yield negate.end()
+
+ // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
+ // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
+ // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+
+ case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+ Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
+
+ case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+ Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
+
+ case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+ Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
+
+ case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+ Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
+
+ case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+ Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
+
+ case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+ Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
+
+ case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
+
+ case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
+
+ case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
+ val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
+ Some(builder.startAnd().in(quotedName, getType(attribute),
+ castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
+
+ case _ => None
+ }
+ }
+}
diff --git a/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
new file mode 100644
index 000000000000..68503aba22b4
--- /dev/null
+++ b/sql/core/v2.3.4/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.sql.Date
+
+import org.apache.orc.storage.common.`type`.HiveDecimal
+import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch
+import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument}
+import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator}
+import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
+
+import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
+import org.apache.spark.sql.types.Decimal
+
+/**
+ * Various utilities for ORC used to upgrade the built-in Hive.
+ */
+private[sql] object OrcShimUtils {
+
+ class VectorizedRowBatchWrap(val batch: VectorizedRowBatch) {}
+
+ private[sql] type Operator = OrcOperator
+ private[sql] type SearchArgument = OrcSearchArgument
+
+ def getSqlDate(value: Any): Date = value.asInstanceOf[DateWritable].get
+
+ def getDecimal(value: Any): Decimal = {
+ val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal()
+ Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale())
+ }
+
+ def getDateWritable(reuseObj: Boolean): (SpecializedGetters, Int) => DateWritable = {
+ if (reuseObj) {
+ val result = new DateWritable()
+ (getter, ordinal) =>
+ result.set(getter.getInt(ordinal))
+ result
+ } else {
+ (getter: SpecializedGetters, ordinal: Int) =>
+ new DateWritable(getter.getInt(ordinal))
+ }
+ }
+
+ def getHiveDecimalWritable(precision: Int, scale: Int):
+ (SpecializedGetters, Int) => HiveDecimalWritable = {
+ (getter, ordinal) =>
+ val d = getter.getDecimal(ordinal, precision, scale)
+ new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal))
+ }
+}
diff --git a/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
new file mode 100644
index 000000000000..e96c6fb7716c
--- /dev/null
+++ b/sql/core/v2.3.4/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.math.MathContext
+import java.nio.charset.StandardCharsets
+import java.sql.{Date, Timestamp}
+
+import scala.collection.JavaConverters._
+
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
+
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types._
+
+/**
+ * A test suite that tests Apache ORC filter API based filter pushdown optimization.
+ * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage.
+ * The difference are the packages containing 'Predicate' and 'SearchArgument' classes.
+ * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package.
+ * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package.
+ */
+class OrcFilterSuite extends OrcTest with SharedSQLContext {
+
+ protected def checkFilterPredicate(
+ df: DataFrame,
+ predicate: Predicate,
+ checker: (SearchArgument) => Unit): Unit = {
+ val output = predicate.collect { case a: Attribute => a }.distinct
+ val query = df
+ .select(output.map(e => Column(e)): _*)
+ .where(Column(predicate))
+
+ query.queryExecution.optimizedPlan match {
+ case PhysicalOperation(_, filters,
+ DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+ assert(filters.nonEmpty, "No filter is analyzed from the given query")
+ val scanBuilder = orcTable.newScanBuilder(options)
+ scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
+ val pushedFilters = scanBuilder.pushedFilters()
+ assert(pushedFilters.nonEmpty, "No filter is pushed down")
+ val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
+ assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pushedFilters")
+ checker(maybeFilter.get)
+
+ case _ =>
+ throw new AnalysisException("Can not match OrcTable in the query.")
+ }
+ }
+
+ protected def checkFilterPredicate
+ (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
+ (implicit df: DataFrame): Unit = {
+ def checkComparisonOperator(filter: SearchArgument) = {
+ val operator = filter.getLeaves.asScala
+ assert(operator.map(_.getOperator).contains(filterOperator))
+ }
+ checkFilterPredicate(df, predicate, checkComparisonOperator)
+ }
+
+ protected def checkFilterPredicate
+ (predicate: Predicate, stringExpr: String)
+ (implicit df: DataFrame): Unit = {
+ def checkLogicalOperator(filter: SearchArgument) = {
+ assert(filter.toString == stringExpr)
+ }
+ checkFilterPredicate(df, predicate, checkLogicalOperator)
+ }
+
+ test("filter pushdown - integer") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - long") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - float") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - double") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - string") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - boolean") {
+ withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - decimal") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(
+ Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(
+ Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(
+ Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(
+ Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(
+ Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(
+ Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - timestamp") {
+ val timeString = "2015-08-20 14:57:00"
+ val timestamps = (1 to 4).map { i =>
+ val milliseconds = Timestamp.valueOf(timeString).getTime + i * 3600
+ new Timestamp(milliseconds)
+ }
+ withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("filter pushdown - combinations with logical operators") {
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
+ checkFilterPredicate(
+ '_1.isNotNull,
+ "leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
+ )
+ checkFilterPredicate(
+ '_1 =!= 1,
+ "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
+ )
+ checkFilterPredicate(
+ !('_1 < 4),
+ "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
+ )
+ checkFilterPredicate(
+ '_1 < 2 || '_1 > 3,
+ "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
+ "expr = (or leaf-0 (not leaf-1))"
+ )
+ checkFilterPredicate(
+ '_1 < 2 && '_1 > 3,
+ "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
+ "expr = (and (not leaf-0) leaf-1 (not leaf-2))"
+ )
+ }
+ }
+
+ test("filter pushdown - date") {
+ val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
+ Date.valueOf(day)
+ }
+ withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
+ checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+ checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+ checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
+
+ checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+ checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+ checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+ checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+ checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+ }
+ }
+
+ test("no filter pushdown - non-supported types") {
+ implicit class IntToBinary(int: Int) {
+ def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
+ }
+ // ArrayType
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
+ checkNoFilterPredicate('_1.isNull, noneSupported = true)
+ }
+ // BinaryType
+ withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
+ checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
+ }
+ // MapType
+ withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
+ checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
+ }
+ }
+
+ test("SPARK-12218 and SPARK-25699 Converting conjunctions into ORC SearchArguments") {
+ import org.apache.spark.sql.sources._
+ // The `LessThan` should be converted while the `StringContains` shouldn't
+ val schema = new StructType(
+ Array(
+ StructField("a", IntegerType, nullable = true),
+ StructField("b", StringType, nullable = true)))
+ assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+ OrcFilters.createFilter(schema, Array(
+ LessThan("a", 10),
+ StringContains("b", "prefix")
+ )).get.toString
+ }
+
+ // The `LessThan` should be converted while the whole inner `And` shouldn't
+ assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+ OrcFilters.createFilter(schema, Array(
+ LessThan("a", 10),
+ Not(And(
+ GreaterThan("a", 1),
+ StringContains("b", "prefix")
+ ))
+ )).get.toString
+ }
+
+ // Can not remove unsupported `StringContains` predicate since it is under `Or` operator.
+ assert(OrcFilters.createFilter(schema, Array(
+ Or(
+ LessThan("a", 10),
+ And(
+ StringContains("b", "prefix"),
+ GreaterThan("a", 1)
+ )
+ )
+ )).isEmpty)
+
+ // Safely remove unsupported `StringContains` predicate and push down `LessThan`
+ assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+ OrcFilters.createFilter(schema, Array(
+ And(
+ LessThan("a", 10),
+ StringContains("b", "prefix")
+ )
+ )).get.toString
+ }
+
+ // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`.
+ assertResult("leaf-0 = (LESS_THAN a 10), leaf-1 = (LESS_THAN_EQUALS a 1)," +
+ " expr = (and leaf-0 (not leaf-1))") {
+ OrcFilters.createFilter(schema, Array(
+ And(
+ And(
+ LessThan("a", 10),
+ StringContains("b", "prefix")
+ ),
+ GreaterThan("a", 1)
+ )
+ )).get.toString
+ }
+ }
+
+ test("SPARK-27160: Fix casting of the DecimalType literal") {
+ import org.apache.spark.sql.sources._
+ val schema = StructType(Array(StructField("a", DecimalType(3, 2))))
+ assertResult("leaf-0 = (LESS_THAN a 3.14), expr = leaf-0") {
+ OrcFilters.createFilter(schema, Array(
+ LessThan(
+ "a",
+ new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2)))
+ ).get.toString
+ }
+ }
+}
+