diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java index a56007b2a5ab8..30596d92faaa2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java @@ -55,7 +55,7 @@ public String name() { @Override public String toString() { return Stream.concat(Stream.of(namespace), Stream.of(name)) - .map(CatalogV2Implicits::quote) + .map(CatalogV2Implicits::quoteIfNeeded) .collect(Collectors.joining(".")); } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index 3478af8783af6..71bab624f06a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits { } implicit class NamespaceHelper(namespace: Array[String]) { - def quoted: String = namespace.map(quote).mkString(".") + def quoted: String = namespace.map(quoteIfNeeded).mkString(".") } implicit class IdentifierHelper(ident: Identifier) { def quoted: String = { if (ident.namespace.nonEmpty) { - ident.namespace.map(quote).mkString(".") + "." + quote(ident.name) + ident.namespace.map(quoteIfNeeded).mkString(".") + "." + quoteIfNeeded(ident.name) } else { - quote(ident.name) + quoteIfNeeded(ident.name) } } @@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits { s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.") } - def quoted: String = parts.map(quote).mkString(".") + def quoted: String = parts.map(quoteIfNeeded).mkString(".") } - def quote(part: String): String = { + def quoteIfNeeded(part: String): String = { if (part.contains(".") || part.contains("`")) { s"`${part.replace("`", "``")}`" } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 91e0c58a1c6d0..70fc9689e6087 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -24,6 +24,7 @@ import scala.collection.mutable import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} import org.apache.spark.sql.types.StructType @@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { def quoted: String = { identifier.database match { case Some(db) => - Seq(db, identifier.table).map(quote).mkString(".") + Seq(db, identifier.table).map(quoteIfNeeded).mkString(".") case _ => - quote(identifier.table) + quoteIfNeeded(identifier.table) } } - - private def quote(part: String): String = { - if (part.contains(".") || part.contains("`")) { - s"`${part.replace("`", "``")}`" - } else { - part - } - } } def catalogTable: CatalogTable = v1Table diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 6d22c71303e8d..53def3f956fb4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -118,7 +118,7 @@ class ResolveSessionCatalog( .map(_._2.dataType) .getOrElse { throw new AnalysisException( - s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " + + s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " + s"Available: ${v1Table.schema.fieldNames.mkString(", ")}") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala index 0b5658715377a..e673309188756 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala @@ -36,16 +36,6 @@ trait OrcFiltersBase { } } - // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters - // in order to distinguish predicate pushdown for nested columns. - protected[sql] def quoteAttributeNameIfNeeded(name: String) : String = { - if (!name.contains("`") && name.contains(".")) { - s"`$name`" - } else { - name - } - } - /** * Return true if this is a searchable type in ORC. * Both CharType and VarcharType are cleaned at AstBuilder. diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 995c5ed317de1..b9cbc484e1fc1 100644 --- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -24,6 +24,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder import org.apache.orc.storage.serde2.io.HiveDecimalWritable import org.apache.spark.SparkException +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types._ @@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase { // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). + // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters + // in order to distinguish predicate pushdown for nested columns. expression match { case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end()) case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end()) case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end()) case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end()) case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startAnd().isNull(quotedName, getType(attribute)).end()) case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startNot().isNull(quotedName, getType(attribute)).end()) case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute))) Some(builder.startAnd().in(quotedName, getType(attribute), castedValues.map(_.asInstanceOf[AnyRef]): _*).end()) diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 948ab44a8c19c..6e9e592be13be 100644 --- a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable import org.apache.spark.SparkException +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types._ @@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase { // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). + // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters + // in order to distinguish predicate pushdown for nested columns. expression match { case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end()) case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end()) case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end()) case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end()) case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValue = castLiteralValue(value, dataTypeMap(attribute)) Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end()) case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startAnd().isNull(quotedName, getType(attribute)).end()) case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) Some(builder.startNot().isNull(quotedName, getType(attribute)).end()) case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) => - val quotedName = quoteAttributeNameIfNeeded(attribute) + val quotedName = quoteIfNeeded(attribute) val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute))) Some(builder.startAnd().in(quotedName, getType(attribute), castedValues.map(_.asInstanceOf[AnyRef]): _*).end())