Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ object TypeCoercion {
* with primitive types, because in that case the precision and scale of the result depends on
* the operation. Those rules are implemented in [[DecimalPrecision]].
*/
val findTightestCommonTypeOfTwo: (DataType, DataType) => Option[DataType] = {
val findTightestCommonType: (DataType, DataType) => Option[DataType] = {
case (t1, t2) if t1 == t2 => Some(t1)
case (NullType, t1) => Some(t1)
case (t1, NullType) => Some(t1)
Expand All @@ -103,29 +103,18 @@ object TypeCoercion {

/** Similar to [[findTightestCommonType]], but can promote all the way to StringType. */
def findTightestCommonTypeToString(left: DataType, right: DataType): Option[DataType] = {
findTightestCommonTypeOfTwo(left, right).orElse((left, right) match {
findTightestCommonType(left, right).orElse((left, right) match {
case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => Some(StringType)
case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType => Some(StringType)
case _ => None
})
}

/**
* Find the tightest common type of a set of types by continuously applying
* `findTightestCommonTypeOfTwo` on these types.
*/
private def findTightestCommonType(types: Seq[DataType]): Option[DataType] = {
types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
case None => None
case Some(d) => findTightestCommonTypeOfTwo(d, c)
})
}

/**
* Case 2 type widening (see the classdoc comment above for TypeCoercion).
*
* i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that here we allow some
* loss of precision when widening decimal and double.
* i.e. the main difference with [[findTightestCommonType]] is that here we allow some
* loss of precision when widening decimal and double, and promotion to string.
*/
private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match {
case (t1: DecimalType, t2: DecimalType) =>
Expand All @@ -148,13 +137,13 @@ object TypeCoercion {
}

/**
* Similar to [[findWiderCommonType]], but can't promote to string. This is also similar to
* [[findTightestCommonType]], but can handle decimal types. If the wider decimal type exceeds
* system limitation, this rule will truncate the decimal type before return it.
* Similar to [[findWiderCommonType]] that can handle decimal types, but can't promote to
* string. If the wider decimal type exceeds system limitation, this rule will truncate
* the decimal type before return it.
*/
def findWiderTypeWithoutStringPromotion(types: Seq[DataType]): Option[DataType] = {
types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
case Some(d) => findTightestCommonTypeOfTwo(d, c).orElse((d, c) match {
case Some(d) => findTightestCommonType(d, c).orElse((d, c) match {
case (t1: DecimalType, t2: DecimalType) =>
Some(DecimalPrecision.widerDecimalType(t1, t2))
case (t: IntegralType, d: DecimalType) =>
Expand Down Expand Up @@ -621,7 +610,7 @@ object TypeCoercion {
case e if !e.childrenResolved => e

case b @ BinaryOperator(left, right) if left.dataType != right.dataType =>
findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { commonType =>
findTightestCommonType(left.dataType, right.dataType).map { commonType =>
if (b.inputType.acceptsType(commonType)) {
// If the expression accepts the tightest common type, cast to that.
val newLeft = if (left.dataType == commonType) left else Cast(left, commonType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,11 @@ class TypeCoercionSuite extends PlanTest {

test("tightest common bound for types") {
def widenTest(t1: DataType, t2: DataType, tightestCommon: Option[DataType]) {
var found = TypeCoercion.findTightestCommonTypeOfTwo(t1, t2)
var found = TypeCoercion.findTightestCommonType(t1, t2)
assert(found == tightestCommon,
s"Expected $tightestCommon as tightest common type for $t1 and $t2, found $found")
// Test both directions to make sure the widening is symmetric.
found = TypeCoercion.findTightestCommonTypeOfTwo(t2, t1)
found = TypeCoercion.findTightestCommonType(t2, t1)
assert(found == tightestCommon,
s"Expected $tightestCommon as tightest common type for $t2 and $t1, found $found")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ private[sql] object InferSchema {
* Returns the most general data type for two given data types.
*/
def compatibleType(t1: DataType, t2: DataType): DataType = {
TypeCoercion.findTightestCommonTypeOfTwo(t1, t2).getOrElse {
TypeCoercion.findTightestCommonType(t1, t2).getOrElse {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In CSVInferSchema.scala, it sounds like we are also calling the same function. Could you check whether we can directly call it, instead of duplicating the code? (I did not read it carefully)

https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala#L172-L217

Copy link
Member Author

@HyukjinKwon HyukjinKwon Feb 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, yea, it appearntly possible for now. But, can I maybe do this in another PR if you think it is fine? It seems possible but I would like to avoid to fix it here simply because of a worry to resolve a conflict in the PR refacorting this code path #16680 and it seems not directly related to the JIRA.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, no problem. Please submit a separate PR for it. Thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for bearing with me.

// t1 or t2 is a StructType, ArrayType, or an unexpected type.
(t1, t2) match {
// Double support larger range than fixed decimal, DecimalType.Maximum should be enough
Expand Down