Skip to content

Commit dae01f1

Browse files
committed
Better error message for ListQuery.
1 parent 498bd3b commit dae01f1

File tree

4 files changed

+64
-5
lines changed

4 files changed

+64
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,12 @@ class Analyzer(
12911291
ListQuery(plan, exprs, exprId, plan.output)
12921292
})
12931293
In(value, Seq(expr))
1294+
// If IN subquery doesn't need to resolve (e.g., SELECT 1 FROM ...), we still need to fill
1295+
// its `childOutputs`.
1296+
case In(value, Seq(l @ ListQuery(sub, _, _, Nil)))
1297+
if value.resolved && sub.resolved =>
1298+
val expr = l.copy(childOutputs = sub.output)
1299+
In(value, Seq(expr))
12941300
}
12951301
}
12961302

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,46 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
140140
require(list != null, "list should not be null")
141141

142142
override def checkInputDataTypes(): TypeCheckResult = {
143-
val mismatchOpt = list.find(l => l.dataType != value.dataType)
143+
val mismatchOpt = list.find(l => !DataType.equalsStructurally(l.dataType, value.dataType))
144144
if (mismatchOpt.isDefined) {
145-
TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
146-
s"${value.dataType} != ${mismatchOpt.get.dataType}")
145+
list match {
146+
case ListQuery(_, _, _, childOutputs) :: Nil =>
147+
val valExprs = value match {
148+
case cns: CreateNamedStruct => cns.valExprs
149+
case expr => Seq(expr)
150+
}
151+
if (valExprs.length != childOutputs.length) {
152+
TypeCheckResult.TypeCheckFailure(
153+
s"""
154+
|The number of columns in the left hand side of an IN subquery does not match the
155+
|number of columns in the output of subquery.
156+
|#columns in left hand side: ${valExprs.length}.
157+
|#columns in right hand side: ${childOutputs.length}.
158+
|Left side columns:
159+
|[${valExprs.map(_.sql).mkString(", ")}].
160+
|Right side columns:
161+
|[${childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
162+
} else {
163+
val mismatchedColumns = valExprs.zip(childOutputs).flatMap {
164+
case (l, r) if l.dataType != r.dataType =>
165+
s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})"
166+
case _ => None
167+
}
168+
TypeCheckResult.TypeCheckFailure(
169+
s"""
170+
|The data type of one or more elements in the left hand side of an IN subquery
171+
|is not compatible with the data type of the output of the subquery
172+
|Mismatched columns:
173+
|[${mismatchedColumns.mkString(", ")}]
174+
|Left side:
175+
|[${valExprs.map(_.dataType.catalogString).mkString(", ")}].
176+
|Right side:
177+
|[${childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
178+
}
179+
case _ =>
180+
TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
181+
s"${value.dataType} != ${mismatchOpt.get.dataType}")
182+
}
147183
} else {
148184
TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
149185
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ case class ListQuery(
282282
} else {
283283
childOutputs.head.dataType
284284
}
285+
override lazy val resolved: Boolean = childrenResolved && plan.resolved && childOutputs.nonEmpty
285286
override def nullable: Boolean = false
286287
override def withNewPlan(plan: LogicalPlan): ListQuery = copy(plan = plan)
287288
override def toString: String = s"list#${exprId.id} $conditionString"

sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,15 @@ t1a IN (SELECT t2a, t2b
7272
struct<>
7373
-- !query 5 output
7474
org.apache.spark.sql.AnalysisException
75-
cannot resolve '(t1.`t1a` IN (listquery(t1.`t1a`)))' due to data type mismatch: Arguments must be same type but were: IntegerType != StructType(StructField(t2a,IntegerType,false), StructField(t2b,IntegerType,false));
75+
cannot resolve '(t1.`t1a` IN (listquery(t1.`t1a`)))' due to data type mismatch:
76+
The number of columns in the left hand side of an IN subquery does not match the
77+
number of columns in the output of subquery.
78+
#columns in left hand side: 1.
79+
#columns in right hand side: 2.
80+
Left side columns:
81+
[t1.`t1a`].
82+
Right side columns:
83+
[t2.`t2a`, t2.`t2b`].;
7684

7785

7886
-- !query 6
@@ -85,4 +93,12 @@ WHERE
8593
struct<>
8694
-- !query 6 output
8795
org.apache.spark.sql.AnalysisException
88-
cannot resolve '(named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`)))' due to data type mismatch: Arguments must be same type but were: StructType(StructField(t1a,IntegerType,false), StructField(t1b,IntegerType,false)) != IntegerType;
96+
cannot resolve '(named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`)))' due to data type mismatch:
97+
The number of columns in the left hand side of an IN subquery does not match the
98+
number of columns in the output of subquery.
99+
#columns in left hand side: 2.
100+
#columns in right hand side: 1.
101+
Left side columns:
102+
[t1.`t1a`, t1.`t1b`].
103+
Right side columns:
104+
[t2.`t2a`].;

0 commit comments

Comments
 (0)