diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index ac1cde10bdf5..baac54760e3e 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3909,6 +3909,11 @@ "CREATE TEMPORARY TABLE ... USING ... is a deprecated syntax. To overcome the issue, please use CREATE TEMPORARY VIEW instead." ] }, + "EMPTY_IN_PREDICATE" : { + "message" : [ + "IN predicate requires at least one value. Empty IN clauses like 'IN ()' are not allowed. " + ] + }, "EMPTY_PARTITION_VALUE" : { "message" : [ "Partition key must set value." diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 132ced820e9a..38c7b0f5d03d 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1206,7 +1206,7 @@ booleanExpression predicate : errorCapturingNot? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | errorCapturingNot? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + | errorCapturingNot? kind=IN (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) | errorCapturingNot? kind=IN LEFT_PAREN query RIGHT_PAREN | errorCapturingNot? kind=RLIKE pattern=valueExpression | errorCapturingNot? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 553161ea2db0..c569f45575e6 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -821,6 +821,15 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase { ctx) } + def emptyInPredicateError(ctx: ParserRuleContext): Throwable = { + new ParseException( + errorClass = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + messageParameters = Map( + "alternative" -> ("Consider using 'WHERE FALSE' if you need an always-false condition, " + + "or provide at least one value in the IN list.")), + ctx) + } + /** * Throws an internal error for unexpected parameter markers found during AST building. This * should be unreachable in normal operation due to grammar-level blocking. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f918232c42ac..2f5e3c03c55a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2837,6 +2837,10 @@ class AstBuilder extends DataTypeAstBuilder case SqlBaseParser.IN if ctx.query != null => invertIfNotDefined(InSubquery(getValueExpressions(e), ListQuery(plan(ctx.query)))) case SqlBaseParser.IN => + // Validate that IN clause is not empty + if (ctx.expression.isEmpty) { + throw QueryParsingErrors.emptyInPredicateError(ctx) + } invertIfNotDefined(In(e, ctx.expression.asScala.map(expression).toSeq)) case SqlBaseParser.LIKE | SqlBaseParser.ILIKE => Option(ctx.quantifier).map(_.getType) match { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 59205f308471..0128a3f090c5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1395,7 +1395,7 @@ class PlanParserSuite extends AnalysisTest { checkError( exception = parseException(sql2), condition = "PARSE_SYNTAX_ERROR", - parameters = Map("error" -> "'IN'", "hint" -> "")) + parameters = Map("error" -> "'INTO'", "hint" -> "")) } test("relation in v2 catalog") { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out index 55822a10041f..a774d0b1d6d7 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out @@ -517,6 +517,46 @@ Project [NOT cast(null as int) IN (cast(1 as int),cast(2 as int),cast(null as in +- OneRowRelation +-- !query +select 1 in () +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + "sqlState" : "42000", + "messageParameters" : { + "alternative" : "Consider using 'WHERE FALSE' if you need an always-false condition, or provide at least one value in the IN list." + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 10, + "stopIndex" : 14, + "fragment" : "in ()" + } ] +} + + +-- !query +select 1 not in () +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + "sqlState" : "42000", + "messageParameters" : { + "alternative" : "Consider using 'WHERE FALSE' if you need an always-false condition, or provide at least one value in the IN list." + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 10, + "stopIndex" : 18, + "fragment" : "not in ()" + } ] +} + + -- !query select 1 between 0 and 2 -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql index 195db17a3a1f..314ac205b7dd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql @@ -94,6 +94,10 @@ select 1 not in ('2', '3', '4', null); select null not in (1, 2, 3); select null not in (1, 2, null); +-- Empty IN clause (negative case - should error) +select 1 in (); +select 1 not in (); + -- Between select 1 between 0 and 2; select 0.5 between 0 and 1; diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out index 10ff268a5606..66135ab41a68 100644 --- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out @@ -743,6 +743,48 @@ struct<(NOT (NULL IN (1, 2, NULL))):boolean> -- !query output NULL +-- !query +select 1 in () +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + "sqlState" : "42000", + "messageParameters" : { + "alternative" : "Consider using 'WHERE FALSE' if you need an always-false condition, or provide at least one value in the IN list." + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 10, + "stopIndex" : 14, + "fragment" : "in ()" + } ] +} + + +-- !query +select 1 not in () +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + "sqlState" : "42000", + "messageParameters" : { + "alternative" : "Consider using 'WHERE FALSE' if you need an always-false condition, or provide at least one value in the IN list." + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 10, + "stopIndex" : 18, + "fragment" : "not in ()" + } ] +} -- !query select 1 between 0 and 2 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 629d85f19b0a..4309d783680f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -726,4 +726,45 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL start = 32, stop = 58)) } + + test("INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE: Empty IN clause") { + val alternativeMsg = "Consider using 'WHERE FALSE' if you need an always-false condition, " + + "or provide at least one value in the IN list." + + // Test with single column IN () + // PredicateContext captures "IN ()" starting at position 33 + checkError( + exception = parseException("SELECT * FROM range(10) WHERE id IN ()"), + condition = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + sqlState = "42000", + parameters = Map("alternative" -> alternativeMsg), + context = ExpectedContext( + fragment = "IN ()", + start = 33, + stop = 37)) + + // Test with expression IN () + // PredicateContext captures "IN ()" starting at position 39 + checkError( + exception = parseException("SELECT * FROM range(10) WHERE (id + 1) IN ()"), + condition = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + sqlState = "42000", + parameters = Map("alternative" -> alternativeMsg), + context = ExpectedContext( + fragment = "IN ()", + start = 39, + stop = 43)) + + // Test with NOT IN () + // PredicateContext captures "NOT IN ()" starting at position 33 + checkError( + exception = parseException("SELECT * FROM range(10) WHERE id NOT IN ()"), + condition = "INVALID_SQL_SYNTAX.EMPTY_IN_PREDICATE", + sqlState = "42000", + parameters = Map("alternative" -> alternativeMsg), + context = ExpectedContext( + fragment = "NOT IN ()", + start = 33, + stop = 41)) + } }