Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c018e6a
intial prototype
dbatomic Dec 11, 2023
8c17973
Adding test to plan generation test suite
dbatomic Dec 11, 2023
8c929c2
Adding explain check
dbatomic Dec 11, 2023
e5da5c5
adding new comment
dbatomic Dec 11, 2023
38f3ea6
Merge remote-tracking branch 'upstream/master' into between_expressio…
dbatomic Dec 11, 2023
57d7994
Merge branch 'apache:master' into between_expression_v2
dbatomic Dec 12, 2023
feceae3
fixing test in expressionparsersuite
dbatomic Dec 13, 2023
765e499
Fixing streaming logic
dbatomic Dec 13, 2023
1b25d4f
Updating golden files
dbatomic Dec 13, 2023
a69dd57
Removing unresolved between expression.
dbatomic Dec 18, 2023
a038faa
Renaming BetweenExpr -> Between
dbatomic Dec 18, 2023
d0462ff
Fixing golden files
dbatomic Dec 18, 2023
596befe
Adding comments.
dbatomic Dec 18, 2023
1a2ec50
Adding more rigid coercion rules for between, there are more alike if…
dbatomic Dec 19, 2023
0faa792
Using built in function for resolution.
dbatomic Dec 19, 2023
7f6b0c9
Updating changed plans
dbatomic Dec 19, 2023
a1dc501
Updating golden files
dbatomic Dec 19, 2023
6569653
Updating golden files for sql-expression-schema. Removing between fro…
dbatomic Dec 19, 2023
e64fc19
Doc update for between expression
dbatomic Dec 19, 2023
57c88ee
Updating expression documentation
dbatomic Dec 20, 2023
1776f8a
Merge branch 'apache:master' into between_expression_v2
dbatomic Dec 20, 2023
26653f0
fixing failing doc test
dbatomic Dec 20, 2023
dc36be3
Documentation update
dbatomic Dec 21, 2023
ad61074
Merge branch 'apache:master' into between_expression_v2
dbatomic Dec 21, 2023
3cf76b1
Resolving PR comments
dbatomic Dec 26, 2023
550fa3d
Merge branch 'between_expression_v2' of github.com:dbatomic/spark int…
dbatomic Dec 26, 2023
7dfd72d
PR comments
dbatomic Dec 27, 2023
4fb6631
PR comment on better documentation
dbatomic Dec 27, 2023
a960c91
doc update
dbatomic Dec 27, 2023
1ce26eb
adding rand example to predicate function golden files.
dbatomic Dec 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,10 @@ class PlanGenerationTestSuite
simple.where("a + id < 1000")
}

test("between expr") {
simple.selectExpr("rand(123) BETWEEN 0.1 AND 0.2")
}

test("unpivot values") {
simple.unpivot(
ids = Array(fn.col("id"), fn.col("a")),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Project [((_common_expr_0#0 >= cast(0.1 as double)) AND (_common_expr_0#0 <= cast(0.2 as double))) AS between(rand(123), 0.1, 0.2)#0]
+- Project [id#0L, a#0, b#0, rand(123) AS _common_expr_0#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cid:bigint,a:int,b:double\u003e"
}
},
"expressions": [{
"expressionString": {
"expression": "rand(123) BETWEEN 0.1 AND 0.2"
}
}]
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,7 @@ object FunctionRegistry {
expression[PercentRank]("percent_rank"),

// predicates
expression[Between]("between"),
expression[And]("and"),
expression[In]("in"),
expression[Not]("not"),
Expand Down Expand Up @@ -876,9 +877,6 @@ object FunctionRegistry {
"expr1 <> expr2 - Returns true if `expr1` is not equal to `expr2`."),
"!=" -> makeExprInfoForVirtualOperator("!=",
"expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`."),
"between" -> makeExprInfoForVirtualOperator("between",
"expr1 [NOT] BETWEEN expr2 AND expr3 - " +
"evaluate if `expr1` is [not] in between `expr2` and `expr3`."),
"case" -> makeExprInfoForVirtualOperator("case",
"CASE expr1 WHEN expr2 THEN expr3 [WHEN expr4 THEN expr5]* [ELSE expr6] END " +
"- When `expr1` = `expr2`, returns `expr3`; when `expr1` = `expr4`, return `expr5`; " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
case LessThanOrEqual(l, r) => getStateWatermarkSafely(l, r).map(_ - 1)
case GreaterThan(l, r) => getStateWatermarkSafely(r, l)
case GreaterThanOrEqual(l, r) => getStateWatermarkSafely(r, l).map(_ - 1)
case Between(input, lower, upper, _) =>
getStateWatermarkSafely(lower, input).map(_ - 1)
.orElse(getStateWatermarkSafely(input, upper).map(_ - 1))
case _ => None
}
if (stateWatermark.nonEmpty) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "Usage: input [NOT] BETWEEN lower AND upper - evaluate if `input` is [not] in between `lower` and `upper`",
examples = """
Examples:
> SELECT 0.5 _FUNC_ 0.1 AND 1.0;
true
""",
arguments = """
Arguments:
* input - An expression that is being compared with lower and upper bound.
* lower - Lower bound of the between check.
* upper - Upper bound of the between check.
""",
since = "4.0.0",
group = "conditional_funcs")
case class Between private(input: Expression, lower: Expression, upper: Expression, replacement: Expression)
extends RuntimeReplaceable with InheritAnalysisRules {
def this(input: Expression, lower: Expression, upper: Expression) = {
this(input, lower, upper, {
val commonExpr = CommonExpressionDef(input)
val ref = new CommonExpressionRef(commonExpr)
val replacement = And(GreaterThanOrEqual(ref, lower), LessThanOrEqual(ref, upper))
With(replacement, Seq(commonExpr))
})
};

override def parameters: Seq[Expression] = Seq(input, lower, upper)

override protected def withNewChildInternal(newChild: Expression): Between = {
copy(replacement = newChild)
}
}

object Between {
def apply(input: Expression, lower: Expression, upper: Expression): Between = {
new Between(input, lower, upper)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2034,10 +2034,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
// Create the predicate.
ctx.kind.getType match {
case SqlBaseParser.BETWEEN =>
// BETWEEN is translated to lower <= e && e <= upper
invertIfNotDefined(And(
GreaterThanOrEqual(e, expression(ctx.lower)),
LessThanOrEqual(e, expression(ctx.upper))))
invertIfNotDefined(UnresolvedFunction(
"between", Seq(e, expression(ctx.lower), expression(ctx.upper)), isDistinct = false))
case SqlBaseParser.IN if ctx.query != null =>
invertIfNotDefined(InSubquery(getValueExpressions(e), ListQuery(plan(ctx.query))))
case SqlBaseParser.IN =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,10 @@ class ExpressionParserSuite extends AnalysisTest {
}

test("between expressions") {
assertEqual("a between b and c", $"a" >= $"b" && $"a" <= $"c")
assertEqual("a not between b and c", !($"a" >= $"b" && $"a" <= $"c"))
assertEqual("a between b and c",
UnresolvedFunction("between", Seq($"a", $"b", $"c"), isDistinct = false))
assertEqual("a not between b and c",
!UnresolvedFunction("between", Seq($"a", $"b", $"c"), isDistinct = false))
}

test("in expressions") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
| org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct<ATANH(0):double> |
| org.apache.spark.sql.catalyst.expressions.BRound | bround | SELECT bround(2.5, 0) | struct<bround(2.5, 0):decimal(2,0)> |
| org.apache.spark.sql.catalyst.expressions.Base64 | base64 | SELECT base64('Spark SQL') | struct<base64(Spark SQL):string> |
| org.apache.spark.sql.catalyst.expressions.Between | between | SELECT 0.5 between 0.1 AND 1.0 | struct<between(0.5, 0.1, 1.0):boolean> |
| org.apache.spark.sql.catalyst.expressions.Bin | bin | SELECT bin(13) | struct<bin(13):string> |
| org.apache.spark.sql.catalyst.expressions.BitLength | bit_length | SELECT bit_length('Spark SQL') | struct<bit_length(Spark SQL):int> |
| org.apache.spark.sql.catalyst.expressions.BitmapBitPosition | bitmap_bit_position | SELECT bitmap_bit_position(1) | struct<bitmap_bit_position(1):bigint> |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`pubview`, SELECT * FROM tbl1
BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f), false, false, PersistedView, true
+- Project [a#x, b#x]
+- Filter (((a#x >= scalar-subquery#x []) AND (a#x <= scalar-subquery#x [])) AND exists#x [])
+- Filter (between(a#x, scalar-subquery#x [], scalar-subquery#x []) AND exists#x [])
: :- Project [d#x]
: : +- Filter (c#x = 1)
: : +- SubqueryAlias spark_catalog.testviewschm2.tbl2
Expand Down Expand Up @@ -882,7 +882,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j), false, false, PersistedView, true
+- Project [a#x, b#x]
+- Filter ((((a#x >= scalar-subquery#x []) AND (a#x <= scalar-subquery#x [])) AND exists#x []) AND NOT exists#x [])
+- Filter ((between(a#x, scalar-subquery#x [], scalar-subquery#x []) AND exists#x []) AND NOT exists#x [])
: :- Project [d#x]
: : +- Filter (c#x = 1)
: : +- SubqueryAlias spark_catalog.testviewschm2.tbl2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ SELECT f1 AS `Three` FROM DATE_TBL
WHERE f1 BETWEEN '2000-01-01' AND '2001-01-01'
-- !query analysis
Project [f1#x AS Three#x]
+- Filter ((f1#x >= cast(2000-01-01 as date)) AND (f1#x <= cast(2001-01-01 as date)))
+- Filter between(f1#x, 2000-01-01, 2001-01-01)
+- SubqueryAlias spark_catalog.default.date_tbl
+- Relation spark_catalog.default.date_tbl[f1#x] parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ SELECT '' AS `54`, d1 as `timestamp`,
FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
-- !query analysis
Project [ AS 54#x, d1#x AS timestamp#x, date_part(year, d1#x) AS year#x, date_part(month, d1#x) AS month#x, date_part(day, d1#x) AS day#x, date_part(hour, d1#x) AS hour#x, date_part(minute, d1#x) AS minute#x, date_part(second, d1#x) AS second#x]
+- Filter ((d1#x >= cast(1902-01-01 as timestamp)) AND (d1#x <= cast(2038-01-01 as timestamp)))
+- Filter between(d1#x, 1902-01-01, 2038-01-01)
+- SubqueryAlias spark_catalog.default.timestamp_tbl
+- Relation spark_catalog.default.timestamp_tbl[d1#x] parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ Sort [five#x ASC NULLS FIRST], true
+- Distinct
+- Union false, false
:- Project [f1#x AS five#x]
: +- Filter ((f1#x >= -1000000.0) AND (f1#x <= 1000000.0))
: +- Filter between(f1#x, -1000000.0, 1000000.0)
: +- SubqueryAlias float8_tbl
: +- View (`FLOAT8_TBL`, [f1#x])
: +- Project [cast(f1#x as double) AS f1#x]
Expand All @@ -419,7 +419,7 @@ Sort [five#x ASC NULLS FIRST], true
: +- LocalRelation [col1#x]
+- Project [cast(f1#x as double) AS f1#x]
+- Project [f1#x]
+- Filter ((f1#x >= 0) AND (f1#x <= 1000000))
+- Filter between(f1#x, 0, 1000000)
+- SubqueryAlias int4_tbl
+- View (`INT4_TBL`, [f1#x])
+- Project [cast(f1#x as int) AS f1#x]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,85 @@ select null not in (1, 2, null)
-- !query analysis
Project [NOT cast(null as int) IN (cast(1 as int),cast(2 as int),cast(null as int)) AS (NOT (NULL IN (1, 2, NULL)))#x]
+- OneRowRelation


-- !query
select 1 between 0 and 2
-- !query analysis
Project [between(1, 0, 2) AS between(1, 0, 2)#x]
+- OneRowRelation


-- !query
select 0.5 between 0 and 1
-- !query analysis
Project [between(0.5, 0, 1) AS between(0.5, 0, 1)#x]
+- OneRowRelation


-- !query
select 2.0 between '1.0' and '3.0'
-- !query analysis
Project [between(2.0, 1.0, 3.0) AS between(2.0, 1.0, 3.0)#x]
+- OneRowRelation


-- !query
select 'b' between 'a' and 'c'
-- !query analysis
Project [between(b, a, c) AS between(b, a, c)#x]
+- OneRowRelation


-- !query
select to_timestamp('2022-12-26 00:00:01') between to_date('2022-03-01') and to_date('2022-12-31')
-- !query analysis
Project [between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some(America/Los_Angeles), false), to_date(2022-03-01, None, Some(America/Los_Angeles), false), to_date(2022-12-31, None, Some(America/Los_Angeles), false)) AS between(to_timestamp(2022-12-26 00:00:01), to_date(2022-03-01), to_date(2022-12-31))#x]
+- OneRowRelation


-- !query
select rand(123) between 0.1 AND 0.2
-- !query analysis
[Analyzer test output redacted due to nondeterminism]


-- !query
select 1 not between 0 and 2
-- !query analysis
Project [NOT between(1, 0, 2) AS (NOT between(1, 0, 2))#x]
+- OneRowRelation


-- !query
select 0.5 not between 0 and 1
-- !query analysis
Project [NOT between(0.5, 0, 1) AS (NOT between(0.5, 0, 1))#x]
+- OneRowRelation


-- !query
select 2.0 not between '1.0' and '3.0'
-- !query analysis
Project [NOT between(2.0, 1.0, 3.0) AS (NOT between(2.0, 1.0, 3.0))#x]
+- OneRowRelation


-- !query
select 'b' not between 'a' and 'c'
-- !query analysis
Project [NOT between(b, a, c) AS (NOT between(b, a, c))#x]
+- OneRowRelation


-- !query
select to_timestamp('2022-12-26 00:00:01') not between to_date('2022-03-01') and to_date('2022-12-31')
-- !query analysis
Project [NOT between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some(America/Los_Angeles), false), to_date(2022-03-01, None, Some(America/Los_Angeles), false), to_date(2022-12-31, None, Some(America/Los_Angeles), false)) AS (NOT between(to_timestamp(2022-12-26 00:00:01), to_date(2022-03-01), to_date(2022-12-31)))#x]
+- OneRowRelation


-- !query
select rand(123) not between 0.1 AND 0.2
-- !query analysis
[Analyzer test output redacted due to nondeterminism]
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,19 @@ select 1 not in ('2', '3', '4');
select 1 not in ('2', '3', '4', null);
select null not in (1, 2, 3);
select null not in (1, 2, null);

-- Between
select 1 between 0 and 2;
select 0.5 between 0 and 1;
select 2.0 between '1.0' and '3.0';
select 'b' between 'a' and 'c';
select to_timestamp('2022-12-26 00:00:01') between to_date('2022-03-01') and to_date('2022-12-31');
select rand(123) between 0.1 AND 0.2;

-- Not(Between)
select 1 not between 0 and 2;
select 0.5 not between 0 and 1;
select 2.0 not between '1.0' and '3.0';
select 'b' not between 'a' and 'c';
select to_timestamp('2022-12-26 00:00:01') not between to_date('2022-03-01') and to_date('2022-12-31');
select rand(123) not between 0.1 AND 0.2;
Loading