From 33988d3179edf0f179da11f1b8dfc4d28d9c5d08 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Fri, 14 Oct 2016 23:46:07 +0800 Subject: [PATCH 1/3] [SPARK-17940][SQL] Fixed a typo in LAST function and improved its usage string. --- .../spark/sql/catalyst/expressions/aggregate/Last.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index 8579f7292d3a..c4a645af1e15 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.types._ * a single partition, and we use a single reducer to do the aggregation.). */ @ExpressionDescription( - usage = "_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows.") + usage = """_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows. + _FUNC_(expr,isIgnoreNull=false) - Returns the last value of `child` for a group of rows. + If isIgnoreNull is true, returns only non-null values. + """) case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) @@ -37,7 +40,7 @@ case class Last(child: Expression, ignoreNullsExpr: Expression) extends Declarat private val ignoreNulls: Boolean = ignoreNullsExpr match { case Literal(b: Boolean, BooleanType) => b case _ => - throw new AnalysisException("The second argument of First should be a boolean literal.") + throw new AnalysisException("The second argument of Last should be a boolean literal.") } override def children: Seq[Expression] = child :: ignoreNullsExpr :: Nil From fcf8bb5ba7a7043abfe2025a4d17ec4be7f4b4f1 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Sun, 16 Oct 2016 21:53:11 +0800 Subject: [PATCH 2/3] Added the note about nondeterministic. --- .../spark/sql/catalyst/expressions/aggregate/First.scala | 3 ++- .../spark/sql/catalyst/expressions/aggregate/Last.scala | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index d702c08cfd34..093a242d8600 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.types._ @ExpressionDescription( usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows. _FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows. - If isIgnoreNull is true, returns only non-null values. + If isIgnoreNull is true, returns only non-null values. Note that in most cases the + result is nondeterministic. """) case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index c4a645af1e15..f062536433a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -29,9 +29,10 @@ import org.apache.spark.sql.types._ * a single partition, and we use a single reducer to do the aggregation.). */ @ExpressionDescription( - usage = """_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows. + usage = """_FUNC_(expr) - Returns the last value of `child` for a group of rows. _FUNC_(expr,isIgnoreNull=false) - Returns the last value of `child` for a group of rows. - If isIgnoreNull is true, returns only non-null values. + If isIgnoreNull is true, returns only non-null values. Note that in most cases the + result is nondeterministic. """) case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { From 4c3f043087297383ec8e52bcc083746309121257 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Tue, 18 Oct 2016 18:13:17 +0800 Subject: [PATCH 3/3] Improve usage string format. --- .../sql/catalyst/expressions/aggregate/First.scala | 12 ++++++++---- .../sql/catalyst/expressions/aggregate/Last.scala | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index 093a242d8600..6c9b7d559c3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -29,10 +29,14 @@ import org.apache.spark.sql.types._ * a single partition, and we use a single reducer to do the aggregation.). */ @ExpressionDescription( - usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows. - _FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows. - If isIgnoreNull is true, returns only non-null values. Note that in most cases the - result is nondeterministic. + usage = + """ + _FUNC_(expr) - Returns the first value of `child` for a group of rows. + + _FUNC_(expr, isIgnoreNull=false) - Returns the first value of `child` for a group of rows. + If isIgnoreNull is true, returns only non-null values. + + Note that in most cases the result is nondeterministic. """) case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index f062536433a9..b8453f79f48a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -29,10 +29,14 @@ import org.apache.spark.sql.types._ * a single partition, and we use a single reducer to do the aggregation.). */ @ExpressionDescription( - usage = """_FUNC_(expr) - Returns the last value of `child` for a group of rows. - _FUNC_(expr,isIgnoreNull=false) - Returns the last value of `child` for a group of rows. - If isIgnoreNull is true, returns only non-null values. Note that in most cases the - result is nondeterministic. + usage = + """ + _FUNC_(expr) - Returns the last value of `child` for a group of rows. + + _FUNC_(expr, isIgnoreNull) - Returns the last value of `child` for a group of rows. + If isIgnoreNull is true, returns only non-null values. + + Note that in most cases the result is nondeterministic. """) case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {