Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -466,29 +466,56 @@ class Column(protected[sql] val expr: Expression) extends Logging {
(this >= lowerBound) && (this <= upperBound)
}

/**
* @group expr_ops
* @deprecated As of 1.6.0, replaced by `isnan`. This will be removed in Spark 2.0.
*/
@deprecated("Use isnan. This will be removed in Spark 2.0.", "1.6.0")
def isNaN: Column = isnan

/**
* True if the current expression is NaN.
*
* @group expr_ops
* @since 1.5.0
* @since 1.6.0
*/
def isNaN: Column = withExpr { IsNaN(expr) }
def isnan: Column = withExpr { IsNaN(expr) }

/**
* @group expr_ops
* @deprecated As of 1.6.0, replaced by `isnull`. This will be removed in Spark 2.0.
*/
def isNull: Column = isnull

/**
* True if the current expression is null.
*
* @group expr_ops
* @since 1.3.0
* @since 1.6.0
*/
def isNull: Column = withExpr { IsNull(expr) }
def isnull: Column = withExpr { IsNull(expr) }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its fine to be uniform with SQL in the functions that we call with columns as argument, but this seems like here we are breaking user code for no reason. And if we are going to go lowercase then why are these isnull and not is_null?

For things that are just standard methods on objects I don't see a need to break with scala/java conventions. We can have aliases in python to be compatible with pandas if that is the motivation.

/cc @rxin

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left isnull unchanged in my earlier pull request because I felt it was too commonly used, and decided not to change them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marmbrus @rxin Thanks for your comments. The conclusion is clear, I will close this PR and send follow-up one(#10097) to add alias at python side in order to be compatible with pandas.


/**
* @group expr_ops
* @deprecated As of 1.6.0, replaced by `isnotnull`. This will be removed in Spark 2.0.
*/
def isNotNull: Column = isnotnull

/**
* True if the current expression is NOT null.
*
* @group expr_ops
* @since 1.3.0
* @since 1.6.0
*/
def isnotnull: Column = withExpr { IsNotNull(expr) }

/**
* True if the current expression is NOT null.
*
* @group expr_ops
* @since 1.6.0
*/
def isNotNull: Column = withExpr { IsNotNull(expr) }
def notnull: Column = isnotnull

/**
* Boolean OR.
Expand Down
16 changes: 16 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,22 @@ object functions extends LegacyFunctions {
*/
def isnull(e: Column): Column = withExpr { IsNull(e.expr) }

/**
* Return true iff the column is NOT null.
*
* @group normal_funcs
* @since 1.6.0
*/
def isnotnull(e: Column): Column = withExpr { IsNotNull(e.expr) }

/**
* Return true iff the column is NOT null.
*
* @group normal_funcs
* @since 1.6.0
*/
def notnull(e: Column): Column = isnotnull(e)

/**
* A column expression that generates monotonically increasing 64-bit integers.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,27 +265,27 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
complexData.collect().toSeq.map(r => Row(!r.getBoolean(3))))
}

test("isNull") {
test("isnull") {
checkAnswer(
nullStrings.toDF.where($"s".isNull),
nullStrings.toDF.where($"s".isnull),
nullStrings.collect().toSeq.filter(r => r.getString(1) eq null))

checkAnswer(
sql("select isnull(null), isnull(1)"),
Row(true, false))
}

test("isNotNull") {
test("isnotnull") {
checkAnswer(
nullStrings.toDF.where($"s".isNotNull),
nullStrings.toDF.where($"s".isnotnull),
nullStrings.collect().toSeq.filter(r => r.getString(1) ne null))

checkAnswer(
sql("select isnotnull(null), isnotnull('a')"),
Row(false, true))
}

test("isNaN") {
test("isnan") {
val testData = sqlContext.createDataFrame(sparkContext.parallelize(
Row(Double.NaN, Float.NaN) ::
Row(math.log(-1), math.log(-3).toFloat) ::
Expand All @@ -294,11 +294,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
StructType(Seq(StructField("a", DoubleType), StructField("b", FloatType))))

checkAnswer(
testData.select($"a".isNaN, $"b".isNaN),
testData.select($"a".isnan, $"b".isnan),
Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil)

checkAnswer(
testData.select(isNaN($"a"), isNaN($"b")),
testData.select(isnan($"a"), isnan($"b")),
Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil)

checkAnswer(
Expand Down