-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15192][SQL] null check for SparkSession.createDataFrame #13008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8f0a0bf
a5815e5
7419a52
0915a71
3acf24f
0c81f39
4a84982
225128d
57efddb
f533188
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,8 +113,8 @@ object ScalaReflection extends ScalaReflection { | |
| * Returns true if the value of this data type is same between internal and external. | ||
| */ | ||
| def isNativeType(dt: DataType): Boolean = dt match { | ||
| case BooleanType | ByteType | ShortType | IntegerType | LongType | | ||
| FloatType | DoubleType | BinaryType => true | ||
| case NullType | BooleanType | ByteType | ShortType | IntegerType | LongType | | ||
| FloatType | DoubleType | BinaryType | CalendarIntervalType => true | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why CalendarIntervalType?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we don't have an external representation of it |
||
| case _ => false | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -477,8 +477,8 @@ class SparkSession private( | |
| // TODO: use MutableProjection when rowRDD is another DataFrame and the applied | ||
| // schema differs from the existing schema on any field data type. | ||
| val catalystRows = if (needsConversion) { | ||
| val converter = CatalystTypeConverters.createToCatalystConverter(schema) | ||
| rowRDD.map(converter(_).asInstanceOf[InternalRow]) | ||
| val encoder = RowEncoder(schema) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, we already do null check in RowEncoder, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea |
||
| rowRDD.map(encoder.toRow) | ||
| } else { | ||
| rowRDD.map{r: Row => InternalRow.fromSeq(r.toSeq)} | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -507,7 +507,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext { | |
| val schema = StructType(Seq( | ||
| StructField("f", StructType(Seq( | ||
| StructField("a", StringType, nullable = true), | ||
| StructField("b", IntegerType, nullable = false) | ||
| StructField("b", IntegerType, nullable = true) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why change this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the new null check, we will trigger error earlier than this test expected. This test is testing the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. so the new test (
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (just want to make sure we are not losing test coverage)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea,
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks! |
||
| )), nullable = true) | ||
| )) | ||
|
|
||
|
|
@@ -684,7 +684,16 @@ class DatasetSuite extends QueryTest with SharedSQLContext { | |
| val message = intercept[Exception] { | ||
| df.collect() | ||
| }.getMessage | ||
| assert(message.contains("The 0th field of input row cannot be null")) | ||
| assert(message.contains("The 0th field 'i' of input row cannot be null")) | ||
| } | ||
|
|
||
| test("row nullability mismatch") { | ||
| val schema = new StructType().add("a", StringType, true).add("b", StringType, false) | ||
| val rdd = sqlContext.sparkContext.parallelize(Row(null, "123") :: Row("234", null) :: Nil) | ||
| val message = intercept[Exception] { | ||
| sqlContext.createDataFrame(rdd, schema).collect() | ||
| }.getMessage | ||
| assert(message.contains("The 1th field 'b' of input row cannot be null")) | ||
| } | ||
|
|
||
| test("createTempView") { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to call
toSeqat here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need. This is a special case,
FPGrowthModelhas a type parameter and we useFPGrowthModel[_]here. Sox.itemsreturnsObject[]instead ofT[]as we expected and doesn't match the schema.