Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions python/pyspark/sql/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,7 @@ def test_struct_type(self):
def test_parse_datatype_string(self):
from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
for k, t in _all_atomic_types.items():
if t != NullType:
self.assertEqual(t(), _parse_datatype_string(k))
self.assertEqual(t(), _parse_datatype_string(k))
self.assertEqual(IntegerType(), _parse_datatype_string("int"))
self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1 ,1)"))
self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 10,1 )"))
Expand Down
4 changes: 3 additions & 1 deletion python/pyspark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ class NullType(DataType, metaclass=DataTypeSingleton):

The data type representing None, used for the types that cannot be inferred.
"""
pass
@classmethod
def typeName(cls):
return 'void'


class AtomicType(DataType):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ object DataType {
case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
case CHAR_TYPE(length) => CharType(length.toInt)
case VARCHAR_TYPE(length) => VarcharType(length.toInt)
// For backwards compatibility, previously the type name of NullType is "null"
case "null" => NullType
case other => otherTypes.getOrElse(
other,
throw new IllegalArgumentException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class NullType private() extends DataType {
override def defaultSize: Int = 1

private[spark] override def asNullable: NullType = this

override def typeName: String = "void"
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,10 @@ class DataTypeSuite extends SparkFunSuite {
assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType]))
}

test("SPARK-36224: Backwards compatibility test for NullType.json") {
assert(DataType.fromJson("\"null\"") == NullType)
}

def checkDataTypeFromJson(dataType: DataType): Unit = {
test(s"from Json - $dataType") {
assert(DataType.fromJson(dataType.json) === dataType)
Expand All @@ -198,6 +202,7 @@ class DataTypeSuite extends SparkFunSuite {
}

checkDataTypeFromJson(NullType)
checkDataTypeFromDDL(NullType)

checkDataTypeFromJson(BooleanType)
checkDataTypeFromDDL(BooleanType)
Expand Down Expand Up @@ -424,6 +429,7 @@ class DataTypeSuite extends SparkFunSuite {
i => StructField(s"col$i", IntegerType, nullable = true)
})

checkCatalogString(NullType)
checkCatalogString(BooleanType)
checkCatalogString(ByteType)
checkCatalogString(ShortType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
| org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct<ascii(222):int> |
| org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct<ASIN(0):double> |
| org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct<ASINH(0):double> |
| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):null> |
| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):void> |
| org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct<ATAN(0):double> |
| org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct<ATAN2(0, 0):double> |
| org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct<ATANH(0):double> |
Expand Down Expand Up @@ -223,7 +223,7 @@
| org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
| org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):null> |
| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):void> |
| org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
| org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
Expand Down Expand Up @@ -366,4 +366,4 @@
| org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
-- !query
select null, Null, nUll
-- !query schema
struct<NULL:null,NULL:null,NULL:null>
struct<NULL:void,NULL:void,NULL:void>
-- !query output
NULL NULL NULL

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ select left(null, -2)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7
cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7


-- !query
Expand All @@ -101,7 +101,7 @@ select right(null, -2)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7
cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ two 2
-- !query
select * from values ("one", null), ("two", null) as data(a, b)
-- !query schema
struct<a:string,b:null>
struct<a:string,b:void>
-- !query output
one NULL
two NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
-- !query
select null, Null, nUll
-- !query schema
struct<NULL:null,NULL:null,NULL:null>
struct<NULL:void,NULL:void,NULL:void>
-- !query output
NULL NULL NULL

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ select typeof(null)
-- !query schema
struct<typeof(NULL):string>
-- !query output
null
void


-- !query
Expand Down Expand Up @@ -61,7 +61,7 @@ array<int> map<int,int> struct<a:int,b:string>
-- !query
SELECT assert_true(true), assert_true(boolean(1))
-- !query schema
struct<assert_true(true, 'true' is not true!):null,assert_true(1, 'cast(1 as boolean)' is not true!):null>
struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as boolean)' is not true!):void>
-- !query output
NULL NULL

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,15 +308,15 @@ struct<1:int>
-- !query
select foo.* from (select null) as foo
-- !query schema
struct<NULL:null>
struct<NULL:void>
-- !query output
NULL


-- !query
select foo.* from (select 'xyzzy',1,null) as foo
-- !query schema
struct<xyzzy:string,1:int,NULL:null>
struct<xyzzy:string,1:int,NULL:void>
-- !query output
xyzzy 1 NULL

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ select concat_ws(',',10,20,null,30)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'concat_ws(',', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array<string> or string) type, however, '10' is of int type. argument 3 requires (array<string> or string) type, however, '20' is of int type. argument 4 requires (array<string> or string) type, however, 'NULL' is of null type. argument 5 requires (array<string> or string) type, however, '30' is of int type.; line 1 pos 7
cannot resolve 'concat_ws(',', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array<string> or string) type, however, '10' is of int type. argument 3 requires (array<string> or string) type, however, '20' is of int type. argument 4 requires (array<string> or string) type, however, 'NULL' is of void type. argument 5 requires (array<string> or string) type, however, '30' is of int type.; line 1 pos 7


-- !query
Expand All @@ -139,7 +139,7 @@ select concat_ws('',10,20,null,30)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'concat_ws('', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array<string> or string) type, however, '10' is of int type. argument 3 requires (array<string> or string) type, however, '20' is of int type. argument 4 requires (array<string> or string) type, however, 'NULL' is of null type. argument 5 requires (array<string> or string) type, however, '30' is of int type.; line 1 pos 7
cannot resolve 'concat_ws('', 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array<string> or string) type, however, '10' is of int type. argument 3 requires (array<string> or string) type, however, '20' is of int type. argument 4 requires (array<string> or string) type, however, 'NULL' is of void type. argument 5 requires (array<string> or string) type, however, '30' is of int type.; line 1 pos 7


-- !query
Expand All @@ -148,7 +148,7 @@ select concat_ws(NULL,10,20,null,30) is null
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'concat_ws(CAST(NULL AS STRING), 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array<string> or string) type, however, '10' is of int type. argument 3 requires (array<string> or string) type, however, '20' is of int type. argument 4 requires (array<string> or string) type, however, 'NULL' is of null type. argument 5 requires (array<string> or string) type, however, '30' is of int type.; line 1 pos 7
cannot resolve 'concat_ws(CAST(NULL AS STRING), 10, 20, NULL, 30)' due to data type mismatch: argument 2 requires (array<string> or string) type, however, '10' is of int type. argument 3 requires (array<string> or string) type, however, '20' is of int type. argument 4 requires (array<string> or string) type, however, 'NULL' is of void type. argument 5 requires (array<string> or string) type, however, '30' is of int type.; line 1 pos 7


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
-- !query
SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)
-- !query schema
struct<ifnull(NULL, x):string,ifnull(y, x):string,ifnull(NULL, NULL):null>
struct<ifnull(NULL, x):string,ifnull(y, x):string,ifnull(NULL, NULL):void>
-- !query output
x y NULL

Expand All @@ -21,15 +21,15 @@ NULL x
-- !query
SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)
-- !query schema
struct<nvl(NULL, x):string,nvl(y, x):string,nvl(NULL, NULL):null>
struct<nvl(NULL, x):string,nvl(y, x):string,nvl(NULL, NULL):void>
-- !query output
x y NULL


-- !query
SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)
-- !query schema
struct<nvl2(NULL, x, y):string,nvl2(n, x, y):string,nvl2(NULL, NULL, NULL):null>
struct<nvl2(NULL, x, y):string,nvl2(n, x, y):string,nvl2(NULL, NULL, NULL):void>
-- !query output
y x NULL

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ Table-valued function range with alternatives:
range(start: long, end: long, step: long)
range(start: long, end: long)
range(end: long)
cannot be applied to (integer, null): Incompatible input data type. Expected: long; Found: null; line 1 pos 14
cannot be applied to (integer, void): Incompatible input data type. Expected: long; Found: void; line 1 pos 14


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ two 2
-- !query
select udf(a), b from values ("one", null), ("two", null) as data(a, b)
-- !query schema
struct<udf(a):string,b:null>
struct<udf(a):string,b:void>
-- !query output
one NULL
two NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ class FileBasedDataSourceSuite extends QueryTest
""
}
def errorMessage(format: String): String = {
s"$format data source does not support null data type."
s"$format data source does not support void data type."
}
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1List) {
withTempDir { dir =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,6 @@ object SparkExecuteStatementOperation {
def getTableSchema(structType: StructType): TableSchema = {
val schema = structType.map { field =>
val attrTypeString = field.dataType match {
case NullType => "void"
case CalendarIntervalType => StringType.catalogString
case _: YearMonthIntervalType => "interval_year_month"
case _: DayTimeIntervalType => "interval_day_time"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ private[hive] object HiveClientImpl extends Logging {
// When reading data in parquet, orc, or avro file format with string type for char,
// the tailing spaces may lost if we are not going to pad it.
val typeString = CharVarcharUtils.getRawTypeString(c.metadata)
.getOrElse(HiveVoidType.replaceVoidType(c.dataType).catalogString)
.getOrElse(c.dataType.catalogString)
new FieldSchema(c.name, typeString, c.getComment().orNull)
}

Expand Down Expand Up @@ -1278,22 +1278,3 @@ private[hive] object HiveClientImpl extends Logging {
hiveConf
}
}

private[hive] case object HiveVoidType extends DataType {
override def defaultSize: Int = 1
override def asNullable: DataType = HiveVoidType
override def simpleString: String = "void"

def replaceVoidType(dt: DataType): DataType = dt match {
case ArrayType(et, nullable) =>
ArrayType(replaceVoidType(et), nullable)
case MapType(kt, vt, nullable) =>
MapType(replaceVoidType(kt), replaceVoidType(vt), nullable)
case StructType(fields) =>
StructType(fields.map { field =>
field.copy(dataType = replaceVoidType(field.dataType))
})
case _: NullType => HiveVoidType
case _ => dt
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2393,12 +2393,12 @@ class HiveDDLSuite
}
}

test("SPARK-36241: support creating tables with null datatype") {
// CTAS with null type
test("SPARK-36241: support creating tables with void datatype") {
// CTAS with void type
withTable("t1", "t2", "t3") {
assertAnalysisError(
"CREATE TABLE t1 USING PARQUET AS SELECT NULL AS null_col",
"Parquet data source does not support null data type")
"Parquet data source does not support void data type")

assertAnalysisError(
"CREATE TABLE t2 STORED AS PARQUET AS SELECT null as null_col",
Expand All @@ -2408,11 +2408,11 @@ class HiveDDLSuite
checkAnswer(sql("SELECT * FROM t3"), Row(null))
}

// Create table with null type
// Create table with void type
withTable("t1", "t2", "t3", "t4") {
assertAnalysisError(
"CREATE TABLE t1 (v VOID) USING PARQUET",
"Parquet data source does not support null data type")
"Parquet data source does not support void data type")

assertAnalysisError(
"CREATE TABLE t2 (v VOID) STORED AS PARQUET",
Expand All @@ -2425,7 +2425,7 @@ class HiveDDLSuite
checkAnswer(sql("SELECT * FROM t4"), Seq.empty)
}

// Create table with null type using spark.catalog.createTable
// Create table with void type using spark.catalog.createTable
withTable("t") {
val schema = new StructType().add("c", NullType)
spark.catalog.createTable(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
msg = intercept[AnalysisException] {
sql("select null").write.mode("overwrite").orc(orcDir)
}.getMessage
assert(msg.contains("ORC data source does not support null data type."))
assert(msg.contains("ORC data source does not support void data type."))

msg = intercept[AnalysisException] {
spark.udf.register("testType", () => new IntervalData())
Expand Down