Skip to content

Commit 0b5ed68

Browse files
authored
[DOP-30265] Add tests for all supported data types (#19)
1 parent 1af8582 commit 0b5ed68

File tree

3 files changed

+133
-14
lines changed

3 files changed

+133
-14
lines changed

docs/data_type_mappings.md

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,22 @@ Primitive types:
3232

3333
``Array(T)`` `->` ``ArrayType(T)`` (without this extension Spark does not support Arrays for GenericJDBC dialect):
3434

35-
| ClickHouse Type (Read) | Spark Type | ClickHouse Type (Write) | ClickHouse Type (Create) |
36-
|----------------------------------------------|--------------------------------|-------------------------|--------------------------|
37-
| `Array(String)` | `ArrayType(StringType)` | `Array(String)` | `Array(String)` |
38-
| `Array(Int8) (Only 0.9.x)` | `ArrayType(ByteType)` | `Array(Int8)` | `Array(Int8)` |
39-
| `Array(Int16) (Only 0.9.x)` | `ArrayType(ShortType)` | `Array(Int16)` | `Array(Int16)` |
40-
| `Array(Int32) (Only 0.9.x)` | `ArrayType(IntegerType)` | `Array(Int32)` | `Array(Int32)` |
41-
| `Array(Int64) (Only 0.9.x)` | `ArrayType(LongType)` | `Array(Int64)` | `Array(Int64)` |
42-
| `Array(Decimal(M, N)) (Only 0.6.x or 0.7.x)` | `ArrayType(DecimalType(M, N))` | `Array(Decimal(M, N))` | `Array(Decimal(M, N))` |
43-
| unsupported | `ArrayType(FloatType)` | `Array(Float32)` | `Array(Float32)` |
44-
| `Array(Float64) (Only 0.9.x)` | `ArrayType(DoubleType)` | `Array(Float64)` | `Array(Float64)` |
45-
| unsupported | `ArrayType(Date)` | `Array(Date)` | `Array(Date)` |
46-
| unsupported | `ArrayType(TimestampType)` | `Array(DateTime64(6))` | `Array(DateTime64(6))` |
35+
| ClickHouse Type (Read) | Spark Type | ClickHouse Type (Write) | ClickHouse Type (Create) |
36+
|----------------------------------------------|----------------------------------|-------------------------|--------------------------|
37+
| `Array(String)` | `ArrayType(StringType)` | `Array(String)` | `Array(String)` |
38+
| `Array(Int8) (Only 0.9.x)` | `ArrayType(ByteType)` | `Array(Int8)` | `Array(Int8)` |
39+
| `Array(Int16) (Only 0.9.x)` | `ArrayType(ShortType)` | `Array(Int16)` | `Array(Int16)` |
40+
| `Array(Int32) (Only 0.9.x)` | `ArrayType(IntegerType)` | `Array(Int32)` | `Array(Int32)` |
41+
| `Array(Int64) (Only 0.9.x)` | `ArrayType(LongType)` | `Array(Int64)` | `Array(Int64)` |
42+
| `Array(UInt8) (Only 0.9.x)` | `ArrayType(ShortType)` | `Array(UInt8)` | `Array(UInt8)` |
43+
| `Array(UInt16) (Only 0.9.x)` | `ArrayType(IntegerType)` | `Array(UInt16)` | `Array(UInt16)` |
44+
| `Array(UInt32) (Only 0.9.x)` | `ArrayType(LongType)` | `Array(Int64)` | `Array(Int64)` |
45+
| `Array(UInt64)` unsupported | `ArrayType(DecimalType(20, 0))` | `Array(Decimal(20, 0))` | `Array(Decimal(20, 0))` |
46+
| `Array(Decimal(M, N)) (Only 0.6.x or 0.7.x)` | `ArrayType(DecimalType(M, N))` | `Array(Decimal(M, N))` | `Array(Decimal(M, N))` |
47+
| unsupported | `ArrayType(FloatType)` | `Array(Float32)` | `Array(Float32)` |
48+
| `Array(Float64) (Only 0.9.x)` | `ArrayType(DoubleType)` | `Array(Float64)` | `Array(Float64)` |
49+
| unsupported | `ArrayType(Date)` | `Array(Date)` | `Array(Date)` |
50+
| unsupported | `ArrayType(TimestampType)` | `Array(DateTime64(6))` | `Array(DateTime64(6))` |
4751

4852
Reading issues are caused by Clickhouse JDBC implementation:
4953
* https://github.com/ClickHouse/clickhouse-java/issues/1754

src/main/scala/io/github/mtsongithub/doetl/sparkdialectextensions/clickhouse/spark35/ClickhouseDialectExtension.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ private object ClickhouseDialectExtension extends JdbcDialect {
8888
case "UInt32" | "Int64" =>
8989
logger.debug(s"Custom mapping applied: LongType for '${_typeName}'")
9090
Some(LongType)
91+
case "UInt64" =>
92+
logger.debug(s"Custom mapping applied: DecimalType for '${_typeName}")
93+
Some(DecimalType(20, 0))
9194
case "Int128" | "Int256" | "UInt256" =>
9295
logger.debug(s"Type '${_typeName}' is not supported")
9396
None

src/test/scala/io/github/mtsongithub/doetl/sparkdialectextensions/clickhouse/ClickhouseDialectTest.scala

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,42 @@ class ClickhouseDialectTest
181181
assert(data sameElements Array(-32768, 32767))
182182
}
183183

184+
test("read ClickHouse Int32 as Spark IntegerType") {
185+
setupTable("integerColumn Int32")
186+
insertTestData(Seq("(-2147483648)", "(2147483647)")) // min and max values for a signed integer
187+
188+
val df = spark.read
189+
.format("jdbc")
190+
.option("url", jdbcUrl)
191+
.option("user", jdbcUser)
192+
.option("password", jdbcPassword)
193+
.option("dbtable", tableName)
194+
.load()
195+
196+
assert(df.schema.fields.head.dataType == IntegerType)
197+
198+
val data = df.collect().map(_.getInt(0)).sorted
199+
assert(data sameElements Array(-2147483648, 2147483647))
200+
}
201+
202+
test("read ClickHouse Int64 as Spark LongType") {
203+
setupTable("longColumn Int64")
204+
insertTestData(Seq("(-9223372036854775808)", "(9223372036854775807)")) // min and max values for a signed long
205+
206+
val df = spark.read
207+
.format("jdbc")
208+
.option("url", jdbcUrl)
209+
.option("user", jdbcUser)
210+
.option("password", jdbcPassword)
211+
.option("dbtable", tableName)
212+
.load()
213+
214+
assert(df.schema.fields.head.dataType == LongType)
215+
216+
val data = df.collect().map(_.getLong(0)).sorted
217+
assert(data sameElements Array(-9223372036854775808L, 9223372036854775807L))
218+
}
219+
184220
test("write Spark ShortType as ClickHouse Int16") {
185221
val schema = StructType(Seq(StructField("shortColumn", ShortType, nullable = false)))
186222
val data = Seq(Row(Short.MinValue), Row(Short.MaxValue))
@@ -264,6 +300,24 @@ class ClickhouseDialectTest
264300
assert(data sameElements Array(0L, 4294967295L))
265301
}
266302

303+
test("read Clickhouse UInt64 as Spark DecimalType") {
304+
setupTable("UInt64Column UInt64")
305+
insertTestData(Seq("(0)", "(18446744073709551615)")) // min and max values
306+
307+
val df = spark.read
308+
.format("jdbc")
309+
.option("url", jdbcUrl)
310+
.option("user", jdbcUser)
311+
.option("password", jdbcPassword)
312+
.option("dbtable", tableName)
313+
.load()
314+
315+
assert(df.schema.fields.head.dataType == DecimalType(20,0))
316+
317+
val data = df.collect().map(_.getDecimal(0)).sorted
318+
assert(data sameElements Array(new java.math.BigDecimal(0), new java.math.BigDecimal("18446744073709551615")))
319+
}
320+
267321
test("read ClickHouse Float32 as Spark FloatType") {
268322
setupTable("floatColumn Float32")
269323
insertTestData(Seq("(-1.23)", "(4.56)"))
@@ -572,7 +626,24 @@ class ClickhouseDialectTest
572626
(
573627
"longArrayColumn Array(Int64)",
574628
"([1, 2, 3, 4, 5])",
575-
ArrayType(LongType, containsNull = false)))
629+
ArrayType(LongType, containsNull = false)),
630+
(
631+
"doubleArrayColumn Array(Float64)",
632+
"([1.1, 2.2, 3.3, 4.4, 5.5])",
633+
ArrayType(DoubleType, containsNull = false)),
634+
(
635+
"shortArrayColumn Array(UInt8)",
636+
"([1, 2, 3, 4, 5])",
637+
ArrayType(ShortType, containsNull = false)),
638+
(
639+
"intArrayColumn Array(UInt16)",
640+
"([1, 2, 3, 4, 5])",
641+
ArrayType(IntegerType, containsNull = false)),
642+
(
643+
"longArrayColumn Array(UInt32)",
644+
"([1, 2, 3, 4, 5])",
645+
ArrayType(LongType, containsNull = false)),
646+
)
576647

577648
val testReadArrayCasesV0_7_X = Table(
578649
("columnDefinition", "insertedData", "expectedType"),
@@ -629,6 +700,37 @@ class ClickhouseDialectTest
629700
"([1, 2, 3, 4, 5])",
630701
ArrayType(LongType, containsNull = false),
631702
"class [J cannot be cast to class [Ljava.lang.Object"),
703+
(
704+
"floatArrayColumn Array(Float32)",
705+
"([1.1, 2.2, 3.3, 4.4, 5.5])",
706+
ArrayType(FloatType, containsNull = false),
707+
"class [F cannot be cast to class [Ljava.lang.Object;"),
708+
(
709+
"doubleArrayColumn Array(Float64)",
710+
"([1.1, 2.2, 3.3, 4.4, 5.5])",
711+
ArrayType(DoubleType, containsNull = false),
712+
"class [D cannot be cast to class [Ljava.lang.Object;"),
713+
(
714+
"shortArrayColumn Array(UInt8)",
715+
"([1, 2, 3, 4, 5])",
716+
ArrayType(ShortType, containsNull = false),
717+
"class [B cannot be cast to class [Ljava.lang.Object"),
718+
(
719+
"intArrayColumn Array(UInt16)",
720+
"([1, 2, 3, 4, 5])",
721+
ArrayType(IntegerType, containsNull = false),
722+
"class [S cannot be cast to class [Ljava.lang.Object"),
723+
(
724+
"longArrayColumn Array(UInt32)",
725+
"([1, 2, 3, 4, 5])",
726+
ArrayType(LongType, containsNull = false),
727+
"class [I cannot be cast to class [Ljava.lang.Object"),
728+
(
729+
"UInt64Column Array(UInt64)",
730+
"([1, 2, 3, 4, 5])",
731+
ArrayType(DecimalType(20,0), containsNull = false),
732+
"class [J cannot be cast to class [Ljava.math.BigDecimal"
733+
),
632734
// https://github.com/ClickHouse/clickhouse-java/issues/1409
633735
(
634736
"dateArrayColumn Array(Date)",
@@ -658,7 +760,17 @@ class ClickhouseDialectTest
658760
"datetimeArrayColumn Array(DateTime64(6))",
659761
"(['2024-01-01T00:00:00.000000', '2024-01-02T11:11:11.111111', '2024-01-03.2222222'])",
660762
ArrayType(TimestampType, containsNull = false),
661-
"class [Ljava.lang.Object; cannot be cast to class [Ljava.sql.Timestamp;"))
763+
"class [Ljava.lang.Object; cannot be cast to class [Ljava.sql.Timestamp;"),
764+
(
765+
"floatColumn Array(Float32)",
766+
"([(1.1), (2.2), (3.3), (4.4), (5.5)])",
767+
ArrayType(FloatType, containsNull = false),
768+
"class java.lang.Double cannot be cast to class java.lang.Float"),
769+
(
770+
"UInt64Column Array(UInt64)",
771+
"([(1), (2), (3), (4), (5)])",
772+
ArrayType(DecimalType(20, 0), containsNull = false),
773+
"class [Ljava.lang.Object; cannot be cast to class [Ljava.math.BigDecimal;"))
662774

663775
forAll(if (driverVersion.startsWith("0.9")) testReadArrayUnsupportedCasesV0_9_X else testReadArrayUnsupportedCasesV0_7_X) {
664776
(

0 commit comments

Comments
 (0)