@@ -631,6 +631,35 @@ statement ok
631631drop table foo
632632
633633
634+ # Tests for int96 timestamps written by spark
635+ # See https://github.com/apache/datafusion/issues/9981
636+
637+ statement ok
638+ CREATE EXTERNAL TABLE int96_from_spark
639+ STORED AS PARQUET
640+ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
641+
642+ # by default the value is read as nanosecond precision
643+ query TTT
644+ describe int96_from_spark
645+ ----
646+ a Timestamp(Nanosecond, None) YES
647+
648+ # Note that the values are read as nanosecond precision
649+ query P
650+ select * from int96_from_spark
651+ ----
652+ 2024-01-01T20:34:56.123456
653+ 2024-01-01T01:00:00
654+ 1816-03-29T08:56:08.066277376
655+ 2024-12-30T23:00:00
656+ NULL
657+ 1815-11-08T16:01:01.191053312
658+
659+ statement ok
660+ drop table int96_from_spark;
661+
662+ # Enable coercion of int96 to microseconds
634663statement ok
635664set datafusion.execution.parquet.coerce_int96 = ms;
636665
@@ -645,5 +674,33 @@ describe int96_from_spark;
645674----
646675a Timestamp(Millisecond, None) YES
647676
677+ # Per https://github.com/apache/parquet-testing/blob/6e851ddd768d6af741c7b15dc594874399fc3cff/data/int96_from_spark.md?plain=1#L37
678+ # these values should be
679+ #
680+ # Some("2024-01-01T12:34:56.123456"),
681+ # Some("2024-01-01T01:00:00Z"),
682+ # Some("9999-12-31T01:00:00-02:00"),
683+ # Some("2024-12-31T01:00:00+02:00"),
684+ # None,
685+ # Some("290000-12-31T01:00:00+02:00"))
686+ #
687+ # However, printing the large dates (9999-12-31 and 290000-12-31) is not supported by
688+ # arrow yet
689+ #
690+ # See https://github.com/apache/arrow-rs/issues/7287
691+ query P
692+ select * from int96_from_spark
693+ ----
694+ 2024-01-01T20:34:56.123
695+ 2024-01-01T01:00:00
696+ 9999-12-31T03:00:00
697+ 2024-12-30T23:00:00
698+ NULL
699+ ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(Millisecond, None)
700+
701+ # Cleanup / reset default setting
702+ statement ok
703+ drop table int96_from_spark;
704+
648705statement ok
649706set datafusion.execution.parquet.coerce_int96 = ns;
0 commit comments