Skip to content

Commit 2eb8315

Browse files
findepialexanderbianchi
authored andcommitted
Fix discrepancy in Float64 to timestamp(9) casts for constants (apache#16639)
* Fix discrepancy in Float64 to timestamp(9) casts Before the change, when casting `Float64` value to `Timestamp(Nanosecond, None)`, the result would depend on whether the source value is constant-foldable scalar. This is because `ScalarValue.cast_to` had a special treatment for that source & destination type pair, producing a different result from the canonical one. * Test Float32 cast to timestamp ntz too * restore to_timestamp(double) behavior the function was not meant to be changed (cherry picked from commit 4e32ab9)
1 parent 60417fb commit 2eb8315

File tree

3 files changed

+129
-10
lines changed

3 files changed

+129
-10
lines changed

datafusion/common/src/scalar/mod.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3059,11 +3059,6 @@ impl ScalarValue {
30593059
cast_options: &CastOptions<'static>,
30603060
) -> Result<Self> {
30613061
let scalar_array = match (self, target_type) {
3062-
(
3063-
ScalarValue::Float64(Some(float_ts)),
3064-
DataType::Timestamp(TimeUnit::Nanosecond, None),
3065-
) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64))
3066-
.to_array()?,
30673062
(
30683063
ScalarValue::Decimal128(Some(decimal_value), _, scale),
30693064
DataType::Timestamp(time_unit, None),

datafusion/functions/src/datetime/to_timestamp.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ use std::any::Any;
1919
use std::sync::Arc;
2020

2121
use crate::datetime::common::*;
22+
use arrow::array::Float64Array;
2223
use arrow::datatypes::DataType::*;
2324
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
2425
use arrow::datatypes::{
2526
ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
2627
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
2728
};
29+
use datafusion_common::format::DEFAULT_CAST_OPTIONS;
2830
use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
2931
use datafusion_expr::{
3032
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -319,9 +321,22 @@ impl ScalarUDFImpl for ToTimestampFunc {
319321
Int32 | Int64 => args[0]
320322
.cast_to(&Timestamp(Second, None), None)?
321323
.cast_to(&Timestamp(Nanosecond, None), None),
322-
Null | Float64 | Timestamp(_, None) => {
324+
Null | Timestamp(_, None) => {
323325
args[0].cast_to(&Timestamp(Nanosecond, None), None)
324326
}
327+
Float64 => {
328+
let rescaled = arrow::compute::kernels::numeric::mul(
329+
&args[0].to_array(1)?,
330+
&arrow::array::Scalar::new(Float64Array::from(vec![
331+
1_000_000_000f64,
332+
])),
333+
)?;
334+
Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
335+
&rescaled,
336+
&Timestamp(Nanosecond, None),
337+
&DEFAULT_CAST_OPTIONS,
338+
)?))
339+
}
325340
Timestamp(_, Some(tz)) => {
326341
args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
327342
}

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 113 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,115 @@ SELECT TIMESTAMPTZ '2000-01-01T01:01:01'
176176
2000-01-01T01:01:01Z
177177

178178

179+
##########
180+
## cast tests
181+
##########
182+
183+
query BPPPPPP
184+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
185+
FROM (SELECT
186+
(SELECT CAST(CAST(1 AS float) AS timestamp(0))) AS t1,
187+
(SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t2,
188+
(SELECT CAST(CAST(one AS float) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t3,
189+
(SELECT CAST(CAST(1 AS double) AS timestamp(0))) AS t4,
190+
(SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (SELECT 1 AS one)) AS t5,
191+
(SELECT CAST(CAST(one AS double) AS timestamp(0)) FROM (VALUES (1)) t(one)) AS t6
192+
)
193+
----
194+
true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01
195+
196+
query BPPPPPP
197+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
198+
FROM (SELECT
199+
(SELECT CAST(CAST(1 AS float) AS timestamp(3))) AS t1,
200+
(SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t2,
201+
(SELECT CAST(CAST(one AS float) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t3,
202+
(SELECT CAST(CAST(1 AS double) AS timestamp(3))) AS t4,
203+
(SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (SELECT 1 AS one)) AS t5,
204+
(SELECT CAST(CAST(one AS double) AS timestamp(3)) FROM (VALUES (1)) t(one)) AS t6
205+
)
206+
----
207+
true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001
208+
209+
query BPPPPPP
210+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
211+
FROM (SELECT
212+
(SELECT CAST(CAST(1 AS float) AS timestamp(6))) AS t1,
213+
(SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t2,
214+
(SELECT CAST(CAST(one AS float) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t3,
215+
(SELECT CAST(CAST(1 AS double) AS timestamp(6))) AS t4,
216+
(SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (SELECT 1 AS one)) AS t5,
217+
(SELECT CAST(CAST(one AS double) AS timestamp(6)) FROM (VALUES (1)) t(one)) AS t6
218+
)
219+
----
220+
true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001
221+
222+
query BPPPPPP
223+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
224+
FROM (SELECT
225+
(SELECT CAST(CAST(1 AS float) AS timestamp(9))) AS t1,
226+
(SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t2,
227+
(SELECT CAST(CAST(one AS float) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t3,
228+
(SELECT CAST(CAST(1 AS double) AS timestamp(9))) AS t4,
229+
(SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (SELECT 1 AS one)) AS t5,
230+
(SELECT CAST(CAST(one AS double) AS timestamp(9)) FROM (VALUES (1)) t(one)) AS t6
231+
)
232+
----
233+
true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
234+
235+
query BPPPPPP
236+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
237+
FROM (SELECT
238+
(SELECT CAST(CAST(1.125 AS float) AS timestamp(0))) AS t1,
239+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
240+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
241+
(SELECT CAST(CAST(1.125 AS double) AS timestamp(0))) AS t4,
242+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
243+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(0)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
244+
)
245+
----
246+
true 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01 1970-01-01T00:00:01
247+
248+
query BPPPPPP
249+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
250+
FROM (SELECT
251+
(SELECT CAST(CAST(1.125 AS float) AS timestamp(3))) AS t1,
252+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
253+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
254+
(SELECT CAST(CAST(1.125 AS double) AS timestamp(3))) AS t4,
255+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
256+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(3)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
257+
)
258+
----
259+
true 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001 1970-01-01T00:00:00.001
260+
261+
query BPPPPPP
262+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
263+
FROM (SELECT
264+
(SELECT CAST(CAST(1.125 AS float) AS timestamp(6))) AS t1,
265+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
266+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
267+
(SELECT CAST(CAST(1.125 AS double) AS timestamp(6))) AS t4,
268+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
269+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(6)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
270+
)
271+
----
272+
true 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001 1970-01-01T00:00:00.000001
273+
274+
query BPPPPPP
275+
SELECT t1 = t2 AND t1 = t3 AND t1 = t4 AND t1 = t5 AND t1 = t6, *
276+
FROM (SELECT
277+
(SELECT CAST(CAST(1.125 AS float) AS timestamp(9))) AS t1,
278+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t2,
279+
(SELECT CAST(CAST(one_and_a_bit AS float) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t3,
280+
(SELECT CAST(CAST(1.125 AS double) AS timestamp(9))) AS t4,
281+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (SELECT 1.125 AS one_and_a_bit)) AS t5,
282+
(SELECT CAST(CAST(one_and_a_bit AS double) AS timestamp(9)) FROM (VALUES (1.125)) t(one_and_a_bit)) AS t6
283+
)
284+
----
285+
true 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
286+
287+
179288
##########
180289
## to_timestamp tests
181290
##########
@@ -394,12 +503,12 @@ SELECT COUNT(*) FROM ts_data_secs where ts > to_timestamp_seconds('2020-09-08 12
394503
query PPP
395504
SELECT to_timestamp(1.1) as c1, cast(1.1 as timestamp) as c2, 1.1::timestamp as c3;
396505
----
397-
1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100
506+
1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
398507

399508
query PPP
400509
SELECT to_timestamp(-1.1) as c1, cast(-1.1 as timestamp) as c2, (-1.1)::timestamp as c3;
401510
----
402-
1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900
511+
1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999
403512

404513
query PPP
405514
SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as c3;
@@ -409,12 +518,12 @@ SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp as
409518
query PPP
410519
SELECT to_timestamp(1.23456789) as c1, cast(1.23456789 as timestamp) as c2, 1.23456789::timestamp as c3;
411520
----
412-
1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890
521+
1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
413522

414523
query PPP
415524
SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as timestamp) as c2, 123456789.123456789::timestamp as c3;
416525
----
417-
1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784
526+
1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789
418527

419528
# to_timestamp Decimal128 inputs
420529

0 commit comments

Comments
 (0)