diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index ac3b89e0ba02..3806f0adc5d6 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -277,16 +277,11 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result { to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc()) } -/// Defensive check to prevent chrono-rs panics when nanosecond conversion happens on non-supported dates +/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds #[inline] fn to_timestamp_nanos(dt: NaiveDateTime) -> Result { - if dt.timestamp().checked_mul(1_000_000_000).is_none() { - return Err(ArrowError::ParseError( - ERR_NANOSECONDS_NOT_SUPPORTED.to_string(), - )); - } - - Ok(dt.timestamp_nanos()) + dt.timestamp_nanos_opt() + .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string())) } /// Accepts a string in ISO8601 standard format and some @@ -1313,12 +1308,12 @@ mod tests { // Ensure both T and ' ' variants work assert_eq!( - naive_datetime.timestamp_nanos(), + naive_datetime.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08T13:42:29.190855").unwrap() ); assert_eq!( - naive_datetime.timestamp_nanos(), + naive_datetime.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08 13:42:29.190855").unwrap() ); @@ -1331,12 +1326,12 @@ mod tests { // Ensure both T and ' ' variants work assert_eq!( - naive_datetime_whole_secs.timestamp_nanos(), + naive_datetime_whole_secs.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08T13:42:29").unwrap() ); assert_eq!( - naive_datetime_whole_secs.timestamp_nanos(), + naive_datetime_whole_secs.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08 13:42:29").unwrap() ); @@ -1349,7 +1344,7 @@ mod tests { ); assert_eq!( - naive_datetime_no_time.timestamp_nanos(), + naive_datetime_no_time.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08").unwrap() ) } @@ -1463,12 +1458,12 @@ mod tests { // Ensure both T and ' ' variants work assert_eq!( - naive_datetime.timestamp_nanos(), + naive_datetime.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08T13:42:29.190855").unwrap() ); assert_eq!( - naive_datetime.timestamp_nanos(), + naive_datetime.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08 13:42:29.190855").unwrap() ); @@ -1479,12 +1474,12 @@ mod tests { // Ensure both T and ' ' variants work assert_eq!( - naive_datetime.timestamp_nanos(), + naive_datetime.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08T13:42:29").unwrap() ); assert_eq!( - naive_datetime.timestamp_nanos(), + naive_datetime.timestamp_nanos_opt().unwrap(), parse_timestamp("2020-09-08 13:42:29").unwrap() ); diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 695e3d47965d..17db7a34e06f 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -984,20 +984,26 @@ fn build_timestamp_array_impl( return Ok(None); } - let date = string_to_datetime(timezone, s).map_err(|e| { - ArrowError::ParseError(format!( - "Error parsing column {col_idx} at line {}: {}", - line_number + row_index, - e - )) - })?; - - Ok(Some(match T::UNIT { - TimeUnit::Second => date.timestamp(), - TimeUnit::Millisecond => date.timestamp_millis(), - TimeUnit::Microsecond => date.timestamp_micros(), - TimeUnit::Nanosecond => date.timestamp_nanos(), - })) + let date = string_to_datetime(timezone, s) + .and_then(|date| match T::UNIT { + TimeUnit::Second => Ok(date.timestamp()), + TimeUnit::Millisecond => Ok(date.timestamp_millis()), + TimeUnit::Microsecond => Ok(date.timestamp_micros()), + TimeUnit::Nanosecond => date.timestamp_nanos_opt().ok_or_else(|| { + ArrowError::ParseError(format!( + "{} would overflow 64-bit signed nanoseconds", + date.to_rfc3339(), + )) + }), + }) + .map_err(|e| { + ArrowError::ParseError(format!( + "Error parsing column {col_idx} at line {}: {}", + line_number + row_index, + e + )) + })?; + Ok(Some(date)) }) .collect() } diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs index ef69deabce2d..b80915f6a56a 100644 --- a/arrow-json/src/reader/timestamp_array.rs +++ b/arrow-json/src/reader/timestamp_array.rs @@ -71,7 +71,14 @@ where TimeUnit::Second => date.timestamp(), TimeUnit::Millisecond => date.timestamp_millis(), TimeUnit::Microsecond => date.timestamp_micros(), - TimeUnit::Nanosecond => date.timestamp_nanos(), + TimeUnit::Nanosecond => { + date.timestamp_nanos_opt().ok_or_else(|| { + ArrowError::ParseError(format!( + "{} would overflow 64-bit signed nanoseconds", + date.to_rfc3339(), + )) + })? + } }; builder.append_value(value) } diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index a5b5a78190b3..db371b59080a 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -757,7 +757,8 @@ mod tests { let ts_nanos = ts_string .parse::() .unwrap() - .timestamp_nanos(); + .timestamp_nanos_opt() + .unwrap(); let ts_micros = ts_nanos / 1000; let ts_millis = ts_micros / 1000; let ts_secs = ts_millis / 1000; @@ -809,7 +810,8 @@ mod tests { let ts_nanos = ts_string .parse::() .unwrap() - .timestamp_nanos(); + .timestamp_nanos_opt() + .unwrap(); let ts_micros = ts_nanos / 1000; let ts_millis = ts_micros / 1000; let ts_secs = ts_millis / 1000;