diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e01de6b70470e..5679ea9306c72 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -50,7 +50,9 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, convert_datetime_to_tsobject, + convert_timezone, get_datetime64_nanos, + parse_pydatetime, precision_from_unit, ) from pandas._libs.tslibs.nattype cimport ( @@ -59,7 +61,6 @@ from pandas._libs.tslibs.nattype cimport ( c_nat_strings as nat_strings, ) from pandas._libs.tslibs.timestamps cimport _Timestamp -from pandas._libs.tslibs.timezones cimport tz_compare from pandas._libs.tslibs import ( Resolution, @@ -525,35 +526,16 @@ cpdef array_to_datetime( seen_datetime = True if val.tzinfo is not None: found_tz = True - if utc_convert: - _ts = convert_datetime_to_tsobject(val, None) - _ts.ensure_reso(NPY_FR_ns) - iresult[i] = _ts.value - elif found_naive: - raise ValueError('Tz-aware datetime.datetime ' - 'cannot be converted to ' - 'datetime64 unless utc=True') - elif tz_out is not None and not tz_compare(tz_out, val.tzinfo): - raise ValueError('Tz-aware datetime.datetime ' - 'cannot be converted to ' - 'datetime64 unless utc=True') - else: - found_tz = True - tz_out = val.tzinfo - _ts = convert_datetime_to_tsobject(val, None) - _ts.ensure_reso(NPY_FR_ns) - iresult[i] = _ts.value - else: found_naive = True - if found_tz and not utc_convert: - raise ValueError('Cannot mix tz-aware with ' - 'tz-naive values') - if isinstance(val, _Timestamp): - iresult[i] = val.as_unit("ns").value - else: - iresult[i] = pydatetime_to_dt64(val, &dts) - check_dts_bounds(&dts) + tz_out = convert_timezone( + val.tzinfo, + tz_out, + found_naive, + found_tz, + utc_convert, + ) + result[i] = parse_pydatetime(val, &dts, utc_convert) elif PyDate_Check(val): seen_datetime = True diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index c285b248f7a5b..dfb8b2009f0ec 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -12,6 +12,8 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, npy_datetimestruct, ) +from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare cdef class _TSObject: @@ -22,7 +24,7 @@ cdef class _TSObject: bint fold NPY_DATETIMEUNIT creso - cdef void ensure_reso(self, NPY_DATETIMEUNIT creso) + cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1 cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, @@ -40,3 +42,17 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1 cpdef (int64_t, int) precision_from_unit(str unit) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) + +cdef tzinfo convert_timezone( + tzinfo tz_in, + tzinfo tz_out, + bint found_naive, + bint found_tz, + bint utc_convert, +) + +cdef int64_t parse_pydatetime( + object val, + npy_datetimestruct *dts, + bint utc_convert, +) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 923dfa3c54d26..d0d6dc3f42d85 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -41,6 +41,7 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, + pydatetime_to_dt64, pydatetime_to_dtstruct, string_to_dts, ) @@ -65,6 +66,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.tzconversion cimport ( Localizer, tz_localize_to_utc_single, @@ -208,9 +210,10 @@ cdef class _TSObject: self.fold = 0 self.creso = NPY_FR_ns # default value - cdef void ensure_reso(self, NPY_DATETIMEUNIT creso): + cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1: if self.creso != creso: self.value = convert_reso(self.value, self.creso, creso, False) + return self.value cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, @@ -642,3 +645,99 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): elif isinstance(dt, ABCTimestamp): return dt.tz_localize(tz) return _localize_pydatetime(dt, tz) + + +cdef tzinfo convert_timezone( + tzinfo tz_in, + tzinfo tz_out, + bint found_naive, + bint found_tz, + bint utc_convert, +): + """ + Validate that ``tz_in`` can be converted/localized to ``tz_out``. + + Parameters + ---------- + tz_in : tzinfo + Timezone info of element being processed. + tz_out : tzinfo + Timezone info of output. + found_naive : bool + Whether a timezone-naive element has been found so far. + found_tz : bool + Whether a timezone-aware element has been found so far. + utc_convert : bool + Whether to convert/localize to UTC. + + Returns + ------- + tz_info + Timezone info of output. + + Raises + ------ + ValueError + If ``tz_in`` can't be converted/localized to ``tz_out``. + """ + if tz_in is not None: + if utc_convert: + pass + elif found_naive: + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + elif tz_out is not None and not tz_compare(tz_out, tz_in): + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + else: + tz_out = tz_in + else: + if found_tz and not utc_convert: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + return tz_out + + +cdef int64_t parse_pydatetime( + object val, + npy_datetimestruct *dts, + bint utc_convert, +) except? -1: + """ + Convert pydatetime to datetime64. + + Parameters + ---------- + val + Element being processed. + dts : *npy_datetimestruct + Needed to use in pydatetime_to_dt64, which writes to it. + utc_convert : bool + Whether to convert/localize to UTC. + + Raises + ------ + OutOfBoundsDatetime + """ + cdef: + _TSObject _ts + int64_t result + + if val.tzinfo is not None: + if utc_convert: + _ts = convert_datetime_to_tsobject(val, None) + _ts.ensure_reso(NPY_FR_ns) + result = _ts.value + else: + _ts = convert_datetime_to_tsobject(val, None) + _ts.ensure_reso(NPY_FR_ns) + result = _ts.value + else: + if isinstance(val, _Timestamp): + result = val.as_unit("ns").value + else: + result = pydatetime_to_dt64(val, dts) + check_dts_bounds(dts) + return result