Skip to content

REF factor out parse_pydatetime from array_to_datetime #49866

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 10 additions & 28 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
convert_datetime_to_tsobject,
convert_timezone,
get_datetime64_nanos,
parse_pydatetime,
precision_from_unit,
)
from pandas._libs.tslibs.nattype cimport (
Expand All @@ -59,7 +61,6 @@ from pandas._libs.tslibs.nattype cimport (
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timezones cimport tz_compare

from pandas._libs.tslibs import (
Resolution,
Expand Down Expand Up @@ -525,35 +526,16 @@ cpdef array_to_datetime(
seen_datetime = True
if val.tzinfo is not None:
found_tz = True
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
iresult[i] = _ts.value
elif found_naive:
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
elif tz_out is not None and not tz_compare(tz_out, val.tzinfo):
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
else:
found_tz = True
tz_out = val.tzinfo
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
iresult[i] = _ts.value

else:
found_naive = True
if found_tz and not utc_convert:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if isinstance(val, _Timestamp):
iresult[i] = val.as_unit("ns").value
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
tz_out = convert_timezone(
val.tzinfo,
tz_out,
found_naive,
found_tz,
utc_convert,
)
result[i] = parse_pydatetime(val, &dts, utc_convert)

elif PyDate_Check(val):
seen_datetime = True
Expand Down
18 changes: 17 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
npy_datetimestruct,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timezones cimport tz_compare


cdef class _TSObject:
Expand All @@ -22,7 +24,7 @@ cdef class _TSObject:
bint fold
NPY_DATETIMEUNIT creso

cdef void ensure_reso(self, NPY_DATETIMEUNIT creso)
cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand All @@ -40,3 +42,17 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1
cpdef (int64_t, int) precision_from_unit(str unit)

cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

cdef tzinfo convert_timezone(
tzinfo tz_in,
tzinfo tz_out,
bint found_naive,
bint found_tz,
bint utc_convert,
)

cdef int64_t parse_pydatetime(
object val,
npy_datetimestruct *dts,
bint utc_convert,
) except? -1
101 changes: 100 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct,
npy_datetimestruct_to_datetime,
pandas_datetime_to_datetimestruct,
pydatetime_to_dt64,
pydatetime_to_dtstruct,
string_to_dts,
)
Expand All @@ -65,6 +66,7 @@ from pandas._libs.tslibs.nattype cimport (
c_NaT as NaT,
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.tzconversion cimport (
Localizer,
tz_localize_to_utc_single,
Expand Down Expand Up @@ -208,9 +210,10 @@ cdef class _TSObject:
self.fold = 0
self.creso = NPY_FR_ns # default value

cdef void ensure_reso(self, NPY_DATETIMEUNIT creso):
cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1:
if self.creso != creso:
self.value = convert_reso(self.value, self.creso, creso, False)
return self.value


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down Expand Up @@ -642,3 +645,99 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
elif isinstance(dt, ABCTimestamp):
return dt.tz_localize(tz)
return _localize_pydatetime(dt, tz)


cdef tzinfo convert_timezone(
tzinfo tz_in,
tzinfo tz_out,
bint found_naive,
bint found_tz,
bint utc_convert,
):
"""
Validate that ``tz_in`` can be converted/localized to ``tz_out``.

Parameters
----------
tz_in : tzinfo
Timezone info of element being processed.
tz_out : tzinfo
Timezone info of output.
found_naive : bool
Whether a timezone-naive element has been found so far.
found_tz : bool
Whether a timezone-aware element has been found so far.
utc_convert : bool
Whether to convert/localize to UTC.

Returns
-------
tz_info
Timezone info of output.

Raises
------
ValueError
If ``tz_in`` can't be converted/localized to ``tz_out``.
"""
if tz_in is not None:
if utc_convert:
pass
elif found_naive:
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
elif tz_out is not None and not tz_compare(tz_out, tz_in):
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
else:
tz_out = tz_in
else:
if found_tz and not utc_convert:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
return tz_out

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one more newline

Copy link
Member Author

@MarcoGorelli MarcoGorelli Nov 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're right (and that's another one for cython-lint!)

done


cdef int64_t parse_pydatetime(
object val,
npy_datetimestruct *dts,
bint utc_convert,
) except? -1:
"""
Convert pydatetime to datetime64.

Parameters
----------
val
Element being processed.
dts : *npy_datetimestruct
Needed to use in pydatetime_to_dt64, which writes to it.
utc_convert : bool
Whether to convert/localize to UTC.

Raises
------
OutOfBoundsDatetime
"""
cdef:
_TSObject _ts
int64_t result

if val.tzinfo is not None:
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
result = _ts.value
else:
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
result = _ts.value
else:
if isinstance(val, _Timestamp):
result = val.as_unit("ns").value
else:
result = pydatetime_to_dt64(val, dts)
check_dts_bounds(dts)
return result