Skip to content

Commit 01d72bf

Browse files
committed
Merge remote-tracking branch 'upstream/main' into flaky/32/rounding
2 parents 52ea391 + bedd8f0 commit 01d72bf

File tree

19 files changed

+497
-88
lines changed

19 files changed

+497
-88
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,7 @@ Missing
914914
^^^^^^^
915915
- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``downcast`` keyword not being respected in some cases where there are no NA values present (:issue:`45423`)
916916
- Bug in :meth:`Series.fillna` and :meth:`DataFrame.fillna` with :class:`IntervalDtype` and incompatible value raising instead of casting to a common (usually object) dtype (:issue:`45796`)
917+
- Bug in :meth:`Series.map` not respecting ``na_action`` argument if mapper is a ``dict`` or :class:`Series` (:issue:`47527`)
917918
- Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`)
918919
- Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`)
919920
- Bug in :meth:`DataFrame.fillna` ignored ``axis`` when :class:`DataFrame` is single block (:issue:`47713`)

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,4 +127,4 @@ dependencies:
127127
# build the interactive terminal
128128
- jupyterlab >=3.4,<4
129129
- pip:
130-
- jupyterlite==0.1.0b9
130+
- jupyterlite==0.1.0b10

pandas/_libs/tslibs/np_datetime.pxd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,10 @@ cdef bint cmp_dtstructs(npy_datetimestruct* left, npy_datetimestruct* right, int
109109
cdef get_implementation_bounds(
110110
NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper
111111
)
112+
113+
cdef int64_t convert_reso(
114+
int64_t value,
115+
NPY_DATETIMEUNIT from_reso,
116+
NPY_DATETIMEUNIT to_reso,
117+
bint round_ok,
118+
) except? -1

pandas/_libs/tslibs/np_datetime.pyx

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
cimport cython
22
from cpython.datetime cimport (
3+
PyDateTime_CheckExact,
34
PyDateTime_DATE_GET_HOUR,
45
PyDateTime_DATE_GET_MICROSECOND,
56
PyDateTime_DATE_GET_MINUTE,
@@ -229,7 +230,13 @@ def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit):
229230

230231

231232
cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts):
232-
dts.year = PyDateTime_GET_YEAR(dt)
233+
if PyDateTime_CheckExact(dt):
234+
dts.year = PyDateTime_GET_YEAR(dt)
235+
else:
236+
# We use dt.year instead of PyDateTime_GET_YEAR because with Timestamp
237+
# we override year such that PyDateTime_GET_YEAR is incorrect.
238+
dts.year = dt.year
239+
233240
dts.month = PyDateTime_GET_MONTH(dt)
234241
dts.day = PyDateTime_GET_DAY(dt)
235242
dts.hour = PyDateTime_DATE_GET_HOUR(dt)
@@ -541,5 +548,59 @@ cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT
541548
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
542549
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
543550
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
551+
552+
553+
cdef int64_t convert_reso(
554+
int64_t value,
555+
NPY_DATETIMEUNIT from_reso,
556+
NPY_DATETIMEUNIT to_reso,
557+
bint round_ok,
558+
) except? -1:
559+
cdef:
560+
int64_t res_value, mult, div, mod
561+
562+
if from_reso == to_reso:
563+
return value
564+
565+
elif to_reso < from_reso:
566+
# e.g. ns -> us, no risk of overflow, but can be lossy rounding
567+
mult = get_conversion_factor(to_reso, from_reso)
568+
div, mod = divmod(value, mult)
569+
if mod > 0 and not round_ok:
570+
raise ValueError("Cannot losslessly convert units")
571+
572+
# Note that when mod > 0, we follow np.timedelta64 in always
573+
# rounding down.
574+
res_value = div
575+
576+
elif (
577+
from_reso == NPY_FR_Y
578+
or from_reso == NPY_FR_M
579+
or to_reso == NPY_FR_Y
580+
or to_reso == NPY_FR_M
581+
):
582+
# Converting by multiplying isn't _quite_ right bc the number of
583+
# seconds in a month/year isn't fixed.
584+
res_value = _convert_reso_with_dtstruct(value, from_reso, to_reso)
585+
544586
else:
545-
raise ValueError(from_unit, to_unit)
587+
# e.g. ns -> us, risk of overflow, but no risk of lossy rounding
588+
mult = get_conversion_factor(from_reso, to_reso)
589+
with cython.overflowcheck(True):
590+
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
591+
res_value = value * mult
592+
593+
return res_value
594+
595+
596+
cdef int64_t _convert_reso_with_dtstruct(
597+
int64_t value,
598+
NPY_DATETIMEUNIT from_unit,
599+
NPY_DATETIMEUNIT to_unit,
600+
) except? -1:
601+
cdef:
602+
npy_datetimestruct dts
603+
604+
pandas_datetime_to_datetimestruct(value, from_unit, &dts)
605+
check_dts_bounds(&dts, to_unit)
606+
return npy_datetimestruct_to_datetime(to_unit, &dts)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ from pandas._libs.tslibs.np_datetime cimport (
4747
NPY_FR_ns,
4848
cmp_dtstructs,
4949
cmp_scalar,
50+
convert_reso,
5051
get_conversion_factor,
5152
get_datetime64_unit,
5253
get_timedelta64_value,
@@ -57,7 +58,10 @@ from pandas._libs.tslibs.np_datetime cimport (
5758
pandas_timedeltastruct,
5859
)
5960

60-
from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta
61+
from pandas._libs.tslibs.np_datetime import (
62+
OutOfBoundsDatetime,
63+
OutOfBoundsTimedelta,
64+
)
6165

6266
from pandas._libs.tslibs.offsets cimport is_tick_object
6367
from pandas._libs.tslibs.util cimport (
@@ -240,6 +244,11 @@ cpdef int64_t delta_to_nanoseconds(
240244

241245
elif is_timedelta64_object(delta):
242246
in_reso = get_datetime64_unit(delta)
247+
if in_reso == NPY_DATETIMEUNIT.NPY_FR_Y or in_reso == NPY_DATETIMEUNIT.NPY_FR_M:
248+
raise ValueError(
249+
"delta_to_nanoseconds does not support Y or M units, "
250+
"as their duration in nanoseconds is ambiguous."
251+
)
243252
n = get_timedelta64_value(delta)
244253

245254
elif PyDelta_Check(delta):
@@ -256,26 +265,15 @@ cpdef int64_t delta_to_nanoseconds(
256265
else:
257266
raise TypeError(type(delta))
258267

259-
if reso < in_reso:
260-
# e.g. ns -> us
261-
factor = get_conversion_factor(reso, in_reso)
262-
div, mod = divmod(n, factor)
263-
if mod > 0 and not round_ok:
264-
raise ValueError("Cannot losslessly convert units")
265-
266-
# Note that when mod > 0, we follow np.timedelta64 in always
267-
# rounding down.
268-
value = div
269-
else:
270-
factor = get_conversion_factor(in_reso, reso)
271-
try:
272-
with cython.overflowcheck(True):
273-
value = n * factor
274-
except OverflowError as err:
275-
unit_str = npy_unit_to_abbrev(reso)
276-
raise OutOfBoundsTimedelta(
277-
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
278-
) from err
268+
try:
269+
return convert_reso(n, in_reso, reso, round_ok=round_ok)
270+
except (OutOfBoundsDatetime, OverflowError) as err:
271+
# Catch OutOfBoundsDatetime bc convert_reso can call check_dts_bounds
272+
# for Y/M-resolution cases
273+
unit_str = npy_unit_to_abbrev(reso)
274+
raise OutOfBoundsTimedelta(
275+
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
276+
) from err
279277

280278
return value
281279

@@ -1538,21 +1536,7 @@ cdef class _Timedelta(timedelta):
15381536
if reso == self._reso:
15391537
return self
15401538

1541-
if reso < self._reso:
1542-
# e.g. ns -> us
1543-
mult = get_conversion_factor(reso, self._reso)
1544-
div, mod = divmod(self.value, mult)
1545-
if mod > 0 and not round_ok:
1546-
raise ValueError("Cannot losslessly convert units")
1547-
1548-
# Note that when mod > 0, we follow np.timedelta64 in always
1549-
# rounding down.
1550-
value = div
1551-
else:
1552-
mult = get_conversion_factor(self._reso, reso)
1553-
with cython.overflowcheck(True):
1554-
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
1555-
value = self.value * mult
1539+
value = convert_reso(self.value, self._reso, reso, round_ok=round_ok)
15561540
return type(self)._from_value_and_reso(value, reso=reso)
15571541

15581542

pandas/_libs/tslibs/timestamps.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ cdef _Timestamp create_timestamp_from_ts(int64_t value,
2222

2323
cdef class _Timestamp(ABCTimestamp):
2424
cdef readonly:
25-
int64_t value, nanosecond
25+
int64_t value, nanosecond, year
2626
BaseOffset _freq
2727
NPY_DATETIMEUNIT _reso
2828

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ from pandas._libs.tslibs.np_datetime cimport (
8282
NPY_FR_ns,
8383
cmp_dtstructs,
8484
cmp_scalar,
85+
convert_reso,
8586
get_conversion_factor,
8687
get_datetime64_unit,
8788
get_datetime64_value,
@@ -143,12 +144,27 @@ cdef inline _Timestamp create_timestamp_from_ts(
143144
""" convenience routine to construct a Timestamp from its parts """
144145
cdef:
145146
_Timestamp ts_base
146-
147-
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
147+
int64_t pass_year = dts.year
148+
149+
# We pass year=1970/1972 here and set year below because with non-nanosecond
150+
# resolution we may have datetimes outside of the stdlib pydatetime
151+
# implementation bounds, which would raise.
152+
# NB: this means the C-API macro PyDateTime_GET_YEAR is unreliable.
153+
if 1 <= pass_year <= 9999:
154+
# we are in-bounds for pydatetime
155+
pass
156+
elif ccalendar.is_leapyear(dts.year):
157+
pass_year = 1972
158+
else:
159+
pass_year = 1970
160+
161+
ts_base = _Timestamp.__new__(Timestamp, pass_year, dts.month,
148162
dts.day, dts.hour, dts.min,
149163
dts.sec, dts.us, tz, fold=fold)
164+
150165
ts_base.value = value
151166
ts_base._freq = freq
167+
ts_base.year = dts.year
152168
ts_base.nanosecond = dts.ps // 1000
153169
ts_base._reso = reso
154170

@@ -179,6 +195,40 @@ def integer_op_not_supported(obj):
179195
return TypeError(int_addsub_msg)
180196

181197

198+
class MinMaxReso:
199+
"""
200+
We need to define min/max/resolution on both the Timestamp _instance_
201+
and Timestamp class. On an instance, these depend on the object's _reso.
202+
On the class, we default to the values we would get with nanosecond _reso.
203+
204+
See also: timedeltas.MinMaxReso
205+
"""
206+
def __init__(self, name):
207+
self._name = name
208+
209+
def __get__(self, obj, type=None):
210+
cls = Timestamp
211+
if self._name == "min":
212+
val = np.iinfo(np.int64).min + 1
213+
elif self._name == "max":
214+
val = np.iinfo(np.int64).max
215+
else:
216+
assert self._name == "resolution"
217+
val = 1
218+
cls = Timedelta
219+
220+
if obj is None:
221+
# i.e. this is on the class, default to nanos
222+
return cls(val)
223+
elif self._name == "resolution":
224+
return Timedelta._from_value_and_reso(val, obj._reso)
225+
else:
226+
return Timestamp._from_value_and_reso(val, obj._reso, tz=None)
227+
228+
def __set__(self, obj, value):
229+
raise AttributeError(f"{self._name} is not settable.")
230+
231+
182232
# ----------------------------------------------------------------------
183233

184234
cdef class _Timestamp(ABCTimestamp):
@@ -188,6 +238,10 @@ cdef class _Timestamp(ABCTimestamp):
188238
dayofweek = _Timestamp.day_of_week
189239
dayofyear = _Timestamp.day_of_year
190240

241+
min = MinMaxReso("min")
242+
max = MinMaxReso("max")
243+
resolution = MinMaxReso("resolution") # GH#21336, GH#21365
244+
191245
cpdef void _set_freq(self, freq):
192246
# set the ._freq attribute without going through the constructor,
193247
# which would issue a warning
@@ -248,10 +302,12 @@ cdef class _Timestamp(ABCTimestamp):
248302
def __hash__(_Timestamp self):
249303
if self.nanosecond:
250304
return hash(self.value)
305+
if not (1 <= self.year <= 9999):
306+
# out of bounds for pydatetime
307+
return hash(self.value)
251308
if self.fold:
252309
return datetime.__hash__(self.replace(fold=0))
253310
return datetime.__hash__(self)
254-
# TODO(non-nano): what if we are out of bounds for pydatetime?
255311

256312
def __richcmp__(_Timestamp self, object other, int op):
257313
cdef:
@@ -968,6 +1024,9 @@ cdef class _Timestamp(ABCTimestamp):
9681024
"""
9691025
base_ts = "microseconds" if timespec == "nanoseconds" else timespec
9701026
base = super(_Timestamp, self).isoformat(sep=sep, timespec=base_ts)
1027+
# We need to replace the fake year 1970 with our real year
1028+
base = f"{self.year}-" + base.split("-", 1)[1]
1029+
9711030
if self.nanosecond == 0 and timespec != "nanoseconds":
9721031
return base
9731032

@@ -1043,7 +1102,6 @@ cdef class _Timestamp(ABCTimestamp):
10431102
# -----------------------------------------------------------------
10441103
# Conversion Methods
10451104

1046-
# TODO: share with _Timedelta?
10471105
@cython.cdivision(False)
10481106
cdef _Timestamp _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
10491107
cdef:
@@ -1052,21 +1110,7 @@ cdef class _Timestamp(ABCTimestamp):
10521110
if reso == self._reso:
10531111
return self
10541112

1055-
if reso < self._reso:
1056-
# e.g. ns -> us
1057-
mult = get_conversion_factor(reso, self._reso)
1058-
div, mod = divmod(self.value, mult)
1059-
if mod > 0 and not round_ok:
1060-
raise ValueError("Cannot losslessly convert units")
1061-
1062-
# Note that when mod > 0, we follow np.datetime64 in always
1063-
# rounding down.
1064-
value = div
1065-
else:
1066-
mult = get_conversion_factor(self._reso, reso)
1067-
with cython.overflowcheck(True):
1068-
# Note: caller is responsible for re-raising as OutOfBoundsDatetime
1069-
value = self.value * mult
1113+
value = convert_reso(self.value, self._reso, reso, round_ok=round_ok)
10701114
return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo)
10711115

10721116
def _as_unit(self, str unit, bint round_ok=True):
@@ -2332,29 +2376,24 @@ default 'raise'
23322376
Return the day of the week represented by the date.
23332377
Monday == 1 ... Sunday == 7.
23342378
"""
2335-
return super().isoweekday()
2379+
# same as super().isoweekday(), but that breaks because of how
2380+
# we have overriden year, see note in create_timestamp_from_ts
2381+
return self.weekday() + 1
23362382

23372383
def weekday(self):
23382384
"""
23392385
Return the day of the week represented by the date.
23402386
Monday == 0 ... Sunday == 6.
23412387
"""
2342-
return super().weekday()
2388+
# same as super().weekday(), but that breaks because of how
2389+
# we have overriden year, see note in create_timestamp_from_ts
2390+
return ccalendar.dayofweek(self.year, self.month, self.day)
23432391

23442392

23452393
# Aliases
23462394
Timestamp.weekofyear = Timestamp.week
23472395
Timestamp.daysinmonth = Timestamp.days_in_month
23482396

2349-
# Add the min and max fields at the class level
2350-
cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max
2351-
cdef int64_t _NS_LOWER_BOUND = NPY_NAT + 1
2352-
2353-
# Resolution is in nanoseconds
2354-
Timestamp.min = Timestamp(_NS_LOWER_BOUND)
2355-
Timestamp.max = Timestamp(_NS_UPPER_BOUND)
2356-
Timestamp.resolution = Timedelta(nanoseconds=1) # GH#21336, GH#21365
2357-
23582397

23592398
# ----------------------------------------------------------------------
23602399
# Scalar analogues to functions in vectorized.pyx

0 commit comments

Comments
 (0)