Skip to content

Commit 41db572

Browse files
API: Timestamp(pydatetime) microsecond reso (#49034)
* API: Timedelta(td64_obj) retain resolution * BUG: preserve DTA/TDA+timedeltalike scalar with mismatched resos * BUG: DatetimeArray-datetimelike mixed resos * API: Timestamp(pydatetime) microsecond reso * use willayd suggestion * ci fixup * mypy fixup * ignore pyright * fix doctest * un-xfail * Merge main follow-up * s reso for pydate * typo fixup * post-merge fixups * suggestion json validation * extra Py_DECREF * requested refactor * fix doctest * unit keyword * Update pandas/_libs/tslibs/conversion.pyx Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * dedicate pydate reso test * fix failing resample test Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
1 parent 13f758c commit 41db572

26 files changed

+193
-57
lines changed

pandas/_libs/lib.pyx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,16 @@ from pandas._libs.missing cimport (
109109
is_null_datetime64,
110110
is_null_timedelta64,
111111
)
112-
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
112+
from pandas._libs.tslibs.conversion cimport (
113+
_TSObject,
114+
convert_to_tsobject,
115+
)
113116
from pandas._libs.tslibs.nattype cimport (
114117
NPY_NAT,
115118
c_NaT as NaT,
116119
checknull_with_nat,
117120
)
121+
from pandas._libs.tslibs.np_datetime cimport NPY_FR_ns
118122
from pandas._libs.tslibs.offsets cimport is_offset_object
119123
from pandas._libs.tslibs.period cimport is_period_object
120124
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
@@ -2378,6 +2382,7 @@ def maybe_convert_objects(ndarray[object] objects,
23782382
ndarray[uint8_t] bools
23792383
Seen seen = Seen()
23802384
object val
2385+
_TSObject tsobj
23812386
float64_t fnan = np.nan
23822387

23832388
if dtype_if_all_nat is not None:
@@ -2470,7 +2475,8 @@ def maybe_convert_objects(ndarray[object] objects,
24702475
else:
24712476
seen.datetime_ = True
24722477
try:
2473-
convert_to_tsobject(val, None, None, 0, 0)
2478+
tsobj = convert_to_tsobject(val, None, None, 0, 0)
2479+
tsobj.ensure_reso(NPY_FR_ns)
24742480
except OutOfBoundsDatetime:
24752481
seen.object_ = True
24762482
break

pandas/_libs/src/ujson/python/objToJSON.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,42 @@ static int is_simple_frame(PyObject *obj) {
278278
}
279279

280280
static npy_int64 get_long_attr(PyObject *o, const char *attr) {
281+
// NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT
282+
281283
npy_int64 long_val;
282284
PyObject *value = PyObject_GetAttrString(o, attr);
283285
long_val =
284286
(PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value));
287+
285288
Py_DECREF(value);
289+
290+
if (object_is_nat_type(o)) {
291+
// i.e. o is NaT, long_val will be NPY_MIN_INT64
292+
return long_val;
293+
}
294+
295+
// ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit
296+
PyObject* reso = PyObject_GetAttrString(o, "_creso");
297+
if (!PyLong_Check(reso)) {
298+
// https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139
299+
Py_DECREF(reso);
300+
return -1;
301+
}
302+
303+
long cReso = PyLong_AsLong(reso);
304+
Py_DECREF(reso);
305+
if (cReso == -1 && PyErr_Occurred()) {
306+
return -1;
307+
}
308+
309+
if (cReso == NPY_FR_us) {
310+
long_val = long_val * 1000L;
311+
} else if (cReso == NPY_FR_ms) {
312+
long_val = long_val * 1000000L;
313+
} else if (cReso == NPY_FR_s) {
314+
long_val = long_val * 1000000000L;
315+
}
316+
286317
return long_val;
287318
}
288319

@@ -1265,6 +1296,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
12651296
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
12661297
is_datetimelike = 1;
12671298
if (PyObject_HasAttrString(item, "value")) {
1299+
// see test_date_index_and_values for case with non-nano
12681300
nanosecVal = get_long_attr(item, "value");
12691301
} else {
12701302
if (PyDelta_Check(item)) {

pandas/_libs/tslib.pyx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -841,16 +841,19 @@ cdef _array_to_datetime_object(
841841
cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc):
842842
# We delay this check for as long as possible
843843
# because it catches relatively rare cases
844+
845+
# Multiply by 1000 to convert to nanos, since these methods naturally have
846+
# microsecond resolution
844847
if val == "now":
845848
if utc:
846-
iresult[0] = Timestamp.utcnow().value
849+
iresult[0] = Timestamp.utcnow().value * 1000
847850
else:
848851
# GH#18705 make sure to_datetime("now") matches Timestamp("now")
849852
# Note using Timestamp.now() is faster than Timestamp("now")
850-
iresult[0] = Timestamp.now().value
853+
iresult[0] = Timestamp.now().value * 1000
851854
return True
852855
elif val == "today":
853-
iresult[0] = Timestamp.today().value
856+
iresult[0] = Timestamp.today().value * 1000
854857
return True
855858
return False
856859

pandas/_libs/tslibs/conversion.pyx

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ from pandas._libs.tslibs.dtypes cimport (
3232
from pandas._libs.tslibs.np_datetime cimport (
3333
NPY_DATETIMEUNIT,
3434
NPY_FR_ns,
35+
NPY_FR_us,
3536
check_dts_bounds,
3637
convert_reso,
3738
get_datetime64_unit,
@@ -212,7 +213,12 @@ cdef class _TSObject:
212213

213214
cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1:
214215
if self.creso != creso:
215-
self.value = convert_reso(self.value, self.creso, creso, False)
216+
try:
217+
self.value = convert_reso(self.value, self.creso, creso, False)
218+
except OverflowError as err:
219+
raise OutOfBoundsDatetime from err
220+
221+
self.creso = creso
216222
return self.value
217223

218224

@@ -288,11 +294,22 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
288294
obj.value = ts
289295
pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts)
290296
elif PyDateTime_Check(ts):
291-
return convert_datetime_to_tsobject(ts, tz, nanos)
297+
if nanos == 0:
298+
if isinstance(ts, ABCTimestamp):
299+
reso = abbrev_to_npy_unit(ts.unit) # TODO: faster way to do this?
300+
else:
301+
# TODO: what if user explicitly passes nanos=0?
302+
reso = NPY_FR_us
303+
else:
304+
reso = NPY_FR_ns
305+
return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso)
292306
elif PyDate_Check(ts):
293307
# Keep the converter same as PyDateTime's
308+
# For date object we give the lowest supported resolution, i.e. "s"
294309
ts = datetime.combine(ts, time())
295-
return convert_datetime_to_tsobject(ts, tz)
310+
return convert_datetime_to_tsobject(
311+
ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_s
312+
)
296313
else:
297314
from .period import Period
298315
if isinstance(ts, Period):
@@ -346,6 +363,7 @@ cdef _TSObject convert_datetime_to_tsobject(
346363
_TSObject obj = _TSObject()
347364
int64_t pps
348365

366+
obj.creso = reso
349367
obj.fold = ts.fold
350368
if tz is not None:
351369
tz = maybe_get_tz(tz)

pandas/_libs/tslibs/offsets.pyx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ def apply_wraps(func):
162162

163163
result = func(self, other)
164164

165-
result = Timestamp(result)
165+
result = (<_Timestamp>Timestamp(result))._as_creso(other._creso)
166+
166167
if self._adjust_dst:
167168
result = result.tz_localize(tz)
168169

@@ -175,9 +176,10 @@ def apply_wraps(func):
175176
if result.nanosecond != nano:
176177
if result.tz is not None:
177178
# convert to UTC
178-
value = result.tz_localize(None).value
179+
res = result.tz_localize(None)
179180
else:
180-
value = result.value
181+
res = result
182+
value = res.as_unit("ns").value
181183
result = Timestamp(value + nano)
182184

183185
if tz is not None and result.tzinfo is None:

pandas/_libs/tslibs/timestamps.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ cdef class _Timestamp(ABCTimestamp):
3333
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
3434
int op) except -1
3535
cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op)
36-
cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT reso, bint round_ok=*)
36+
cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT creso, bint round_ok=*)

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -497,9 +497,9 @@ cdef class _Timestamp(ABCTimestamp):
497497
# Matching numpy, we cast to the higher resolution. Unlike numpy,
498498
# we raise instead of silently overflowing during this casting.
499499
if self._creso < other._creso:
500-
self = (<_Timestamp>self)._as_creso(other._creso, round_ok=False)
500+
self = (<_Timestamp>self)._as_creso(other._creso, round_ok=True)
501501
elif self._creso > other._creso:
502-
other = (<_Timestamp>other)._as_creso(self._creso, round_ok=False)
502+
other = (<_Timestamp>other)._as_creso(self._creso, round_ok=True)
503503

504504
# scalar Timestamp/datetime - Timestamp/datetime -> yields a
505505
# Timedelta
@@ -983,15 +983,22 @@ cdef class _Timestamp(ABCTimestamp):
983983
# Conversion Methods
984984

985985
@cython.cdivision(False)
986-
cdef _Timestamp _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
986+
cdef _Timestamp _as_creso(self, NPY_DATETIMEUNIT creso, bint round_ok=True):
987987
cdef:
988988
int64_t value
989989

990-
if reso == self._creso:
990+
if creso == self._creso:
991991
return self
992992

993-
value = convert_reso(self.value, self._creso, reso, round_ok=round_ok)
994-
return type(self)._from_value_and_reso(value, reso=reso, tz=self.tzinfo)
993+
try:
994+
value = convert_reso(self.value, self._creso, creso, round_ok=round_ok)
995+
except OverflowError as err:
996+
unit = npy_unit_to_abbrev(creso)
997+
raise OutOfBoundsDatetime(
998+
f"Cannot cast {self} to unit='{unit}' without overflow."
999+
) from err
1000+
1001+
return type(self)._from_value_and_reso(value, reso=creso, tz=self.tzinfo)
9951002

9961003
def as_unit(self, str unit, bint round_ok=True):
9971004
"""
@@ -1025,7 +1032,7 @@ cdef class _Timestamp(ABCTimestamp):
10251032
--------
10261033
>>> ts = pd.Timestamp(2020, 3, 14, 15)
10271034
>>> ts.asm8
1028-
numpy.datetime64('2020-03-14T15:00:00.000000000')
1035+
numpy.datetime64('2020-03-14T15:00:00.000000')
10291036
"""
10301037
return self.to_datetime64()
10311038

pandas/core/array_algos/take.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,14 @@ def wrapper(
360360
if out_dtype is not None:
361361
out = out.view(out_dtype)
362362
if fill_wrap is not None:
363+
# FIXME: if we get here with dt64/td64 we need to be sure we have
364+
# matching resos
365+
if fill_value.dtype.kind == "m":
366+
fill_value = fill_value.astype("m8[ns]")
367+
else:
368+
fill_value = fill_value.astype("M8[ns]")
363369
fill_value = fill_wrap(fill_value)
370+
364371
f(arr, indexer, out, fill_value=fill_value)
365372

366373
return wrapper

pandas/core/arrays/datetimes.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def _generate_range( # type: ignore[override]
445445
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
446446
else:
447447
xdr = _generate_range(
448-
start=start, end=end, periods=periods, offset=freq
448+
start=start, end=end, periods=periods, offset=freq, unit=unit
449449
)
450450
i8values = np.array([x.value for x in xdr], dtype=np.int64)
451451

@@ -508,7 +508,10 @@ def _unbox_scalar(self, value) -> np.datetime64:
508508
if not isinstance(value, self._scalar_type) and value is not NaT:
509509
raise ValueError("'value' should be a Timestamp.")
510510
self._check_compatible_with(value)
511-
return value.asm8
511+
if value is NaT:
512+
return np.datetime64(value.value, self.unit)
513+
else:
514+
return value.as_unit(self.unit).asm8
512515

513516
def _scalar_from_string(self, value) -> Timestamp | NaTType:
514517
return Timestamp(value, tz=self.tz)
@@ -2475,6 +2478,8 @@ def _generate_range(
24752478
end: Timestamp | None,
24762479
periods: int | None,
24772480
offset: BaseOffset,
2481+
*,
2482+
unit: str,
24782483
):
24792484
"""
24802485
Generates a sequence of dates corresponding to the specified time
@@ -2486,7 +2491,8 @@ def _generate_range(
24862491
start : Timestamp or None
24872492
end : Timestamp or None
24882493
periods : int or None
2489-
offset : DateOffset,
2494+
offset : DateOffset
2495+
unit : str
24902496
24912497
Notes
24922498
-----
@@ -2506,13 +2512,20 @@ def _generate_range(
25062512
start = Timestamp(start) # type: ignore[arg-type]
25072513
# Non-overlapping identity check (left operand type: "Timestamp", right
25082514
# operand type: "NaTType")
2509-
start = start if start is not NaT else None # type: ignore[comparison-overlap]
2515+
if start is not NaT: # type: ignore[comparison-overlap]
2516+
start = start.as_unit(unit)
2517+
else:
2518+
start = None
2519+
25102520
# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
25112521
# expected "Union[integer[Any], float, str, date, datetime64]"
25122522
end = Timestamp(end) # type: ignore[arg-type]
25132523
# Non-overlapping identity check (left operand type: "Timestamp", right
25142524
# operand type: "NaTType")
2515-
end = end if end is not NaT else None # type: ignore[comparison-overlap]
2525+
if end is not NaT: # type: ignore[comparison-overlap]
2526+
end = end.as_unit(unit)
2527+
else:
2528+
end = None
25162529

25172530
if start and not offset.is_on_offset(start):
25182531
# Incompatible types in assignment (expression has type "datetime",
@@ -2553,7 +2566,7 @@ def _generate_range(
25532566
break
25542567

25552568
# faster than cur + offset
2556-
next_date = offset._apply(cur)
2569+
next_date = offset._apply(cur).as_unit(unit)
25572570
if next_date <= cur:
25582571
raise ValueError(f"Offset {offset} did not increment date")
25592572
cur = next_date
@@ -2567,7 +2580,7 @@ def _generate_range(
25672580
break
25682581

25692582
# faster than cur + offset
2570-
next_date = offset._apply(cur)
2583+
next_date = offset._apply(cur).as_unit(unit)
25712584
if next_date >= cur:
25722585
raise ValueError(f"Offset {offset} did not decrement date")
25732586
cur = next_date

pandas/core/computation/pytables.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import numpy as np
1212

1313
from pandas._libs.tslibs import (
14+
NaT,
1415
Timedelta,
1516
Timestamp,
1617
)
@@ -216,6 +217,8 @@ def stringify(value):
216217
v = stringify(v)
217218
v = ensure_decoded(v)
218219
v = Timestamp(v)
220+
if v is not NaT:
221+
v = v.as_unit("ns") # pyright: ignore[reportGeneralTypeIssues]
219222
if v.tz is not None:
220223
v = v.tz_convert("UTC")
221224
return TermValue(v, v.value, kind)

pandas/core/dtypes/cast.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -754,16 +754,21 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
754754
elif isinstance(val, (np.datetime64, dt.datetime)):
755755
try:
756756
val = Timestamp(val)
757+
# error: Non-overlapping identity check (left operand type:
758+
# "Timestamp", right operand type: "NaTType")
759+
if val is not NaT: # type: ignore[comparison-overlap]
760+
val = val.as_unit("ns")
757761
except OutOfBoundsDatetime:
758762
return _dtype_obj, val
759763

760764
# error: Non-overlapping identity check (left operand type: "Timestamp",
761765
# right operand type: "NaTType")
762766
if val is NaT or val.tz is None: # type: ignore[comparison-overlap]
763-
dtype = np.dtype("M8[ns]")
764767
val = val.to_datetime64()
768+
dtype = val.dtype
765769
# TODO(2.0): this should be dtype = val.dtype
766770
# to get the correct M8 resolution
771+
# TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
767772
else:
768773
if pandas_dtype:
769774
dtype = DatetimeTZDtype(unit="ns", tz=val.tz)

pandas/core/indexes/datetimes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import warnings
1010

1111
import numpy as np
12+
import pytz
1213

1314
from pandas._libs import (
1415
NaT,
@@ -578,7 +579,7 @@ def get_loc(self, key, method=None, tolerance=None):
578579

579580
try:
580581
parsed, reso = self._parse_with_reso(key)
581-
except ValueError as err:
582+
except (ValueError, pytz.NonExistentTimeError) as err:
582583
raise KeyError(key) from err
583584
self._disallow_mismatched_indexing(parsed)
584585

0 commit comments

Comments
 (0)