diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e4879a6c41515..c418801c0a9f2 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -581,6 +581,8 @@ Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) +- Bug in inferring an incorrect ``freq`` when passing a string to :class:`Period` microseconds that are a multiple of 1000 (:issue:`46811`) +- Bug in constructing a :class:`Period` from a :class:`Timestamp` or ``np.datetime64`` object with non-zero nanoseconds and ``freq="ns"`` incorrectly truncating the nanoseconds (:issue:`46811`) - Plotting diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c96a65cdff525..0b35347fbd618 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -27,6 +27,7 @@ cnp.import_array() import pytz from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, check_dts_bounds, dt64_to_dtstruct, dtstruct_to_dt64, @@ -75,6 +76,7 @@ def _test_parse_iso8601(ts: str): cdef: _TSObject obj int out_local = 0, out_tzoffset = 0 + NPY_DATETIMEUNIT out_bestunit obj = _TSObject() @@ -83,7 +85,7 @@ def _test_parse_iso8601(ts: str): elif ts == 'today': return Timestamp.now().normalize() - string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True) + string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) obj.value = dtstruct_to_dt64(&obj.dts) check_dts_bounds(&obj.dts) if out_local == 1: @@ -428,6 +430,7 @@ cpdef array_to_datetime( ndarray[int64_t] iresult ndarray[object] oresult npy_datetimestruct dts + NPY_DATETIMEUNIT out_bestunit bint utc_convert = bool(utc) bint seen_integer = False bint seen_string = False @@ -516,7 +519,7 @@ cpdef array_to_datetime( continue string_to_dts_failed = string_to_dts( - val, &dts, &out_local, + val, &dts, &out_bestunit, &out_local, &out_tzoffset, False ) if string_to_dts_failed: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ff54c399f6435..c876cc55be0be 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -586,6 +586,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, int out_local = 0, out_tzoffset = 0, string_to_dts_failed datetime dt int64_t ival + NPY_DATETIMEUNIT out_bestunit if len(ts) == 0 or ts in nat_strings: ts = NaT @@ -604,7 +605,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, # equiv: datetime.today().replace(tzinfo=tz) else: string_to_dts_failed = string_to_dts( - ts, &dts, &out_local, + ts, &dts, &out_bestunit, &out_local, &out_tzoffset, False ) if not string_to_dts_failed: diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index ecb318026f97b..f072dab3763aa 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -90,6 +90,7 @@ cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil cdef int string_to_dts( str val, npy_datetimestruct* dts, + NPY_DATETIMEUNIT* out_bestunit, int* out_local, int* out_tzoffset, bint want_exc, diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index c611fec260ed0..07d198193464f 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -46,6 +46,7 @@ cdef extern from "src/datetime/np_datetime.h": cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset) @@ -255,6 +256,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): cdef inline int string_to_dts( str val, npy_datetimestruct* dts, + NPY_DATETIMEUNIT* out_bestunit, int* out_local, int* out_tzoffset, bint want_exc, @@ -265,7 +267,7 @@ cdef inline int string_to_dts( buf = get_c_string_buf_and_size(val, &length) return parse_iso_8601_datetime(buf, length, want_exc, - dts, out_local, out_tzoffset) + dts, out_bestunit, out_local, out_tzoffset) cpdef ndarray astype_overflowsafe( diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 242a39b67fc44..8b42ed195957b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -53,6 +53,11 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, + string_to_dts, +) from pandas._libs.tslibs.offsets cimport is_offset_object from pandas._libs.tslibs.util cimport ( get_c_string_buf_and_size, @@ -350,6 +355,11 @@ cdef parse_datetime_string_with_reso( """ cdef: object parsed, reso + bint string_to_dts_failed + npy_datetimestruct dts + NPY_DATETIMEUNIT out_bestunit + int out_local + int out_tzoffset if not _does_string_look_like_datetime(date_string): raise ValueError('Given date string not likely a datetime.') @@ -358,6 +368,33 @@ cdef parse_datetime_string_with_reso( if parsed is not None: return parsed, reso + # Try iso8601 first, as it handles nanoseconds + # TODO: does this render some/all of parse_delimited_date redundant? + string_to_dts_failed = string_to_dts( + date_string, &dts, &out_bestunit, &out_local, + &out_tzoffset, False + ) + if not string_to_dts_failed: + if dts.ps != 0 or out_local: + # TODO: the not-out_local case we could do without Timestamp; + # avoid circular import + from pandas import Timestamp + parsed = Timestamp(date_string) + else: + parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) + reso = { + NPY_DATETIMEUNIT.NPY_FR_Y: "year", + NPY_DATETIMEUNIT.NPY_FR_M: "month", + NPY_DATETIMEUNIT.NPY_FR_D: "day", + NPY_DATETIMEUNIT.NPY_FR_h: "hour", + NPY_DATETIMEUNIT.NPY_FR_m: "minute", + NPY_DATETIMEUNIT.NPY_FR_s: "second", + NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond", + NPY_DATETIMEUNIT.NPY_FR_us: "microsecond", + NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond", + }[out_bestunit] + return parsed, reso + try: return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) except DateParseError: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 3d04562cb73c3..cfd358e61af9c 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2584,10 +2584,13 @@ class Period(_Period): dt = value if freq is None: raise ValueError('Must supply freq for datetime value') + if isinstance(dt, Timestamp): + nanosecond = dt.nanosecond elif util.is_datetime64_object(value): dt = Timestamp(value) if freq is None: raise ValueError('Must supply freq for datetime value') + nanosecond = dt.nanosecond elif PyDate_Check(value): dt = datetime(year=value.year, month=value.month, day=value.day) if freq is None: diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index 847e84b21c06c..f787a26ab51fb 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -68,11 +68,13 @@ This file implements string parsing and creation for NumPy datetime. */ int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset) { int year_leap = 0; int i, numdigits; const char *substr; int sublen; + NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; /* If year-month-day are separated by a valid separator, * months/days without leading zeroes will be parsed @@ -137,6 +139,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (out_local != NULL) { *out_local = 0; } + bestunit = NPY_FR_Y; goto finish; } @@ -182,6 +185,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* Next character must be the separator, start of day, or end of string */ if (sublen == 0) { + bestunit = NPY_FR_M; /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ if (!has_ymd_sep) { goto parse_error; @@ -231,6 +235,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (out_local != NULL) { *out_local = 0; } + bestunit = NPY_FR_D; goto finish; } @@ -269,6 +274,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (!hour_was_2_digits) { goto parse_error; } + bestunit = NPY_FR_h; goto finish; } @@ -310,6 +316,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } if (sublen == 0) { + bestunit = NPY_FR_m; goto finish; } @@ -354,6 +361,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, ++substr; --sublen; } else { + bestunit = NPY_FR_s; goto parse_timezone; } @@ -370,6 +378,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } else { + bestunit = NPY_FR_ms; + } goto parse_timezone; } @@ -386,6 +399,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_ps; + } else { + bestunit = NPY_FR_ns; + } goto parse_timezone; } @@ -401,8 +419,14 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } } + if (numdigits > 3) { + bestunit = NPY_FR_as; + } else { + bestunit = NPY_FR_fs; + } + parse_timezone: - /* trim any whitespace between time/timeezone */ + /* trim any whitespace between time/timezone */ while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; @@ -521,6 +545,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } return 0; parse_error: diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index 200a71ff0c2b7..2cc032c0e278c 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -56,6 +56,7 @@ This file implements string parsing and creation for NumPy datetime. int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, + NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset); diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 91578cbd7d614..f23d332c7e279 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -113,6 +113,21 @@ def test_construction(self): with pytest.raises(TypeError, match="pass as a string instead"): Period("1982", freq=("Min", 1)) + def test_construction_from_timestamp_nanos(self): + # GH#46811 don't drop nanos from Timestamp + ts = Timestamp("2022-04-20 09:23:24.123456789") + per = Period(ts, freq="ns") + + # should losslessly round-trip, not lose the 789 + rt = per.to_timestamp() + assert rt == ts + + # same thing but from a datetime64 object + dt64 = ts.asm8 + per2 = Period(dt64, freq="ns") + rt2 = per2.to_timestamp() + assert rt2.asm8 == dt64 + def test_construction_bday(self): # Biz day construction, roll forward if non-weekday @@ -324,8 +339,10 @@ def test_constructor_infer_freq(self): p = Period("2007-01-01 07:10:15.123") assert p.freq == "L" + # We see that there are 6 digits after the decimal, so get microsecond + # even though they are all zeros. p = Period("2007-01-01 07:10:15.123000") - assert p.freq == "L" + assert p.freq == "U" p = Period("2007-01-01 07:10:15.123400") assert p.freq == "U" diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 279a84b174e36..4dae6c586e306 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -23,6 +23,12 @@ def test_parse_time_string(): assert parsed == parsed_lower +def test_parse_time_string_nanosecond_reso(): + # GH#46811 + parsed, reso = parse_time_string("2022-04-20 09:19:19.123456789") + assert reso == "nanosecond" + + def test_parse_time_string_invalid_type(): # Raise on invalid input, don't just return it msg = "Argument 'arg' has incorrect type (expected str, got tuple)"