From 866475710038c9034a9169640e1ab6dcbfea7d9c Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 16 Oct 2016 07:00:14 -0500 Subject: [PATCH 1/3] BUG: underflow on Timestamp creation --- doc/source/whatsnew/v0.19.1.txt | 1 + pandas/src/datetime/np_datetime.c | 21 ++++++++++++++------- pandas/tseries/tests/test_timeseries.py | 9 +++++++++ pandas/tslib.pyx | 12 ++++++++---- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 30593c1b204e7..b2facd4e2d0ec 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -46,6 +46,7 @@ Bug Fixes - ``pd.merge()`` will raise ``ValueError`` with non-boolean parameters in passed boolean type arguments (:issue:`14434`) +- Bug in ``Timestamp`` where dates very near the minimum (1677-09) could underflow on creation (:issue:`14415`) - Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) - Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`) diff --git a/pandas/src/datetime/np_datetime.c b/pandas/src/datetime/np_datetime.c index 80703c8b08de6..d4b9de45618f3 100644 --- a/pandas/src/datetime/np_datetime.c +++ b/pandas/src/datetime/np_datetime.c @@ -846,7 +846,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt; @@ -860,7 +861,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt / 60; @@ -875,7 +877,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt / (60*60); @@ -891,7 +894,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt / (60*60*1000LL); @@ -908,7 +912,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt / (60*60*1000000LL); @@ -925,7 +930,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt / (60*60*1000000000LL); @@ -943,7 +949,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, dt = dt % perday; } else { - set_datetimestruct_days((dt - (perday-1)) / perday, out); + set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1), + out); dt = (perday-1) + (dt + 1) % perday; } out->hour = dt / (60*60*1000000000000LL); diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f640b3974b360..c13805d383e5d 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4463,6 +4463,15 @@ def test_basics_nanos(self): self.assertEqual(stamp.microsecond, 0) self.assertEqual(stamp.nanosecond, 500) + # GH 14415 + val = np.iinfo(np.int64).min + 80000000000000 + stamp = Timestamp(val) + self.assertEqual(stamp.year, 1677) + self.assertEqual(stamp.month, 9) + self.assertEqual(stamp.day, 21) + self.assertEqual(stamp.microsecond, 145224) + self.assertEqual(stamp.nanosecond, 192) + def test_unit(self): def check(val, unit=None, h=1, s=1, us=0): diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index bab45595cd60f..464479e29f09c 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -24,6 +24,11 @@ from cpython cimport ( PyUnicode_AsUTF8String, ) + +cdef extern from "headers/stdint.h": + enum: INT64_MAX + enum: INT64_MIN + # Cython < 0.17 doesn't have this in cpython cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) @@ -904,10 +909,9 @@ cpdef object get_value_box(ndarray arr, object loc): # Add the min and max fields at the class level -# These are defined as magic numbers due to strange -# wraparound behavior when using the true int64 lower boundary -cdef int64_t _NS_LOWER_BOUND = -9223285636854775000LL -cdef int64_t _NS_UPPER_BOUND = 9223372036854775807LL +# INT64_MIN is reserved for NaT +cdef int64_t _NS_LOWER_BOUND = INT64_MIN + 1 +cdef int64_t _NS_UPPER_BOUND = INT64_MAX cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS) From df279e5486c3591cd401510fb76f0bc2c3654894 Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 16 Oct 2016 16:07:56 -0500 Subject: [PATCH 2/3] undo change to lower bound --- pandas/lib.pyx | 9 ++------- pandas/src/inference.pyx | 16 +++------------- pandas/src/util.pxd | 14 ++++++++++++++ pandas/tslib.pyx | 13 ++++++------- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index b56a02b245d69..ef3407ffd5388 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -65,13 +65,8 @@ cdef int64_t NPY_NAT = util.get_nat() ctypedef unsigned char UChar cimport util -from util cimport is_array, _checknull, _checknan - -cdef extern from "headers/stdint.h": - enum: UINT8_MAX - enum: INT64_MAX - enum: INT64_MIN - +from util cimport (is_array, _checknull, _checknan, INT64_MAX, + INT64_MIN, UINT8_MAX) cdef extern from "math.h": double sqrt(double x) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 4fa730eac0fd1..5ac2c70bb1808 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -6,19 +6,9 @@ iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 -cdef extern from "headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN +from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX, + INT8_MIN, INT8_MAX, INT16_MIN, INT16_MAX, + INT32_MAX, INT32_MIN, INT64_MAX, INT64_MIN) # core.common import for fast inference checks diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd index fcb5583a0a6e7..fdbfbf62af7d2 100644 --- a/pandas/src/util.pxd +++ b/pandas/src/util.pxd @@ -38,6 +38,20 @@ ctypedef fused numeric: cnp.float32_t cnp.float64_t +cdef extern from "headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + cdef inline object get_value_at(ndarray arr, object loc): cdef: Py_ssize_t i, sz diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 464479e29f09c..595e5f9545757 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -25,10 +25,6 @@ from cpython cimport ( ) -cdef extern from "headers/stdint.h": - enum: INT64_MAX - enum: INT64_MIN - # Cython < 0.17 doesn't have this in cpython cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) @@ -42,7 +38,7 @@ from datetime cimport cmp_pandas_datetimestruct from libc.stdlib cimport free from util cimport (is_integer_object, is_float_object, is_datetime64_object, - is_timedelta64_object) + is_timedelta64_object, INT64_MAX) cimport util from datetime cimport * @@ -909,9 +905,12 @@ cpdef object get_value_box(ndarray arr, object loc): # Add the min and max fields at the class level -# INT64_MIN is reserved for NaT -cdef int64_t _NS_LOWER_BOUND = INT64_MIN + 1 cdef int64_t _NS_UPPER_BOUND = INT64_MAX +# smallest value we could actually represent is +# INT64_MIN + 1 == -9223372036854775807 +# but to allow overflow free conversion with a microsecond resolution +# use the smallest value with a 0 nanosecond unit +cdef int64_t _NS_LOWER_BOUND = -9223285636854775000LL cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS) From 4b7674ca11fc027fbca4f7878bddb1c64f6f2733 Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 16 Oct 2016 17:26:31 -0500 Subject: [PATCH 3/3] change lower bound; but keep rounding to us --- pandas/tslib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 595e5f9545757..81e721e610cc6 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -906,11 +906,11 @@ cpdef object get_value_box(ndarray arr, object loc): # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = INT64_MAX -# smallest value we could actually represent is +# the smallest value we could actually represent is # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution -# use the smallest value with a 0 nanosecond unit -cdef int64_t _NS_LOWER_BOUND = -9223285636854775000LL +# use the smallest value with a 0 nanosecond unit (0s in last 3 digits) +cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS)