From 73411ece7bfde51b055a08c4d68ea1cf38b35899 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Jun 2019 17:08:24 -0700 Subject: [PATCH 1/4] Restrict DTA to 1D --- pandas/core/algorithms.py | 6 ++++++ pandas/core/arrays/datetimes.py | 2 ++ pandas/io/formats/format.py | 5 +++++ pandas/tests/arrays/test_datetimes.py | 12 ++++++++++++ 4 files changed, 25 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 932ac71a23ed0..036d90be56b96 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -104,6 +104,12 @@ def _ensure_data(values, dtype=None): dtype = values.dtype else: # Datetime + if values.ndim > 1: + # Avoid calling the DatetimeIndex constructor as it is 1D only + asi8 = values.view('i8') + dtype = values.dtype + return asi8, dtype, 'int64' + from pandas import DatetimeIndex values = DatetimeIndex(values) dtype = values.dtype diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6e7217762a3fb..eaa0278da6dc3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -309,6 +309,8 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): "ndarray, or Series or Index containing one of those." ) raise ValueError(msg.format(type(values).__name__)) + if values.ndim != 1: + raise ValueError("Only 1-dimensional input arrays are supported.") if values.dtype == 'i8': # for compat with datetime/timedelta/period shared methods, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b2ef45b15e549..efa0b5ac14508 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1273,6 +1273,11 @@ def format_percentiles(percentiles): def _is_dates_only(values): # return a boolean if we are only dates (and don't have a timezone) + if isinstance(values, np.ndarray) and values.ndim > 1: + # We don't actaully care about the order of values, and DatetimeIndex + # only accepts 1D values + values = values.ravel() + values = DatetimeIndex(values) if values.tz is not None: return False diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1218527f6fd9b..c7c0e1180ce46 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -15,6 +15,18 @@ class TestDatetimeArrayConstructor: + + def test_only_1dim_accepted(self): + arr = np.array([0, 1, 2, 3], dtype='M8[h]').astype('M8[ns]') + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 2-dim + DatetimeArray(arr.reshape(2, 2)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + DatetimeArray(arr[[0]].squeeze()) + def test_freq_validation(self): # GH#24623 check that invalid instances cannot be created with the # public constructor From e8615300b2cd9ad2b01649205a6c1fa672ec415e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 13:28:08 -0500 Subject: [PATCH 2/4] address comments --- pandas/core/algorithms.py | 14 +++++++------- pandas/io/formats/format.py | 11 +++++++---- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 036d90be56b96..6740f3728618f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -16,12 +16,12 @@ from pandas.core.dtypes.common import ( ensure_float64, ensure_int64, ensure_object, ensure_platform_int, ensure_uint64, is_array_like, is_bool_dtype, is_categorical_dtype, - is_complex_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, - is_datetimelike, is_extension_array_dtype, is_float_dtype, is_integer, - is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype, - is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype, - is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype, - needs_i8_conversion) + is_complex_dtype, is_datetime64_any_dtype, is_datetime64ns_dtype, + is_datetime64tz_dtype, is_datetimelike, is_extension_array_dtype, + is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype, + is_list_like, is_numeric_dtype, is_object_dtype, is_period_dtype, + is_scalar, is_signed_integer_dtype, is_sparse, is_timedelta64_dtype, + is_unsigned_integer_dtype, needs_i8_conversion) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -104,7 +104,7 @@ def _ensure_data(values, dtype=None): dtype = values.dtype else: # Datetime - if values.ndim > 1: + if values.ndim > 1 and is_datetime64ns_dtype(values): # Avoid calling the DatetimeIndex constructor as it is 1D only asi8 = values.view('i8') dtype = values.dtype diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index efa0b5ac14508..1efcb63269454 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1273,10 +1273,7 @@ def format_percentiles(percentiles): def _is_dates_only(values): # return a boolean if we are only dates (and don't have a timezone) - if isinstance(values, np.ndarray) and values.ndim > 1: - # We don't actaully care about the order of values, and DatetimeIndex - # only accepts 1D values - values = values.ravel() + assert values.ndim == 1 values = DatetimeIndex(values) if values.tz is not None: @@ -1329,6 +1326,12 @@ def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None): def _get_format_datetime64_from_values(values, date_format): """ given values and a date_format, return a string format """ + + if isinstance(values, np.ndarray) and values.ndim > 1: + # We don't actaully care about the order of values, and DatetimeIndex + # only accepts 1D values + values = values.ravel() + is_dates_only = _is_dates_only(values) if is_dates_only: return date_format or "%Y-%m-%d" From d59f41cb8b7720f80c33880be70425c8bdd4b5d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 14:45:59 -0500 Subject: [PATCH 3/4] typo fixup --- pandas/core/algorithms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6740f3728618f..9303f40f8422e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -16,7 +16,7 @@ from pandas.core.dtypes.common import ( ensure_float64, ensure_int64, ensure_object, ensure_platform_int, ensure_uint64, is_array_like, is_bool_dtype, is_categorical_dtype, - is_complex_dtype, is_datetime64_any_dtype, is_datetime64ns_dtype, + is_complex_dtype, is_datetime64_any_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimelike, is_extension_array_dtype, is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype, is_object_dtype, is_period_dtype, @@ -104,7 +104,7 @@ def _ensure_data(values, dtype=None): dtype = values.dtype else: # Datetime - if values.ndim > 1 and is_datetime64ns_dtype(values): + if values.ndim > 1 and is_datetime64_ns_dtype(values): # Avoid calling the DatetimeIndex constructor as it is 1D only asi8 = values.view('i8') dtype = values.dtype From 8f99a00a907f9bb114d6c1d7b5fda84e94671c00 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 27 Jun 2019 15:27:58 -0500 Subject: [PATCH 4/4] rank comment --- pandas/core/algorithms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9303f40f8422e..bdb73fa246d4f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -106,6 +106,7 @@ def _ensure_data(values, dtype=None): # Datetime if values.ndim > 1 and is_datetime64_ns_dtype(values): # Avoid calling the DatetimeIndex constructor as it is 1D only + # Note: this is reached by DataFrame.rank calls GH#27027 asi8 = values.view('i8') dtype = values.dtype return asi8, dtype, 'int64'