From dc87874475ef006682cb8e09876733fcb035fa95 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 21 Dec 2020 13:33:11 +0000 Subject: [PATCH 1/2] Revert "REF: use astype_nansafe in Index.astype (#38518)" This reverts commit 7043f8fa9d4d97782ec0d0d1a4c3b57573a7fc21. --- pandas/core/indexes/base.py | 20 ++++++++++---------- pandas/core/indexes/numeric.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6e2bbc5e3a0e6..2a5d6db41a56d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -33,7 +33,6 @@ from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.cast import ( - astype_nansafe, find_common_type, maybe_cast_to_integer_array, maybe_promote, @@ -694,21 +693,22 @@ def astype(self, dtype, copy=True): if is_dtype_equal(self.dtype, dtype): return self.copy() if copy else self - if needs_i8_conversion(dtype) and is_float_dtype(self.dtype): - # We can't put this into astype_nansafe bc astype_nansafe allows - # casting np.nan to NaT - raise TypeError( - f"Cannot convert {type(self).__name__} to dtype {dtype}; integer " - "values are required for conversion" + elif is_categorical_dtype(dtype): + from pandas.core.indexes.category import CategoricalIndex + + return CategoricalIndex( + self._values, name=self.name, dtype=dtype, copy=copy ) + elif is_extension_array_dtype(dtype): + return Index(np.asarray(self), name=self.name, dtype=dtype, copy=copy) + try: - casted = astype_nansafe(self._values, dtype=dtype, copy=True) - except TypeError as err: + casted = self._values.astype(dtype, copy=copy) + except (TypeError, ValueError) as err: raise TypeError( f"Cannot cast {type(self).__name__} to dtype {dtype}" ) from err - return Index(casted, name=self.name, dtype=dtype) _index_shared_docs[ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index d6f91c9a06739..91d27d9922aa5 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -7,10 +7,12 @@ from pandas._typing import Dtype, DtypeObj, Label from pandas.util._decorators import doc +from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( is_bool, is_bool_dtype, is_dtype_equal, + is_extension_array_dtype, is_float, is_float_dtype, is_integer_dtype, @@ -19,6 +21,8 @@ is_scalar, is_signed_integer_dtype, is_unsigned_integer_dtype, + needs_i8_conversion, + pandas_dtype, ) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna @@ -328,6 +332,21 @@ def inferred_type(self) -> str: """ return "floating" + @doc(Index.astype) + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + if needs_i8_conversion(dtype): + raise TypeError( + f"Cannot convert Float64Index to dtype {dtype}; integer " + "values are required for conversion" + ) + elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): + # TODO(jreback); this can change once we have an EA Index type + # GH 13149 + arr = astype_nansafe(self._values, dtype=dtype) + return Int64Index(arr, name=self.name) + return super().astype(dtype, copy=copy) + # ---------------------------------------------------------------- # Indexing Methods From ae7f748ed7ce6bb6b3b025a109bfb34d055f0f2c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 21 Dec 2020 13:50:09 +0000 Subject: [PATCH 2/2] add test --- pandas/tests/indexes/object/test_astype.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 pandas/tests/indexes/object/test_astype.py diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py new file mode 100644 index 0000000000000..42c7b8eb4aeec --- /dev/null +++ b/pandas/tests/indexes/object/test_astype.py @@ -0,0 +1,10 @@ +from pandas import Index +import pandas.testing as tm + + +def test_astype_str_from_bytes(): + # https://github.com/pandas-dev/pandas/issues/38607 + idx = Index(["あ", b"a"], dtype="object") + result = idx.astype(str) + expected = Index(["あ", "a"], dtype="object") + tm.assert_index_equal(result, expected)