Skip to content

Commit 6b9512b

Browse files
committed
CLN: replace _interleave_dtype with _find_common_type
1 parent 783ae69 commit 6b9512b

File tree

6 files changed

+53
-59
lines changed

6 files changed

+53
-59
lines changed

Diff for: doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ Bug Fixes
826826

827827

828828
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
829-
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`)
829+
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`)
830830

831831
- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`)
832832

Diff for: pandas/core/internals.py

+9-50
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
from pandas.core.base import PandasObject
1111

12-
from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
12+
from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype,
13+
CategoricalDtype)
1314
from pandas.types.common import (_TD_DTYPE, _NS_DTYPE,
1415
_ensure_int64, _ensure_platform_int,
1516
is_integer,
@@ -4496,55 +4497,13 @@ def _interleaved_dtype(blocks):
44964497
if not len(blocks):
44974498
return None
44984499

4499-
counts = defaultdict(list)
4500-
for x in blocks:
4501-
counts[type(x)].append(x)
4502-
4503-
have_int = len(counts[IntBlock]) > 0
4504-
have_bool = len(counts[BoolBlock]) > 0
4505-
have_object = len(counts[ObjectBlock]) > 0
4506-
have_float = len(counts[FloatBlock]) > 0
4507-
have_complex = len(counts[ComplexBlock]) > 0
4508-
have_dt64 = len(counts[DatetimeBlock]) > 0
4509-
have_dt64_tz = len(counts[DatetimeTZBlock]) > 0
4510-
have_td64 = len(counts[TimeDeltaBlock]) > 0
4511-
have_cat = len(counts[CategoricalBlock]) > 0
4512-
# TODO: have_sparse is not used
4513-
have_sparse = len(counts[SparseBlock]) > 0 # noqa
4514-
have_numeric = have_float or have_complex or have_int
4515-
has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat
4516-
4517-
if (have_object or
4518-
(have_bool and
4519-
(have_numeric or have_dt64 or have_dt64_tz or have_td64)) or
4520-
(have_numeric and has_non_numeric) or have_cat or have_dt64 or
4521-
have_dt64_tz or have_td64):
4522-
return np.dtype(object)
4523-
elif have_bool:
4524-
return np.dtype(bool)
4525-
elif have_int and not have_float and not have_complex:
4526-
# if we are mixing unsigned and signed, then return
4527-
# the next biggest int type (if we can)
4528-
lcd = _find_common_type([b.dtype for b in counts[IntBlock]])
4529-
kinds = set([i.dtype.kind for i in counts[IntBlock]])
4530-
if len(kinds) == 1:
4531-
return lcd
4532-
4533-
if lcd == 'uint64' or lcd == 'int64':
4534-
return np.dtype('int64')
4535-
4536-
# return 1 bigger on the itemsize if unsinged
4537-
if lcd.kind == 'u':
4538-
return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
4539-
return lcd
4540-
4541-
elif have_int and have_float and not have_complex:
4542-
return np.dtype('float64')
4543-
elif have_complex:
4544-
return np.dtype('c16')
4545-
else:
4546-
introspection_blks = counts[FloatBlock] + counts[SparseBlock]
4547-
return _find_common_type([b.dtype for b in introspection_blks])
4500+
dtype = _find_common_type([b.dtype for b in blocks])
4501+
4502+
# only numpy compat
4503+
if isinstance(dtype, ExtensionDtype):
4504+
dtype = np.object
4505+
4506+
return dtype
45484507

45494508

45504509
def _consolidate(blocks):

Diff for: pandas/tests/indexing/test_coercion.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1190,12 +1190,9 @@ def _assert_replace_conversion(self, from_key, to_key, how):
11901190
pytest.skip("windows platform buggy: {0} -> {1}".format
11911191
(from_key, to_key))
11921192

1193-
if ((from_key == 'float64' and to_key in ('bool', 'int64')) or
1193+
if ((from_key == 'float64' and to_key in ('int64')) or
11941194
(from_key == 'complex128' and
1195-
to_key in ('bool', 'int64', 'float64')) or
1196-
1197-
# GH12747 The result must be int?
1198-
(from_key == 'int64' and to_key in ('bool'))):
1195+
to_key in ('bool', 'int64', 'float64'))):
11991196

12001197
# buggy on 32-bit
12011198
if tm.is_platform_32bit():

Diff for: pandas/tests/series/test_replace.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,8 @@ def check_replace(to_rep, val, expected):
152152
tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
153153
check_replace(tr, v, e)
154154

155-
# casts to float
156-
e = pd.Series([0, 1, 2, 3.5, 1])
155+
# casts to object
156+
e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
157157
tr, v = [3, 4], [3.5, True]
158158
check_replace(tr, v, e)
159159

Diff for: pandas/tests/types/test_cast.py

+14
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,20 @@ def test_numpy_dtypes(self):
238238
((np.object, np.float32), np.object),
239239
((np.object, np.int16), np.object),
240240

241+
# bool with int
242+
((np.dtype('bool'), np.int64), np.object),
243+
((np.dtype('bool'), np.int32), np.object),
244+
((np.dtype('bool'), np.int16), np.object),
245+
((np.dtype('bool'), np.int8), np.object),
246+
((np.dtype('bool'), np.uint64), np.object),
247+
((np.dtype('bool'), np.uint32), np.object),
248+
((np.dtype('bool'), np.uint16), np.object),
249+
((np.dtype('bool'), np.uint8), np.object),
250+
251+
# bool with float
252+
((np.dtype('bool'), np.float64), np.object),
253+
((np.dtype('bool'), np.float32), np.object),
254+
241255
((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
242256
np.dtype('datetime64[ns]')),
243257
((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),

Diff for: pandas/types/cast.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -892,12 +892,28 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):
892892

893893

894894
def _find_common_type(types):
895-
"""Find a common data type among the given dtypes."""
895+
"""
896+
Find a common data type among the given dtypes.
897+
898+
Parameters
899+
----------
900+
types : list of dtypes
901+
902+
Returns
903+
-------
904+
pandas extension or numpy dtype
905+
906+
See Also
907+
--------
908+
numpy.find_common_type
909+
910+
"""
896911

897912
if len(types) == 0:
898913
raise ValueError('no types given')
899914

900915
first = types[0]
916+
901917
# workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
902918
# => object
903919
if all(is_dtype_equal(first, t) for t in types[1:]):
@@ -912,4 +928,12 @@ def _find_common_type(types):
912928
if all(is_timedelta64_dtype(t) for t in types):
913929
return np.dtype('timedelta64[ns]')
914930

931+
# don't mix bool / int or float
932+
# this is different from numpy, which casts bool/int as int
933+
has_bools = any(is_bool_dtype(t) for t in types)
934+
has_ints = any(is_integer_dtype(t) for t in types)
935+
has_floats = any(is_float_dtype(t) for t in types)
936+
if has_bools and (has_ints or has_floats):
937+
return np.object
938+
915939
return np.find_common_type(types, [])

0 commit comments

Comments
 (0)