Skip to content

Commit 7a587f6

Browse files
committed
Use argument dtype to inform coercion
Master: ```python >>> import dask.dataframe as dd >>> s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') >>> pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], ... 'b': [7, 6, 5, 4, 3, 2, 1]}) >>> (pdf + s).dtypes a object b object dtype: object Head: ``` >>> (pdf + s).dtypes a int64 b int64 dtype: object ``` This is more consistent with 0.20.3, while still most of the changes in pandas-dev#16821 Closes pandas-dev#17767
1 parent 2ff1241 commit 7a587f6

File tree

3 files changed

+114
-20
lines changed

3 files changed

+114
-20
lines changed

Diff for: pandas/core/dtypes/cast.py

+34
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,39 @@ def infer_dtype_from_array(arr, pandas_dtype=False):
483483
return arr.dtype, arr
484484

485485

486+
def _maybe_infer_dtype_type(element):
487+
"""Try to infer an object's dtype's type, for use in arithmetic ops
488+
489+
Uses `element.dtype` if that's available.
490+
Objects implementing the iterator protocol are cast to a NumPy array,
491+
and from there the array's type is used.
492+
493+
Parameters
494+
----------
495+
element : object
496+
Possibly has a `.dtype` attribute, and possibly the iterator
497+
protocol.
498+
499+
Returns
500+
-------
501+
tipo : type
502+
503+
Examples
504+
--------
505+
>>> from collections import namedtuple
506+
>>> Foo = namedtuple("dtype")
507+
>>> _maybe_infer_dtype_type(Foo(np.dtype("i8")))
508+
numpy.int64
509+
"""
510+
tipo = None
511+
if hasattr(element, 'dtype'):
512+
tipo = element.dtype.type
513+
elif is_list_like(element):
514+
element = np.asarray(element)
515+
tipo = element.dtype.type
516+
return tipo
517+
518+
486519
def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
487520
""" provide explict type promotion and coercion
488521
@@ -509,6 +542,7 @@ def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
509542
return values, fill_value
510543

511544

545+
512546
def maybe_cast_item(obj, item, dtype):
513547
chunk = obj[item]
514548

Diff for: pandas/core/internals.py

+19-20
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@
4444
soft_convert_objects,
4545
maybe_convert_objects,
4646
astype_nansafe,
47-
find_common_type)
47+
find_common_type,
48+
_maybe_infer_dtype_type)
4849
from pandas.core.dtypes.missing import (
4950
isna, notna, array_equivalent,
5051
_isna_compat,
@@ -629,9 +630,8 @@ def convert(self, copy=True, **kwargs):
629630
def _can_hold_element(self, element):
630631
""" require the same dtype as ourselves """
631632
dtype = self.values.dtype.type
632-
if is_list_like(element):
633-
element = np.asarray(element)
634-
tipo = element.dtype.type
633+
tipo = _maybe_infer_dtype_type(element)
634+
if tipo:
635635
return issubclass(tipo, dtype)
636636
return isinstance(element, dtype)
637637

@@ -1806,9 +1806,8 @@ class FloatBlock(FloatOrComplexBlock):
18061806
_downcast_dtype = 'int64'
18071807

18081808
def _can_hold_element(self, element):
1809-
if is_list_like(element):
1810-
element = np.asarray(element)
1811-
tipo = element.dtype.type
1809+
tipo = _maybe_infer_dtype_type(element)
1810+
if tipo:
18121811
return (issubclass(tipo, (np.floating, np.integer)) and
18131812
not issubclass(tipo, (np.datetime64, np.timedelta64)))
18141813
return (isinstance(element, (float, int, np.floating, np.int_)) and
@@ -1856,9 +1855,9 @@ class ComplexBlock(FloatOrComplexBlock):
18561855
is_complex = True
18571856

18581857
def _can_hold_element(self, element):
1859-
if is_list_like(element):
1860-
element = np.array(element)
1861-
return issubclass(element.dtype.type,
1858+
tipo = _maybe_infer_dtype_type(element)
1859+
if tipo:
1860+
return issubclass(tipo,
18621861
(np.floating, np.integer, np.complexfloating))
18631862
return (isinstance(element,
18641863
(float, int, complex, np.float_, np.int_)) and
@@ -1874,9 +1873,8 @@ class IntBlock(NumericBlock):
18741873
_can_hold_na = False
18751874

18761875
def _can_hold_element(self, element):
1877-
if is_list_like(element):
1878-
element = np.array(element)
1879-
tipo = element.dtype.type
1876+
tipo = _maybe_infer_dtype_type(element)
1877+
if tipo:
18801878
return (issubclass(tipo, np.integer) and
18811879
not issubclass(tipo, (np.datetime64, np.timedelta64)) and
18821880
self.dtype.itemsize >= element.dtype.itemsize)
@@ -1917,9 +1915,8 @@ def _box_func(self):
19171915
return lambda x: tslib.Timedelta(x, unit='ns')
19181916

19191917
def _can_hold_element(self, element):
1920-
if is_list_like(element):
1921-
element = np.array(element)
1922-
tipo = element.dtype.type
1918+
tipo = _maybe_infer_dtype_type(element)
1919+
if tipo:
19231920
return issubclass(tipo, np.timedelta64)
19241921
return isinstance(element, (timedelta, np.timedelta64))
19251922

@@ -2018,9 +2015,9 @@ class BoolBlock(NumericBlock):
20182015
_can_hold_na = False
20192016

20202017
def _can_hold_element(self, element):
2021-
if is_list_like(element):
2022-
element = np.asarray(element)
2023-
return issubclass(element.dtype.type, np.bool_)
2018+
tipo = _maybe_infer_dtype_type(element)
2019+
if tipo:
2020+
return issubclass(tipo, np.bool_)
20242021
return isinstance(element, (bool, np.bool_))
20252022

20262023
def should_store(self, value):
@@ -2450,7 +2447,9 @@ def _astype(self, dtype, mgr=None, **kwargs):
24502447
return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs)
24512448

24522449
def _can_hold_element(self, element):
2453-
if is_list_like(element):
2450+
tipo = _maybe_infer_dtype_type(element)
2451+
if tipo:
2452+
# TODO: this still uses asarray, instead of dtype.type
24542453
element = np.array(element)
24552454
return element.dtype == _NS_DTYPE or element.dtype == np.int64
24562455
return (is_integer(element) or isinstance(element, datetime) or

Diff for: pandas/tests/internals/test_internals.py

+61
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=W0102
33

44
from datetime import datetime, date
5+
import operator
56
import sys
67
import pytest
78
import numpy as np
@@ -1213,3 +1214,63 @@ def assert_add_equals(val, inc, result):
12131214

12141215
with pytest.raises(ValueError):
12151216
BlockPlacement(slice(2, None, -1)).add(-1)
1217+
1218+
1219+
class DummyElement(object):
1220+
def __init__(self, value, dtype):
1221+
self.value = value
1222+
self.dtype = np.dtype(dtype)
1223+
1224+
def __array__(self):
1225+
return np.array(self.value, dtype=self.dtype)
1226+
1227+
def __str__(self):
1228+
return "DummyElement({}, {})".format(self.value, self.dtype)
1229+
1230+
def __repr__(self):
1231+
return str(self)
1232+
1233+
def astype(self, dtype, copy=False):
1234+
self.dtype = dtype
1235+
return self
1236+
1237+
def view(self, dtype):
1238+
return type(self)(self.value.view(dtype), dtype)
1239+
1240+
def any(self, axis=None):
1241+
return bool(self.value)
1242+
1243+
1244+
class TestCanHoldElement(object):
1245+
@pytest.mark.parametrize('value, dtype', [
1246+
(1, 'i8'),
1247+
(1.0, 'f8'),
1248+
(1j, 'complex128'),
1249+
(True, 'bool'),
1250+
# (np.timedelta64(20, 'ns'), '<m8[ns]'),
1251+
(np.datetime64(20, 'ns'), '<M8[ns]'),
1252+
])
1253+
@pytest.mark.parametrize('op', [
1254+
operator.add,
1255+
operator.sub,
1256+
operator.mul,
1257+
operator.truediv,
1258+
operator.mod,
1259+
operator.pow,
1260+
], ids=lambda x: x.__name__)
1261+
def test_binop_other(self, op, value, dtype):
1262+
skip = {(operator.add, 'bool'),
1263+
(operator.sub, 'bool'),
1264+
(operator.mul, 'bool'),
1265+
(operator.truediv, 'bool'),
1266+
(operator.mod, 'i8'),
1267+
(operator.mod, 'complex128'),
1268+
(operator.mod, '<M8[ns]'),
1269+
(operator.pow, 'bool')}
1270+
if (op, dtype) in skip:
1271+
pytest.skip("Invalid combination {},{}".format(op, dtype))
1272+
e = DummyElement(value, dtype)
1273+
s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
1274+
result = op(s, e).dtypes
1275+
expected = op(s, value).dtypes
1276+
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)