Skip to content

Commit f04e2da

Browse files
sinhrksjreback
authored andcommitted
CLN/BUG: fix ndarray assignment may cause unexpected cast
supersedes pandas-dev#14145 closes pandas-dev#14001
1 parent ea487fc commit f04e2da

File tree

11 files changed

+315
-77
lines changed

11 files changed

+315
-77
lines changed

doc/source/whatsnew/v0.21.0.txt

+10
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,13 @@ Other API Changes
107107
- Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
108108
- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)
109109
- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
110+
111+
112+
.. _whatsnew_0210.api:
113+
114+
Other API Changes
115+
^^^^^^^^^^^^^^^^^
116+
110117
- Moved definition of ``MergeError`` to the ``pandas.errors`` module.
111118

112119

@@ -148,6 +155,9 @@ Bug Fixes
148155
Conversion
149156
^^^^^^^^^^
150157

158+
- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
159+
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
160+
151161

152162
Indexing
153163
^^^^^^^^

pandas/core/dtypes/cast.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def maybe_promote(dtype, fill_value=np.nan):
273273
else:
274274
if issubclass(dtype.type, np.datetime64):
275275
try:
276-
fill_value = lib.Timestamp(fill_value).value
276+
fill_value = Timestamp(fill_value).value
277277
except:
278278
# the proper thing to do here would probably be to upcast
279279
# to object (but numpy 1.6.1 doesn't do this properly)
@@ -350,9 +350,9 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
350350

351351
# a 1-element ndarray
352352
if isinstance(val, np.ndarray):
353+
msg = "invalid ndarray passed to _infer_dtype_from_scalar"
353354
if val.ndim != 0:
354-
raise ValueError(
355-
"invalid ndarray passed to _infer_dtype_from_scalar")
355+
raise ValueError(msg)
356356

357357
dtype = val.dtype
358358
val = val.item()
@@ -553,7 +553,7 @@ def conv(r, dtype):
553553
if isnull(r):
554554
pass
555555
elif dtype == _NS_DTYPE:
556-
r = lib.Timestamp(r)
556+
r = Timestamp(r)
557557
elif dtype == _TD_DTYPE:
558558
r = _coerce_scalar_to_timedelta_type(r)
559559
elif dtype == np.bool_:
@@ -1027,3 +1027,19 @@ def find_common_type(types):
10271027
return np.object
10281028

10291029
return np.find_common_type(types, [])
1030+
1031+
1032+
def _cast_scalar_to_array(shape, value, dtype=None):
1033+
"""
1034+
create np.ndarray of specified shape and dtype, filled with values
1035+
"""
1036+
1037+
if dtype is None:
1038+
dtype, fill_value = _infer_dtype_from_scalar(value)
1039+
else:
1040+
fill_value = value
1041+
1042+
values = np.empty(shape, dtype=dtype)
1043+
values.fill(fill_value)
1044+
1045+
return values

pandas/core/frame.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
is_named_tuple)
6060
from pandas.core.dtypes.missing import isnull, notnull
6161

62+
6263
from pandas.core.common import (_try_sort,
6364
_default_index,
6465
_values_from_object,
@@ -385,15 +386,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
385386
raise_with_traceback(exc)
386387

387388
if arr.ndim == 0 and index is not None and columns is not None:
388-
if isinstance(data, compat.string_types) and dtype is None:
389-
dtype = np.object_
390-
if dtype is None:
391-
dtype, data = infer_dtype_from_scalar(data)
392-
393-
values = np.empty((len(index), len(columns)), dtype=dtype)
394-
values.fill(data)
395-
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
396-
copy=False)
389+
values = _cast_scalar_to_array((len(index), len(columns)),
390+
data, dtype=dtype)
391+
mgr = self._init_ndarray(values, index, columns,
392+
dtype=values.dtype, copy=False)
397393
else:
398394
raise ValueError('DataFrame constructor not properly called!')
399395

@@ -507,7 +503,7 @@ def _get_axes(N, K, index=index, columns=columns):
507503
values = _prep_ndarray(values, copy=copy)
508504

509505
if dtype is not None:
510-
if values.dtype != dtype:
506+
if not is_dtype_equal(values.dtype, dtype):
511507
try:
512508
values = values.astype(dtype)
513509
except Exception as orig:
@@ -2689,9 +2685,8 @@ def reindexer(value):
26892685

26902686
else:
26912687
# upcast the scalar
2692-
dtype, value = infer_dtype_from_scalar(value)
2693-
value = np.repeat(value, len(self.index)).astype(dtype)
2694-
value = maybe_cast_to_datetime(value, dtype)
2688+
value = _cast_scalar_to_array(len(self.index), value)
2689+
value = _possibly_cast_to_datetime(value, value.dtype)
26952690

26962691
# return internal types directly
26972692
if is_extension_type(value):

pandas/core/internals.py

+88-32
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
388388

389389
# fillna, but if we cannot coerce, then try again as an ObjectBlock
390390
try:
391-
values, _, value, _ = self._try_coerce_args(self.values, value)
391+
values, _, _, _ = self._try_coerce_args(self.values, value)
392+
# value may be converted to internal, thus drop
392393
blocks = self.putmask(mask, value, inplace=inplace)
393394
blocks = [b.make_block(values=self._try_coerce_result(b.values))
394395
for b in blocks]
@@ -682,8 +683,43 @@ def setitem(self, indexer, value, mgr=None):
682683
if self.is_numeric:
683684
value = np.nan
684685

685-
# coerce args
686-
values, _, value, _ = self._try_coerce_args(self.values, value)
686+
# coerce if block dtype can store value
687+
values = self.values
688+
try:
689+
values, _, value, _ = self._try_coerce_args(values, value)
690+
# can keep its own dtype
691+
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
692+
value.dtype):
693+
dtype = self.dtype
694+
else:
695+
dtype = 'infer'
696+
697+
except (TypeError, ValueError):
698+
# current dtype cannot store value, coerce to common dtype
699+
find_dtype = False
700+
701+
if hasattr(value, 'dtype'):
702+
dtype = value.dtype
703+
find_dtype = True
704+
705+
elif is_scalar(value):
706+
if isnull(value):
707+
# NaN promotion is handled in latter path
708+
dtype = False
709+
else:
710+
dtype, _ = _infer_dtype_from_scalar(value,
711+
pandas_dtype=True)
712+
find_dtype = True
713+
else:
714+
dtype = 'infer'
715+
716+
if find_dtype:
717+
dtype = _find_common_type([values.dtype, dtype])
718+
if not is_dtype_equal(self.dtype, dtype):
719+
b = self.astype(dtype)
720+
return b.setitem(indexer, value, mgr=mgr)
721+
722+
# value must be storeable at this moment
687723
arr_value = np.array(value)
688724

689725
# cast the values to a type that can hold nan (if necessary)
@@ -713,19 +749,8 @@ def setitem(self, indexer, value, mgr=None):
713749
raise ValueError("cannot set using a slice indexer with a "
714750
"different length than the value")
715751

716-
try:
717-
718-
def _is_scalar_indexer(indexer):
719-
# return True if we are all scalar indexers
720-
721-
if arr_value.ndim == 1:
722-
if not isinstance(indexer, tuple):
723-
indexer = tuple([indexer])
724-
return all([is_scalar(idx) for idx in indexer])
725-
return False
726-
727-
def _is_empty_indexer(indexer):
728-
# return a boolean if we have an empty indexer
752+
def _is_scalar_indexer(indexer):
753+
# return True if we are all scalar indexers
729754

730755
if arr_value.ndim == 1:
731756
if not isinstance(indexer, tuple):
@@ -777,23 +802,43 @@ def _is_empty_indexer(indexer):
777802
raise
778803
except TypeError:
779804

780-
# cast to the passed dtype if possible
781-
# otherwise raise the original error
782-
try:
783-
# e.g. we are uint32 and our value is uint64
784-
# this is for compat with older numpies
785-
block = self.make_block(transf(values.astype(value.dtype)))
786-
return block.setitem(indexer=indexer, value=value, mgr=mgr)
805+
def _is_empty_indexer(indexer):
806+
# return a boolean if we have an empty indexer
787807

788-
except:
789-
pass
790-
791-
raise
808+
if arr_value.ndim == 1:
809+
if not isinstance(indexer, tuple):
810+
indexer = tuple([indexer])
811+
return any(isinstance(idx, np.ndarray) and len(idx) == 0
812+
for idx in indexer)
813+
return False
792814

793-
except Exception:
815+
# empty indexers
816+
# 8669 (empty)
817+
if _is_empty_indexer(indexer):
794818
pass
795819

796-
return [self]
820+
# setting a single element for each dim and with a rhs that could
821+
# be say a list
822+
# GH 6043
823+
elif _is_scalar_indexer(indexer):
824+
values[indexer] = value
825+
826+
# if we are an exact match (ex-broadcasting),
827+
# then use the resultant dtype
828+
elif (len(arr_value.shape) and
829+
arr_value.shape[0] == values.shape[0] and
830+
np.prod(arr_value.shape) == np.prod(values.shape)):
831+
values[indexer] = value
832+
values = values.astype(arr_value.dtype)
833+
834+
# set
835+
else:
836+
values[indexer] = value
837+
838+
# coerce and try to infer the dtypes of the result
839+
values = self._try_coerce_and_cast_result(values, dtype)
840+
block = self.make_block(transf(values), fastpath=True)
841+
return block
797842

798843
def putmask(self, mask, new, align=True, inplace=False, axis=0,
799844
transpose=False, mgr=None):
@@ -1264,6 +1309,7 @@ def func(cond, values, other):
12641309

12651310
values, values_mask, other, other_mask = self._try_coerce_args(
12661311
values, other)
1312+
12671313
try:
12681314
return self._try_coerce_result(expressions.where(
12691315
cond, values, other, raise_on_error=True))
@@ -1543,6 +1589,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
15431589
new = new[mask]
15441590

15451591
mask = _safe_reshape(mask, new_values.shape)
1592+
15461593
new_values[mask] = new
15471594
new_values = self._try_coerce_result(new_values)
15481595
return [self.make_block(values=new_values)]
@@ -1712,7 +1759,7 @@ def fillna(self, value, **kwargs):
17121759

17131760
# allow filling with integers to be
17141761
# interpreted as seconds
1715-
if not isinstance(value, np.timedelta64) and is_integer(value):
1762+
if not isinstance(value, np.timedelta64):
17161763
value = Timedelta(value, unit='s')
17171764
return super(TimeDeltaBlock, self).fillna(value, **kwargs)
17181765

@@ -1949,6 +1996,15 @@ def _maybe_downcast(self, blocks, downcast=None):
19491996
def _can_hold_element(self, element):
19501997
return True
19511998

1999+
def _try_coerce_args(self, values, other):
2000+
""" provide coercion to our input arguments """
2001+
2002+
if isinstance(other, ABCDatetimeIndex):
2003+
# to store DatetimeTZBlock as object
2004+
other = other.asobject.values
2005+
2006+
return values, False, other, False
2007+
19522008
def _try_cast(self, element):
19532009
return element
19542010

@@ -2288,8 +2344,6 @@ def _try_coerce_args(self, values, other):
22882344
"naive Block")
22892345
other_mask = isnull(other)
22902346
other = other.asm8.view('i8')
2291-
elif hasattr(other, 'dtype') and is_integer_dtype(other):
2292-
other = other.view('i8')
22932347
else:
22942348
try:
22952349
other = np.asarray(other)
@@ -2466,6 +2520,8 @@ def _try_coerce_args(self, values, other):
24662520
raise ValueError("incompatible or non tz-aware value")
24672521
other_mask = isnull(other)
24682522
other = other.value
2523+
else:
2524+
raise TypeError
24692525

24702526
return values, values_mask, other, other_mask
24712527

pandas/core/panel.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
178178
copy = False
179179
dtype = None
180180
elif is_scalar(data) and all(x is not None for x in passed_axes):
181-
if dtype is None:
182-
dtype, data = infer_dtype_from_scalar(data)
183-
values = np.empty([len(x) for x in passed_axes], dtype=dtype)
184-
values.fill(data)
185-
mgr = self._init_matrix(values, passed_axes, dtype=dtype,
181+
values = _cast_scalar_to_array([len(x) for x in passed_axes],
182+
data, dtype=dtype)
183+
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
186184
copy=False)
187185
copy = False
188186
else: # pragma: no cover
@@ -584,9 +582,7 @@ def __setitem__(self, key, value):
584582
shape[1:], tuple(map(int, value.shape))))
585583
mat = np.asarray(value)
586584
elif is_scalar(value):
587-
dtype, value = infer_dtype_from_scalar(value)
588-
mat = np.empty(shape[1:], dtype=dtype)
589-
mat.fill(value)
585+
mat = _cast_scalar_to_array(shape[1:], value)
590586
else:
591587
raise TypeError('Cannot set item of type: %s' % str(type(value)))
592588

0 commit comments

Comments
 (0)