Skip to content

Commit a30fe18

Browse files
committed
Clobber commits together to ease merge.
1 parent b133eef commit a30fe18

File tree

7 files changed

+350
-45
lines changed

7 files changed

+350
-45
lines changed

Diff for: pandas/core/dtypes/common.py

-24
Original file line numberDiff line numberDiff line change
@@ -739,27 +739,3 @@ def pandas_dtype(dtype):
739739
return dtype
740740

741741
return np.dtype(dtype)
742-
743-
744-
def _is_fillable_value(value):
745-
pandas_ts_types = ('Timestamp', 'Period', 'Timedelta')
746-
pandas_block_types = ('Series', 'DataFrame')
747-
748-
if any([isinstance(value, (list, dict)),
749-
callable(value),
750-
(not (isinstance(value, string_types) or
751-
isinstance(value, (int, float, complex, str, None.__class__)) or
752-
is_numeric_dtype(value) or
753-
is_datetime_or_timedelta_dtype(value) or
754-
is_period_dtype(value) or
755-
type(value).__name__ in pandas_ts_types) or
756-
type(value).__name__ in pandas_block_types)]):
757-
return False
758-
else:
759-
return True
760-
761-
762-
def validate_fill_value(value):
763-
if not _is_fillable_value(value):
764-
raise TypeError('"value" parameter must be a scalar, but '
765-
'you passed a "{0}"'.format(type(value).__name__))

Diff for: pandas/core/dtypes/missing.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
is_object_dtype,
2020
is_integer,
2121
_TD_DTYPE,
22-
_NS_DTYPE)
22+
_NS_DTYPE,
23+
is_datetime64_any_dtype, is_float,
24+
is_numeric_dtype, is_complex, is_period_arraylike)
25+
from datetime import datetime, timedelta
2326
from .inference import is_list_like
2427

2528

@@ -394,3 +397,32 @@ def na_value_for_dtype(dtype):
394397
elif is_bool_dtype(dtype):
395398
return False
396399
return np.nan
400+
401+
402+
def is_valid_fill_value(value, dtype):
403+
"""
404+
Makes sure the fill value is appropriate for the given dtype.
405+
406+
Parameters
407+
----------
408+
value : scalar
409+
dtype: string / dtype
410+
"""
411+
if isinstance(value, dict):
412+
return True
413+
if not is_scalar(value):
414+
# maybe always raise?
415+
# raise TypeError('"value" parameter must be a scalar or dict, but '
416+
# 'you passed a "{0}"'.format(type(value).__name__))
417+
return False
418+
elif isnull(value):
419+
return True
420+
elif is_bool_dtype(dtype):
421+
return isinstance(value, (np.bool, bool))
422+
elif is_numeric_dtype(dtype):
423+
return is_float(value) or is_integer(value) or is_complex(value)
424+
elif is_datetime64_any_dtype(dtype):
425+
return isinstance(value, (np.datetime64, datetime))
426+
elif is_timedelta64_dtype(dtype):
427+
return isinstance(value, (np.timedelta64, timedelta))
428+
return True

Diff for: pandas/core/generic.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
is_re_compilable)
2727
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
2828
from pandas.core.dtypes.missing import isnull, notnull
29-
from pandas.core.dtypes.generic import ABCSeries, ABCPanel
29+
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
3030

3131
from pandas.core.common import (_values_from_object,
3232
_maybe_box_datetimelike,
@@ -3443,9 +3443,24 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
34433443

34443444
@Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
34453445
def fillna(self, value=None, method=None, axis=None, inplace=False,
3446-
limit=None, downcast=None):
3446+
limit=None, downcast=None, errors=None):
34473447
inplace = validate_bool_kwarg(inplace, 'inplace')
34483448

3449+
# if a singular fill value is provided, validate it
3450+
if value is not None:
3451+
# fill values by column, not all at once, to respect dtypes
3452+
if not isinstance(value, (dict, ABCSeries)) and \
3453+
isinstance(self, ABCDataFrame):
3454+
value = {col: value for col in self.columns}
3455+
try:
3456+
missing.validate_fill_value(self, value)
3457+
except TypeError:
3458+
if errors == 'ignore':
3459+
return self
3460+
elif errors == 'raise':
3461+
raise
3462+
# if errors == 'coerce' continue
3463+
34493464
if isinstance(value, (list, tuple)):
34503465
raise TypeError('"value" parameter must be a scalar or dict, but '
34513466
'you passed a "{0}"'.format(type(value).__name__))
@@ -3464,7 +3479,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
34643479
if self._is_mixed_type and axis == 1:
34653480
if inplace:
34663481
raise NotImplementedError()
3467-
result = self.T.fillna(method=method, limit=limit).T
3482+
result = self.T.fillna(method=method, limit=limit,
3483+
errors=errors).T
34683484

34693485
# need to downcast here because of all of the transposes
34703486
result._data = result._data.downcast()
@@ -3480,7 +3496,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
34803496
elif self.ndim == 3:
34813497

34823498
# fill in 2d chunks
3483-
result = dict([(col, s.fillna(method=method, value=value))
3499+
result = dict([(col, s.fillna(method=method, value=value,
3500+
errors=errors))
34843501
for col, s in self.iteritems()])
34853502
new_obj = self._constructor.\
34863503
from_dict(result).__finalize__(self)
@@ -3512,7 +3529,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
35123529

35133530
new_data = self._data.fillna(value=value, limit=limit,
35143531
inplace=inplace,
3515-
downcast=downcast)
3532+
downcast=downcast,
3533+
errors=errors)
35163534

35173535
elif isinstance(value, (dict, ABCSeries)):
35183536
if axis == 1:
@@ -3525,12 +3543,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
35253543
if k not in result:
35263544
continue
35273545
obj = result[k]
3528-
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
3529-
return result
3546+
obj.fillna(v, limit=limit, inplace=True,
3547+
downcast=downcast, errors=errors)
3548+
return None if inplace else result
35303549
elif not is_list_like(value):
35313550
new_data = self._data.fillna(value=value, limit=limit,
35323551
inplace=inplace,
3533-
downcast=downcast)
3552+
downcast=downcast,
3553+
errors=errors)
35343554
elif isinstance(value, DataFrame) and self.ndim == 2:
35353555
new_data = self.where(self.notnull(), value)
35363556
else:

Diff for: pandas/core/internals.py

+21-9
Original file line numberDiff line numberDiff line change
@@ -362,10 +362,13 @@ def apply(self, func, mgr=None, **kwargs):
362362
return result
363363

364364
def fillna(self, value, limit=None, inplace=False, downcast=None,
365-
mgr=None):
365+
errors=None, mgr=None):
366366
""" fillna on the block with the value. If we fail, then convert to
367367
ObjectBlock and try again
368368
"""
369+
if not errors:
370+
errors = 'coerce'
371+
369372
inplace = validate_bool_kwarg(inplace, 'inplace')
370373

371374
if not self._can_hold_na:
@@ -399,12 +402,16 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
399402
if not mask.any():
400403
return self if inplace else self.copy()
401404

402-
# we cannot coerce the underlying object, so
403-
# make an ObjectBlock
404-
return self.to_object_block(mgr=mgr).fillna(original_value,
405-
limit=limit,
406-
inplace=inplace,
407-
downcast=False)
405+
if errors == 'coerce':
406+
# we cannot coerce the underlying object, so
407+
# make an ObjectBlock
408+
return self.to_object_block(mgr=mgr).fillna(original_value,
409+
limit=limit,
410+
inplace=inplace,
411+
downcast=False,
412+
errors='ignore')
413+
else: # errors == 'ignore'
414+
return self
408415

409416
def _maybe_downcast(self, blocks, downcast=None):
410417

@@ -2132,11 +2139,14 @@ def _try_coerce_result(self, result):
21322139
return result
21332140

21342141
def fillna(self, value, limit=None, inplace=False, downcast=None,
2135-
mgr=None):
2142+
errors=None, mgr=None):
21362143
# we may need to upcast our fill to match our dtype
21372144
if limit is not None:
21382145
raise NotImplementedError("specifying a limit for 'fillna' has "
21392146
"not been implemented yet")
2147+
if errors is not None:
2148+
raise NotImplementedError("specifying error handling for 'fillna' "
2149+
"has not been implemented yet")
21402150

21412151
values = self.values if inplace else self.values.copy()
21422152
values = self._try_coerce_result(values.fillna(value=value,
@@ -2626,11 +2636,13 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
26262636
placement=self.mgr_locs)
26272637

26282638
def fillna(self, value, limit=None, inplace=False, downcast=None,
2629-
mgr=None):
2639+
errors=None, mgr=None):
26302640
# we may need to upcast our fill to match our dtype
26312641
if limit is not None:
26322642
raise NotImplementedError("specifying a limit for 'fillna' has "
26332643
"not been implemented yet")
2644+
if errors is not None:
2645+
raise NotImplementedError
26342646
values = self.values if inplace else self.values.copy()
26352647
values = values.fillna(value, downcast=downcast)
26362648
return [self.make_block_same_class(values=values,

Diff for: pandas/core/missing.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
_ensure_float64)
2121

2222
from pandas.core.dtypes.cast import infer_dtype_from_array
23-
from pandas.core.dtypes.missing import isnull
23+
from pandas.core.dtypes.missing import isnull, is_valid_fill_value
24+
from pandas.core.dtypes.generic import ABCSeries
2425

2526

2627
def mask_missing(arr, values_to_mask):
@@ -624,3 +625,35 @@ def fill_zeros(result, x, y, name, fill):
624625
result = result.reshape(shape)
625626

626627
return result
628+
629+
630+
def validate_fill_value(obj, value):
631+
"""
632+
633+
Fillna error coercion routine.
634+
635+
Parameters
636+
----------
637+
obj : Series of DataFrame
638+
The Series or DataFrame for which a fill value is being evaluated.
639+
If obj is a DataFrame this method simply returns True (e.g. the fillna
640+
operation is allowed to continue) because it will be broken up and
641+
parsed as a sequence of sub-Series later on.
642+
value : object
643+
The value to be used as a fill for the object.
644+
645+
Returns
646+
-------
647+
continue : bool
648+
Whether or not, based on the values and the error mode, the fill
649+
operation ought to continue.
650+
"""
651+
"""
652+
fillna error coercion routine, returns whether or not to continue.
653+
"""
654+
if isinstance(obj, ABCSeries):
655+
if not is_valid_fill_value(value, obj.dtype):
656+
raise TypeError('"value" parameter must be compatible '
657+
'with the {0} dtype, but you passed a '
658+
'"{1}"'.format(obj.dtype,
659+
type(value).__name__))

Diff for: pandas/tests/dtypes/test_missing.py

+35-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
from warnings import catch_warnings
44
import numpy as np
5-
from datetime import datetime
5+
from datetime import datetime, timedelta
66
from pandas.util import testing as tm
7+
import pytest
78

89
import pandas as pd
910
from pandas.core import config as cf
@@ -14,7 +15,7 @@
1415
from pandas.core.dtypes.dtypes import DatetimeTZDtype
1516
from pandas.core.dtypes.missing import (
1617
array_equivalent, isnull, notnull,
17-
na_value_for_dtype)
18+
na_value_for_dtype, is_valid_fill_value)
1819

1920

2021
def test_notnull():
@@ -312,3 +313,35 @@ def test_na_value_for_dtype():
312313

313314
for dtype in ['O']:
314315
assert np.isnan(na_value_for_dtype(np.dtype(dtype)))
316+
317+
318+
@pytest.mark.parametrize(('value', 'dtype'),
319+
[(False, bool), (np.nan, bool),
320+
(0, int), (0.0, int), (0j, int), (np.nan, int),
321+
(0, float), (0.0, float), (0j, float),
322+
(np.nan, float),
323+
(0, complex), (0.0, complex), (0j, complex),
324+
(np.nan, complex),
325+
(False, str), (0, str), (0.0, str), (0j, str),
326+
(np.nan, str), ('0', str),
327+
(datetime(1970, 1, 1), np.datetime64),
328+
(pd.Timestamp('1970-01-01'), np.datetime64),
329+
(timedelta(0), np.timedelta64),
330+
(pd.Timedelta(0), np.timedelta64)])
331+
def test_valid_fill_value(value, dtype):
332+
assert is_valid_fill_value(value, dtype)
333+
334+
335+
@pytest.mark.parametrize(('value', 'dtype'),
336+
[(0, bool), (0.0, bool), (0j, bool), ('0', bool),
337+
('0', int),
338+
('0', float),
339+
('0', complex),
340+
('0', np.dtype('datetime64')),
341+
(timedelta(0), np.dtype('datetime64')),
342+
(pd.Period('1970-01-01'), np.dtype('datetime64')),
343+
('0', np.dtype('timedelta64')),
344+
(datetime(1970, 1, 1), np.dtype('timedelta64')),
345+
(pd.Period('1970-01-01'), np.dtype('timedelta64'))])
346+
def test_invalid_fill_value(value, dtype):
347+
assert not is_valid_fill_value(value, dtype)

0 commit comments

Comments
 (0)