pandas-dev · jreback · Aug 13, 2015
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -551,6 +551,20 @@ Other API Changes
 
 - Improved error message when concatenating an empty iterable of dataframes (:issue:`9157`)
 
+- ``Series.sum()`` will now return 0.0, and ``Series.prod()`` will return 1.0 for all-NaN series rather than ``NaN``; this is for compat with ``numpy`` >= 1.8.2 and ``bottleneck`` >= 1.0 (:issue:`9422`).
+
+   .. ipython:: python
+
+      s = Series([np.nan])
+      s.sum()
+      s.sum(skipna=False)
+      s.prod()
+      s.prod(skipna=False)
+
+   .. warning::
+
+      ``bottleneck`` is used for these calculations. If you have ``bottleneck`` < 1.0, then these will all return ``NaN``.
+
 .. _whatsnew_0170.deprecations:
 
 Deprecations

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -27,6 +27,7 @@
 _np_version = np.version.short_version
 _np_version_under1p8 = LooseVersion(_np_version) < '1.8'
 _np_version_under1p9 = LooseVersion(_np_version) < '1.9'
+_np_version_under1p10 = LooseVersion(_np_version) < '1.10'
 
 
 from pandas.info import __doc__

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -102,11 +102,11 @@ class SpecificationError(GroupByError):
 
 
 def _groupby_function(name, alias, npfunc, numeric_only=True,
-                      _convert=False):
+                      fillna=None, _convert=False):
     def f(self):
         self._set_selection_from_grouper()
         try:
-            return self._cython_agg_general(alias, numeric_only=numeric_only)
+            return self._cython_agg_general(alias, numeric_only=numeric_only, fillna=fillna)
         except AssertionError as e:
             raise SpecificationError(str(e))
         except Exception:
@@ -793,8 +793,8 @@ def size(self):
         """
         return self.grouper.size()
 
-    sum = _groupby_function('sum', 'add', np.sum)
-    prod = _groupby_function('prod', 'prod', np.prod)
+    sum = _groupby_function('sum', 'add', np.sum, fillna=0.0)
+    prod = _groupby_function('prod', 'prod', np.prod, fillna=1.0)
     min = _groupby_function('min', 'min', np.min, numeric_only=False)
     max = _groupby_function('max', 'max', np.max, numeric_only=False)
     first = _groupby_function('first', 'first', _first_compat,
@@ -1118,15 +1118,15 @@ def _try_cast(self, result, obj):
 
         return result
 
-    def _cython_agg_general(self, how, numeric_only=True):
+    def _cython_agg_general(self, how, numeric_only=True, fillna=None):
         output = {}
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
             if numeric_only and not is_numeric:
                 continue
 
             try:
-                result, names = self.grouper.aggregate(obj.values, how)
+                result, names = self.grouper.aggregate(obj.values, how, fillna=fillna)
             except AssertionError as e:
                 raise GroupByError(str(e))
             output[name] = self._try_cast(result, obj)
@@ -1511,7 +1511,7 @@ def wrapper(*args, **kwargs):
                                       (how, dtype_str))
         return func, dtype_str
 
-    def aggregate(self, values, how, axis=0):
+    def aggregate(self, values, how, axis=0, fillna=None):
         arity = self._cython_arity.get(how, 1)
 
         vdim = values.ndim
@@ -1534,14 +1534,18 @@ def aggregate(self, values, how, axis=0):
             values = values.view('int64')
             # GH 7754
             is_numeric = True
+            fillna = None
         elif is_bool_dtype(values.dtype):
             values = _algos.ensure_float64(values)
+            fillna = None
         elif com.is_integer_dtype(values):
             values = values.astype('int64', copy=False)
+            fillna = None
         elif is_numeric:
             values = _algos.ensure_float64(values)
         else:
             values = values.astype(object)
+            fillna = None
 
         try:
             agg_func, dtype_str = self._get_aggregate_function(how, values)
@@ -1564,6 +1568,10 @@ def aggregate(self, values, how, axis=0):
 
         result = self._aggregate(result, counts, values, agg_func, is_numeric)
 
+        # if we have a non-None fillna, then replace
+        if fillna is not None:
+            result[np.isnan(result)] = fillna
+
         if com.is_integer_dtype(result):
             if len(result[result == tslib.iNaT]) > 0:
                 result = result.astype('float64')
@@ -2581,8 +2589,8 @@ def _iterate_slices(self):
                 continue
             yield val, slicer(val)
 
-    def _cython_agg_general(self, how, numeric_only=True):
-        new_items, new_blocks = self._cython_agg_blocks(how, numeric_only=numeric_only)
+    def _cython_agg_general(self, how, numeric_only=True, fillna=None):
+        new_items, new_blocks = self._cython_agg_blocks(how, numeric_only=numeric_only, fillna=fillna)
         return self._wrap_agged_blocks(new_items, new_blocks)
 
     def _wrap_agged_blocks(self, items, blocks):
@@ -2608,7 +2616,7 @@ def _wrap_agged_blocks(self, items, blocks):
 
     _block_agg_axis = 0
 
-    def _cython_agg_blocks(self, how, numeric_only=True):
+    def _cython_agg_blocks(self, how, numeric_only=True, fillna=None):
         data, agg_axis = self._get_data_to_aggregate()
 
         new_blocks = []
@@ -2620,7 +2628,7 @@ def _cython_agg_blocks(self, how, numeric_only=True):
 
             values = block._try_operate(block.values)
 
-            result, _ = self.grouper.aggregate(values, how, axis=agg_axis)
+            result, _ = self.grouper.aggregate(values, how, axis=agg_axis, fillna=fillna)
 
             # see if we can cast the block back to the original dtype
             result = block._try_coerce_and_cast_result(result)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -9,7 +9,7 @@
     _USE_BOTTLENECK = False
 
 import pandas.hashtable as _hash
-from pandas import compat, lib, algos, tslib
+from pandas import compat, lib, algos, tslib, _np_version_under1p10
 from pandas.compat import builtins
 from pandas.core.common import (isnull, notnull, _values_from_object,
                                 _maybe_upcast_putmask,
@@ -243,12 +243,14 @@ def nanall(values, axis=None, skipna=True):
 @disallow('M8')
 @bottleneck_switch(zero_value=0)
 def nansum(values, axis=None, skipna=True):
+    dtype = values.dtype
     values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
     dtype_sum = dtype_max
     if is_float_dtype(dtype):
         dtype_sum = dtype
     the_sum = values.sum(axis, dtype=dtype_sum)
-    the_sum = _maybe_null_out(the_sum, axis, mask)
+    the_sum = _maybe_null_out(the_sum, axis, mask, allow_all_null=not skipna,
+                              dtype=dtype, fill_value=0)
 
     return _wrap_results(the_sum, dtype)
 
@@ -549,12 +551,14 @@ def nankurt(values, axis=None, skipna=True):
 
 @disallow('M8','m8')
 def nanprod(values, axis=None, skipna=True):
+    dtype = values.dtype
     mask = isnull(values)
     if skipna and not is_any_int_dtype(values):
         values = values.copy()
         values[mask] = 1
     result = values.prod(axis)
-    return _maybe_null_out(result, axis, mask)
+    return _maybe_null_out(result, axis, mask, allow_all_null=not skipna, dtype=dtype,
+                           fill_value=1)
 
 
 def _maybe_arg_null_out(result, axis, mask, skipna):
@@ -588,19 +592,49 @@ def _get_counts(mask, axis, dtype=float):
         return np.array(count, dtype=dtype)
 
 
-def _maybe_null_out(result, axis, mask):
+def _maybe_null_out(result, axis, mask, allow_all_null=True, dtype=None, fill_value=None):
+
+
+    # 9422
+    # if we have all nulls we normally return a
+    # null, but for numpy >= 1.8.2 and bottleneck >= 1.0
+    # nansum/nanprod are set to be the fill_values
+    if not allow_all_null and dtype is not None:
+
+        if is_complex_dtype(dtype) or not is_float_dtype(dtype):
+
+            # we don't mask complex
+            # object or non-floats
+            # if numpy changes this, we will as well
+
+            # IOW, np.nansum(np.array([np.nan],dtype='object')) is np.nan
+            # https://github.com/numpy/numpy/issues/6209
+            allow_all_null = True
+            fill_value = np.nan
+
+    else:
+        fill_value = np.nan
+
     if axis is not None and getattr(result, 'ndim', False):
         null_mask = (mask.shape[axis] - mask.sum(axis)) == 0
         if np.any(null_mask):
             if np.iscomplexobj(result):
                 result = result.astype('c16')
             else:
                 result = result.astype('f8')
+
+            # mark nans
             result[null_mask] = np.nan
+
+            # masker if for only all nan
+            if not allow_all_null:
+                null_mask = mask.all(axis)
+                if null_mask.any():
+                    result[null_mask] = fill_value
     else:
         null_mask = mask.size - mask.sum()
-        if null_mask == 0:
-            result = np.nan
+        if null_mask == 0 and (mask.size > 0 or allow_all_null):
+            result = fill_value
 
     return result
 

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -12230,10 +12230,10 @@ def test_count(self):
         assert_series_equal(result, expected)
 
     def test_sum(self):
-        self._check_stat_op('sum', np.sum, has_numeric_only=True)
+        self._check_stat_op('sum', np.sum, has_numeric_only=True, fillna=0.0)
 
         # mixed types (with upcasting happening)
-        self._check_stat_op('sum', np.sum, frame=self.mixed_float.astype('float32'),
+        self._check_stat_op('sum', np.sum, frame=self.mixed_float.astype('float32'), fillna=0.0,
                             has_numeric_only=True, check_dtype=False, check_less_precise=True)
 
     def test_stat_operators_attempt_obj_array(self):
@@ -12247,23 +12247,32 @@ def test_stat_operators_attempt_obj_array(self):
         df1 = DataFrame(data, index=['foo', 'bar', 'baz'],
                         dtype='O')
         methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']
+        fills = [0.0, np.nan, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan]
 
         # GH #676
         df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3],
                         2: [np.nan, 4]}, dtype=object)
 
         for df in [df1, df2]:
-            for meth in methods:
+            for meth, fill in zip(methods, fills):
                 self.assertEqual(df.values.dtype, np.object_)
                 result = getattr(df, meth)(1)
+
+                # 9422
+                # all-NaN object array is still NaN, while floats are not :<
                 expected = getattr(df.astype('f8'), meth)(1)
+                if not np.isnan(fill):
+                    mask = df.isnull().all(1)
+                    if mask.any():
+                        expected[mask] = np.nan
+
                 assert_series_equal(result, expected)
 
     def test_mean(self):
         self._check_stat_op('mean', np.mean, check_dates=True)
 
     def test_product(self):
-        self._check_stat_op('product', np.prod)
+        self._check_stat_op('product', np.prod, fillna=1.0)
 
     def test_median(self):
         def wrapper(x):
@@ -12435,7 +12444,7 @@ def alt(x):
 
     def _check_stat_op(self, name, alternative, frame=None, has_skipna=True,
                        has_numeric_only=False, check_dtype=True, check_dates=False,
-                       check_less_precise=False):
+                       check_less_precise=False, fillna=None):
         if frame is None:
             frame = self.frame
             # set some NAs
@@ -12478,11 +12487,20 @@ def wrapper(x):
             wrapper = alternative
 
         result0 = f(axis=0)
-        result1 = f(axis=1)
-        assert_series_equal(result0, frame.apply(skipna_wrapper),
+        expected0 = frame.apply(skipna_wrapper)
+        assert_series_equal(result0, expected0,
                             check_dtype=check_dtype,
                             check_less_precise=check_less_precise)
-        assert_series_equal(result1, frame.apply(skipna_wrapper, axis=1),
+
+        result1 = f(axis=1)
+
+        # 9422
+        # all-nan rows get the fillna
+        expected1 = frame.apply(skipna_wrapper, axis=1)
+        if fillna is not None:
+            expected1[isnull(frame).all(axis=1)] = fillna
+
+        assert_series_equal(result1, expected1,
                             check_dtype=False,
                             check_less_precise=check_less_precise)
 
@@ -12513,8 +12531,14 @@ def wrapper(x):
             all_na = self.frame * np.NaN
             r0 = getattr(all_na, name)(axis=0)
             r1 = getattr(all_na, name)(axis=1)
-            self.assertTrue(np.isnan(r0).all())
-            self.assertTrue(np.isnan(r1).all())
+
+            # 9422
+            if fillna is not None:
+                self.assertTrue((r0==fillna).all())
+                self.assertTrue((r1==fillna).all())
+            else:
+                self.assertTrue(np.isnan(r0).all())
+                self.assertTrue(np.isnan(r1).all())
 
     def test_mode(self):
         df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11],