Skip to content

Commit a14cbd0

Browse files
committed
Merge pull request #3600 from jreback/modulo
BUG: Fix integer modulo and division to make integer and float dtypes work similarly for invalid values
2 parents 9ae47f9 + 555af4c commit a14cbd0

File tree

7 files changed

+159
-21
lines changed

7 files changed

+159
-21
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pandas 0.11.1
4848
to append an index with a different name than the existing
4949
- support datelike columns with a timezone as data_columns (GH2852_)
5050
- table writing performance improvements.
51+
- Add modulo operator to Series, DataFrame
5152

5253
**API Changes**
5354

@@ -111,6 +112,8 @@ pandas 0.11.1
111112
is a ``list`` or ``tuple``.
112113
- Fixed bug where a time-series was being selected in preference to an actual column name
113114
in a frame (GH3594_)
115+
- Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return
116+
``np.nan`` or ``np.inf`` as appropriate (GH3590_)
114117

115118
.. _GH3164: https://github.com/pydata/pandas/issues/3164
116119
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -155,6 +158,7 @@ pandas 0.11.1
155158
.. _GH3593: https://github.com/pydata/pandas/issues/3593
156159
.. _GH3556: https://github.com/pydata/pandas/issues/3556
157160
.. _GH3594: https://github.com/pydata/pandas/issues/3594
161+
.. _GH3590: https://github.com/pydata/pandas/issues/3590
158162
.. _GH3435: https://github.com/pydata/pandas/issues/3435
159163

160164

doc/source/v0.11.1.txt

+12
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,17 @@ enhancements along with a large number of bug fixes.
99
API changes
1010
~~~~~~~~~~~
1111

12+
- Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return
13+
``np.nan`` or ``np.inf`` as appropriate (GH3590_). This correct a numpy bug that treats ``integer``
14+
and ``float`` dtypes differently.
15+
16+
.. ipython:: python
17+
18+
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
19+
p % 0
20+
p % p
21+
p / p
22+
p / 0
1223

1324
Enhancements
1425
~~~~~~~~~~~~
@@ -33,4 +44,5 @@ on GitHub for a complete list.
3344
.. _GH3477: https://github.com/pydata/pandas/issues/3477
3445
.. _GH3492: https://github.com/pydata/pandas/issues/3492
3546
.. _GH3499: https://github.com/pydata/pandas/issues/3499
47+
.. _GH3590: https://github.com/pydata/pandas/issues/3590
3648
.. _GH3435: https://github.com/pydata/pandas/issues/3435

pandas/core/common.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -793,13 +793,16 @@ def changeit():
793793

794794
# try to directly set by expanding our array to full
795795
# length of the boolean
796-
om = other[mask]
797-
om_at = om.astype(result.dtype)
798-
if (om == om_at).all():
799-
new_other = result.values.copy()
800-
new_other[mask] = om_at
801-
result[:] = new_other
802-
return result, False
796+
try:
797+
om = other[mask]
798+
om_at = om.astype(result.dtype)
799+
if (om == om_at).all():
800+
new_other = result.values.copy()
801+
new_other[mask] = om_at
802+
result[:] = new_other
803+
return result, False
804+
except:
805+
pass
803806

804807
# we are forced to change the dtype of the result as the input isn't compatible
805808
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
@@ -948,6 +951,27 @@ def _lcd_dtypes(a_dtype, b_dtype):
948951
return np.float64
949952
return np.object
950953

954+
def _fill_zeros(result, y, fill):
955+
""" if we have an integer value (or array in y)
956+
and we have 0's, fill them with the fill,
957+
return the result """
958+
959+
if fill is not None:
960+
if not isinstance(y, np.ndarray):
961+
dtype, value = _infer_dtype_from_scalar(y)
962+
y = pa.empty(result.shape,dtype=dtype)
963+
y.fill(value)
964+
965+
if is_integer_dtype(y):
966+
967+
mask = y.ravel() == 0
968+
if mask.any():
969+
shape = result.shape
970+
result, changed = _maybe_upcast_putmask(result.ravel(),mask,fill)
971+
result = result.reshape(shape)
972+
973+
return result
974+
951975
def _interp_wrapper(f, wrap_dtype, na_override=None):
952976
def wrapper(arr, mask, limit=None):
953977
view = arr.view(wrap_dtype)

pandas/core/frame.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,12 @@ class DataConflictError(Exception):
189189
# Factory helper methods
190190

191191

192-
def _arith_method(op, name, str_rep = None, default_axis='columns'):
192+
def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None):
193193
def na_op(x, y):
194194
try:
195195
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True)
196+
result = com._fill_zeros(result,y,fill_zeros)
197+
196198
except TypeError:
197199
xrav = x.ravel()
198200
result = np.empty(x.size, dtype=x.dtype)
@@ -841,20 +843,23 @@ def __contains__(self, key):
841843
__sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None)
842844
__mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None)
843845
__truediv__ = _arith_method(operator.truediv, '__truediv__', '/',
844-
default_axis=None)
846+
default_axis=None, fill_zeros=np.inf)
845847
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__',
846-
default_axis=None)
848+
default_axis=None, fill_zeros=np.inf)
847849
__pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None)
848850

851+
__mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan)
852+
849853
__radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None)
850854
__rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None)
851855
__rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None)
852856
__rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__',
853-
default_axis=None)
857+
default_axis=None, fill_zeros=np.inf)
854858
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__',
855-
default_axis=None)
859+
default_axis=None, fill_zeros=np.inf)
856860
__rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__',
857861
default_axis=None)
862+
__rmod__ = _arith_method(operator.mod, '__rmod__', default_axis=None, fill_zeros=np.nan)
858863

859864
# boolean operators
860865
__and__ = _arith_method(operator.and_, '__and__', '&')
@@ -863,9 +868,10 @@ def __contains__(self, key):
863868

864869
# Python 2 division methods
865870
if not py3compat.PY3:
866-
__div__ = _arith_method(operator.div, '__div__', '/', default_axis=None)
871+
__div__ = _arith_method(operator.div, '__div__', '/',
872+
default_axis=None, fill_zeros=np.inf)
867873
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__',
868-
default_axis=None)
874+
default_axis=None, fill_zeros=np.inf)
869875

870876
def __neg__(self):
871877
arr = operator.neg(self.values)

pandas/core/series.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,17 @@
5555
# Wrapper function for Series arithmetic methods
5656

5757

58-
def _arith_method(op, name):
58+
def _arith_method(op, name, fill_zeros=None):
5959
"""
6060
Wrapper function for Series arithmetic operations, to avoid
6161
code duplication.
6262
"""
6363
def na_op(x, y):
6464
try:
65+
6566
result = op(x, y)
67+
result = com._fill_zeros(result,y,fill_zeros)
68+
6669
except TypeError:
6770
result = pa.empty(len(x), dtype=x.dtype)
6871
if isinstance(y, pa.Array):
@@ -1258,16 +1261,18 @@ def iteritems(self):
12581261
__add__ = _arith_method(operator.add, '__add__')
12591262
__sub__ = _arith_method(operator.sub, '__sub__')
12601263
__mul__ = _arith_method(operator.mul, '__mul__')
1261-
__truediv__ = _arith_method(operator.truediv, '__truediv__')
1262-
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__')
1264+
__truediv__ = _arith_method(operator.truediv, '__truediv__', fill_zeros=np.inf)
1265+
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__', fill_zeros=np.inf)
12631266
__pow__ = _arith_method(operator.pow, '__pow__')
1267+
__mod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan)
12641268

12651269
__radd__ = _arith_method(_radd_compat, '__add__')
12661270
__rmul__ = _arith_method(operator.mul, '__mul__')
12671271
__rsub__ = _arith_method(lambda x, y: y - x, '__sub__')
1268-
__rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__')
1269-
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__')
1272+
__rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__', fill_zeros=np.inf)
1273+
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__', fill_zeros=np.inf)
12701274
__rpow__ = _arith_method(lambda x, y: y ** x, '__pow__')
1275+
__rmod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan)
12711276

12721277
# comparisons
12731278
__gt__ = _comp_method(operator.gt, '__gt__')
@@ -1301,8 +1306,8 @@ def __invert__(self):
13011306

13021307
# Python 2 division operators
13031308
if not py3compat.PY3:
1304-
__div__ = _arith_method(operator.div, '__div__')
1305-
__rdiv__ = _arith_method(lambda x, y: y / x, '__div__')
1309+
__div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf)
1310+
__rdiv__ = _arith_method(lambda x, y: y / x, '__div__', fill_zeros=np.inf)
13061311
__idiv__ = __div__
13071312

13081313
#----------------------------------------------------------------------

pandas/tests/test_frame.py

+44
Original file line numberDiff line numberDiff line change
@@ -4011,6 +4011,50 @@ def test_operators_none_as_na(self):
40114011
result = op(df.fillna(7), df)
40124012
assert_frame_equal(result, expected)
40134013

4014+
def test_modulo(self):
4015+
4016+
# GH3590, modulo as ints
4017+
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
4018+
4019+
### this is technically wrong as the integer portion is coerced to float ###
4020+
expected = DataFrame({ 'first' : Series([0,0,0,0],dtype='float64'), 'second' : Series([np.nan,np.nan,np.nan,0]) })
4021+
result = p % p
4022+
assert_frame_equal(result,expected)
4023+
4024+
# numpy has a slightly different (wrong) treatement
4025+
result2 = DataFrame(p.values % p.values,index=p.index,columns=p.columns,dtype='float64')
4026+
result2.iloc[0:3,1] = np.nan
4027+
assert_frame_equal(result2,expected)
4028+
4029+
result = p % 0
4030+
expected = DataFrame(np.nan,index=p.index,columns=p.columns)
4031+
assert_frame_equal(result,expected)
4032+
4033+
# numpy has a slightly different (wrong) treatement
4034+
result2 = DataFrame(p.values.astype('float64') % 0,index=p.index,columns=p.columns)
4035+
assert_frame_equal(result2,expected)
4036+
4037+
def test_div(self):
4038+
4039+
# integer div, but deal with the 0's
4040+
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
4041+
result = p / p
4042+
4043+
### this is technically wrong as the integer portion is coerced to float ###
4044+
expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) })
4045+
assert_frame_equal(result,expected)
4046+
4047+
result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf)
4048+
assert_frame_equal(result2,expected)
4049+
4050+
result = p / 0
4051+
expected = DataFrame(np.inf,index=p.index,columns=p.columns)
4052+
assert_frame_equal(result,expected)
4053+
4054+
# numpy has a slightly different (wrong) treatement
4055+
result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf)
4056+
assert_frame_equal(result2,expected)
4057+
40144058
def test_logical_operators(self):
40154059
import operator
40164060

pandas/tests/test_series.py

+43
Original file line numberDiff line numberDiff line change
@@ -1766,6 +1766,49 @@ def test_neg(self):
17661766
def test_invert(self):
17671767
assert_series_equal(-(self.series < 0), ~(self.series < 0))
17681768

1769+
def test_modulo(self):
1770+
1771+
# GH3590, modulo as ints
1772+
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
1773+
result = p['first'] % p['second']
1774+
expected = Series(p['first'].values % p['second'].values,dtype='float64')
1775+
expected.iloc[0:3] = np.nan
1776+
assert_series_equal(result,expected)
1777+
1778+
result = p['first'] % 0
1779+
expected = Series(np.nan,index=p.index)
1780+
assert_series_equal(result,expected)
1781+
1782+
p = p.astype('float64')
1783+
result = p['first'] % p['second']
1784+
expected = Series(p['first'].values % p['second'].values)
1785+
assert_series_equal(result,expected)
1786+
1787+
def test_div(self):
1788+
1789+
# integer div, but deal with the 0's
1790+
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
1791+
result = p['first'] / p['second']
1792+
expected = Series(p['first'].values / p['second'].values,dtype='float64')
1793+
expected.iloc[0:3] = np.inf
1794+
assert_series_equal(result,expected)
1795+
1796+
result = p['first'] / 0
1797+
expected = Series(np.inf,index=p.index)
1798+
assert_series_equal(result,expected)
1799+
1800+
p = p.astype('float64')
1801+
result = p['first'] / p['second']
1802+
expected = Series(p['first'].values / p['second'].values)
1803+
assert_series_equal(result,expected)
1804+
1805+
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [1,1,1,1] })
1806+
result = p['first'] / p['second']
1807+
if py3compat.PY3:
1808+
assert_series_equal(result,p['first'].astype('float64'))
1809+
else:
1810+
assert_series_equal(result,p['first'])
1811+
17691812
def test_operators(self):
17701813

17711814
def _check_op(series, other, op, pos_only=False):

0 commit comments

Comments
 (0)