Skip to content

WRN: let + * and - pass thru on boolean with a warning #7245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 27, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ API Changes
- ``to_excel`` now converts ``np.inf`` into a string representation,
customizable by the ``inf_rep`` keyword argument (Excel has no native inf
representation) (:issue:`6782`)
- Arithmetic ops are now disallowed when passed two bool dtype Series or
DataFrames (:issue:`6762`).
- Arithmetic ops on bool dtype arrays/scalars now give a warning indicating
that they are evaluated in Python space (:issue:`6762`, :issue:`7210`).
- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`)

- ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``. The ``table`` keyword can receive the following values.
Expand Down
11 changes: 6 additions & 5 deletions doc/source/v0.14.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,18 @@ API changes
- Added ``factorize`` functions to ``Index`` and ``Series`` to get indexer and unique values (:issue:`7090`)
- ``describe`` on a DataFrame with a mix of Timestamp and string like objects returns a different Index (:issue:`7088`).
Previously the index was unintentionally sorted.
- arithmetic operations with **only** ``bool`` dtypes now raise an error
(:issue:`7011`, :issue:`6762`, :issue:`7015`)
- arithmetic operations with **only** ``bool`` dtypes warn for ``+``, ``-``,
and ``*`` operations and raise for all others (:issue:`7011`, :issue:`6762`,
:issue:`7015`, :issue:`7210`)

.. code-block:: python

x = pd.Series(np.random.rand(10) > 0.5)
y = True
x * y
x + y # warning generated: should do x | y instead
x / y # this raises because it doesn't make sense

# this now raises for arith ops like ``+``, ``*``, etc.
NotImplementedError: operator '*' not implemented for bool dtypes
NotImplementedError: operator '/' not implemented for bool dtypes


.. _whatsnew_0140.display:
Expand Down
25 changes: 19 additions & 6 deletions pandas/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

"""

import warnings
import numpy as np
from pandas.core.common import _values_from_object
from distutils.version import LooseVersion
Expand Down Expand Up @@ -170,11 +171,23 @@ def _has_bool_dtype(x):
return isinstance(x, (bool, np.bool_))


def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('+', '*', '-', '/',
'//', '**'))):
if op_str in not_allowed and _has_bool_dtype(a) and _has_bool_dtype(b):
raise NotImplementedError("operator %r not implemented for bool "
"dtypes" % op_str)
def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')),
unsupported=None):
if unsupported is None:
unsupported = {'+': '|', '*': '&', '-': '^'}

if _has_bool_dtype(a) and _has_bool_dtype(b):
if op_str in unsupported:
warnings.warn("evaluating in Python space because the %r operator"
" is not supported by numexpr for the bool "
"dtype, use %r instead" % (op_str,
unsupported[op_str]))
return False

if op_str in not_allowed:
raise NotImplementedError("operator %r not implemented for bool "
"dtypes" % op_str)
return True


def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True,
Expand All @@ -193,7 +206,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True,
return the results
use_numexpr : whether to try to use numexpr (default True)
"""
_bool_arith_check(op_str, a, b)
use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
if use_numexpr:
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
**eval_kwargs)
Expand Down
47 changes: 45 additions & 2 deletions pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ def testit():
def test_bool_ops_raise_on_arithmetic(self):
df = DataFrame({'a': np.random.rand(10) > 0.5,
'b': np.random.rand(10) > 0.5})
names = 'add', 'mul', 'sub', 'div', 'truediv', 'floordiv', 'pow'
ops = '+', '*', '-', '/', '/', '//', '**'
names = 'div', 'truediv', 'floordiv', 'pow'
ops = '/', '/', '//', '**'
msg = 'operator %r not implemented for bool dtypes'
for op, name in zip(ops, names):
if not compat.PY3 or name != 'div':
Expand All @@ -369,6 +369,49 @@ def test_bool_ops_raise_on_arithmetic(self):
with tm.assertRaisesRegexp(TypeError, err_msg):
f(df, True)

def test_bool_ops_warn_on_arithmetic(self):
n = 10
df = DataFrame({'a': np.random.rand(n) > 0.5,
'b': np.random.rand(n) > 0.5})
names = 'add', 'mul', 'sub'
ops = '+', '*', '-'
subs = {'+': '|', '*': '&', '-': '^'}
sub_funcs = {'|': 'or_', '&': 'and_', '^': 'xor'}
for op, name in zip(ops, names):
f = getattr(operator, name)
fe = getattr(operator, sub_funcs[subs[op]])

with tm.use_numexpr(True, min_elements=5):
with tm.assert_produces_warning():
r = f(df, df)
e = fe(df, df)
tm.assert_frame_equal(r, e)

with tm.assert_produces_warning():
r = f(df.a, df.b)
e = fe(df.a, df.b)
tm.assert_series_equal(r, e)

with tm.assert_produces_warning():
r = f(df.a, True)
e = fe(df.a, True)
tm.assert_series_equal(r, e)

with tm.assert_produces_warning():
r = f(False, df.a)
e = fe(False, df.a)
tm.assert_series_equal(r, e)

with tm.assert_produces_warning():
r = f(False, df)
e = fe(False, df)
tm.assert_frame_equal(r, e)

with tm.assert_produces_warning():
r = f(df, True)
e = fe(df, True)
tm.assert_frame_equal(r, e)


if __name__ == '__main__':
import nose
Expand Down
13 changes: 13 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
raise_with_traceback, httplib
)

from pandas.computation import expressions as expr

from pandas import bdate_range
from pandas.tseries.index import DatetimeIndex
from pandas.tseries.period import PeriodIndex
Expand Down Expand Up @@ -1576,3 +1578,14 @@ def __enter__(self):
def __exit__(self, exc_type, exc_value, traceback):

np.random.set_state(self.start_state)


@contextmanager
def use_numexpr(use, min_elements=expr._MIN_ELEMENTS):
olduse = expr._USE_NUMEXPR
oldmin = expr._MIN_ELEMENTS
expr.set_use_numexpr(use)
expr._MIN_ELEMENTS = min_elements
yield
expr._MIN_ELEMENTS = oldmin
expr.set_use_numexpr(olduse)