Skip to content

[CLN] parametrize and cleanup a bunch of tests #22093

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 31, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,29 @@ def test_corr_non_numeric(self):
tm.assert_frame_equal(result, expected)

@td.skip_if_no_scipy
def test_corr_nooverlap(self):
@pytest.mark.parametrize('meth', ['pearson', 'kendall', 'spearman'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should make this a fixture

def test_corr_nooverlap(self, meth):
# nothing in common
for meth in ['pearson', 'kendall', 'spearman']:
df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan],
'B': [np.nan, np.nan, np.nan, 1, 1.5, 1],
'C': [np.nan, np.nan, np.nan, np.nan,
np.nan, np.nan]})
rs = df.corr(meth)
assert isna(rs.loc['A', 'B'])
assert isna(rs.loc['B', 'A'])
assert rs.loc['A', 'A'] == 1
assert rs.loc['B', 'B'] == 1
assert isna(rs.loc['C', 'C'])
df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan],
'B': [np.nan, np.nan, np.nan, 1, 1.5, 1],
'C': [np.nan, np.nan, np.nan, np.nan,
np.nan, np.nan]})
rs = df.corr(meth)
assert isna(rs.loc['A', 'B'])
assert isna(rs.loc['B', 'A'])
assert rs.loc['A', 'A'] == 1
assert rs.loc['B', 'B'] == 1
assert isna(rs.loc['C', 'C'])

@td.skip_if_no_scipy
def test_corr_constant(self):
@pytest.mark.parametrize('meth', ['pearson', 'spearman'])
def test_corr_constant(self, meth):
# constant --> all NA

for meth in ['pearson', 'spearman']:
df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan],
'B': [np.nan, np.nan, np.nan, 1, 1, 1]})
rs = df.corr(meth)
assert isna(rs.values).all()
df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan],
'B': [np.nan, np.nan, np.nan, 1, 1, 1]})
rs = df.corr(meth)
assert isna(rs.values).all()

def test_corr_int(self):
# dtypes other than float64 #1761
Expand Down Expand Up @@ -658,21 +658,21 @@ def test_numeric_only_flag(self, meth):
pytest.raises(TypeError, lambda: getattr(df2, meth)(
axis=1, numeric_only=False))

def test_mixed_ops(self):
@pytest.mark.parametrize('op', ['mean', 'std', 'var',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have these as fixtures?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not AFAICT

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cython_table_items yields these (well it yields them all, maybe need to make a subset)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well it yields them all, maybe need to make a subset

Consider:

@pytest.mark.parametrize('op', SPECIFIC_EXPLICIT_LIST)
def test_thing(self, op):

def test_thing(self, cython_table_items):
     if cython_table_items not in SPECIFIC_EXPLICIT_LIST:
          return

There are good use cases for fixtures, this isn't near the top of that list.

'skew', 'kurt', 'sem'])
def test_mixed_ops(self, op):
# GH 16116
df = DataFrame({'int': [1, 2, 3, 4],
'float': [1., 2., 3., 4.],
'str': ['a', 'b', 'c', 'd']})

for op in ['mean', 'std', 'var', 'skew',
'kurt', 'sem']:
result = getattr(df, op)()
assert len(result) == 2

with pd.option_context('use_bottleneck', False):
result = getattr(df, op)()
assert len(result) == 2

with pd.option_context('use_bottleneck', False):
result = getattr(df, op)()
assert len(result) == 2

def test_cumsum(self):
self.tsframe.loc[5:10, 0] = nan
self.tsframe.loc[10:15, 1] = nan
Expand Down
17 changes: 8 additions & 9 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,15 @@ def test_apply_standard_nonunique(self):
rs = df.T.apply(lambda s: s[0], axis=0)
assert_series_equal(rs, xp)

def test_with_string_args(self):

for arg in ['sum', 'mean', 'min', 'max', 'std']:
result = self.frame.apply(arg)
expected = getattr(self.frame, arg)()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize('arg', ['sum', 'mean', 'min', 'max', 'std'])
def test_with_string_args(self, arg):
result = self.frame.apply(arg)
expected = getattr(self.frame, arg)()
tm.assert_series_equal(result, expected)

result = self.frame.apply(arg, axis=1)
expected = getattr(self.frame, arg)(axis=1)
tm.assert_series_equal(result, expected)
result = self.frame.apply(arg, axis=1)
expected = getattr(self.frame, arg)(axis=1)
tm.assert_series_equal(result, expected)

def test_apply_broadcast_deprecated(self):
with tm.assert_produces_warning(FutureWarning):
Expand Down
29 changes: 6 additions & 23 deletions pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,29 +674,12 @@ def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
assert_frame_equal(aa, ea)
assert_frame_equal(ab, eb)

def test_align_fill_method_inner(self):
for meth in ['pad', 'bfill']:
for ax in [0, 1, None]:
for fax in [0, 1]:
self._check_align_fill('inner', meth, ax, fax)

def test_align_fill_method_outer(self):
for meth in ['pad', 'bfill']:
for ax in [0, 1, None]:
for fax in [0, 1]:
self._check_align_fill('outer', meth, ax, fax)

def test_align_fill_method_left(self):
for meth in ['pad', 'bfill']:
for ax in [0, 1, None]:
for fax in [0, 1]:
self._check_align_fill('left', meth, ax, fax)

def test_align_fill_method_right(self):
for meth in ['pad', 'bfill']:
for ax in [0, 1, None]:
for fax in [0, 1]:
self._check_align_fill('right', meth, ax, fax)
@pytest.mark.parametrize('meth', ['pad', 'bfill'])
@pytest.mark.parametrize('ax', [0, 1, None])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have the axis arg as a fixture (though may need to add another one which adds None to it)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok for sure need to use the fixture (but again can come back later on this)

@pytest.mark.parametrize('fax', [0, 1])
@pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right'])
def test_align_fill_method(self, how, meth, ax, fax):
self._check_align_fill(how, meth, ax, fax)

def _check_align_fill(self, kind, meth, ax, fax):
left = self.frame.iloc[0:4, :10]
Expand Down
74 changes: 39 additions & 35 deletions pandas/tests/frame/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,18 @@ def test_operators(self):
assert (df + df).equals(df)
assert_frame_equal(df + df, df)

def test_ops_np_scalar(self):
vals, xs = np.random.rand(5, 3), [nan, 7, -23, 2.718, -3.14, np.inf]
@pytest.mark.parametrize('other', [nan, 7, -23, 2.718, -3.14, np.inf])
def test_ops_np_scalar(self, other):
vals = np.random.randn(5, 3)
f = lambda x: DataFrame(x, index=list('ABCDE'),
columns=['jim', 'joe', 'jolie'])

df = f(vals)

for x in xs:
assert_frame_equal(df / np.array(x), f(vals / x))
assert_frame_equal(np.array(x) * df, f(vals * x))
assert_frame_equal(df + np.array(x), f(vals + x))
assert_frame_equal(np.array(x) - df, f(x - vals))
assert_frame_equal(df / np.array(other), f(vals / other))
assert_frame_equal(np.array(other) * df, f(vals * other))
assert_frame_equal(df + np.array(other), f(vals + other))
assert_frame_equal(np.array(other) - df, f(other - vals))

def test_operators_boolean(self):

Expand Down Expand Up @@ -116,41 +116,40 @@ def test_operators_boolean(self):
True, index=[1], columns=['A'])
assert_frame_equal(result, DataFrame(1, index=[1], columns=['A']))

def f():
DataFrame(1.0, index=[1], columns=['A']) | DataFrame(
True, index=[1], columns=['A'])
pytest.raises(TypeError, f)
df1 = DataFrame(1.0, index=[1], columns=['A'])
df2 = DataFrame(True, index=[1], columns=['A'])
with pytest.raises(TypeError):
df1 | df2

def f():
DataFrame('foo', index=[1], columns=['A']) | DataFrame(
True, index=[1], columns=['A'])
pytest.raises(TypeError, f)
df1 = DataFrame('foo', index=[1], columns=['A'])
df2 = DataFrame(True, index=[1], columns=['A'])
with pytest.raises(TypeError):
df1 | df2

def test_operators_none_as_na(self):
@pytest.mark.parametrize('op', [operator.add, operator.sub,
operator.mul, operator.truediv])
def test_operators_none_as_na(self, op):
df = DataFrame({"col1": [2, 5.0, 123, None],
"col2": [1, 2, 3, 4]}, dtype=object)

ops = [operator.add, operator.sub, operator.mul, operator.truediv]

# since filling converts dtypes from object, changed expected to be
# object
for op in ops:
filled = df.fillna(np.nan)
result = op(df, 3)
expected = op(filled, 3).astype(object)
expected[com.isna(expected)] = None
assert_frame_equal(result, expected)
filled = df.fillna(np.nan)
result = op(df, 3)
expected = op(filled, 3).astype(object)
expected[com.isna(expected)] = None
assert_frame_equal(result, expected)

result = op(df, df)
expected = op(filled, filled).astype(object)
expected[com.isna(expected)] = None
assert_frame_equal(result, expected)
result = op(df, df)
expected = op(filled, filled).astype(object)
expected[com.isna(expected)] = None
assert_frame_equal(result, expected)

result = op(df, df.fillna(7))
assert_frame_equal(result, expected)
result = op(df, df.fillna(7))
assert_frame_equal(result, expected)

result = op(df.fillna(7), df)
assert_frame_equal(result, expected, check_dtype=False)
result = op(df.fillna(7), df)
assert_frame_equal(result, expected, check_dtype=False)

def test_comparison_invalid(self):

Expand Down Expand Up @@ -978,8 +977,11 @@ def test_boolean_comparison(self):
result = df.values > b_r
assert_numpy_array_equal(result, expected.values)

pytest.raises(ValueError, df.__gt__, b_c)
pytest.raises(ValueError, df.values.__gt__, b_c)
with pytest.raises(ValueError):
df > b_c

with pytest.raises(ValueError):
df.values > b_c

# ==
expected = DataFrame([[False, False], [True, False], [False, False]])
Expand All @@ -998,7 +1000,9 @@ def test_boolean_comparison(self):
result = df.values == b_r
assert_numpy_array_equal(result, expected.values)

pytest.raises(ValueError, lambda: df == b_c)
with pytest.raises(ValueError):
df == b_c

assert df.values.shape != b_c.shape

# with alignment
Expand Down
13 changes: 6 additions & 7 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,11 +1029,10 @@ def test_bool_arith_expr(self, parser, engine):
expect = self.frame.a[self.frame.a < 1] + self.frame.b
assert_series_equal(res, expect)

def test_invalid_type_for_operator_raises(self, parser, engine):
@pytest.mark.parametrize('op', ['+', '-', '*', '/'])
def test_invalid_type_for_operator_raises(self, parser, engine, op):
df = DataFrame({'a': [1, 2], 'b': ['c', 'd']})
ops = '+', '-', '*', '/'
for op in ops:
with tm.assert_raises_regex(TypeError,
r"unsupported operand type\(s\) "
"for .+: '.+' and '.+'"):
df.eval('a {0} b'.format(op), engine=engine, parser=parser)
with tm.assert_raises_regex(TypeError,
r"unsupported operand type\(s\) "
"for .+: '.+' and '.+'"):
df.eval('a {0} b'.format(op), engine=engine, parser=parser)
14 changes: 6 additions & 8 deletions pandas/tests/frame/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,14 +547,12 @@ def test_regex_replace_numeric_to_object_conversion(self):
assert_frame_equal(res, expec)
assert res.a.dtype == np.object_

def test_replace_regex_metachar(self):
metachars = '[]', '()', r'\d', r'\w', r'\s'

for metachar in metachars:
df = DataFrame({'a': [metachar, 'else']})
result = df.replace({'a': {metachar: 'paren'}})
expected = DataFrame({'a': ['paren', 'else']})
assert_frame_equal(result, expected)
@pytest.mark.parametrize('metachar', ['[]', '()', r'\d', r'\w', r'\s'])
def test_replace_regex_metachar(self, metachar):
df = DataFrame({'a': [metachar, 'else']})
result = df.replace({'a': {metachar: 'paren'}})
expected = DataFrame({'a': ['paren', 'else']})
assert_frame_equal(result, expected)

def test_replace(self):
self.tsframe['A'][:5] = nan
Expand Down
28 changes: 14 additions & 14 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,21 +855,21 @@ def _test_stack_with_multiindex(multiindex):
dtype=df.dtypes[0])
assert_frame_equal(result, expected)

def test_stack_preserve_categorical_dtype(self):
@pytest.mark.parametrize('ordered', [False, True])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think ordered is a fixture already

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in conftest.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pandas/tests//arrays/categorical/conftest.py

maybe can simply move it to top level and use it

Copy link
Member

@gfyoung gfyoung Jul 30, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I would love to create a helper in utils/testing.py to generate boolean fixtures like these. Not sure if it's possible, but just throwing it out there for thought.

@pytest.mark.parametrize('labels', [list("yxz"), list("yxy")])
def test_stack_preserve_categorical_dtype(self, ordered, labels):
# GH13854
for ordered in [False, True]:
for labels in [list("yxz"), list("yxy")]:
cidx = pd.CategoricalIndex(labels, categories=list("xyz"),
ordered=ordered)
df = DataFrame([[10, 11, 12]], columns=cidx)
result = df.stack()

# `MutliIndex.from_product` preserves categorical dtype -
# it's tested elsewhere.
midx = pd.MultiIndex.from_product([df.index, cidx])
expected = Series([10, 11, 12], index=midx)

tm.assert_series_equal(result, expected)
cidx = pd.CategoricalIndex(labels, categories=list("xyz"),
ordered=ordered)
df = DataFrame([[10, 11, 12]], columns=cidx)
result = df.stack()

# `MutliIndex.from_product` preserves categorical dtype -
# it's tested elsewhere.
midx = pd.MultiIndex.from_product([df.index, cidx])
expected = Series([10, 11, 12], index=midx)

tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("level", [0, 'baz'])
def test_unstack_swaplevel_sortlevel(self, level):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,9 +1038,10 @@ def test_add_raises(self):
dt1 + dt2

boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe move these 2 lines to the top of the file?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use them more than once it makes sense, but ATM they are defined right next to where they are used, which is pretty ideal.

ids = ['identity', 'Series', 'Index']

@pytest.mark.parametrize('lbox', boxes)
@pytest.mark.parametrize('rbox', boxes)
@pytest.mark.parametrize('lbox', boxes, ids=ids)
@pytest.mark.parametrize('rbox', boxes, ids=ids)
def test_add_timestamp_raises(self, rbox, lbox):
# GH # 17983
ts = pd.Timestamp('2017')
Expand Down
Loading