From 69f2e6fd31bc4d917ddc2f5f419c023d3d1bb820 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 23 Oct 2019 11:52:07 -0700 Subject: [PATCH 1/5] Removed generate_bins_generic --- pandas/core/groupby/ops.py | 53 ------------------------ pandas/tests/groupby/test_bin_groupby.py | 45 +++++--------------- 2 files changed, 11 insertions(+), 87 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e6f4f2f056058..e8dc0378060b0 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -52,59 +52,6 @@ ) -def generate_bins_generic(values, binner, closed): - """ - Generate bin edge offsets and bin labels for one array using another array - which has bin edge values. Both arrays must be sorted. - - Parameters - ---------- - values : array of values - binner : a comparable array of values representing bins into which to bin - the first array. Note, 'values' end-points must fall within 'binner' - end-points. - closed : which end of bin is closed; left (default), right - - Returns - ------- - bins : array of offsets (into 'values' argument) of bins. - Zero and last edge are excluded in result, so for instance the first - bin is values[0:bin[0]] and the last is values[bin[-1]:] - """ - lenidx = len(values) - lenbin = len(binner) - - if lenidx <= 0 or lenbin <= 0: - raise ValueError("Invalid length for values or for binner") - - # check binner fits data - if values[0] < binner[0]: - raise ValueError("Values falls before first bin") - - if values[lenidx - 1] > binner[lenbin - 1]: - raise ValueError("Values falls after last bin") - - bins = np.empty(lenbin - 1, dtype=np.int64) - - j = 0 # index into values - bc = 0 # bin count - - # linear scan, presume nothing about values/binner except that it fits ok - for i in range(0, lenbin - 1): - r_bin = binner[i + 1] - - # count values in current bin, advance to next bin - while j < lenidx and ( - values[j] < r_bin or (closed == "right" and values[j] == r_bin) - ): - j += 1 - - bins[bc] = j - bc += 1 - - return bins - - class BaseGrouper: """ This is an internal Grouper class, which actually holds diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 8da03a7f61029..2972639b404c2 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -1,12 +1,10 @@ import numpy as np -import pytest from pandas._libs import groupby, lib, reduction as libreduction from pandas.core.dtypes.common import ensure_int64 from pandas import Index, Series, isna -from pandas.core.groupby.ops import generate_bins_generic import pandas.util.testing as tm from pandas.util.testing import assert_almost_equal @@ -43,42 +41,21 @@ def test_series_bin_grouper(): assert_almost_equal(counts, exp_counts) -class TestBinGroupers: - def setup_method(self, method): - self.obj = np.random.randn(10, 1) - self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64) - self.bins = np.array([3, 6], dtype=np.int64) +def test_generate_bins(): + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + binner = np.array([0, 3, 6, 9], dtype=np.int64) - def test_generate_bins(self): - values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - binner = np.array([0, 3, 6, 9], dtype=np.int64) + bins = lib.generate_bins_dt64(values, binner, closed="left") + assert (bins == np.array([2, 5, 6])).all() - for func in [lib.generate_bins_dt64, generate_bins_generic]: - bins = func(values, binner, closed="left") - assert (bins == np.array([2, 5, 6])).all() + bins = lib.generate_bins_dt64(values, binner, closed="right") + assert (bins == np.array([3, 6, 6])).all() - bins = func(values, binner, closed="right") - assert (bins == np.array([3, 6, 6])).all() + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + binner = np.array([0, 3, 6], dtype=np.int64) - for func in [lib.generate_bins_dt64, generate_bins_generic]: - values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - binner = np.array([0, 3, 6], dtype=np.int64) - - bins = func(values, binner, closed="right") - assert (bins == np.array([3, 6])).all() - - msg = "Invalid length for values or for binner" - with pytest.raises(ValueError, match=msg): - generate_bins_generic(values, [], "right") - with pytest.raises(ValueError, match=msg): - generate_bins_generic(values[:0], binner, "right") - - msg = "Values falls before first bin" - with pytest.raises(ValueError, match=msg): - generate_bins_generic(values, [4], "right") - msg = "Values falls after last bin" - with pytest.raises(ValueError, match=msg): - generate_bins_generic(values, [-3, -1], "right") + bins = lib.generate_bins_dt64(values, binner, closed="right") + assert (bins == np.array([3, 6])).all() def test_group_ohlc(): From b1908c9b9a1b5dd213c08606f6c837f8fce68f22 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 23 Oct 2019 13:11:32 -0700 Subject: [PATCH 2/5] Parametrized --- pandas/tests/groupby/test_bin_groupby.py | 27 ++++++++++-------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 2972639b404c2..4a8261d382a09 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas._libs import groupby, lib, reduction as libreduction @@ -6,7 +7,7 @@ from pandas import Index, Series, isna import pandas.util.testing as tm -from pandas.util.testing import assert_almost_equal +from pandas.util.testing import assert_almost_equal, assert_numpy_array_equal def test_series_grouper(): @@ -41,21 +42,15 @@ def test_series_bin_grouper(): assert_almost_equal(counts, exp_counts) -def test_generate_bins(): - values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - binner = np.array([0, 3, 6, 9], dtype=np.int64) - - bins = lib.generate_bins_dt64(values, binner, closed="left") - assert (bins == np.array([2, 5, 6])).all() - - bins = lib.generate_bins_dt64(values, binner, closed="right") - assert (bins == np.array([3, 6, 6])).all() - - values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - binner = np.array([0, 3, 6], dtype=np.int64) - - bins = lib.generate_bins_dt64(values, binner, closed="right") - assert (bins == np.array([3, 6])).all() +@pytest.mark.parametrize("binner,closed,expected", [ + (np.array([0, 3, 6, 9]), "left", np.array([2, 5, 6])), + (np.array([0, 3, 6, 9]), "right", np.array([3, 6, 6])), + (np.array([0, 3, 6]), "left", np.array([2, 5])), + (np.array([0, 3, 6]), "right", np.array([3, 6]))]) +def test_generate_bins(binner, closed, expected): + values = np.array([1, 2, 3, 4, 5, 6]) + result = lib.generate_bins_dt64(values, binner, closed=closed) + assert_numpy_array_equal(result, expected) def test_group_ohlc(): From 92286a1c43152905dd2f26c86b4cba0459a34fc5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 23 Oct 2019 13:13:24 -0700 Subject: [PATCH 3/5] Reverted back conf merge? --- doc/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index cdabf2d470839..13d3324caf249 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -378,7 +378,7 @@ "index", "pandas.tex", "pandas: powerful Python data analysis toolkit", - r"Wes McKinney\n\& PyData Development Team", + "Wes McKinney and the Pandas Development Team", "manual", ) ] From 654d1a5aa06bc9e8c476bbccbef8e6929d601a6b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 23 Oct 2019 14:24:29 -0700 Subject: [PATCH 4/5] 32bit / windows compat --- pandas/tests/groupby/test_bin_groupby.py | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 4a8261d382a09..2022b6b540ed3 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -42,13 +42,29 @@ def test_series_bin_grouper(): assert_almost_equal(counts, exp_counts) -@pytest.mark.parametrize("binner,closed,expected", [ - (np.array([0, 3, 6, 9]), "left", np.array([2, 5, 6])), - (np.array([0, 3, 6, 9]), "right", np.array([3, 6, 6])), - (np.array([0, 3, 6]), "left", np.array([2, 5])), - (np.array([0, 3, 6]), "right", np.array([3, 6]))]) +@pytest.mark.parametrize( + "binner,closed,expected", + [ + ( + np.array([0, 3, 6, 9], dtype=np.int64), + "left", + np.array([2, 5, 6], dtype=np.int64), + ), + ( + np.array([0, 3, 6, 9], dtype=np.int64), + "right", + np.array([3, 6, 6], dtype=np.int64), + ), + (np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)), + ( + np.array([0, 3, 6], dtype=np.int64), + "right", + np.array([3, 6], dtype=np.int64), + ), + ], +) def test_generate_bins(binner, closed, expected): - values = np.array([1, 2, 3, 4, 5, 6]) + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) result = lib.generate_bins_dt64(values, binner, closed=closed) assert_numpy_array_equal(result, expected) From 320681961a095b601dd279c2453700b3c871449a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 24 Oct 2019 12:25:50 -0700 Subject: [PATCH 5/5] Consistent test util import --- pandas/tests/groupby/test_bin_groupby.py | 25 ++++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 2022b6b540ed3..0e7a66769d2d4 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -7,7 +7,6 @@ from pandas import Index, Series, isna import pandas.util.testing as tm -from pandas.util.testing import assert_almost_equal, assert_numpy_array_equal def test_series_grouper(): @@ -20,10 +19,10 @@ def test_series_grouper(): result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) exp_counts = np.array([3, 4], dtype=np.int64) - assert_almost_equal(counts, exp_counts) + tm.assert_almost_equal(counts, exp_counts) def test_series_bin_grouper(): @@ -36,10 +35,10 @@ def test_series_bin_grouper(): result, counts = grouper.get_result() expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) exp_counts = np.array([3, 3, 4], dtype=np.int64) - assert_almost_equal(counts, exp_counts) + tm.assert_almost_equal(counts, exp_counts) @pytest.mark.parametrize( @@ -66,7 +65,7 @@ def test_series_bin_grouper(): def test_generate_bins(binner, closed, expected): values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) result = lib.generate_bins_dt64(values, binner, closed=closed) - assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) def test_group_ohlc(): @@ -88,13 +87,13 @@ def _ohlc(group): expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])]) - assert_almost_equal(out, expected) + tm.assert_almost_equal(out, expected) tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64)) obj[:6] = np.nan func(out, counts, obj[:, None], labels) expected[0] = np.nan - assert_almost_equal(out, expected) + tm.assert_almost_equal(out, expected) _check("float32") _check("float64") @@ -109,29 +108,29 @@ def test_int_index(self): arr = np.random.randn(100, 4) result = libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4))) expected = arr.sum(0) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) result = libreduction.compute_reduction( arr, np.sum, axis=1, labels=Index(np.arange(100)) ) expected = arr.sum(1) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) dummy = Series(0.0, index=np.arange(100)) result = libreduction.compute_reduction( arr, np.sum, dummy=dummy, labels=Index(np.arange(4)) ) expected = arr.sum(0) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) dummy = Series(0.0, index=np.arange(4)) result = libreduction.compute_reduction( arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100)) ) expected = arr.sum(1) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected) result = libreduction.compute_reduction( arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100)) ) - assert_almost_equal(result, expected) + tm.assert_almost_equal(result, expected)