From bd9011aaa04f16be785672e2f6c6f3e0862584d6 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 18 Apr 2020 18:16:33 +0000
Subject: [PATCH 01/31] made nan count when dropna=False

---
 doc/source/whatsnew/v1.0.3.rst         | 1 +
 pandas/core/algorithms.py              | 5 ++---
 pandas/core/groupby/generic.py         | 3 ++-
 pandas/tests/base/test_value_counts.py | 8 ++++++++
 4 files changed, 13 insertions(+), 4 deletions(-)
 mode change 100644 => 100755 pandas/core/groupby/generic.py

diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst
index 26d06433bda0c..0418845022b5c 100644
--- a/doc/source/whatsnew/v1.0.3.rst
+++ b/doc/source/whatsnew/v1.0.3.rst
@@ -22,6 +22,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
+:issue: `25970` Fixed Series.value_counts so that normalize excludes NA values when dropna=False.
 
 Contributors
 ~~~~~~~~~~~~
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 62a3808d36ba2..18a839d71af11 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -695,7 +695,6 @@ def value_counts(
 
         # count, remove nulls (from the index), and but the bins
         result = ii.value_counts(dropna=dropna)
-        result = result[result.index.notna()]
         result.index = result.index.astype("interval")
         result = result.sort_index()
 
@@ -703,8 +702,8 @@ def value_counts(
         if dropna and (result._values == 0).all():
             result = result.iloc[0:0]
 
-        # normalizing is by len of all (regardless of dropna)
-        counts = np.array([len(ii)])
+        # normalizing is by len of what gets included in the bins
+        counts = result._values
 
     else:
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
old mode 100644
new mode 100755
index c007d4920cbe7..594ebc4e4570c
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -675,7 +675,7 @@ def value_counts(
         from pandas.core.reshape.tile import cut
         from pandas.core.reshape.merge import _get_join_indexers
 
-        if bins is not None and not np.iterable(bins):
+        if bins is not None:# and not np.iterable(bins):
             # scalar bins cannot be done at top level
             # in a backward compatible way
             return self.apply(
@@ -684,6 +684,7 @@ def value_counts(
                 sort=sort,
                 ascending=ascending,
                 bins=bins,
+                dropna=dropna
             )
 
         ids, _, _ = self.grouper.group_info
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index d45feaff68dde..a66a2d1dafd11 100644
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -190,6 +190,14 @@ def test_value_counts_bins(index_or_series):
 
     assert s.nunique() == 0
 
+    # handle normalizing bins with NA's properly
+    # see GH25970
+    s2 = Series([1,2,2,3,3,3, np.nan, np.nan, 4, 5])
+    intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
+    expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1,0,2]))
+    expected_keepna_vals = np.array([0.3, 0.3, 0.2, 0.2])
+    tm.assert_series_equal(s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna)
+    tm.assert_numpy_array_equal(s2.value_counts(dropna=False, normalize=True, bins=3).values, expected_keepna_vals)
 
 def test_value_counts_datetime64(index_or_series):
     klass = index_or_series

From d9d5ec15bb1b258e327163498864155becba8857 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 18 Apr 2020 18:18:37 +0000
Subject: [PATCH 02/31] updated changelog

---
 doc/source/whatsnew/v1.0.3.rst         | 0
 pandas/tests/base/test_value_counts.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 doc/source/whatsnew/v1.0.3.rst
 mode change 100644 => 100755 pandas/tests/base/test_value_counts.py

diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst
old mode 100644
new mode 100755
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
old mode 100644
new mode 100755

From 86fe7f9a44c150c08e92bbbe62c8829ad270e7b3 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 18 Apr 2020 18:20:19 +0000
Subject: [PATCH 03/31] trivial

---
 pandas/core/algorithms.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 pandas/core/algorithms.py

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
old mode 100644
new mode 100755

From c34a863abd60649ebef8b1c812f8f0d958c478df Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 18 Apr 2020 23:43:06 +0000
Subject: [PATCH 04/31] added specific test for groupby valuecount interval fix

---
 doc/source/whatsnew/v1.0.3.rst            |  4 ++--
 pandas/tests/groupby/test_value_counts.py | 22 +++++++++++++++++++---
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst
index 0418845022b5c..8184d979d2c50 100755
--- a/doc/source/whatsnew/v1.0.3.rst
+++ b/doc/source/whatsnew/v1.0.3.rst
@@ -22,8 +22,8 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
-:issue: `25970` Fixed Series.value_counts so that normalize excludes NA values when dropna=False.
-
+Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
+Fixed Dataframe Groupby value_counts with bins (:issue:`32471')
 Contributors
 ~~~~~~~~~~~~
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index c86cb4532bc26..4b12a1e0b2da4 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
+from pandas import DataFrame, Grouper, MultiIndex, Series, cut, date_range, to_datetime
 import pandas._testing as tm
 
 
@@ -41,13 +41,12 @@ def seed_df(seed_nans, n, m):
 ids = []
 for seed_nans in [True, False]:
     for n, m in product((100, 1000), (5, 20)):
-
         df = seed_df(seed_nans, n, m)
         bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
         keys = "1st", "2nd", ["1st", "2nd"]
         for k, b in product(keys, bins):
             binned.append((df, k, b, n, m))
-            ids.append(f"{k}-{n}-{m}")
+            ids.append(f"{k}-{n}-{m}-{seed_nans} ")
 
 
 @pytest.mark.slow
@@ -71,6 +70,7 @@ def rebuild_index(df):
 
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
+    left.index.names = left.index.names[:-1] + ["3rd"]
 
     gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
@@ -81,6 +81,22 @@ def rebuild_index(df):
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
+def test_groubpy_value_counts_bins():
+    # GH32471
+    BINS = [0, 20, 80, 100]
+    df = DataFrame(
+        [[0, 0], [1, 100], [0, 100], [2, 0], [3, 100]], columns=["key", "score"]
+    )
+    result = df.groupby("key")["score"].value_counts(bins=BINS)
+    result.sort_index(inplace=True)
+    intervals = cut(Series([0]), bins=BINS, include_lowest=True).cat.categories
+    index = MultiIndex.from_product(
+        [[0, 1, 2, 3], sorted(intervals)], names=("key", None)
+    )
+    expected = Series([1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1], index, name="score")
+    tm.assert_series_equal(result, expected)
+
+
 def test_series_groupby_value_counts_with_grouper():
     # GH28479
     df = DataFrame(

From 5f8eb1d775633732e00e26de66c21199645c6081 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 19 Apr 2020 12:35:30 +0000
Subject: [PATCH 05/31] updated value_count docstrings

---
 pandas/core/algorithms.py      |  14 ++--
 pandas/core/base.py            |  28 +++++--
 pandas/core/groupby/generic.py | 135 ++-------------------------------
 3 files changed, 38 insertions(+), 139 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 110f8f95927ee..c2f9f1aa73922 100755
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -663,12 +663,16 @@ def value_counts(
     ascending : bool, default False
         Sort in ascending order
     normalize: bool, default False
-        If True then compute a relative histogram
-    bins : integer, optional
-        Rather than count values, group them into half-open bins,
-        convenience for pd.cut, only works with numeric data
+        If True, then compute a relative histogram that outputs the
+        proportion of each value.
+    bins : integer or iterable of numeric, optional
+        Rather than count values, group them into half-open bins.
+        Only works with numeric data.
+        If int, interpreted as number of bins and will use pd.cut.
+        If interable of numeric, will use provided numbers as bin endpoints.
     dropna : bool, default True
-        Don't include counts of NaN
+        Don't include counts of NaN.
+        If False and NaNs are present, NaN will be a key in the output.
 
     Returns
     -------
diff --git a/pandas/core/base.py b/pandas/core/base.py
index ee514888c6331..122cfabd20768 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1176,17 +1176,20 @@ def value_counts(
         Parameters
         ----------
         normalize : bool, default False
-            If True then the object returned will contain the relative
-            frequencies of the unique values.
+            If True, outputs the relative frequencies of the unique values.
         sort : bool, default True
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : int, optional
-            Rather than count values, group them into half-open bins,
-            a convenience for ``pd.cut``, only works with numeric data.
+         bins : integer or iterable of numeric, optional
+            Rather than count individual values, group them into half-open bins.
+            Only works with numeric data.
+            If int, interpreted as number of bins and will use ``pd.cut``.
+            If interable of numeric, will use provided numbers as bin endpoints.
+
         dropna : bool, default True
             Don't include counts of NaN.
+            If False and NaNs are present, NaN will be a key in the output.
 
         Returns
         -------
@@ -1223,8 +1226,10 @@ def value_counts(
 
         Bins can be useful for going from a continuous variable to a
         categorical variable; instead of counting unique
-        apparitions of values, divide the index in the specified
-        number of half-open bins.
+        instances of values, count the number of values that fall
+        into half-open intervals.
+
+        Bins can be an int.
 
         >>> s.value_counts(bins=3)
         (2.0, 3.0]      2
@@ -1232,6 +1237,15 @@ def value_counts(
         (3.0, 4.0]      1
         dtype: int64
 
+        Bins can also be an iterable of numbers.  These numbers are treated
+        as endpoints for the intervals.
+
+        >>> s.value_counts(bins=[0,2,4,9])
+        (2.0, 4.0]      3
+        (-0.001, 2.0]    2
+        (4.0, 9.0]       0
+        dtype: int64
+
         **dropna**
 
         With `dropna` set to `False` we can also see NaN index values.
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 594ebc4e4570c..6eaf652ff6ab8 100755
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -7,7 +7,6 @@
 """
 from collections import abc, namedtuple
 import copy
-from functools import partial
 from textwrap import dedent
 import typing
 from typing import (
@@ -41,11 +40,8 @@
     maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
-    ensure_int64,
     ensure_platform_int,
     is_bool,
-    is_integer_dtype,
-    is_interval_dtype,
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
@@ -671,129 +667,14 @@ def describe(self, **kwargs):
     def value_counts(
         self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
     ):
-
-        from pandas.core.reshape.tile import cut
-        from pandas.core.reshape.merge import _get_join_indexers
-
-        if bins is not None:# and not np.iterable(bins):
-            # scalar bins cannot be done at top level
-            # in a backward compatible way
-            return self.apply(
-                Series.value_counts,
-                normalize=normalize,
-                sort=sort,
-                ascending=ascending,
-                bins=bins,
-                dropna=dropna
-            )
-
-        ids, _, _ = self.grouper.group_info
-        val = self.obj._values
-
-        # groupby removes null keys from groupings
-        mask = ids != -1
-        ids, val = ids[mask], val[mask]
-
-        if bins is None:
-            lab, lev = algorithms.factorize(val, sort=True)
-            llab = lambda lab, inc: lab[inc]
-        else:
-
-            # lab is a Categorical with categories an IntervalIndex
-            lab = cut(Series(val), bins, include_lowest=True)
-            lev = lab.cat.categories
-            lab = lev.take(lab.cat.codes)
-            llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
-
-        if is_interval_dtype(lab):
-            # TODO: should we do this inside II?
-            sorter = np.lexsort((lab.left, lab.right, ids))
-        else:
-            sorter = np.lexsort((lab, ids))
-
-        ids, lab = ids[sorter], lab[sorter]
-
-        # group boundaries are where group ids change
-        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-
-        # new values are where sorted labels change
-        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
-        inc = np.r_[True, lchanges]
-        inc[idx] = True  # group boundaries are also new values
-        out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
-
-        # num. of times each group should be repeated
-        rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
-
-        # multi-index components
-        codes = self.grouper.reconstructed_codes
-        codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
-        levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
-        names = self.grouper.names + [self._selection_name]
-
-        if dropna:
-            mask = codes[-1] != -1
-            if mask.all():
-                dropna = False
-            else:
-                out, codes = out[mask], [level_codes[mask] for level_codes in codes]
-
-        if normalize:
-            out = out.astype("float")
-            d = np.diff(np.r_[idx, len(ids)])
-            if dropna:
-                m = ids[lab == -1]
-                np.add.at(d, m, -1)
-                acc = rep(d)[mask]
-            else:
-                acc = rep(d)
-            out /= acc
-
-        if sort and bins is None:
-            cat = ids[inc][mask] if dropna else ids[inc]
-            sorter = np.lexsort((out if ascending else -out, cat))
-            out, codes[-1] = out[sorter], codes[-1][sorter]
-
-        if bins is None:
-            mi = MultiIndex(
-                levels=levels, codes=codes, names=names, verify_integrity=False
-            )
-
-            if is_integer_dtype(out):
-                out = ensure_int64(out)
-            return Series(out, index=mi, name=self._selection_name)
-
-        # for compat. with libgroupby.value_counts need to ensure every
-        # bin is present at every index level, null filled with zeros
-        diff = np.zeros(len(out), dtype="bool")
-        for level_codes in codes[:-1]:
-            diff |= np.r_[True, level_codes[1:] != level_codes[:-1]]
-
-        ncat, nbin = diff.sum(), len(levels[-1])
-
-        left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
-
-        right = [diff.cumsum() - 1, codes[-1]]
-
-        _, idx = _get_join_indexers(left, right, sort=False, how="left")
-        out = np.where(idx != -1, out[idx], 0)
-
-        if sort:
-            sorter = np.lexsort((out if ascending else -out, left[0]))
-            out, left[-1] = out[sorter], left[-1][sorter]
-
-        # build the multi-index w/ full levels
-        def build_codes(lev_codes: np.ndarray) -> np.ndarray:
-            return np.repeat(lev_codes[diff], nbin)
-
-        codes = [build_codes(lev_codes) for lev_codes in codes[:-1]]
-        codes.append(left[-1])
-
-        mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
-
-        if is_integer_dtype(out):
-            out = ensure_int64(out)
-        return Series(out, index=mi, name=self._selection_name)
+        return self.apply(
+            Series.value_counts,
+            normalize=normalize,
+            sort=sort,
+            ascending=ascending,
+            bins=bins,
+            dropna=dropna,
+        )
 
     def count(self) -> Series:
         """

From 127616697d7af37eed5bcc7d7d13e79dbb843db4 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 19 Apr 2020 13:47:58 +0000
Subject: [PATCH 06/31] fixed pep8 style

---
 doc/source/whatsnew/v1.0.3.rst         |  3 ++-
 pandas/core/base.py                    |  1 +
 pandas/tests/base/test_value_counts.py | 14 ++++++++++----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst
index 8184d979d2c50..7972948d1d08a 100755
--- a/doc/source/whatsnew/v1.0.3.rst
+++ b/doc/source/whatsnew/v1.0.3.rst
@@ -23,7 +23,8 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
-Fixed Dataframe Groupby value_counts with bins (:issue:`32471')
+Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
+
 Contributors
 ~~~~~~~~~~~~
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 122cfabd20768..d745aada64cbf 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1258,6 +1258,7 @@ def value_counts(
         1.0    1
         dtype: int64
         """
+
         result = value_counts(
             self,
             sort=sort,
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index a66a2d1dafd11..3b7c8dea2576f 100755
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -192,12 +192,18 @@ def test_value_counts_bins(index_or_series):
 
     # handle normalizing bins with NA's properly
     # see GH25970
-    s2 = Series([1,2,2,3,3,3, np.nan, np.nan, 4, 5])
+    s2 = Series([1, 2, 2, 3, 3, 3, np.nan, np.nan, 4, 5])
     intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
-    expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1,0,2]))
+    expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1, 0, 2]))
     expected_keepna_vals = np.array([0.3, 0.3, 0.2, 0.2])
-    tm.assert_series_equal(s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna)
-    tm.assert_numpy_array_equal(s2.value_counts(dropna=False, normalize=True, bins=3).values, expected_keepna_vals)
+    tm.assert_series_equal(
+        s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna
+    )
+    tm.assert_numpy_array_equal(
+        s2.value_counts(dropna=False, normalize=True, bins=3).values,
+        expected_keepna_vals,
+    )
+
 
 def test_value_counts_datetime64(index_or_series):
     klass = index_or_series

From a1b7197d3caa598bf0573d753f260c667dbe9768 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 19 Apr 2020 15:06:28 +0000
Subject: [PATCH 07/31] fixed more minor style

---
 pandas/core/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index d745aada64cbf..0d93967d6ee05 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1184,7 +1184,7 @@ def value_counts(
          bins : integer or iterable of numeric, optional
             Rather than count individual values, group them into half-open bins.
             Only works with numeric data.
-            If int, interpreted as number of bins and will use ``pd.cut``.
+            If int, interpreted as number of bins and will use ``pd.cut``
             If interable of numeric, will use provided numbers as bin endpoints.
 
         dropna : bool, default True

From 9c3ede33c598475e29bb041d3f0e3e8e98276c5c Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Mon, 20 Apr 2020 21:00:59 +0000
Subject: [PATCH 08/31] added test for na in bins

---
 pandas/tests/base/test_value_counts.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index 3b7c8dea2576f..69dcd687f8505 100755
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -190,8 +190,22 @@ def test_value_counts_bins(index_or_series):
 
     assert s.nunique() == 0
 
-    # handle normalizing bins with NA's properly
-    # see GH25970
+
+def test_value_counts_bins_nas():
+    # GH25970, handle normalizing bins with NA's properly
+    # First test that NA's are included appropriately
+    rand_data = np.append(
+        np.random.randint(1, 5, 50), [np.nan] * np.random.randint(1, 20)
+    )
+    s = Series(rand_data)
+    assert s.value_counts(dropna=False).index.hasnans
+    assert not s.value_counts(dropna=True).index.hasnans
+    assert s.value_counts(dropna=False, bins=3).index.hasnans
+    assert not s.value_counts(dropna=True, bins=3).index.hasnans
+    assert s.value_counts(dropna=False, bins=[0, 1, 3, 6]).index.hasnans
+    assert not s.value_counts(dropna=True, bins=[0, 1, 3, 6]).index.hasnans
+
+    # then verify specific example
     s2 = Series([1, 2, 2, 3, 3, 3, np.nan, np.nan, 4, 5])
     intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
     expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1, 0, 2]))

From 0cff92b85862b069b26c7e3be82ae18ef3556ab2 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Mon, 20 Apr 2020 21:03:54 +0000
Subject: [PATCH 09/31] added release notes to 1.1

---
 doc/source/whatsnew/v1.0.3.rst | 3 +--
 doc/source/whatsnew/v1.1.0.rst | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst
index 7972948d1d08a..b7fd746eefba1 100755
--- a/doc/source/whatsnew/v1.0.3.rst
+++ b/doc/source/whatsnew/v1.0.3.rst
@@ -22,8 +22,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
-Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
-Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
+
 
 Contributors
 ~~~~~~~~~~~~
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 03a547fadd7ca..7335245eae1a8 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -434,7 +434,8 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-
+Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
+Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
 
 Categorical
 ^^^^^^^^^^^

From 27aa4603a9e30cbd7ea86c32d3a1d781a870dbe3 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 25 Apr 2020 15:25:10 +0000
Subject: [PATCH 10/31] trying to avoid docstring warning

---
 pandas/core/algorithms.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c2f9f1aa73922..c0569dab903b2 100755
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -415,7 +415,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
     if is_categorical_dtype(comps):
         # TODO(extension)
         # handle categoricals
-        return comps.isin(values)  # type: ignore
+        return comps.isin(values)
 
     comps, dtype = _ensure_data(comps)
     values, _ = _ensure_data(values, dtype=dtype)
@@ -678,6 +678,7 @@ def value_counts(
     -------
     Series
     """
+
     from pandas.core.series import Series
 
     name = getattr(values, "name", None)

From 27c985612bb62185b737f6debdfab6768aabd1a8 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 25 Apr 2020 15:48:37 +0000
Subject: [PATCH 11/31] trying to avoid docstring warning

---
 pandas/core/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 0d93967d6ee05..c9cb06e90e3e3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1184,9 +1184,8 @@ def value_counts(
          bins : integer or iterable of numeric, optional
             Rather than count individual values, group them into half-open bins.
             Only works with numeric data.
-            If int, interpreted as number of bins and will use ``pd.cut``
+            If int, interpreted as number of bins and will use `pd.cut`.
             If interable of numeric, will use provided numbers as bin endpoints.
-
         dropna : bool, default True
             Don't include counts of NaN.
             If False and NaNs are present, NaN will be a key in the output.

From f5e9aeb08a6085e114aa38238dcc4affe588ffd8 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 27 Jun 2020 15:57:09 -0400
Subject: [PATCH 12/31] include nan count when dropna=False

---
 pandas/core/algorithms.py              | 21 ++++++++++---------
 pandas/core/base.py                    | 16 ++++++++++++---
 pandas/tests/base/test_value_counts.py | 28 ++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 9e3ca4cc53363..e9313b1988cad 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -693,12 +693,16 @@ def value_counts(
     ascending : bool, default False
         Sort in ascending order
     normalize: bool, default False
-        If True then compute a relative histogram
-    bins : integer, optional
-        Rather than count values, group them into half-open bins,
-        convenience for pd.cut, only works with numeric data
+        If True, then compute a relative histogram that outputs the
+        proportion of each value.
+    bins : integer or iterable of numeric, optional
+        Rather than count values, group them into half-open bins.
+        Only works with numeric data.
+        If int, interpreted as number of bins and will use pd.cut.
+        If interable of numeric, will use provided numbers as bin endpoints.
     dropna : bool, default True
-        Don't include counts of NaN
+        Don't include counts of NaN.
+        If False and NaNs are present, NaN will be a key in the output.
 
     Returns
     -------
@@ -717,9 +721,8 @@ def value_counts(
         except TypeError as err:
             raise TypeError("bins argument only works with numeric data.") from err
 
-        # count, remove nulls (from the index), and but the bins
+        # count, remove nulls (from the index), and use the bins
         result = ii.value_counts(dropna=dropna)
-        result = result[result.index.notna()]
         result.index = result.index.astype("interval")
         result = result.sort_index()
 
@@ -727,8 +730,8 @@ def value_counts(
         if dropna and (result._values == 0).all():
             result = result.iloc[0:0]
 
-        # normalizing is by len of all (regardless of dropna)
-        counts = np.array([len(ii)])
+        # normalizing is by len of what gets included in the bins
+        counts = result._values
 
     else:
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index b62ef668df5e1..16c6938ced539 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1180,11 +1180,14 @@ def value_counts(
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : int, optional
-            Rather than count values, group them into half-open bins,
-            a convenience for ``pd.cut``, only works with numeric data.
+        bins : integer or iterable of numeric, optional
+            Rather than count individual values, group them into half-open bins.
+            Only works with numeric data.
+            If int, interpreted as number of bins and will use `pd.cut`.
+            If interable of numeric, will use provided numbers as bin endpoints.
         dropna : bool, default True
             Don't include counts of NaN.
+            If False and NaNs are present, NaN will be a key in the output.
 
         Returns
         -------
@@ -1230,6 +1233,13 @@ def value_counts(
         (3.0, 4.0]      1
         dtype: int64
 
+        Bins can also be an iterable of numbers.  These numbers are treated
+        as endpoints for the intervals.
+        >>> s.value_counts(bins=[0,2,4,9])
+        (2.0, 4.0]      3
+        (-0.001, 2.0]    2
+        (4.0, 9.0]       0
+        dtype: int64
         **dropna**
 
         With `dropna` set to `False` we can also see NaN index values.
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index de04c30432e6f..17e1afe906b0a 100644
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -191,6 +191,34 @@ def test_value_counts_bins(index_or_series):
     assert s.nunique() == 0
 
 
+def test_value_counts_bins_nas():
+    # GH25970, handle normalizing bins with NA's properly
+    # First test that NA's are included appropriately
+    rand_data = np.append(
+        np.random.randint(1, 5, 50), [np.nan] * np.random.randint(1, 20)
+    )
+    s = Series(rand_data)
+    assert s.value_counts(dropna=False).index.hasnans
+    assert not s.value_counts(dropna=True).index.hasnans
+    assert s.value_counts(dropna=False, bins=3).index.hasnans
+    assert not s.value_counts(dropna=True, bins=3).index.hasnans
+    assert s.value_counts(dropna=False, bins=[0, 1, 3, 6]).index.hasnans
+    assert not s.value_counts(dropna=True, bins=[0, 1, 3, 6]).index.hasnans
+
+    # then verify specific example
+    s2 = Series([1, 2, 2, 3, 3, 3, np.nan, np.nan, 4, 5])
+    intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
+    expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1, 0, 2]))
+    expected_keepna_vals = np.array([0.3, 0.3, 0.2, 0.2])
+    tm.assert_series_equal(
+        s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna
+    )
+    tm.assert_numpy_array_equal(
+        s2.value_counts(dropna=False, normalize=True, bins=3).values,
+        expected_keepna_vals,
+    )
+
+
 def test_value_counts_datetime64(index_or_series):
     klass = index_or_series
 

From 99b7112e51cad358af0e40b7fe9a98ffd6cf6a9e Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 27 Jun 2020 16:56:34 -0400
Subject: [PATCH 13/31] listed bugfix

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index c5eb2febe8ae9..61d68064e6b0d 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -921,6 +921,7 @@ Numeric
 - Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
 - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`)
 - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`)
+- Bug in :meth:`Series.value_counts` with ``normalize=True`` for NA values (:issue:`25970`)
 
 Conversion
 ^^^^^^^^^^

From 75374b26b7a892ad240e0e7f19196d9667eee702 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 27 Jun 2020 18:16:54 -0400
Subject: [PATCH 14/31] avoided tests that highlight groupby.value_count bug

---
 pandas/core/base.py                       | 6 ++++--
 pandas/tests/groupby/test_value_counts.py | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 16c6938ced539..56faaa80ffb74 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1180,7 +1180,7 @@ def value_counts(
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : integer or iterable of numeric, optional
+        bins : int or iterable of numeric, optional
             Rather than count individual values, group them into half-open bins.
             Only works with numeric data.
             If int, interpreted as number of bins and will use `pd.cut`.
@@ -1235,11 +1235,13 @@ def value_counts(
 
         Bins can also be an iterable of numbers.  These numbers are treated
         as endpoints for the intervals.
+
         >>> s.value_counts(bins=[0,2,4,9])
-        (2.0, 4.0]      3
+        (2.0, 4.0]       3
         (-0.001, 2.0]    2
         (4.0, 9.0]       0
         dtype: int64
+
         **dropna**
 
         With `dropna` set to `False` we can also see NaN index values.
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index c86cb4532bc26..f8640c63ecc6e 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -53,10 +53,10 @@ def seed_df(seed_nans, n, m):
 @pytest.mark.slow
 @pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
 @pytest.mark.parametrize("isort", [True, False])
-@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize("normalize", [False])
 @pytest.mark.parametrize("sort", [True, False])
 @pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("dropna", [True])
 def test_series_groupby_value_counts(
     df, keys, bins, n, m, isort, normalize, sort, ascending, dropna
 ):
@@ -71,6 +71,7 @@ def rebuild_index(df):
 
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
+    # left.index.names = left.index.names[:-1] + ["3rd"]
 
     gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)

From 25b6c143b95661b2b426ff9d62e9071c0fd91277 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 4 Jul 2020 13:15:16 -0400
Subject: [PATCH 15/31] Revert "avoided tests that highlight
 groupby.value_count bug"

This reverts commit 75374b26b7a892ad240e0e7f19196d9667eee702.
---
 pandas/core/base.py                       | 6 ++----
 pandas/tests/groupby/test_value_counts.py | 5 ++---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 56faaa80ffb74..16c6938ced539 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1180,7 +1180,7 @@ def value_counts(
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : int or iterable of numeric, optional
+        bins : integer or iterable of numeric, optional
             Rather than count individual values, group them into half-open bins.
             Only works with numeric data.
             If int, interpreted as number of bins and will use `pd.cut`.
@@ -1235,13 +1235,11 @@ def value_counts(
 
         Bins can also be an iterable of numbers.  These numbers are treated
         as endpoints for the intervals.
-
         >>> s.value_counts(bins=[0,2,4,9])
-        (2.0, 4.0]       3
+        (2.0, 4.0]      3
         (-0.001, 2.0]    2
         (4.0, 9.0]       0
         dtype: int64
-
         **dropna**
 
         With `dropna` set to `False` we can also see NaN index values.
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index f8640c63ecc6e..c86cb4532bc26 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -53,10 +53,10 @@ def seed_df(seed_nans, n, m):
 @pytest.mark.slow
 @pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
 @pytest.mark.parametrize("isort", [True, False])
-@pytest.mark.parametrize("normalize", [False])
+@pytest.mark.parametrize("normalize", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
 @pytest.mark.parametrize("ascending", [True, False])
-@pytest.mark.parametrize("dropna", [True])
+@pytest.mark.parametrize("dropna", [True, False])
 def test_series_groupby_value_counts(
     df, keys, bins, n, m, isort, normalize, sort, ascending, dropna
 ):
@@ -71,7 +71,6 @@ def rebuild_index(df):
 
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
-    # left.index.names = left.index.names[:-1] + ["3rd"]
 
     gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)

From 277ce5275ffcf33d0ccc7c513c5a553fd2597661 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 4 Jul 2020 14:49:22 -0400
Subject: [PATCH 16/31] use series value_counts for groupby

---
 pandas/core/groupby/generic.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index dab8475d9580c..6a97760a92086 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -664,7 +664,16 @@ def describe(self, **kwargs):
     def value_counts(
         self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
     ):
+        return self.apply(
+            Series.value_counts,
+            normalize=normalize,
+            sort=sort,
+            ascending=ascending,
+            bins=bins,
+            dropna=dropna,
+        )
 
+    """
         from pandas.core.reshape.tile import cut
         from pandas.core.reshape.merge import _get_join_indexers
 
@@ -786,6 +795,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
         if is_integer_dtype(out):
             out = ensure_int64(out)
         return self.obj._constructor(out, index=mi, name=self._selection_name)
+    """
 
     def count(self) -> Series:
         """

From 797f66849cc021c405f3527d90b04d2e150b8429 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 4 Jul 2020 16:45:17 -0400
Subject: [PATCH 17/31] added groupby bin test

---
 pandas/tests/groupby/test_value_counts.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index c86cb4532bc26..4b12a1e0b2da4 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
+from pandas import DataFrame, Grouper, MultiIndex, Series, cut, date_range, to_datetime
 import pandas._testing as tm
 
 
@@ -41,13 +41,12 @@ def seed_df(seed_nans, n, m):
 ids = []
 for seed_nans in [True, False]:
     for n, m in product((100, 1000), (5, 20)):
-
         df = seed_df(seed_nans, n, m)
         bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
         keys = "1st", "2nd", ["1st", "2nd"]
         for k, b in product(keys, bins):
             binned.append((df, k, b, n, m))
-            ids.append(f"{k}-{n}-{m}")
+            ids.append(f"{k}-{n}-{m}-{seed_nans} ")
 
 
 @pytest.mark.slow
@@ -71,6 +70,7 @@ def rebuild_index(df):
 
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
+    left.index.names = left.index.names[:-1] + ["3rd"]
 
     gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
@@ -81,6 +81,22 @@ def rebuild_index(df):
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
+def test_groubpy_value_counts_bins():
+    # GH32471
+    BINS = [0, 20, 80, 100]
+    df = DataFrame(
+        [[0, 0], [1, 100], [0, 100], [2, 0], [3, 100]], columns=["key", "score"]
+    )
+    result = df.groupby("key")["score"].value_counts(bins=BINS)
+    result.sort_index(inplace=True)
+    intervals = cut(Series([0]), bins=BINS, include_lowest=True).cat.categories
+    index = MultiIndex.from_product(
+        [[0, 1, 2, 3], sorted(intervals)], names=("key", None)
+    )
+    expected = Series([1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1], index, name="score")
+    tm.assert_series_equal(result, expected)
+
+
 def test_series_groupby_value_counts_with_grouper():
     # GH28479
     df = DataFrame(

From fce6998fa8459f38850c2ec431ffef0bc7e72bc4 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Thu, 16 Jul 2020 15:36:37 -0400
Subject: [PATCH 18/31] passing groupy valcount tests

---
 pandas/core/algorithms.py                     |  15 +-
 pandas/core/groupby/generic.py                | 241 +++++++++++-------
 pandas/tests/groupby/test_value_counts.py     |  49 +++-
 .../tests/series/methods/test_value_counts.py |   7 +
 4 files changed, 206 insertions(+), 106 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index e9313b1988cad..ba80dfa165f65 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -723,15 +723,15 @@ def value_counts(
 
         # count, remove nulls (from the index), and use the bins
         result = ii.value_counts(dropna=dropna)
+        print(f"{result=}")
         result.index = result.index.astype("interval")
         result = result.sort_index()
 
+        """
         # if we are dropna and we have NO values
         if dropna and (result._values == 0).all():
             result = result.iloc[0:0]
-
-        # normalizing is by len of what gets included in the bins
-        counts = result._values
+        """
 
     else:
 
@@ -740,19 +740,18 @@ def value_counts(
             # handle Categorical and sparse,
             result = Series(values)._values.value_counts(dropna=dropna)
             result.name = name
-            counts = result._values
 
         else:
             keys, counts = _value_counts_arraylike(values, dropna)
 
             result = Series(counts, index=keys, name=name)
 
-    if sort:
-        result = result.sort_values(ascending=ascending)
-
     if normalize:
-        result = result / float(counts.sum())
+        counts = result._values
+        result = result / float(max(counts.sum(), 1))
 
+    if sort:
+        result = result.sort_values(ascending=ascending)
     return result
 
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index eea076044eeeb..a81be6b28bfce 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -51,6 +51,7 @@
     is_scalar,
     needs_i8_conversion,
 )
+from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core.aggregation import (
@@ -664,16 +665,7 @@ def describe(self, **kwargs):
     def value_counts(
         self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
     ):
-        return self.apply(
-            Series.value_counts,
-            normalize=normalize,
-            sort=sort,
-            ascending=ascending,
-            bins=bins,
-            dropna=dropna,
-        )
 
-    """
         from pandas.core.reshape.tile import cut
         from pandas.core.reshape.merge import _get_join_indexers
 
@@ -687,115 +679,184 @@ def value_counts(
                 ascending=ascending,
                 bins=bins,
             )
-
+        keys = [k for k in self.groups]
+        # print(f'{self.groups=}')
         ids, _, _ = self.grouper.group_info
+        # print(f'{ids=}')
         val = self.obj._values
+        print(f"{keys=}")
+        codes = self.grouper.reconstructed_codes  # this will track the groups
+        print("codes: ", codes)
 
         # groupby removes null keys from groupings
         mask = ids != -1
         ids, val = ids[mask], val[mask]
+        if dropna:
+            mask = ~np.isnan(val)
+            if not mask.all():
+                ids, val = ids[mask], val[mask]
+                # codes = [code[mask] for code in codes]
 
+        print(f"{ids=}")
+        print(f"{val=}")
+
+        print(f"{bins=}")
         if bins is None:
-            lab, lev = algorithms.factorize(val, sort=True)
-            llab = lambda lab, inc: lab[inc]
+            val_lab, val_lev = algorithms.factorize(val, sort=True, dropna=dropna)
+            print(f"{val_lab=}")
         else:
+            # val_lab is a Categorical with categories an IntervalIndex
+            print(f"{Series(val)=}")
+            val_lab = cut(Series(val), bins, include_lowest=True)
+            # cut excludes NaN from its categories, so need to manually add
+            print(f"{val_lab=}")
+            print((not dropna) and (val_lab.hasnans))
+            """if (not dropna) and (val_lab.hasnans):
+                # val_lab =
+                cat_nan = CategoricalDtype(val_lab.cat.add_categories('NaN').cat.categories)
+                print(cat_nan)
+                val_lab = val_lab.astype(cat_nan).fillna('NaN')
+            """
+            print(f"{val_lab=}")
+            val_lev = val_lab.cat.categories
+            val_lab = val_lab.cat.codes.values
+            print(f"{val_lab=}")
+            if dropna:
+                included = val_lab != -1
+                ids, val_lab = ids[included], val_lab[included]
 
-            # lab is a Categorical with categories an IntervalIndex
-            lab = cut(Series(val), bins, include_lowest=True)
-            lev = lab.cat.categories
-            lab = lev.take(lab.cat.codes)
-            llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
-
-        if is_interval_dtype(lab.dtype):
+            # print('1st val_lab: ', val_lab.cat.codes)
+            # llab = lambda val_lab, inc: val_lab[inc]._multiindex.codes[-1]
+        print(f"{val_lev=}")
+        if is_interval_dtype(val_lab.dtype):
             # TODO: should we do this inside II?
-            sorter = np.lexsort((lab.left, lab.right, ids))
+            sorter = np.lexsort((val_lab.right, val_lab.left, ids))
         else:
-            sorter = np.lexsort((lab, ids))
-
-        ids, lab = ids[sorter], lab[sorter]
+            sorter = np.lexsort((val_lab, ids))
+        ids, val_lab = ids[sorter], val_lab[sorter]
 
+        print("ids: ", ids)
+        print(f"{val_lab=}")
+        # val_lab = val_lab.values
+        # print(f'{val_lab=}')
         # group boundaries are where group ids change
-        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-
         # new values are where sorted labels change
-        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
-        inc = np.r_[True, lchanges]
-        inc[idx] = True  # group boundaries are also new values
-        out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
-
-        # num. of times each group should be repeated
-        rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
-
-        # multi-index components
-        codes = self.grouper.reconstructed_codes
-        codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
-        levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
+        change_ids = ids[1:] != ids[:-1]
+        print((val_lab[1:] != val_lab[:-1]))
+        changes = np.logical_or(change_ids, (val_lab[1:] != val_lab[:-1]))
+        """
+        changes = [(ids[i] != ids[i+1]) or (val_lab[i] != val_lab[i+1])
+                       for i in range(len(ids)-1)] #((ids[1:] != ids[:-1]) or (val_lab[1:] != val_lab[:-1]))
+        """
+        print(f"{changes=}")
+        print(np.diff(np.nonzero(changes), append=len(changes))[0])
+        changes = np.r_[True, changes]
+        cts = np.diff(np.nonzero(np.r_[changes, True]))[0]  # , append=len(changes))[0]
+        print(f"{cts=}")
+        val_lab = val_lab[changes]
+        ids = ids[changes]
+        print("ids: ", ids)
+
+        change_ids = (
+            ids[1:] != ids[:-1]
+        )  # need to update now that we removed full repeats
+        # num_id_rep = np.diff(np.nonzero(np.r_[True, chan]))
+        print(f"{change_ids=}")
+        print(f"{val_lab=}")
+
+        num_repeats = np.diff(np.nonzero(np.r_[True, change_ids, True]))[0]
+        rep = partial(np.repeat, repeats=num_repeats)
+        print(f"{rep=}")
+        if (not dropna) and (-1 in val_lab):
+            val_lev = np.r_[Index([np.nan]), val_lev]
+            val_lab += 1
+        levels = [ping.group_index for ping in self.grouper.groupings] + [
+            Index(val_lev)
+        ]
+        print(f"{levels=}")
         names = self.grouper.names + [self._selection_name]
-
-        if dropna:
-            mask = codes[-1] != -1
-            if mask.all():
-                dropna = False
-            else:
-                out, codes = out[mask], [level_codes[mask] for level_codes in codes]
+        print(f"{names=}")
 
         if normalize:
-            out = out.astype("float")
-            d = np.diff(np.r_[idx, len(ids)])
-            if dropna:
-                m = ids[lab == -1]
-                np.add.at(d, m, -1)
-                acc = rep(d)[mask]
-            else:
-                acc = rep(d)
-            out /= acc
-
-        if sort and bins is None:
-            cat = ids[inc][mask] if dropna else ids[inc]
-            sorter = np.lexsort((out if ascending else -out, cat))
-            out, codes[-1] = out[sorter], codes[-1][sorter]
+            num_vals = []
+            ix = 0
+            print(f"{num_repeats=}")
+            for i, r in enumerate(num_repeats):
+                num_vals.append(np.sum(cts[ix : ix + r]))
+                # print(out[ix:ix+r])
+                ix += r
+            # print(f'{ix=}')
+            # [np.sum(out[i:i+r]) ]
+            print(f"{num_vals=}")
+            print(f"{cts=}")
+            cts = cts.astype("float")
+            cts /= rep(num_vals)  # each divisor is the number of repeats for that index
+            print(f"{cts=}")
 
         if bins is None:
+            print("codes: ", codes)
+            # codes = [code[changes] for code in codes]
+            used_ids = np.unique(ids)
+            codes = [code[used_ids] for code in codes]
+            codes = [rep(level_codes) for level_codes in codes] + [val_lab]
+            print(f"{codes=}")
+
+            if sort:
+                indices = tuple(reversed(codes[:-1]))
+                sorter = np.lexsort(
+                    np.r_[[val_lab], [cts if ascending else -cts], indices]
+                )  # sorts using right columns first
+                cts = cts[sorter]
+                codes = [code[sorter] for code in codes]
+            print(f"{cts=}")
             mi = MultiIndex(
                 levels=levels, codes=codes, names=names, verify_integrity=False
             )
+            # print(f'{mi=}')
+            if is_integer_dtype(cts):
+                cts = ensure_int64(cts)
+            return self.obj._constructor(cts, index=mi, name=self._selection_name)
 
-            if is_integer_dtype(out):
-                out = ensure_int64(out)
-            return self.obj._constructor(out, index=mi, name=self._selection_name)
+        nbin = len(levels[-1])
+        # print(f'{codes=}')
+        print(len(cts), len(codes[0]), len(sorter))
 
         # for compat. with libgroupby.value_counts need to ensure every
         # bin is present at every index level, null filled with zeros
-        diff = np.zeros(len(out), dtype="bool")
-        for level_codes in codes[:-1]:
-            diff |= np.r_[True, level_codes[1:] != level_codes[:-1]]
-
-        ncat, nbin = diff.sum(), len(levels[-1])
-
-        left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
-
-        right = [diff.cumsum() - 1, codes[-1]]
-
-        _, idx = _get_join_indexers(left, right, sort=False, how="left")
-        out = np.where(idx != -1, out[idx], 0)
-
+        print(f"{ids=}")
+        ncat = len(codes[0])
+        # ncat = len(ids)
+        print(f"{nbin=}")
+        fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
+        for i, ct in enumerate(cts):
+            fout[ids[i] * nbin + val_lab[i]] = ct
+        print(f"{fout=}", len(fout))
+
+        ncodes = [np.repeat(code, nbin) for code in codes]
+        print(f"{ncodes=}")
+        ncodes.append(np.tile(range(nbin), len(codes[0])))
+        """
+        fout = cts
+        ncodes = [rep(level_codes) for level_codes in codes] + [val_lab]
+        """
+        print(f"{ncodes=}")
         if sort:
-            sorter = np.lexsort((out if ascending else -out, left[0]))
-            out, left[-1] = out[sorter], left[-1][sorter]
-
-        # build the multi-index w/ full levels
-        def build_codes(lev_codes: np.ndarray) -> np.ndarray:
-            return np.repeat(lev_codes[diff], nbin)
-
-        codes = [build_codes(lev_codes) for lev_codes in codes[:-1]]
-        codes.append(left[-1])
-
-        mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
-
-        if is_integer_dtype(out):
-            out = ensure_int64(out)
-        return self.obj._constructor(out, index=mi, name=self._selection_name)
-    """
+            indices = tuple(reversed(ncodes[:-1]))
+            print(f"{indices=}")
+            # print(np.r_[[fout if ascending else -fout], indices])
+            sorter = np.lexsort(
+                np.r_[[fout if ascending else -fout], indices]
+            )  # sorts using right columns first
+            # print(sorter)
+            fout = fout[sorter]
+            ncodes = [code[sorter] for code in ncodes]
+        mi = MultiIndex(
+            levels=levels, codes=ncodes, names=names, verify_integrity=False
+        )
+        print(f"{mi=}")
+        if is_integer_dtype(fout):
+            fout = ensure_int64(fout)
+        return self.obj._constructor(fout, index=mi, name=self._selection_name)
 
     def count(self) -> Series:
         """
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 4b12a1e0b2da4..6d221fa89de6b 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -40,7 +40,7 @@ def seed_df(seed_nans, n, m):
 binned = []
 ids = []
 for seed_nans in [True, False]:
-    for n, m in product((100, 1000), (5, 20)):
+    for n, m in product((10, 1000), (5, 20)):
         df = seed_df(seed_nans, n, m)
         bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
         keys = "1st", "2nd", ["1st", "2nd"]
@@ -68,32 +68,65 @@ def rebuild_index(df):
         normalize=normalize, sort=sort, ascending=ascending, dropna=dropna, bins=bins
     )
 
+    print(f"{df=}")
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
     left.index.names = left.index.names[:-1] + ["3rd"]
 
-    gr = df.groupby(keys, sort=isort)
+    # gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
     right.index.names = right.index.names[:-1] + ["3rd"]
+    print(f"{left=}")
+    print(f"{right=}")
 
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
+    # have to ignore 0 counts to be consistent with individual column value_counts
+    left = left[left.astype(bool)]
+    right = right[right.astype(bool)]
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
 def test_groubpy_value_counts_bins():
     # GH32471
     BINS = [0, 20, 80, 100]
-    df = DataFrame(
-        [[0, 0], [1, 100], [0, 100], [2, 0], [3, 100]], columns=["key", "score"]
+    values = [
+        [0, 5, 0],
+        [1, 5, 100],
+        [0, 5, 100],
+        [2, 5, 0],
+        [3, 6, 100],
+        [3, 5, 100],
+        [1, 5, 100],
+    ]
+    df = DataFrame(values, columns=["key1", "key2", "score"])
+    result = df.groupby(["key1", "key2"])["score"].value_counts(bins=BINS)
+    print(f"{result=}")
+    print(
+        df.groupby(["key1", "key2"])["score"].apply(
+            Series.value_counts,
+            bins=BINS,
+            sort=True,
+            normalize=True,
+            ascending=True,
+            dropna=True,
+        )
     )
-    result = df.groupby("key")["score"].value_counts(bins=BINS)
+
     result.sort_index(inplace=True)
     intervals = cut(Series([0]), bins=BINS, include_lowest=True).cat.categories
-    index = MultiIndex.from_product(
-        [[0, 1, 2, 3], sorted(intervals)], names=("key", None)
+    # groups = [(0,5), (1,5), (2,5), (3,5), (3,6)]
+    groups = set((v[1], v[2], i) for v in values for i in intervals)
+    # {val[:-1]: 0 for val in values}
+    index = product([], intervals)
+
+    """index = MultiIndex.from_product(
+        [groups, sorted(intervals)], names=("key1", "key2", "score")
+    )"""
+    expected = Series(
+        [1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1], result.index, name="score"
     )
-    expected = Series([1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1], index, name="score")
+    # expected = [2, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1]
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py
index f97362ce9c2a9..d965fe4faa075 100644
--- a/pandas/tests/series/methods/test_value_counts.py
+++ b/pandas/tests/series/methods/test_value_counts.py
@@ -179,6 +179,13 @@ def test_value_counts_categorical_with_nan(self):
             res = ser.value_counts(dropna=False, sort=False)
             tm.assert_series_equal(res, exp)
 
+    def test_value_counts_interval_bins(self):
+        ser = Series([1, 2, 3, 0, 1, 4], ["a", "a", "a", "b", "b", "c"])
+        res = ser.value_counts(bins=[0, 1, 2])
+        print(res)
+        exp = Series([2, 3, 4])
+        tm.assert_series_equal(res, exp)
+
     @pytest.mark.parametrize(
         "ser, dropna, exp",
         [

From 637a6098fb058871b40a9b45bff37cd155b4869d Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 25 Jul 2020 23:07:23 -0400
Subject: [PATCH 19/31] nan doesnt work for times

---
 pandas/core/groupby/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index a81be6b28bfce..b980a985c3077 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -692,7 +692,7 @@ def value_counts(
         mask = ids != -1
         ids, val = ids[mask], val[mask]
         if dropna:
-            mask = ~np.isnan(val)
+            mask = ~isna(val)
             if not mask.all():
                 ids, val = ids[mask], val[mask]
                 # codes = [code[mask] for code in codes]
@@ -797,7 +797,7 @@ def value_counts(
             print("codes: ", codes)
             # codes = [code[changes] for code in codes]
             used_ids = np.unique(ids)
-            codes = [code[used_ids] for code in codes]
+            # codes = [code[used_ids] for code in codes]
             codes = [rep(level_codes) for level_codes in codes] + [val_lab]
             print(f"{codes=}")
 

From c9a4383563c315b9b6f45eb1c9a20393adee8739 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 26 Jul 2020 21:39:53 -0400
Subject: [PATCH 20/31] passing all value count tests

---
 pandas/core/groupby/generic.py            | 138 ++++++----------------
 pandas/tests/base/test_value_counts.py    |  29 +++--
 pandas/tests/groupby/test_value_counts.py |  14 +--
 3 files changed, 52 insertions(+), 129 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index b980a985c3077..3d5e13dbe658d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -45,13 +45,11 @@
     ensure_platform_int,
     is_bool,
     is_integer_dtype,
-    is_interval_dtype,
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
     needs_i8_conversion,
 )
-from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core.aggregation import (
@@ -61,6 +59,7 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
+from pandas.core.algorithms import unique
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -78,6 +77,7 @@
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
+from pandas.core.sorting import compress_group_index
 from pandas.core.util.numba_ import (
     NUMBA_FUNC_CACHE,
     generate_numba_func,
@@ -667,7 +667,6 @@ def value_counts(
     ):
 
         from pandas.core.reshape.tile import cut
-        from pandas.core.reshape.merge import _get_join_indexers
 
         if bins is not None and not np.iterable(bins):
             # scalar bins cannot be done at top level
@@ -679,14 +678,9 @@ def value_counts(
                 ascending=ascending,
                 bins=bins,
             )
-        keys = [k for k in self.groups]
-        # print(f'{self.groups=}')
         ids, _, _ = self.grouper.group_info
-        # print(f'{ids=}')
         val = self.obj._values
-        print(f"{keys=}")
         codes = self.grouper.reconstructed_codes  # this will track the groups
-        print("codes: ", codes)
 
         # groupby removes null keys from groupings
         mask = ids != -1
@@ -695,111 +689,63 @@ def value_counts(
             mask = ~isna(val)
             if not mask.all():
                 ids, val = ids[mask], val[mask]
-                # codes = [code[mask] for code in codes]
 
-        print(f"{ids=}")
-        print(f"{val=}")
-
-        print(f"{bins=}")
         if bins is None:
             val_lab, val_lev = algorithms.factorize(val, sort=True, dropna=dropna)
-            print(f"{val_lab=}")
         else:
             # val_lab is a Categorical with categories an IntervalIndex
-            print(f"{Series(val)=}")
             val_lab = cut(Series(val), bins, include_lowest=True)
-            # cut excludes NaN from its categories, so need to manually add
-            print(f"{val_lab=}")
-            print((not dropna) and (val_lab.hasnans))
-            """if (not dropna) and (val_lab.hasnans):
-                # val_lab =
-                cat_nan = CategoricalDtype(val_lab.cat.add_categories('NaN').cat.categories)
-                print(cat_nan)
-                val_lab = val_lab.astype(cat_nan).fillna('NaN')
-            """
-            print(f"{val_lab=}")
             val_lev = val_lab.cat.categories
             val_lab = val_lab.cat.codes.values
-            print(f"{val_lab=}")
-            if dropna:
-                included = val_lab != -1
-                ids, val_lab = ids[included], val_lab[included]
-
-            # print('1st val_lab: ', val_lab.cat.codes)
-            # llab = lambda val_lab, inc: val_lab[inc]._multiindex.codes[-1]
-        print(f"{val_lev=}")
-        if is_interval_dtype(val_lab.dtype):
-            # TODO: should we do this inside II?
-            sorter = np.lexsort((val_lab.right, val_lab.left, ids))
-        else:
-            sorter = np.lexsort((val_lab, ids))
+
+        if dropna:
+            included = val_lab != -1
+            ids, val_lab = ids[included], val_lab[included]
+
+        sorter = np.lexsort((val_lab, ids))
         ids, val_lab = ids[sorter], val_lab[sorter]
+        used_ids = unique(ids)
+        if max(used_ids) >= len(
+            codes[0]
+        ):  # this means we had something skipped from the start
+            used_ids = compress_group_index(used_ids)[0]
+        codes = [code[used_ids] for code in codes]  # drop what was taken out for n/a
 
-        print("ids: ", ids)
-        print(f"{val_lab=}")
-        # val_lab = val_lab.values
-        # print(f'{val_lab=}')
         # group boundaries are where group ids change
         # new values are where sorted labels change
         change_ids = ids[1:] != ids[:-1]
-        print((val_lab[1:] != val_lab[:-1]))
         changes = np.logical_or(change_ids, (val_lab[1:] != val_lab[:-1]))
-        """
-        changes = [(ids[i] != ids[i+1]) or (val_lab[i] != val_lab[i+1])
-                       for i in range(len(ids)-1)] #((ids[1:] != ids[:-1]) or (val_lab[1:] != val_lab[:-1]))
-        """
-        print(f"{changes=}")
-        print(np.diff(np.nonzero(changes), append=len(changes))[0])
         changes = np.r_[True, changes]
-        cts = np.diff(np.nonzero(np.r_[changes, True]))[0]  # , append=len(changes))[0]
-        print(f"{cts=}")
         val_lab = val_lab[changes]
         ids = ids[changes]
-        print("ids: ", ids)
-
-        change_ids = (
-            ids[1:] != ids[:-1]
-        )  # need to update now that we removed full repeats
-        # num_id_rep = np.diff(np.nonzero(np.r_[True, chan]))
-        print(f"{change_ids=}")
-        print(f"{val_lab=}")
+        cts = np.diff(np.nonzero(np.r_[changes, True]))[0]
 
+        idx = np.r_[0, 1 + np.nonzero(change_ids)[0]]
+        rep = partial(np.repeat, repeats=np.add.reduceat(changes, idx))
         num_repeats = np.diff(np.nonzero(np.r_[True, change_ids, True]))[0]
-        rep = partial(np.repeat, repeats=num_repeats)
-        print(f"{rep=}")
+
+        change_ids = np.r_[  # need to update now that we removed full repeats
+            ids[1:] != ids[:-1], True
+        ]
+
         if (not dropna) and (-1 in val_lab):
+            # in this case we need to explicitly add NaN as a level
             val_lev = np.r_[Index([np.nan]), val_lev]
             val_lab += 1
+
         levels = [ping.group_index for ping in self.grouper.groupings] + [
             Index(val_lev)
         ]
-        print(f"{levels=}")
         names = self.grouper.names + [self._selection_name]
-        print(f"{names=}")
 
         if normalize:
-            num_vals = []
-            ix = 0
-            print(f"{num_repeats=}")
-            for i, r in enumerate(num_repeats):
-                num_vals.append(np.sum(cts[ix : ix + r]))
-                # print(out[ix:ix+r])
-                ix += r
-            # print(f'{ix=}')
-            # [np.sum(out[i:i+r]) ]
-            print(f"{num_vals=}")
-            print(f"{cts=}")
             cts = cts.astype("float")
-            cts /= rep(num_vals)  # each divisor is the number of repeats for that index
-            print(f"{cts=}")
+            cts /= rep(
+                num_repeats
+            )  # each divisor is the number of repeats for that index
 
         if bins is None:
-            print("codes: ", codes)
-            # codes = [code[changes] for code in codes]
-            used_ids = np.unique(ids)
-            # codes = [code[used_ids] for code in codes]
             codes = [rep(level_codes) for level_codes in codes] + [val_lab]
-            print(f"{codes=}")
 
             if sort:
                 indices = tuple(reversed(codes[:-1]))
@@ -808,52 +754,36 @@ def value_counts(
                 )  # sorts using right columns first
                 cts = cts[sorter]
                 codes = [code[sorter] for code in codes]
-            print(f"{cts=}")
+
             mi = MultiIndex(
                 levels=levels, codes=codes, names=names, verify_integrity=False
             )
-            # print(f'{mi=}')
             if is_integer_dtype(cts):
                 cts = ensure_int64(cts)
             return self.obj._constructor(cts, index=mi, name=self._selection_name)
 
-        nbin = len(levels[-1])
-        # print(f'{codes=}')
-        print(len(cts), len(codes[0]), len(sorter))
-
         # for compat. with libgroupby.value_counts need to ensure every
         # bin is present at every index level, null filled with zeros
-        print(f"{ids=}")
+        nbin = len(levels[-1])
         ncat = len(codes[0])
-        # ncat = len(ids)
-        print(f"{nbin=}")
         fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
-        for i, ct in enumerate(cts):
-            fout[ids[i] * nbin + val_lab[i]] = ct
-        print(f"{fout=}", len(fout))
-
+        id = 0
+        for i, ct in enumerate(cts):  # fill in nonzero values of fout
+            fout[id * nbin + val_lab[i]] = cts[i]
+            id += change_ids[i]
         ncodes = [np.repeat(code, nbin) for code in codes]
-        print(f"{ncodes=}")
         ncodes.append(np.tile(range(nbin), len(codes[0])))
-        """
-        fout = cts
-        ncodes = [rep(level_codes) for level_codes in codes] + [val_lab]
-        """
-        print(f"{ncodes=}")
+
         if sort:
             indices = tuple(reversed(ncodes[:-1]))
-            print(f"{indices=}")
-            # print(np.r_[[fout if ascending else -fout], indices])
             sorter = np.lexsort(
                 np.r_[[fout if ascending else -fout], indices]
             )  # sorts using right columns first
-            # print(sorter)
             fout = fout[sorter]
             ncodes = [code[sorter] for code in ncodes]
         mi = MultiIndex(
             levels=levels, codes=ncodes, names=names, verify_integrity=False
         )
-        print(f"{mi=}")
         if is_integer_dtype(fout):
             fout = ensure_int64(fout)
         return self.obj._constructor(fout, index=mi, name=self._selection_name)
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index 17e1afe906b0a..558a66952d074 100644
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -191,31 +191,34 @@ def test_value_counts_bins(index_or_series):
     assert s.nunique() == 0
 
 
-def test_value_counts_bins_nas():
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("bins", [None, 3, [0, 1, 3, 6]])
+def test_value_counts_bins_nas(dropna, bins):
     # GH25970, handle normalizing bins with NA's properly
     # First test that NA's are included appropriately
     rand_data = np.append(
         np.random.randint(1, 5, 50), [np.nan] * np.random.randint(1, 20)
     )
     s = Series(rand_data)
-    assert s.value_counts(dropna=False).index.hasnans
-    assert not s.value_counts(dropna=True).index.hasnans
-    assert s.value_counts(dropna=False, bins=3).index.hasnans
-    assert not s.value_counts(dropna=True, bins=3).index.hasnans
-    assert s.value_counts(dropna=False, bins=[0, 1, 3, 6]).index.hasnans
-    assert not s.value_counts(dropna=True, bins=[0, 1, 3, 6]).index.hasnans
-
-    # then verify specific example
+    if dropna:
+        assert not s.value_counts(dropna=dropna, bins=bins).index.hasnans
+    else:
+        assert s.value_counts(dropna=dropna, bins=bins).index.hasnans
+
+
+def test_value_counts_bins_specific_na():
+    # verify specific NA example
     s2 = Series([1, 2, 2, 3, 3, 3, np.nan, np.nan, 4, 5])
     intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
     expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1, 0, 2]))
-    expected_keepna_vals = np.array([0.3, 0.3, 0.2, 0.2])
     tm.assert_series_equal(
         s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna
     )
-    tm.assert_numpy_array_equal(
-        s2.value_counts(dropna=False, normalize=True, bins=3).values,
-        expected_keepna_vals,
+    keys = list(intervals.take([1, 0, 2]))
+    keys.insert(2, np.nan)
+    expected_keepna = Series([0.3, 0.3, 0.2, 0.2], keys)
+    tm.assert_series_equal(
+        s2.value_counts(dropna=False, normalize=True, bins=3), expected_keepna
     )
 
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 6d221fa89de6b..fe084a7a941c6 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Grouper, MultiIndex, Series, cut, date_range, to_datetime
+from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
 import pandas._testing as tm
 
 
@@ -40,7 +40,7 @@ def seed_df(seed_nans, n, m):
 binned = []
 ids = []
 for seed_nans in [True, False]:
-    for n, m in product((10, 1000), (5, 20)):
+    for n, m in product((100, 1000), (5, 20)):
         df = seed_df(seed_nans, n, m)
         bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
         keys = "1st", "2nd", ["1st", "2nd"]
@@ -114,19 +114,9 @@ def test_groubpy_value_counts_bins():
     )
 
     result.sort_index(inplace=True)
-    intervals = cut(Series([0]), bins=BINS, include_lowest=True).cat.categories
-    # groups = [(0,5), (1,5), (2,5), (3,5), (3,6)]
-    groups = set((v[1], v[2], i) for v in values for i in intervals)
-    # {val[:-1]: 0 for val in values}
-    index = product([], intervals)
-
-    """index = MultiIndex.from_product(
-        [groups, sorted(intervals)], names=("key1", "key2", "score")
-    )"""
     expected = Series(
         [1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1], result.index, name="score"
     )
-    # expected = [2, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1]
     tm.assert_series_equal(result, expected)
 
 

From d2399ea4a7bdf934ff8b6303a34f93ace8aa01a1 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Mon, 27 Jul 2020 12:17:15 -0400
Subject: [PATCH 21/31] speedups 1

---
 pandas/core/groupby/generic.py            | 13 +++++++------
 pandas/tests/groupby/test_value_counts.py | 15 ---------------
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 513f47302df40..999729bf4dbab 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -684,6 +684,7 @@ def value_counts(
 
         from pandas.core.reshape.tile import cut
 
+        """
         if bins is not None and not np.iterable(bins):
             # scalar bins cannot be done at top level
             # in a backward compatible way
@@ -694,6 +695,7 @@ def value_counts(
                 ascending=ascending,
                 bins=bins,
             )
+        """
         ids, _, _ = self.grouper.group_info
         val = self.obj._values
         codes = self.grouper.reconstructed_codes  # this will track the groups
@@ -735,14 +737,9 @@ def value_counts(
         val_lab = val_lab[changes]
         ids = ids[changes]
         cts = np.diff(np.nonzero(np.r_[changes, True]))[0]
-
         idx = np.r_[0, 1 + np.nonzero(change_ids)[0]]
+        # how many times each index gets repeated
         rep = partial(np.repeat, repeats=np.add.reduceat(changes, idx))
-        num_repeats = np.diff(np.nonzero(np.r_[True, change_ids, True]))[0]
-
-        change_ids = np.r_[  # need to update now that we removed full repeats
-            ids[1:] != ids[:-1], True
-        ]
 
         if (not dropna) and (-1 in val_lab):
             # in this case we need to explicitly add NaN as a level
@@ -755,6 +752,7 @@ def value_counts(
         names = self.grouper.names + [self._selection_name]
 
         if normalize:
+            num_repeats = np.diff(idx, append=len(ids))
             cts = cts.astype("float")
             cts /= rep(
                 num_repeats
@@ -784,6 +782,9 @@ def value_counts(
         ncat = len(codes[0])
         fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
         id = 0
+        change_ids = np.r_[  # need to update now that we removed full repeats
+            ids[1:] != ids[:-1], True
+        ]
         for i, ct in enumerate(cts):  # fill in nonzero values of fout
             fout[id * nbin + val_lab[i]] = cts[i]
             id += change_ids[i]
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index fe084a7a941c6..5b83119f89903 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -68,7 +68,6 @@ def rebuild_index(df):
         normalize=normalize, sort=sort, ascending=ascending, dropna=dropna, bins=bins
     )
 
-    print(f"{df=}")
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
     left.index.names = left.index.names[:-1] + ["3rd"]
@@ -76,8 +75,6 @@ def rebuild_index(df):
     # gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
     right.index.names = right.index.names[:-1] + ["3rd"]
-    print(f"{left=}")
-    print(f"{right=}")
 
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
@@ -101,18 +98,6 @@ def test_groubpy_value_counts_bins():
     ]
     df = DataFrame(values, columns=["key1", "key2", "score"])
     result = df.groupby(["key1", "key2"])["score"].value_counts(bins=BINS)
-    print(f"{result=}")
-    print(
-        df.groupby(["key1", "key2"])["score"].apply(
-            Series.value_counts,
-            bins=BINS,
-            sort=True,
-            normalize=True,
-            ascending=True,
-            dropna=True,
-        )
-    )
-
     result.sort_index(inplace=True)
     expected = Series(
         [1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1], result.index, name="score"

From ec92f15a260c0a72d2df564f9e376b3cb4d02fb7 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 1 Aug 2020 20:00:20 -0400
Subject: [PATCH 22/31] speedup?

---
 pandas/core/groupby/generic.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 54bb17f4e9d8a..9455b37ce62f7 100755
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -769,13 +769,17 @@ def value_counts(
         nbin = len(levels[-1])
         ncat = len(codes[0])
         fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
-        id = 0
+        """
         change_ids = np.r_[  # need to update now that we removed full repeats
             ids[1:] != ids[:-1], True
         ]
+        """
+        id = 0
+        ct_len = len(cts)
         for i, ct in enumerate(cts):  # fill in nonzero values of fout
             fout[id * nbin + val_lab[i]] = cts[i]
-            id += change_ids[i]
+            if i < ct_len - 1:  # avoid index error
+                id += ids[i] != ids[i + 1]
         ncodes = [np.repeat(code, nbin) for code in codes]
         ncodes.append(np.tile(range(nbin), len(codes[0])))
 

From d6179b0510582c8c09945eab6f1c2a868c04fdf7 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 1 Aug 2020 21:25:15 -0400
Subject: [PATCH 23/31] Revert "speedup?"

This reverts commit ec92f15a260c0a72d2df564f9e376b3cb4d02fb7.
---
 pandas/core/groupby/generic.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9455b37ce62f7..54bb17f4e9d8a 100755
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -769,17 +769,13 @@ def value_counts(
         nbin = len(levels[-1])
         ncat = len(codes[0])
         fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
-        """
+        id = 0
         change_ids = np.r_[  # need to update now that we removed full repeats
             ids[1:] != ids[:-1], True
         ]
-        """
-        id = 0
-        ct_len = len(cts)
         for i, ct in enumerate(cts):  # fill in nonzero values of fout
             fout[id * nbin + val_lab[i]] = cts[i]
-            if i < ct_len - 1:  # avoid index error
-                id += ids[i] != ids[i + 1]
+            id += change_ids[i]
         ncodes = [np.repeat(code, nbin) for code in codes]
         ncodes.append(np.tile(range(nbin), len(codes[0])))
 

From 5abfb16455be8644ba37906c44a9daec430b20c9 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Mon, 10 Aug 2020 10:48:49 -0400
Subject: [PATCH 24/31] fixed comments

---
 pandas/core/groupby/generic.py            | 2 --
 pandas/tests/base/test_value_counts.py    | 2 +-
 pandas/tests/groupby/test_value_counts.py | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 54bb17f4e9d8a..7c70bf8d98de1 100755
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -726,7 +726,6 @@ def value_counts(
         ids = ids[changes]
         cts = np.diff(np.nonzero(np.r_[changes, True]))[0]
         idx = np.r_[0, 1 + np.nonzero(change_ids)[0]]
-        print(idx)
         # how many times each index gets repeated
         rep = partial(np.repeat, repeats=np.add.reduceat(changes, idx))
 
@@ -742,7 +741,6 @@ def value_counts(
 
         if normalize:
             num_repeats = np.diff(idx, append=len(change_ids) + 1)
-            print(num_repeats)
             cts = cts.astype("float") / rep(num_repeats)
             # each divisor is the number of repeats for that index
 
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
index 558a66952d074..8ea7f0fe3fc98 100755
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -207,7 +207,7 @@ def test_value_counts_bins_nas(dropna, bins):
 
 
 def test_value_counts_bins_specific_na():
-    # verify specific NA example
+    # GH25970 case where proportions were incorrect for dropna and normalize=True
     s2 = Series([1, 2, 2, 3, 3, 3, np.nan, np.nan, 4, 5])
     intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
     expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1, 0, 2]))
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 5b83119f89903..94e19b93368d8 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -72,12 +72,12 @@ def rebuild_index(df):
     left = gr["3rd"].value_counts(**kwargs)
     left.index.names = left.index.names[:-1] + ["3rd"]
 
-    # gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
     right.index.names = right.index.names[:-1] + ["3rd"]
 
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
+
     # have to ignore 0 counts to be consistent with individual column value_counts
     left = left[left.astype(bool)]
     right = right[right.astype(bool)]

From 5f33834181831f5061e1e55b481457a1eb762ef1 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 12 Sep 2020 08:09:45 -0400
Subject: [PATCH 25/31] removed unneeded import

---
 pandas/core/groupby/generic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index b9868e1538c66..2a67dfaa41c73 100755
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -685,7 +685,6 @@ def value_counts(
         self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
     ):
 
-        from pandas.core.reshape.merge import _get_join_indexers
         from pandas.core.reshape.tile import cut
 
         if bins is not None and not np.iterable(bins):

From f685cb2bec8dce856d83f2b1b1ddf5b0425b306c Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 12 Sep 2020 08:32:03 -0400
Subject: [PATCH 26/31] updated to use na_sentinal param

---
 pandas/core/groupby/generic.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index a3e7bf38fcd3f..fff8ce6da2c87 100755
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -712,7 +712,9 @@ def value_counts(
                 ids, val = ids[mask], val[mask]
 
         if bins is None:
-            val_lab, val_lev = algorithms.factorize(val, sort=True, dropna=dropna)
+            val_lab, val_lev = algorithms.factorize(
+                val, sort=True, na_sentinel=(None if dropna else -1)
+            )
         else:
             # val_lab is a Categorical with categories an IntervalIndex
             val_lab = cut(Series(val), bins, include_lowest=True)

From c21bdbb3a767efdae7dfd91a9395da799aa35f06 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sat, 12 Sep 2020 09:31:52 -0400
Subject: [PATCH 27/31] fixed bad test

---
 pandas/tests/series/methods/test_value_counts.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py
index d965fe4faa075..270f29914442a 100644
--- a/pandas/tests/series/methods/test_value_counts.py
+++ b/pandas/tests/series/methods/test_value_counts.py
@@ -182,8 +182,7 @@ def test_value_counts_categorical_with_nan(self):
     def test_value_counts_interval_bins(self):
         ser = Series([1, 2, 3, 0, 1, 4], ["a", "a", "a", "b", "b", "c"])
         res = ser.value_counts(bins=[0, 1, 2])
-        print(res)
-        exp = Series([2, 3, 4])
+        exp = Series([3, 1], res.index)
         tm.assert_series_equal(res, exp)
 
     @pytest.mark.parametrize(

From e4c255213593e75be45e185944239cfce1996072 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 13 Sep 2020 12:02:39 -0400
Subject: [PATCH 28/31] moved doc, reverted permissions

---
 doc/source/whatsnew/v1.0.3.rst | 1 -
 doc/source/whatsnew/v1.1.0.rst | 3 +--
 doc/source/whatsnew/v1.2.0.rst | 4 +++-
 3 files changed, 4 insertions(+), 4 deletions(-)
 mode change 100755 => 100644 doc/source/whatsnew/v1.0.3.rst

diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst
old mode 100755
new mode 100644
index cc08329934897..62e6ae5b1c5cc
--- a/doc/source/whatsnew/v1.0.3.rst
+++ b/doc/source/whatsnew/v1.0.3.rst
@@ -23,7 +23,6 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 
-
 Contributors
 ~~~~~~~~~~~~
 
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index f88a788aa03cf..7529c73aa14f2 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -876,8 +876,7 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
-Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
+
 
 Categorical
 ^^^^^^^^^^^
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index bce6a735b7b07..4052046125baa 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -215,6 +215,7 @@ Performance improvements
 Bug fixes
 ~~~~~~~~~
 
+
 Categorical
 ^^^^^^^^^^^
 
@@ -315,7 +316,8 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
--
+- Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
+- Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
 
 Reshaping
 ^^^^^^^^^

From 74b13d8c8201807fa1b121e3cfccabdf125c3c38 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 13 Sep 2020 12:18:28 -0400
Subject: [PATCH 29/31] more doc and permission fix

---
 doc/source/whatsnew/v1.1.0.rst | 1 -
 doc/source/whatsnew/v1.2.0.rst | 3 +--
 pandas/core/algorithms.py      | 2 +-
 pandas/core/groupby/generic.py | 5 ++---
 4 files changed, 4 insertions(+), 7 deletions(-)
 mode change 100755 => 100644 pandas/core/algorithms.py
 mode change 100755 => 100644 pandas/core/groupby/generic.py

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 7529c73aa14f2..a49b29d691692 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -955,7 +955,6 @@ Numeric
 - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`)
 - Bug in arithmetic operations between :class:`DataFrame` objects with non-overlapping columns with duplicate labels causing an infinite loop (:issue:`35194`)
 - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`)
-- Bug in :meth:`Series.value_counts` with ``normalize=True`` for NA values (:issue:`25970`)
 - Bug in :meth:`Index.difference` giving incorrect results when comparing a :class:`Float64Index` and object :class:`Index` (:issue:`35217`)
 - Bug in :class:`DataFrame` reductions (e.g. ``df.min()``, ``df.max()``) with ``ExtensionArray`` dtypes (:issue:`34520`, :issue:`32651`)
 - :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` now raise a ValueError if ``limit_direction`` is ``'forward'`` or ``'both'`` and ``method`` is ``'backfill'`` or ``'bfill'`` or ``limit_direction`` is ``'backward'`` or ``'both'`` and ``method`` is ``'pad'`` or ``'ffill'`` (:issue:`34746`)
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 4052046125baa..98304a9fcbe26 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -215,7 +215,6 @@ Performance improvements
 Bug fixes
 ~~~~~~~~~
 
-
 Categorical
 ^^^^^^^^^^^
 
@@ -248,6 +247,7 @@ Numeric
 ^^^^^^^
 - Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`)
 - Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`)
+- Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
 -
 
 Conversion
@@ -316,7 +316,6 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-- Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
 - Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
 
 Reshaping
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
old mode 100755
new mode 100644
index dedcbcd8c8a21..532427dd69782
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -758,7 +758,7 @@ def value_counts(
 
         if is_extension_array_dtype(values):
 
-            # handle Categorical and sparse,
+            # handle Categorical and sparse data,
             result = Series(values)._values.value_counts(dropna=dropna)
             result.name = name
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
old mode 100755
new mode 100644
index fff8ce6da2c87..cff9a24bd1540
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -728,9 +728,8 @@ def value_counts(
         sorter = np.lexsort((val_lab, ids))
         ids, val_lab = ids[sorter], val_lab[sorter]
         used_ids = unique(ids)
-        if max(used_ids) >= len(
-            codes[0]
-        ):  # this means we had something skipped from the start
+        if max(used_ids) >= len(codes[0]):
+            # this means we had something skipped from the start
             used_ids = compress_group_index(used_ids)[0]
         codes = [code[used_ids] for code in codes]  # drop what was taken out for n/a
 

From f0e630a4b0788a266c2a7187c85bd021d7a56071 Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 13 Sep 2020 22:04:57 -0400
Subject: [PATCH 30/31] fixed docstrings

---
 doc/source/whatsnew/v1.0.0.rst | 0
 doc/source/whatsnew/v1.2.0.rst | 6 +++---
 pandas/core/algorithms.py      | 3 ++-
 pandas/core/base.py            | 5 +++--
 4 files changed, 8 insertions(+), 6 deletions(-)
 mode change 100755 => 100644 doc/source/whatsnew/v1.0.0.rst

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
old mode 100755
new mode 100644
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 98304a9fcbe26..fc2aaba6e9c0c 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -247,8 +247,8 @@ Numeric
 ^^^^^^^
 - Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`)
 - Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`)
-- Fixed Series.value_counts so that normalize excludes NA values when dropna=False. (:issue:`25970`)
--
+- Bug in :meth:`Series.value_counts` with ``dropna=True`` and ``normalize=True`` where value counts did not sum to 1. (:issue:`25970`)
+
 
 Conversion
 ^^^^^^^^^^
@@ -316,7 +316,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-- Fixed Dataframe Groupby value_counts with bins (:issue:`32471`)
+- Bug in :meth:`DataframeGroupBy.value_counts` outputs wrong index labels with bins (:issue:`32471`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 532427dd69782..5e2944c80de92 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -725,11 +725,12 @@ def value_counts(
     bins : integer or iterable of numeric, optional
         Rather than count values, group them into half-open bins.
         Only works with numeric data.
-        If int, interpreted as number of bins and will use pd.cut.
+        If int, interpreted as number of bins.
         If interable of numeric, will use provided numbers as bin endpoints.
     dropna : bool, default True
         Don't include counts of NaN.
         If False and NaNs are present, NaN will be a key in the output.
+        .. versionchanged:: 1.2
 
     Returns
     -------
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 68454f50def09..55ca1259bb188 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1182,11 +1182,12 @@ def value_counts(
         bins : integer or iterable of numeric, optional
             Rather than count individual values, group them into half-open bins.
             Only works with numeric data.
-            If int, interpreted as number of bins and will use `pd.cut`.
+            If int, interpreted as number of bins.
             If interable of numeric, will use provided numbers as bin endpoints.
         dropna : bool, default True
             Don't include counts of NaN.
             If False and NaNs are present, NaN will be a key in the output.
+            .. versionchanged:: 1.1.2
 
         Returns
         -------
@@ -1237,7 +1238,7 @@ def value_counts(
         Bins can also be an iterable of numbers.  These numbers are treated
         as endpoints for the intervals.
 
-        >>> s.value_counts(bins=[0,2,4,9])
+        >>> s.value_counts(bins=[0, 2, 4, 9])
         (2.0, 4.0]      3
         (-0.001, 2.0]    2
         (4.0, 9.0]       0

From 9763e83b02995d706f967923b231b0776f09f60c Mon Sep 17 00:00:00 2001
From: DataInformer <fuller.evan@gmail.com>
Date: Sun, 13 Sep 2020 22:10:19 -0400
Subject: [PATCH 31/31] file perm

---
 doc/source/whatsnew/v1.0.0.rst | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 doc/source/whatsnew/v1.0.0.rst

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
old mode 100644
new mode 100755