From e22de7ebe9263786a35dee44050cbaf4b80f163c Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Tue, 18 Jun 2019 15:17:15 +0100
Subject: [PATCH 1/3] Calculate normalized freqs in value_counts correctly when
 bins is not None

---
 pandas/core/algorithms.py  |  4 +---
 pandas/tests/test_algos.py | 23 +++++++++++++++--------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 932ac71a23ed0..e7b46309b95a1 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -670,7 +670,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
         # count, remove nulls (from the index), and but the bins
         result = ii.value_counts(dropna=dropna)
-        result = result[result.index.notna()]
         result.index = result.index.astype('interval')
         result = result.sort_index()
 
@@ -678,8 +677,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
         if dropna and (result.values == 0).all():
             result = result.iloc[0:0]
 
-        # normalizing is by len of all (regardless of dropna)
-        counts = np.array([len(ii)])
+        counts = np.array([result.sum()])
 
     else:
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 64d8436dd5fe3..90c9b7a3591a4 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -967,21 +967,28 @@ def test_dropna(self):
             expected = Series([2, 1, 1], index=[5., 10.3, np.nan])
             tm.assert_series_equal(result, expected)
 
-    def test_value_counts_normalized(self):
+    @pytest.mark.parametrize('dropna, vals, index', [
+        (False, [0.6, 0.2, 0.2], [np.nan, 2.0, 1.0]),
+        (True, [0.5, 0.5], [2.0, 1.0])])
+    def test_value_counts_normalized(self, dropna, vals, index):
         # GH12558
         s = Series([1, 2, np.nan, np.nan, np.nan])
         dtypes = (np.float64, np.object, 'M8[ns]')
         for t in dtypes:
             s_typed = s.astype(t)
-            result = s_typed.value_counts(normalize=True, dropna=False)
-            expected = Series([0.6, 0.2, 0.2],
-                              index=Series([np.nan, 2.0, 1.0], dtype=t))
+            result = s_typed.value_counts(normalize=True, dropna=dropna)
+            expected = Series(vals, index=Series(index, dtype=t))
             tm.assert_series_equal(result, expected)
 
-            result = s_typed.value_counts(normalize=True, dropna=True)
-            expected = Series([0.5, 0.5],
-                              index=Series([2.0, 1.0], dtype=t))
-            tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize('dropna, vals, tuples', [
+        (False, [0.5, 0.3, 0.2], [(-0.005, 2.0), (2.0, 4.0), np.nan]),
+        (True, [0.625, 0.375], [(-0.005, 2.0), (2.0, 4.0)])])
+    def test_value_counts_normalized_bins(self, dropna, vals, tuples):
+        # GH25970
+        s = Series([1, 1, 2, 0, 1, np.nan, 4, 4, np.nan, 3])
+        result = s.value_counts(normalize=True, bins=2, dropna=dropna)
+        expected = Series(vals, index=IntervalIndex.from_tuples(tuples))
+        tm.assert_series_equal(result, expected)
 
     def test_value_counts_uint64(self):
         arr = np.array([2**63], dtype=np.uint64)

From 5d1e2f88a84841aceb803eb4d01611400c78b22d Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Wed, 19 Jun 2019 19:51:30 +0100
Subject: [PATCH 2/3] Parametrize dtype in test_value_counts_normalized

---
 pandas/tests/test_algos.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 90c9b7a3591a4..16d75cad5f837 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -967,18 +967,18 @@ def test_dropna(self):
             expected = Series([2, 1, 1], index=[5., 10.3, np.nan])
             tm.assert_series_equal(result, expected)
 
+
     @pytest.mark.parametrize('dropna, vals, index', [
         (False, [0.6, 0.2, 0.2], [np.nan, 2.0, 1.0]),
         (True, [0.5, 0.5], [2.0, 1.0])])
-    def test_value_counts_normalized(self, dropna, vals, index):
+    @pytest.mark.parametrize('dtype', [np.float64, np.object, 'M8[ns]'])
+    def test_value_counts_normalized(self, dropna, vals, index, dtype):
         # GH12558
         s = Series([1, 2, np.nan, np.nan, np.nan])
-        dtypes = (np.float64, np.object, 'M8[ns]')
-        for t in dtypes:
-            s_typed = s.astype(t)
-            result = s_typed.value_counts(normalize=True, dropna=dropna)
-            expected = Series(vals, index=Series(index, dtype=t))
-            tm.assert_series_equal(result, expected)
+        s_typed = s.astype(dtype)
+        result = s_typed.value_counts(normalize=True, dropna=dropna)
+        expected = Series(vals, index=Series(index, dtype=dtype))
+        tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize('dropna, vals, tuples', [
         (False, [0.5, 0.3, 0.2], [(-0.005, 2.0), (2.0, 4.0), np.nan]),

From 0496b799e4e0ed62262b694a89d8ddef6a0dcd21 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Wed, 19 Jun 2019 19:52:51 +0100
Subject: [PATCH 3/3] Remove extra empty line

---
 pandas/tests/test_algos.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 16d75cad5f837..0991072c442c2 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -967,7 +967,6 @@ def test_dropna(self):
             expected = Series([2, 1, 1], index=[5., 10.3, np.nan])
             tm.assert_series_equal(result, expected)
 
-
     @pytest.mark.parametrize('dropna, vals, index', [
         (False, [0.6, 0.2, 0.2], [np.nan, 2.0, 1.0]),
         (True, [0.5, 0.5], [2.0, 1.0])])