diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index bce6a735b7b07..fc2aaba6e9c0c 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -247,7 +247,8 @@ Numeric
 ^^^^^^^
 - Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`)
 - Bug in :meth:`DataFrame.any` with ``axis=1`` and ``bool_only=True`` ignoring the ``bool_only`` keyword (:issue:`32432`)
--
+- Bug in :meth:`Series.value_counts` with ``dropna=True`` and ``normalize=True`` where value counts did not sum to 1. (:issue:`25970`)
+
 
 Conversion
 ^^^^^^^^^^
@@ -315,7 +316,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
--
+- Bug in :meth:`DataframeGroupBy.value_counts` outputs wrong index labels with bins (:issue:`32471`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 872c51c7dfa75..5e2944c80de92 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -720,17 +720,23 @@ def value_counts(
     ascending : bool, default False
         Sort in ascending order
     normalize: bool, default False
-        If True then compute a relative histogram
-    bins : integer, optional
-        Rather than count values, group them into half-open bins,
-        convenience for pd.cut, only works with numeric data
+        If True, then compute a relative histogram that outputs the
+        proportion of each value.
+    bins : integer or iterable of numeric, optional
+        Rather than count values, group them into half-open bins.
+        Only works with numeric data.
+        If int, interpreted as number of bins.
+        If interable of numeric, will use provided numbers as bin endpoints.
     dropna : bool, default True
-        Don't include counts of NaN
+        Don't include counts of NaN.
+        If False and NaNs are present, NaN will be a key in the output.
+        .. versionchanged:: 1.2
 
     Returns
     -------
     Series
     """
+
     from pandas.core.series import Series
 
     name = getattr(values, "name", None)
@@ -744,39 +750,30 @@ def value_counts(
         except TypeError as err:
             raise TypeError("bins argument only works with numeric data.") from err
 
-        # count, remove nulls (from the index), and but the bins
+        # count, remove nulls (from the index), and use the bins
         result = ii.value_counts(dropna=dropna)
-        result = result[result.index.notna()]
         result.index = result.index.astype("interval")
         result = result.sort_index()
 
-        # if we are dropna and we have NO values
-        if dropna and (result._values == 0).all():
-            result = result.iloc[0:0]
-
-        # normalizing is by len of all (regardless of dropna)
-        counts = np.array([len(ii)])
-
     else:
 
         if is_extension_array_dtype(values):
 
-            # handle Categorical and sparse,
+            # handle Categorical and sparse data,
             result = Series(values)._values.value_counts(dropna=dropna)
             result.name = name
-            counts = result._values
 
         else:
             keys, counts = value_counts_arraylike(values, dropna)
 
             result = Series(counts, index=keys, name=name)
 
-    if sort:
-        result = result.sort_values(ascending=ascending)
-
     if normalize:
-        result = result / float(counts.sum())
+        counts = result._values
+        result = result / float(max(counts.sum(), 1))
 
+    if sort:
+        result = result.sort_values(ascending=ascending)
     return result
 
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 1926803d8f04b..55ca1259bb188 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1174,17 +1174,20 @@ def value_counts(
         Parameters
         ----------
         normalize : bool, default False
-            If True then the object returned will contain the relative
-            frequencies of the unique values.
+            If True, outputs the relative frequencies of the unique values.
         sort : bool, default True
             Sort by frequencies.
         ascending : bool, default False
             Sort in ascending order.
-        bins : int, optional
-            Rather than count values, group them into half-open bins,
-            a convenience for ``pd.cut``, only works with numeric data.
+        bins : integer or iterable of numeric, optional
+            Rather than count individual values, group them into half-open bins.
+            Only works with numeric data.
+            If int, interpreted as number of bins.
+            If interable of numeric, will use provided numbers as bin endpoints.
         dropna : bool, default True
             Don't include counts of NaN.
+            If False and NaNs are present, NaN will be a key in the output.
+            .. versionchanged:: 1.1.2
 
         Returns
         -------
@@ -1221,8 +1224,10 @@ def value_counts(
 
         Bins can be useful for going from a continuous variable to a
         categorical variable; instead of counting unique
-        apparitions of values, divide the index in the specified
-        number of half-open bins.
+        instances of values, count the number of values that fall
+        into half-open intervals.
+
+        Bins can be an int.
 
         >>> s.value_counts(bins=3)
         (2.0, 3.0]      2
@@ -1230,6 +1235,15 @@ def value_counts(
         (3.0, 4.0]      1
         dtype: int64
 
+        Bins can also be an iterable of numbers.  These numbers are treated
+        as endpoints for the intervals.
+
+        >>> s.value_counts(bins=[0, 2, 4, 9])
+        (2.0, 4.0]      3
+        (-0.001, 2.0]    2
+        (4.0, 9.0]       0
+        dtype: int64
+
         **dropna**
 
         With `dropna` set to `False` we can also see NaN index values.
@@ -1242,6 +1256,7 @@ def value_counts(
         1.0    1
         dtype: int64
         """
+
         result = value_counts(
             self,
             sort=sort,
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e870187fc7952..cff9a24bd1540 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -45,7 +45,6 @@
     ensure_platform_int,
     is_bool,
     is_integer_dtype,
-    is_interval_dtype,
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
@@ -59,6 +58,7 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
+from pandas.core.algorithms import unique
 from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
@@ -79,6 +79,7 @@
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager
 from pandas.core.series import Series
+from pandas.core.sorting import compress_group_index
 from pandas.core.util.numba_ import NUMBA_FUNC_CACHE, maybe_use_numba
 
 from pandas.plotting import boxplot_frame_groupby
@@ -685,7 +686,6 @@ def value_counts(
         self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
     ):
 
-        from pandas.core.reshape.merge import get_join_indexers
         from pandas.core.reshape.tile import cut
 
         if bins is not None and not np.iterable(bins):
@@ -701,111 +701,111 @@ def value_counts(
 
         ids, _, _ = self.grouper.group_info
         val = self.obj._values
+        codes = self.grouper.reconstructed_codes  # this will track the groups
 
         # groupby removes null keys from groupings
         mask = ids != -1
         ids, val = ids[mask], val[mask]
+        if dropna:
+            mask = ~isna(val)
+            if not mask.all():
+                ids, val = ids[mask], val[mask]
 
         if bins is None:
-            lab, lev = algorithms.factorize(val, sort=True)
-            llab = lambda lab, inc: lab[inc]
+            val_lab, val_lev = algorithms.factorize(
+                val, sort=True, na_sentinel=(None if dropna else -1)
+            )
         else:
+            # val_lab is a Categorical with categories an IntervalIndex
+            val_lab = cut(Series(val), bins, include_lowest=True)
+            val_lev = val_lab.cat.categories
+            val_lab = val_lab.cat.codes.values
 
-            # lab is a Categorical with categories an IntervalIndex
-            lab = cut(Series(val), bins, include_lowest=True)
-            lev = lab.cat.categories
-            lab = lev.take(lab.cat.codes)
-            llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
-
-        if is_interval_dtype(lab.dtype):
-            # TODO: should we do this inside II?
-            sorter = np.lexsort((lab.left, lab.right, ids))
-        else:
-            sorter = np.lexsort((lab, ids))
+        if dropna:
+            included = val_lab != -1
+            ids, val_lab = ids[included], val_lab[included]
 
-        ids, lab = ids[sorter], lab[sorter]
+        sorter = np.lexsort((val_lab, ids))
+        ids, val_lab = ids[sorter], val_lab[sorter]
+        used_ids = unique(ids)
+        if max(used_ids) >= len(codes[0]):
+            # this means we had something skipped from the start
+            used_ids = compress_group_index(used_ids)[0]
+        codes = [code[used_ids] for code in codes]  # drop what was taken out for n/a
 
         # group boundaries are where group ids change
-        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-
         # new values are where sorted labels change
-        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
-        inc = np.r_[True, lchanges]
-        inc[idx] = True  # group boundaries are also new values
-        out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
-
-        # num. of times each group should be repeated
-        rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
-
-        # multi-index components
-        codes = self.grouper.reconstructed_codes
-        codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)]
-        levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
+        change_ids = ids[1:] != ids[:-1]
+        changes = np.logical_or(change_ids, (val_lab[1:] != val_lab[:-1]))
+        changes = np.r_[True, changes]
+        val_lab = val_lab[changes]
+        ids = ids[changes]
+        cts = np.diff(np.nonzero(np.r_[changes, True]))[0]
+        idx = np.r_[0, 1 + np.nonzero(change_ids)[0]]
+        # how many times each index gets repeated
+        rep = partial(np.repeat, repeats=np.add.reduceat(changes, idx))
+
+        if (not dropna) and (-1 in val_lab):
+            # in this case we need to explicitly add NaN as a level
+            val_lev = np.r_[Index([np.nan]), val_lev]
+            val_lab += 1
+
+        levels = [ping.group_index for ping in self.grouper.groupings] + [
+            Index(val_lev)
+        ]
         names = self.grouper.names + [self._selection_name]
 
-        if dropna:
-            mask = codes[-1] != -1
-            if mask.all():
-                dropna = False
-            else:
-                out, codes = out[mask], [level_codes[mask] for level_codes in codes]
-
         if normalize:
-            out = out.astype("float")
-            d = np.diff(np.r_[idx, len(ids)])
-            if dropna:
-                m = ids[lab == -1]
-                np.add.at(d, m, -1)
-                acc = rep(d)[mask]
-            else:
-                acc = rep(d)
-            out /= acc
-
-        if sort and bins is None:
-            cat = ids[inc][mask] if dropna else ids[inc]
-            sorter = np.lexsort((out if ascending else -out, cat))
-            out, codes[-1] = out[sorter], codes[-1][sorter]
+            num_repeats = np.diff(idx, append=len(change_ids) + 1)
+            cts = cts.astype("float") / rep(num_repeats)
+            # each divisor is the number of repeats for that index
 
         if bins is None:
+            codes = [rep(level_codes) for level_codes in codes] + [val_lab]
+
+            if sort:
+                indices = tuple(reversed(codes[:-1]))
+                sorter = np.lexsort(
+                    np.r_[[val_lab], [cts if ascending else -cts], indices]
+                )  # sorts using right columns first
+                cts = cts[sorter]
+                codes = [code[sorter] for code in codes]
+
             mi = MultiIndex(
                 levels=levels, codes=codes, names=names, verify_integrity=False
             )
-
-            if is_integer_dtype(out):
-                out = ensure_int64(out)
-            return self.obj._constructor(out, index=mi, name=self._selection_name)
+            if is_integer_dtype(cts):
+                cts = ensure_int64(cts)
+            return self.obj._constructor(cts, index=mi, name=self._selection_name)
 
         # for compat. with libgroupby.value_counts need to ensure every
         # bin is present at every index level, null filled with zeros
-        diff = np.zeros(len(out), dtype="bool")
-        for level_codes in codes[:-1]:
-            diff |= np.r_[True, level_codes[1:] != level_codes[:-1]]
-
-        ncat, nbin = diff.sum(), len(levels[-1])
-
-        left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
-
-        right = [diff.cumsum() - 1, codes[-1]]
-
-        _, idx = get_join_indexers(left, right, sort=False, how="left")
-        out = np.where(idx != -1, out[idx], 0)
+        nbin = len(levels[-1])
+        ncat = len(codes[0])
+        fout = np.zeros((ncat * nbin), dtype=float if normalize else np.int64)
+        id = 0
+        change_ids = np.r_[  # need to update now that we removed full repeats
+            ids[1:] != ids[:-1], True
+        ]
+        for i, ct in enumerate(cts):  # fill in nonzero values of fout
+            fout[id * nbin + val_lab[i]] = cts[i]
+            id += change_ids[i]
+        ncodes = [np.repeat(code, nbin) for code in codes]
+        ncodes.append(np.tile(range(nbin), len(codes[0])))
 
         if sort:
-            sorter = np.lexsort((out if ascending else -out, left[0]))
-            out, left[-1] = out[sorter], left[-1][sorter]
-
-        # build the multi-index w/ full levels
-        def build_codes(lev_codes: np.ndarray) -> np.ndarray:
-            return np.repeat(lev_codes[diff], nbin)
-
-        codes = [build_codes(lev_codes) for lev_codes in codes[:-1]]
-        codes.append(left[-1])
-
-        mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
-
-        if is_integer_dtype(out):
-            out = ensure_int64(out)
-        return self.obj._constructor(out, index=mi, name=self._selection_name)
+            indices = tuple(reversed(ncodes[:-1]))
+            sorter = np.lexsort(
+                np.r_[[fout if ascending else -fout], indices]
+            )  # sorts using right columns first
+            fout = fout[sorter]
+            ncodes = [code[sorter] for code in ncodes]
+        mi = MultiIndex(
+            levels=levels, codes=ncodes, names=names, verify_integrity=False
+        )
+        if is_integer_dtype(fout):
+            fout = ensure_int64(fout)
+        return self.obj._constructor(fout, index=mi, name=self._selection_name)
 
     def count(self) -> Series:
         """
diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
old mode 100644
new mode 100755
index de04c30432e6f..8ea7f0fe3fc98
--- a/pandas/tests/base/test_value_counts.py
+++ b/pandas/tests/base/test_value_counts.py
@@ -191,6 +191,37 @@ def test_value_counts_bins(index_or_series):
     assert s.nunique() == 0
 
 
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("bins", [None, 3, [0, 1, 3, 6]])
+def test_value_counts_bins_nas(dropna, bins):
+    # GH25970, handle normalizing bins with NA's properly
+    # First test that NA's are included appropriately
+    rand_data = np.append(
+        np.random.randint(1, 5, 50), [np.nan] * np.random.randint(1, 20)
+    )
+    s = Series(rand_data)
+    if dropna:
+        assert not s.value_counts(dropna=dropna, bins=bins).index.hasnans
+    else:
+        assert s.value_counts(dropna=dropna, bins=bins).index.hasnans
+
+
+def test_value_counts_bins_specific_na():
+    # GH25970 case where proportions were incorrect for dropna and normalize=True
+    s2 = Series([1, 2, 2, 3, 3, 3, np.nan, np.nan, 4, 5])
+    intervals = IntervalIndex.from_breaks([0.995, 2.333, 3.667, 5.0])
+    expected_dropna = Series([0.375, 0.375, 0.25], intervals.take([1, 0, 2]))
+    tm.assert_series_equal(
+        s2.value_counts(dropna=True, normalize=True, bins=3), expected_dropna
+    )
+    keys = list(intervals.take([1, 0, 2]))
+    keys.insert(2, np.nan)
+    expected_keepna = Series([0.3, 0.3, 0.2, 0.2], keys)
+    tm.assert_series_equal(
+        s2.value_counts(dropna=False, normalize=True, bins=3), expected_keepna
+    )
+
+
 def test_value_counts_datetime64(index_or_series):
     klass = index_or_series
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index c86cb4532bc26..94e19b93368d8 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -41,13 +41,12 @@ def seed_df(seed_nans, n, m):
 ids = []
 for seed_nans in [True, False]:
     for n, m in product((100, 1000), (5, 20)):
-
         df = seed_df(seed_nans, n, m)
         bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
         keys = "1st", "2nd", ["1st", "2nd"]
         for k, b in product(keys, bins):
             binned.append((df, k, b, n, m))
-            ids.append(f"{k}-{n}-{m}")
+            ids.append(f"{k}-{n}-{m}-{seed_nans} ")
 
 
 @pytest.mark.slow
@@ -71,16 +70,41 @@ def rebuild_index(df):
 
     gr = df.groupby(keys, sort=isort)
     left = gr["3rd"].value_counts(**kwargs)
+    left.index.names = left.index.names[:-1] + ["3rd"]
 
-    gr = df.groupby(keys, sort=isort)
     right = gr["3rd"].apply(Series.value_counts, **kwargs)
     right.index.names = right.index.names[:-1] + ["3rd"]
 
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
+
+    # have to ignore 0 counts to be consistent with individual column value_counts
+    left = left[left.astype(bool)]
+    right = right[right.astype(bool)]
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
+def test_groubpy_value_counts_bins():
+    # GH32471
+    BINS = [0, 20, 80, 100]
+    values = [
+        [0, 5, 0],
+        [1, 5, 100],
+        [0, 5, 100],
+        [2, 5, 0],
+        [3, 6, 100],
+        [3, 5, 100],
+        [1, 5, 100],
+    ]
+    df = DataFrame(values, columns=["key1", "key2", "score"])
+    result = df.groupby(["key1", "key2"])["score"].value_counts(bins=BINS)
+    result.sort_index(inplace=True)
+    expected = Series(
+        [1, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1], result.index, name="score"
+    )
+    tm.assert_series_equal(result, expected)
+
+
 def test_series_groupby_value_counts_with_grouper():
     # GH28479
     df = DataFrame(
diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py
index f97362ce9c2a9..270f29914442a 100644
--- a/pandas/tests/series/methods/test_value_counts.py
+++ b/pandas/tests/series/methods/test_value_counts.py
@@ -179,6 +179,12 @@ def test_value_counts_categorical_with_nan(self):
             res = ser.value_counts(dropna=False, sort=False)
             tm.assert_series_equal(res, exp)
 
+    def test_value_counts_interval_bins(self):
+        ser = Series([1, 2, 3, 0, 1, 4], ["a", "a", "a", "b", "b", "c"])
+        res = ser.value_counts(bins=[0, 1, 2])
+        exp = Series([3, 1], res.index)
+        tm.assert_series_equal(res, exp)
+
     @pytest.mark.parametrize(
         "ser, dropna, exp",
         [