From f197aeaeaf7d9f3e713abd845f6ee6eb47ab7ed6 Mon Sep 17 00:00:00 2001
From: Jonas Schulze <jonas.schulze@st.ovgu.de>
Date: Sat, 10 Mar 2018 16:05:23 +0100
Subject: [PATCH 1/2] DOC: update the pandas.DataFrame.plot.kde and
 pandas.Series.plot.kde docstrings

Unfortunately, I was not able to compute a kernel estimate of a
two-dimensional random variable. Hence, the example is more of an
analysis of some independent data series.
---
 pandas/plotting/_core.py | 94 ++++++++++++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 19 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 520c6cecce6d7..d586f06c46e94 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -2618,13 +2618,16 @@ def hist(self, bins=10, **kwds):
 
     def kde(self, bw_method=None, ind=None, **kwds):
         """
-        Kernel Density Estimate plot using Gaussian kernels.
+        Generate Kernel Density Estimate plot using Gaussian kernels.
 
-        In statistics, kernel density estimation (KDE) is a non-parametric way
-        to estimate the probability density function (PDF) of a random
+        In statistics, `kernel density estimation`_ (KDE) is a non-parametric
+        way to estimate the probability density function (PDF) of a random
         variable. This function uses Gaussian kernels and includes automatic
         bandwith determination.
 
+        .. _kernel density estimation:
+            https://en.wikipedia.org/wiki/Kernel_density_estimation
+
         Parameters
         ----------
         bw_method : str, scalar or callable, optional
@@ -2635,9 +2638,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
         ind : NumPy array or integer, optional
             Evaluation points for the estimated PDF. If None (default),
             1000 equally spaced points are used. If `ind` is a NumPy array, the
-            kde is evaluated at the points passed. If `ind` is an integer,
+            KDE is evaluated at the points passed. If `ind` is an integer,
             `ind` number of equally spaced points are used.
-        kwds : optional
+        **kwds : optional
             Additional keyword arguments are documented in
             :meth:`pandas.Series.plot`.
 
@@ -2645,16 +2648,17 @@ def kde(self, bw_method=None, ind=None, **kwds):
         -------
         axes : matplotlib.AxesSubplot or np.array of them
 
-        See also
+        See Also
         --------
         scipy.stats.gaussian_kde : Representation of a kernel-density
             estimate using Gaussian kernels. This is the function used
             internally to estimate the PDF.
+        DataFrame.plot.kde : Generate a KDE plot for a DataFrame.
 
         Examples
         --------
         Given a Series of points randomly sampled from an unknown
-        distribution, estimate this distribution using KDE with automatic
+        distribution, estimate its distribution using KDE with automatic
         bandwidth determination and plot the results, evaluating them at
         1000 equally spaced points (default):
 
@@ -2664,10 +2668,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
             >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
             >>> ax = s.plot.kde()
 
-
-        An scalar fixed bandwidth can be specified. Using a too small bandwidth
-        can lead to overfitting, while a too large bandwidth can result in
-        underfitting:
+        A scalar bandwidth can be specified. Using a small bandwidth value can
+        lead to overfitting, while using a large bandwidth value may result
+        in underfitting:
 
         .. plot::
             :context: close-figs
@@ -2851,27 +2854,80 @@ def hist(self, by=None, bins=10, **kwds):
 
     def kde(self, bw_method=None, ind=None, **kwds):
         """
-        Kernel Density Estimate plot
+        Generate Kernel Density Estimate plot using Gaussian kernels.
+
+        In statistics, `kernel density estimation`_ (KDE) is a non-parametric
+        way to estimate the probability density function (PDF) of a random
+        variable. This function uses Gaussian kernels and includes automatic
+        bandwith determination.
+
+        .. _kernel density estimation:
+            https://en.wikipedia.org/wiki/Kernel_density_estimation
 
         Parameters
         ----------
-        bw_method: str, scalar or callable, optional
-            The method used to calculate the estimator bandwidth.  This can be
+        bw_method : str, scalar or callable, optional
+            The method used to calculate the estimator bandwidth. This can be
             'scott', 'silverman', a scalar constant or a callable.
             If None (default), 'scott' is used.
             See :class:`scipy.stats.gaussian_kde` for more information.
         ind : NumPy array or integer, optional
-            Evaluation points. If None (default), 1000 equally spaced points
-            are used. If `ind` is a NumPy array, the kde is evaluated at the
-            points passed. If `ind` is an integer, `ind` number of equally
-            spaced points are used.
-        `**kwds` : optional
+            Evaluation points for the estimated PDF. If None (default),
+            1000 equally spaced points are used. If `ind` is a NumPy array, the
+            KDE is evaluated at the points passed. If `ind` is an integer,
+            `ind` number of equally spaced points are used.
+        **kwds : optional
             Additional keyword arguments are documented in
             :meth:`pandas.DataFrame.plot`.
 
         Returns
         -------
         axes : matplotlib.AxesSubplot or np.array of them
+
+        See Also
+        --------
+        scipy.stats.gaussian_kde : Representation of a kernel-density
+            estimate using Gaussian kernels. This is the function used
+            internally to estimate the PDF.
+        Series.plot.kde : Generate a KDE plot for a Series.
+
+        Examples
+        --------
+        Given several Series of points randomly sampled from unknown
+        distributions, estimate their distribution using KDE with automatic
+        bandwidth determination and plot the results, evaluating them at
+        1000 equally spaced points (default):
+
+        .. plot::
+            :context: close-figs
+
+            >>> df = pd.DataFrame({
+            ...     'x': [1, 2, 2.5, 3, 3.5, 4, 5],
+            ...     'y': [4, 4, 4.5, 5, 5.5, 6, 6],
+            ... })
+            >>> ax = df.plot.kde()
+
+        A scalar bandwidth can be specified. Using a small bandwidth value can
+        lead to overfitting, while using a large bandwidth value may result
+        in underfitting:
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.kde(bw_method=0.3)
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.kde(bw_method=3)
+
+        Finally, the `ind` parameter determines the evaluation points for the
+        plot of the estimated PDF:
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
         """
         return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
 

From a95751e90de542ae8c7216139d173f405ea3d04b Mon Sep 17 00:00:00 2001
From: Jonas Schulze <jonas.schulze@st.ovgu.de>
Date: Sun, 11 Mar 2018 01:05:08 +0100
Subject: [PATCH 2/2] DOC: extract similarities of kde docstrings

The `DataFrame.plot.kde` and `Series.plot.kde` now use a common
docstring, for which the differences are inserted.
---
 pandas/plotting/_core.py | 146 ++++++++++++++++-----------------------
 1 file changed, 60 insertions(+), 86 deletions(-)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index d586f06c46e94..f587cb91ab932 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1380,6 +1380,50 @@ def orientation(self):
             return 'vertical'
 
 
+_kde_docstring = """
+        Generate Kernel Density Estimate plot using Gaussian kernels.
+
+        In statistics, `kernel density estimation`_ (KDE) is a non-parametric
+        way to estimate the probability density function (PDF) of a random
+        variable. This function uses Gaussian kernels and includes automatic
+        bandwith determination.
+
+        .. _kernel density estimation:
+            https://en.wikipedia.org/wiki/Kernel_density_estimation
+
+        Parameters
+        ----------
+        bw_method : str, scalar or callable, optional
+            The method used to calculate the estimator bandwidth. This can be
+            'scott', 'silverman', a scalar constant or a callable.
+            If None (default), 'scott' is used.
+            See :class:`scipy.stats.gaussian_kde` for more information.
+        ind : NumPy array or integer, optional
+            Evaluation points for the estimated PDF. If None (default),
+            1000 equally spaced points are used. If `ind` is a NumPy array, the
+            KDE is evaluated at the points passed. If `ind` is an integer,
+            `ind` number of equally spaced points are used.
+        **kwds : optional
+            Additional keyword arguments are documented in
+            :meth:`pandas.%(this-datatype)s.plot`.
+
+        Returns
+        -------
+        axes : matplotlib.AxesSubplot or np.array of them
+
+        See Also
+        --------
+        scipy.stats.gaussian_kde : Representation of a kernel-density
+            estimate using Gaussian kernels. This is the function used
+            internally to estimate the PDF.
+        %(sibling-datatype)s.plot.kde : Generate a KDE plot for a
+            %(sibling-datatype)s.
+
+        Examples
+        --------
+        %(examples)s
+        """
+
 class KdePlot(HistPlot):
     _kind = 'kde'
     orientation = 'vertical'
@@ -2616,49 +2660,12 @@ def hist(self, bins=10, **kwds):
         """
         return self(kind='hist', bins=bins, **kwds)
 
-    def kde(self, bw_method=None, ind=None, **kwds):
-        """
-        Generate Kernel Density Estimate plot using Gaussian kernels.
-
-        In statistics, `kernel density estimation`_ (KDE) is a non-parametric
-        way to estimate the probability density function (PDF) of a random
-        variable. This function uses Gaussian kernels and includes automatic
-        bandwith determination.
-
-        .. _kernel density estimation:
-            https://en.wikipedia.org/wiki/Kernel_density_estimation
-
-        Parameters
-        ----------
-        bw_method : str, scalar or callable, optional
-            The method used to calculate the estimator bandwidth. This can be
-            'scott', 'silverman', a scalar constant or a callable.
-            If None (default), 'scott' is used.
-            See :class:`scipy.stats.gaussian_kde` for more information.
-        ind : NumPy array or integer, optional
-            Evaluation points for the estimated PDF. If None (default),
-            1000 equally spaced points are used. If `ind` is a NumPy array, the
-            KDE is evaluated at the points passed. If `ind` is an integer,
-            `ind` number of equally spaced points are used.
-        **kwds : optional
-            Additional keyword arguments are documented in
-            :meth:`pandas.Series.plot`.
-
-        Returns
-        -------
-        axes : matplotlib.AxesSubplot or np.array of them
-
-        See Also
-        --------
-        scipy.stats.gaussian_kde : Representation of a kernel-density
-            estimate using Gaussian kernels. This is the function used
-            internally to estimate the PDF.
-        DataFrame.plot.kde : Generate a KDE plot for a DataFrame.
-
-        Examples
-        --------
+    @Appender(_kde_docstring % {
+        'this-datatype': 'Series',
+        'sibling-datatype': 'DataFrame',
+        'examples': """
         Given a Series of points randomly sampled from an unknown
-        distribution, estimate its distribution using KDE with automatic
+        distribution, estimate its PDF using KDE with automatic
         bandwidth determination and plot the results, evaluating them at
         1000 equally spaced points (default):
 
@@ -2689,7 +2696,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
             :context: close-figs
 
             >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
-        """
+        """.strip()
+    })
+    def kde(self, bw_method=None, ind=None, **kwds):
         return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
 
     density = kde
@@ -2852,49 +2861,12 @@ def hist(self, by=None, bins=10, **kwds):
         """
         return self(kind='hist', by=by, bins=bins, **kwds)
 
-    def kde(self, bw_method=None, ind=None, **kwds):
-        """
-        Generate Kernel Density Estimate plot using Gaussian kernels.
-
-        In statistics, `kernel density estimation`_ (KDE) is a non-parametric
-        way to estimate the probability density function (PDF) of a random
-        variable. This function uses Gaussian kernels and includes automatic
-        bandwith determination.
-
-        .. _kernel density estimation:
-            https://en.wikipedia.org/wiki/Kernel_density_estimation
-
-        Parameters
-        ----------
-        bw_method : str, scalar or callable, optional
-            The method used to calculate the estimator bandwidth. This can be
-            'scott', 'silverman', a scalar constant or a callable.
-            If None (default), 'scott' is used.
-            See :class:`scipy.stats.gaussian_kde` for more information.
-        ind : NumPy array or integer, optional
-            Evaluation points for the estimated PDF. If None (default),
-            1000 equally spaced points are used. If `ind` is a NumPy array, the
-            KDE is evaluated at the points passed. If `ind` is an integer,
-            `ind` number of equally spaced points are used.
-        **kwds : optional
-            Additional keyword arguments are documented in
-            :meth:`pandas.DataFrame.plot`.
-
-        Returns
-        -------
-        axes : matplotlib.AxesSubplot or np.array of them
-
-        See Also
-        --------
-        scipy.stats.gaussian_kde : Representation of a kernel-density
-            estimate using Gaussian kernels. This is the function used
-            internally to estimate the PDF.
-        Series.plot.kde : Generate a KDE plot for a Series.
-
-        Examples
-        --------
+    @Appender(_kde_docstring % {
+        'this-datatype': 'DataFrame',
+        'sibling-datatype': 'Series',
+        'examples': """
         Given several Series of points randomly sampled from unknown
-        distributions, estimate their distribution using KDE with automatic
+        distributions, estimate their PDFs using KDE with automatic
         bandwidth determination and plot the results, evaluating them at
         1000 equally spaced points (default):
 
@@ -2928,7 +2900,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
             :context: close-figs
 
             >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
-        """
+        """.strip()
+    })
+    def kde(self, bw_method=None, ind=None, **kwds):
         return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
 
     density = kde