Skip to content

Commit

Permalink
include statistics indices in documentation, cleanup docstrings (#1958)
Browse files Browse the repository at this point in the history
### What kind of change does this PR introduce?

* Adds the `xclim.indices.stats` module to the `indices` documentation
* Updates the `source_suffix` entry to be consistent with changes to
`Sphinx`

### Does this PR introduce a breaking change?

No.

### Other information:

I made some light changes to the docstrings, but there are a lot of
violations of `numpydoc` standard in general throughout the library.
Will be opening another PR to address these.
  • Loading branch information
Zeitsperre authored Oct 16, 2024
2 parents bdd4b18 + 98039e9 commit b630a8a
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Internal changes
* Added the `bugbear`, `pyupgrade` checks to the `ruff` formatter.
* Adjusted `mypy` checks to be more standardized.
* Renamed annual deprecated frequency alias `"A"` to `"Y"` (:pull:`1930`).
* The ``indices`` documentation now includes the members of ``xclim.indices.stats``. (:issue:`1913`, :pull:`1958`).

CI changes
^^^^^^^^^^
Expand Down
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,7 @@ class XCStyle(AlphaStyle):
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# If a list of string, all suffixes will be understood as restructured text variants.
source_suffix = [".rst"]
source_suffix = {".rst": "restructuredtext", ".md": "markdown"}

# The root toctree document.
root_doc = "index"
Expand Down
5 changes: 5 additions & 0 deletions docs/indices.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ Indices submodules
:undoc-members:
:show-inheritance:

.. automodule:: xclim.indices.stats
:members:
:undoc-members:
:show-inheritance:

Fire indices submodule
^^^^^^^^^^^^^^^^^^^^^^
Indices related to fire and fire weather. Currently, submodules exist for calculating indices from the Canadian Forest Fire Weather Index System and the McArthur Forest Fire Danger (Mark 5) System. All fire indices can be accessed from the :py:mod:`xclim.indices` module.
Expand Down
91 changes: 55 additions & 36 deletions xclim/indices/stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
"""Statistic-related functions. See the `frequency_analysis` notebook for examples."""
"""
Statistical indices module
==========================
Functions to aid in computing various statistical indices.
See the `frequency_analysis` notebook for working examples.
"""

from __future__ import annotations

Expand All @@ -10,6 +17,7 @@
import numpy as np
import scipy.stats
import xarray as xr
from scipy.stats import rv_continuous

from xclim.core import DateStr, Quantified
from xclim.core.calendar import compare_offsets, resample_doy, select_time
Expand Down Expand Up @@ -70,7 +78,7 @@ def _fitfunc_1d(arr, *, dist, nparams, method, **fitkwargs):

def fit(
da: xr.DataArray,
dist: str | scipy.stats.rv_continuous = "norm",
dist: str | rv_continuous = "norm",
method: str = "ML",
dim: str = "time",
**fitkwargs: Any,
Expand Down Expand Up @@ -171,7 +179,7 @@ def fit(
def parametric_quantile(
p: xr.DataArray,
q: float | Sequence[float],
dist: str | scipy.stats.rv_continuous | None = None,
dist: str | rv_continuous | None = None,
) -> xr.DataArray:
"""Return the value corresponding to the given distribution parameters and quantile.
Expand All @@ -183,7 +191,7 @@ def parametric_quantile(
and attribute `scipy_dist`, storing the name of the distribution.
q : float or Sequence of float
Quantile to compute, which must be between `0` and `1`, inclusive.
dist: str, rv_continuous instance, optional
dist : str or rv_continuous distribution object, optional
The distribution name or instance if the `scipy_dist` attribute is not available on `p`.
Returns
Expand Down Expand Up @@ -244,7 +252,7 @@ def func(x):
def parametric_cdf(
p: xr.DataArray,
v: float | Sequence[float],
dist: str | scipy.stats.rv_continuous | None = None,
dist: str | rv_continuous | None = None,
) -> xr.DataArray:
"""Return the cumulative distribution function corresponding to the given distribution parameters and value.
Expand All @@ -256,7 +264,7 @@ def parametric_cdf(
and attribute `scipy_dist`, storing the name of the distribution.
v : float or Sequence of float
Value to compute the CDF.
dist: str, rv_continuous instance, optional
dist : str or rv_continuous distribution object, optional
The distribution name or instance is the `scipy_dist` attribute is not available on `p`.
Returns
Expand Down Expand Up @@ -306,7 +314,7 @@ def func(x):
def fa(
da: xr.DataArray,
t: int | Sequence,
dist: str | scipy.stats.rv_continuous = "norm",
dist: str | rv_continuous = "norm",
mode: str = "max",
method: str = "ML",
) -> xr.DataArray:
Expand All @@ -319,7 +327,7 @@ def fa(
t : int or Sequence of int
Return period. The period depends on the resolution of the input data. If the input array's resolution is
yearly, then the return period is in years.
dist : str or rv_continuous instance
dist : str or rv_continuous distribution object
Name of the univariate distribution, such as:
`beta`, `expon`, `genextreme`, `gamma`, `gumbel_r`, `lognorm`, `norm`
Or the distribution instance itself.
Expand Down Expand Up @@ -366,7 +374,7 @@ def frequency_analysis(
da: xr.DataArray,
mode: str,
t: int | Sequence[int],
dist: str | scipy.stats.rv_continuous,
dist: str | rv_continuous,
window: int = 1,
freq: str | None = None,
method: str = "ML",
Expand All @@ -383,7 +391,7 @@ def frequency_analysis(
t : int or sequence
Return period. The period depends on the resolution of the input data. If the input array's resolution is
yearly, then the return period is in years.
dist : str or rv_continuous
dist : str or rv_continuous distribution object
Name of the univariate distribution, e.g. `beta`, `expon`, `genextreme`, `gamma`, `gumbel_r`, `lognorm`, `norm`.
Or an instance of the distribution.
window : int
Expand All @@ -397,8 +405,8 @@ def frequency_analysis(
The PWM method is usually more robust to outliers.
\*\*indexer
Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values,
month=1 to select January, or month=[6,7,8] to select summer months. If indexer is not provided, all values are
considered.
month=1 to select January, or month=[6,7,8] to select summer months.
If indexer is not provided, all values are considered.
Returns
-------
Expand Down Expand Up @@ -427,9 +435,17 @@ def frequency_analysis(
return fa(sel, t, dist=dist, mode=mode, method=method)


def get_dist(dist: str | scipy.stats.rv_continuous):
"""Return a distribution object from `scipy.stats`."""
if isinstance(dist, scipy.stats.rv_continuous):
def get_dist(dist: str | rv_continuous):
"""
Return a distribution object from `scipy.stats`.
Parameters
----------
dist : str or rv_continuous distribution object
Name of the univariate distribution, e.g. `beta`, `expon`, `genextreme`, `gamma`, `gumbel_r`, `lognorm`, `norm`.
Or an instance of the distribution.
"""
if isinstance(dist, rv_continuous):
return dist

dc = getattr(scipy.stats, dist, None)
Expand All @@ -450,7 +466,8 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]:
Input data.
dist : str
Name of the univariate distribution, e.g. `beta`, `expon`, `genextreme`, `gamma`, `gumbel_r`, `lognorm`, `norm`.
(see :py:mod:scipy.stats). Only `genextreme` and `weibull_exp` distributions are supported.
(see :py:mod:scipy.stats).
Only `genextreme` and `weibull_exp` distributions are supported.
\*\*fitkwargs
Kwargs passed to fit.
Expand All @@ -460,8 +477,7 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]:
References
----------
:cite:cts:`coles_introduction_2001,cohen_parameter_2019, thom_1958, cooke_1979, muralidhar_1992`
:cite:cts:`coles_introduction_2001,cohen_parameter_2019,thom_1958,cooke_1979,muralidhar_1992`
"""
x = np.asarray(x)
m = x.mean()
Expand Down Expand Up @@ -542,7 +558,7 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]:


def _dist_method_1D( # noqa: N802
*args, dist: str | scipy.stats.rv_continuous, function: str, **kwargs: Any
*args, dist: str | rv_continuous, function: str, **kwargs: Any
) -> xr.DataArray:
r"""Statistical function for given argument on given distribution initialized with params.
Expand All @@ -553,7 +569,7 @@ def _dist_method_1D( # noqa: N802
----------
\*args
The arguments for the requested scipy function.
dist : str
dist : str or rv_continuous distribution object
The scipy name of the distribution.
function : str
The name of the function to call.
Expand All @@ -572,10 +588,11 @@ def dist_method(
function: str,
fit_params: xr.DataArray,
arg: xr.DataArray | None = None,
dist: str | scipy.stats.rv_continuous | None = None,
dist: str | rv_continuous | None = None,
**kwargs: Any,
) -> xr.DataArray:
r"""Vectorized statistical function for given argument on given distribution initialized with params.
r"""
Vectorized statistical function for given argument on given distribution initialized with params.
Methods where `"*args"` are the distribution parameters can be wrapped, except those that reduce dimensions (
e.g. `nnlf`) or create new dimensions (eg: 'rvs' with size != 1, 'stats' with more than one moment, 'interval',
Expand All @@ -589,7 +606,7 @@ def dist_method(
Distribution parameters are along `dparams`, in the same order as given by :py:func:`fit`.
arg : array_like, optional
The first argument for the requested function if different from `fit_params`.
dist : str pr rv_continuous, optional
dist : str or rv_continuous distribution object, optional
The distribution name or instance. Defaults to the `scipy_dist` attribute or `fit_params`.
\*\*kwargs
Other parameters to pass to the function call.
Expand Down Expand Up @@ -629,11 +646,12 @@ def dist_method(
def preprocess_standardized_index(
da: xr.DataArray, freq: str | None, window: int, **indexer
):
r"""Perform resample and roll operations involved in computing a standardized index.
r"""
Perform resample and roll operations involved in computing a standardized index.
da : xarray.DataArray
Input array.
freq : {D, MS}, optional
freq : {'D', 'MS'}, optional
Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling
has already been applied input dataset and will skip the resampling step.
window : int
Expand Down Expand Up @@ -693,7 +711,7 @@ def standardized_index_fit_params(
da: xr.DataArray,
freq: str | None,
window: int,
dist: str | scipy.stats.rv_continuous,
dist: str | rv_continuous,
method: str,
zero_inflated: bool = False,
fitkwargs: dict | None = None,
Expand All @@ -702,9 +720,9 @@ def standardized_index_fit_params(
r"""Standardized Index fitting parameters.
A standardized index measures the deviation of a variable averaged over a rolling temporal window and
fitted with a given distribution `dist` with respect to a calibration dataset. The comparison is done by porting
back results to a normalized distribution. The fitting parameters of the calibration dataset fitted with `dist`
are obtained here.
fitted with a given distribution `dist` with respect to a calibration dataset.
The comparison is done by porting back results to a normalized distribution.
The fitting parameters of the calibration dataset fitted with `dist` are obtained here.
Parameters
----------
Expand All @@ -716,7 +734,7 @@ def standardized_index_fit_params(
window : int
Averaging window length relative to the resampling frequency. For example, if `freq="MS"`,
i.e. a monthly resampling, the window is an integer number of months.
dist : {'gamma', 'fisk'} or rv_continuous instance
dist : {'gamma', 'fisk'} or rv_continuous distribution object
Name of the univariate distribution. (see :py:mod:`scipy.stats`).
method : {'ML', 'APP', 'PWM'}
Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method
Expand All @@ -732,7 +750,7 @@ def standardized_index_fit_params(
Returns
-------
xarray.DataArray
Standardized Index fitting parameters. The time dimension of the initial array is reduced to
Standardized Index fitting parameters. The time dimension of the initial array is reduced to.
Notes
-----
Expand All @@ -743,9 +761,10 @@ def standardized_index_fit_params(
When using the zero inflated option, : A probability density function :math:`\texttt{pdf}_0(X)` is fitted for :math:`X \neq 0`
and a supplementary parameter :math:`\pi` takes into account the probability of :math:`X = 0`. The full probability density
function is a piecewise function
function is a piecewise function:
.. math::
\texttt{pdf}(X) = \pi \texttt{ if } X=0 \texttt{ else } (1-\pi) \texttt{pdf}_0(X)
"""
fitkwargs = fitkwargs or {}
Expand Down Expand Up @@ -799,7 +818,7 @@ def standardized_index(
da: xr.DataArray,
freq: str | None,
window: int | None,
dist: str | scipy.stats.rv_continuous | None,
dist: str | rv_continuous | None,
method: str | None,
zero_inflated: bool | None,
fitkwargs: dict | None,
Expand All @@ -825,11 +844,11 @@ def standardized_index(
window : int
Averaging window length relative to the resampling frequency. For example, if `freq="MS"`,
i.e. a monthly resampling, the window is an integer number of months.
dist : str or rv_continuous
dist : str or rv_continuous instance
Name of the univariate distribution. (see :py:mod:`scipy.stats`).
method : str
Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method
uses a deterministic function that doesn't involve any optimization.
Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate).
The approximate method uses a deterministic function that doesn't involve any optimization.
zero_inflated : bool
If True, the zeroes of `da` are treated separately.
fitkwargs : dict
Expand Down

0 comments on commit b630a8a

Please sign in to comment.