refactor: statistical functions (#1420)

* refactor: statistical bound functions * test: add and update test for statistical bound functions * refactor: exponential distribution functions * test: add and update test for exponential distribution functions * refactor: abstract s-distribution functions * enhancement: add check for empty data array in do_fit() * test: update and add tests for refactored functions * fix: fix mypy error * fix: fix pylint errors
ReliaQualAssociates · Oct 19, 2024 · 1862211 · 1862211
1 parent c609bcf
commit 1862211
Show file tree

Hide file tree

Showing 14 changed files with 727 additions and 275 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -431,6 +431,7 @@ ignore = [
 	'D204',
 	'D213',
 ]
+match='.*(?<!_test)\.py'
 
 [tool.ruff]
 exclude = ["*.pyi"]

diff --git a/src/ramstk/analyses/derating/models/capacitor.pyi b/src/ramstk/analyses/derating/models/capacitor.pyi
@@ -1,5 +1,5 @@
 # Standard Library Imports
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 
 def do_derating_analysis(
     environment_id: int,

diff --git a/src/ramstk/analyses/statistics/bounds.py b/src/ramstk/analyses/statistics/bounds.py
@@ -3,7 +3,7 @@
 #       ramstk.analyses.statistics.bounds.py is part of The RAMSTK Project
 #
 # All rights reserved.
-# Copyright 2007 - 2017 Doyle Rowland doyle.rowland <AT> reliaqual <DOT> com
+# Copyright since 2007 Doyle Rowland doyle.rowland <AT> reliaqual <DOT> com
 """Functions for performing calculations associated with statistical bounds."""
 
 # Standard Library Imports
@@ -12,8 +12,7 @@
 
 # Third Party Imports
 import numpy as np
-from scipy import misc
-from scipy.stats import norm
+from scipy import misc, stats
 
 
 def do_calculate_beta_bounds(
@@ -23,26 +22,32 @@ def do_calculate_beta_bounds(
 
     These are the project management estimators, not exact calculations.
 
-    :param minimum: the minimum expected value.
-    :param likely: most likely value.
-    :param maximum: the maximum expected value.
-    :param alpha: the desired confidence level.
-    :return: _meanll, _mean, _meanul, _sd; the calculated mean, bounds, and standard
-        error.
-    :rtype: tuple of floats
+    :param minimum: The minimum expected value.
+    :param likely: The most likely value.
+    :param maximum: The maximum expected value.
+    :param alpha: The desired confidence level (0 < alpha < 1).
+    :return: A tuple containing the lower mean bound, mean, upper mean bound, and
+        standard deviation.
+    :rtype: Tuple[float, float, float, float]
     """
-    if alpha > 1.0:
-        _z_norm = norm.ppf(1.0 - ((1.0 - alpha / 100.0) / 2.0))
-    else:
-        _z_norm = norm.ppf(1.0 - ((1.0 - alpha) / 2.0))
+    if alpha < 0.0 or alpha > 100.0:
+        raise ValueError(
+            f"Confidence level (alpha) must be between 0 and 100.  alpha: {alpha}"
+        )
+
+    _z_norm = (
+        stats.norm.ppf(1.0 - ((1.0 - alpha / 100.0) / 2.0))
+        if alpha > 1.0
+        else 1.0 - ((1.0 - alpha) / 2.0)
+    )
 
     _mean = (minimum + 4.0 * likely + maximum) / 6.0
     _sd = (maximum - minimum) / 6.0
 
-    _meanll = _mean - _z_norm * _sd
-    _meanul = _mean + _z_norm * _sd
+    _mean_lower_bound = _mean - _z_norm * _sd
+    _mean_upper_bound = _mean + _z_norm * _sd
 
-    return _meanll, _mean, _meanul, _sd
+    return _mean_lower_bound, _mean, _mean_upper_bound, _sd
 
 
 def do_calculate_fisher_information(
@@ -64,31 +69,43 @@ def do_calculate_fisher_information(
         array([[8.67337390e+05, 9.89376513e+01],
                [9.89376513e+01, 1.12858721e-02]])
 
-    :param model: the model function, f(x, ...). This function must take the
-        data set as the first argument.  The remaining arguments of the
-        function should be the scale, shape, and location parameters.
-    :param p0: point in parameter space where Fisher information matrix is
-        evaluated.  Passed as a list in the same order as the parameter
-        arguments to the model.  See the example above.
-    :param data: the data set to use for calculating the information matrix.
-    :param noise: squared variance of the noise in data.
-    :returns: _fisher; the Fisher information matrix.
-    :rtype: ndarray
+    :param model: The model function f(x, ...), which takes the data set
+                  as the first argument. Remaining arguments should be the
+                  scale, shape, and location parameters.
+    :param p0: The point in parameter space where the Fisher information
+                matrix is evaluated. Passed as a list in the same order
+                as the model's parameter arguments.
+    :param data: The data set used for calculating the information matrix.
+    :param noise: The squared variance of the noise in data (default is 1.0).
+    :return: The Fisher information matrix.
+    :rtype: np.ndarray
     """
-    _labels = inspect.getfullargspec(model)[0][1:]
-    _p0dict = dict(zip(_labels, p0))
-
-    _D = np.zeros((len(p0), data.size))
-
-    for i, argname in enumerate(_labels):
-        # pylint: disable=cell-var-from-loop
-        _D[i, :] = [
-            misc.derivative(
-                lambda p: model(_record, **dict(_p0dict, **{argname: p})),
-                _p0dict[argname],
-                dx=1.0e-6,
-            )
-            for _record in data
-        ]
-
-    return 1.0 / noise**2 * np.einsum("mk, nk", _D, _D)
+    if not isinstance(data, np.ndarray):
+        raise TypeError(
+            f"Expected data to be of type numpy.ndarray, got {type(data)} instead."
+        )
+
+    # Get the parameter names from the model's signature
+    _param_labels = inspect.getfullargspec(model)[0][1:]  # Skip the first arg (data)
+    _param_dict = dict(zip(_param_labels, p0))  # Create a dictionary of parameters
+
+    # Initialize the derivative matrix
+    _num_params = len(p0)
+    _num_data_points = data.shape[0]
+    _D = np.zeros((_num_params, _num_data_points))
+
+    # Calculate the derivatives for each parameter
+    for i, _param_name in enumerate(_param_labels):
+        for j, _record in enumerate(data):
+            _D[i, j] = _partial_derivative(_record, model, _param_dict, _param_name)
+
+    return 1.0 / noise**2 * np.dot(_D, _D.T)
+
+
+def _partial_derivative(record, model, param_dict, param_name):
+    """Calculate the partial derivative."""
+    return misc.derivative(
+        lambda p: model(record, **{**param_dict, param_name: p}),
+        param_dict[param_name],
+        dx=1.0e-6,
+    )
diff --git a/src/ramstk/analyses/statistics/distributions.py b/src/ramstk/analyses/statistics/distributions.py
@@ -0,0 +1,110 @@
+# type: ignore
+# -*- coding: utf-8 -*-
+#
+#       ramstk.analyses.statistics.distributions.py is part of the RAMSTK Project
+#
+# All rights reserved.
+# Copyright since 2007 Doyle "weibullguy" Rowland doyle.rowland <AT> reliaqual <DOT> com
+"""Exponential Module."""
+
+# Standard Library Imports
+from typing import Optional
+
+# Third Party Imports
+from scipy.stats import expon, lognorm, norm, weibull_min
+
+
+def calculate_hazard_rate(
+    time: float,
+    location: float = 0.0,
+    scale: Optional[float] = None,
+    shape: Optional[float] = None,
+    dist_type: str = "exponential",
+) -> float:
+    """Calculate the hazard rate for a given distribution."""
+    if time <= 0.0:
+        return 0.0
+
+    # Exponential distribution doesn't use the `shape` parameter.
+    if dist_type == "exponential":
+        return 1.0 / expon.mean(loc=location, scale=scale)
+
+    # Lognormal distribution uses `shape`, `location`, and `scale`.
+    elif dist_type == "lognormal":
+        return lognorm.pdf(time, shape, loc=location, scale=scale) / lognorm.cdf(
+            time, shape, loc=location, scale=scale
+        )
+
+    # Normal distribution uses 'scale' and 'location'.
+    elif dist_type == "normal":
+        return norm.pdf(time, loc=location, scale=scale) / norm.sf(
+            time, loc=location, scale=scale
+        )
+
+    # Weibull distribution uses `shape`, `location`, and `scale`.
+    elif dist_type == "weibull":
+        return (
+            0.0
+            if time <= 0.0
+            else weibull_min.pdf(time, shape, loc=location, scale=scale)
+            / weibull_min.cdf(time, shape, loc=location, scale=scale)
+        )
+
+    else:
+        raise ValueError(f"Unsupported distribution: {dist_type}")
+
+
+def calculate_mtbf(
+    shape: float = None,
+    location: float = 0.0,
+    scale: float = 1.0,
+    dist_type: str = "exponential",
+) -> float:
+    """Calculate the MTBF for a given distribution.
+
+    :param shape: the shape parameter.
+    :param location: the location parameter.
+    :param scale: the scale (MTBF) parameter.
+    :param dist_type: the type of distribution.
+    :return: the MTBF value.
+    :rtype: float
+    """
+    if dist_type == "exponential":
+        return expon.mean(loc=location, scale=scale)
+    elif dist_type == "lognormal":
+        return lognorm.mean(shape, loc=location, scale=scale)
+    elif dist_type == "normal":
+        return norm.mean(loc=location, scale=scale)
+    elif dist_type == "weibull":
+        return weibull_min.mean(shape, loc=location, scale=scale)
+    else:
+        raise ValueError(f"Unsupported distribution type: {dist_type}")
+
+
+def calculate_survival(
+    shape: float = None,
+    time: float = 0.0,
+    location: float = 0.0,
+    scale: float = 1.0,
+    dist_type: str = "exponential",
+) -> float:
+    """Calculate the survival function at time T for a given distribution.
+
+    :param shape: the shape parameter.
+    :param time: the time at which to calculate the survival function.
+    :param location: the location parameter.
+    :param scale: the scale parameter.
+    :param dist_type: the type of distribution.
+    :return: the survival function value at time T.
+    :rtype: float
+    """
+    if dist_type == "exponential":
+        return expon.sf(time, loc=location, scale=scale)
+    elif dist_type == "lognormal":
+        return lognorm.sf(time, shape, loc=location, scale=scale)
+    elif dist_type == "normal":
+        return norm.sf(time, location, scale)
+    elif dist_type == "weibull":
+        return weibull_min.sf(time, shape, loc=location, scale=scale)
+    else:
+        raise ValueError(f"Unsupported distribution type: {dist_type}")
diff --git a/src/ramstk/analyses/statistics/exponential.py b/src/ramstk/analyses/statistics/exponential.py
@@ -12,7 +12,9 @@
 
 # Third Party Imports
 import scipy
-from scipy.stats import expon
+
+# RAMSTK Local Imports
+from .distributions import calculate_hazard_rate, calculate_mtbf, calculate_survival
 
 
 def get_hazard_rate(scale: float, location: float = 0.0) -> float:
@@ -32,8 +34,9 @@ def get_hazard_rate(scale: float, location: float = 0.0) -> float:
     :return: _hazard_rate; the hazard rate.
     :rtype: float
     """
-    return 1.0 / expon.mean(
-        loc=location,
+    return calculate_hazard_rate(
+        1.0,
+        location=location,
         scale=scale,
     )
 
@@ -53,14 +56,12 @@ def get_mtbf(rate: float, location: float = 0.0) -> float:
     :rtype: float
     """
     try:
-        _mtbf = expon.mean(
-            loc=location,
+        return calculate_mtbf(
+            location=location,
             scale=1.0 / rate,
         )
     except ZeroDivisionError:
-        _mtbf = 0.0
-
-    return _mtbf
+        return 0.0
 
 
 def get_survival(scale: float, time: float, location: float = 0.0) -> float:
@@ -81,7 +82,11 @@ def get_survival(scale: float, time: float, location: float = 0.0) -> float:
     :return: _surv; the value of the survival function at time.
     :rtype: float
     """
-    return expon.sf(time, loc=location, scale=scale)
+    return calculate_survival(
+        time=time,
+        location=location,
+        scale=scale,
+    )
 
 
 def do_fit(data, **kwargs) -> Tuple[float, float]:
@@ -96,36 +101,16 @@ def do_fit(data, **kwargs) -> Tuple[float, float]:
     _scale = kwargs.get("scale", 0.0)  # Initial guess for scale.
     _method = kwargs.get("method", "MLE")  # One of MLE or MM.
 
+    if data.size == 0:
+        raise ValueError("No data provided to perform fit.")
+
     # method is not an argument to fit() until scipy-1.7.1.
-    if _floc is None:
-        if scipy.__version__ >= "1.7.1":
-            _location, _scale = expon.fit(
-                data,
-                loc=_location,
-                scale=_scale,
-                method=_method,
-            )
-        else:
-            _location, _scale = expon.fit(
-                data,
-                loc=_location,
-                scale=_scale,
-            )
+    if scipy.__version__ >= "1.7.1":
+        _fit_args = {"loc": _location, "scale": _scale, "method": _method}
     else:
-        if scipy.__version__ >= "1.7.1":
-            _location, _scale = expon.fit(
-                data,
-                loc=_location,
-                floc=_floc,
-                scale=_scale,
-                method=_method,
-            )
-        else:
-            _location, _scale = expon.fit(
-                data,
-                loc=_location,
-                floc=_floc,
-                scale=_scale,
-            )
-
-    return _location, _scale
+        _fit_args = {"loc": _location, "scale": _scale}
+
+    if _floc is not None:
+        _fit_args["floc"] = _floc
+
+    return scipy.stats.expon.fit(data, **_fit_args)
-Original file line number
+Diff line change
@@ Expand Up / @@ -431,6 +431,7 @@ ignore = [ @@
     	'D204',
     	'D213',
     ]
+    match='.*(?<!_test)\.py'
     [tool.ruff]
     exclude = ["*.pyi"]
@@ Expand Down @@