Skip to content

Commit

Permalink
refactor: statistical functions (#1420)
Browse files Browse the repository at this point in the history
* refactor: statistical bound functions

* test: add and update test for statistical bound functions

* refactor: exponential distribution functions

* test: add and update test for exponential distribution functions

* refactor: abstract s-distribution functions

* enhancement: add check for empty data array in do_fit()

* test: update and add tests for refactored functions

* fix: fix mypy error

* fix: fix pylint errors
  • Loading branch information
weibullguy authored Oct 19, 2024
1 parent c609bcf commit 1862211
Show file tree
Hide file tree
Showing 14 changed files with 727 additions and 275 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ ignore = [
'D204',
'D213',
]
match='.*(?<!_test)\.py'

[tool.ruff]
exclude = ["*.pyi"]
Expand Down
2 changes: 1 addition & 1 deletion src/ramstk/analyses/derating/models/capacitor.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Standard Library Imports
from typing import Dict, List, Tuple
from typing import Dict, List, Optional, Tuple

def do_derating_analysis(
environment_id: int,
Expand Down
105 changes: 61 additions & 44 deletions src/ramstk/analyses/statistics/bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ramstk.analyses.statistics.bounds.py is part of The RAMSTK Project
#
# All rights reserved.
# Copyright 2007 - 2017 Doyle Rowland doyle.rowland <AT> reliaqual <DOT> com
# Copyright since 2007 Doyle Rowland doyle.rowland <AT> reliaqual <DOT> com
"""Functions for performing calculations associated with statistical bounds."""

# Standard Library Imports
Expand All @@ -12,8 +12,7 @@

# Third Party Imports
import numpy as np
from scipy import misc
from scipy.stats import norm
from scipy import misc, stats


def do_calculate_beta_bounds(
Expand All @@ -23,26 +22,32 @@ def do_calculate_beta_bounds(
These are the project management estimators, not exact calculations.
:param minimum: the minimum expected value.
:param likely: most likely value.
:param maximum: the maximum expected value.
:param alpha: the desired confidence level.
:return: _meanll, _mean, _meanul, _sd; the calculated mean, bounds, and standard
error.
:rtype: tuple of floats
:param minimum: The minimum expected value.
:param likely: The most likely value.
:param maximum: The maximum expected value.
:param alpha: The desired confidence level (0 < alpha < 1).
:return: A tuple containing the lower mean bound, mean, upper mean bound, and
standard deviation.
:rtype: Tuple[float, float, float, float]
"""
if alpha > 1.0:
_z_norm = norm.ppf(1.0 - ((1.0 - alpha / 100.0) / 2.0))
else:
_z_norm = norm.ppf(1.0 - ((1.0 - alpha) / 2.0))
if alpha < 0.0 or alpha > 100.0:
raise ValueError(
f"Confidence level (alpha) must be between 0 and 100. alpha: {alpha}"
)

_z_norm = (
stats.norm.ppf(1.0 - ((1.0 - alpha / 100.0) / 2.0))
if alpha > 1.0
else 1.0 - ((1.0 - alpha) / 2.0)
)

_mean = (minimum + 4.0 * likely + maximum) / 6.0
_sd = (maximum - minimum) / 6.0

_meanll = _mean - _z_norm * _sd
_meanul = _mean + _z_norm * _sd
_mean_lower_bound = _mean - _z_norm * _sd
_mean_upper_bound = _mean + _z_norm * _sd

return _meanll, _mean, _meanul, _sd
return _mean_lower_bound, _mean, _mean_upper_bound, _sd


def do_calculate_fisher_information(
Expand All @@ -64,31 +69,43 @@ def do_calculate_fisher_information(
array([[8.67337390e+05, 9.89376513e+01],
[9.89376513e+01, 1.12858721e-02]])
:param model: the model function, f(x, ...). This function must take the
data set as the first argument. The remaining arguments of the
function should be the scale, shape, and location parameters.
:param p0: point in parameter space where Fisher information matrix is
evaluated. Passed as a list in the same order as the parameter
arguments to the model. See the example above.
:param data: the data set to use for calculating the information matrix.
:param noise: squared variance of the noise in data.
:returns: _fisher; the Fisher information matrix.
:rtype: ndarray
:param model: The model function f(x, ...), which takes the data set
as the first argument. Remaining arguments should be the
scale, shape, and location parameters.
:param p0: The point in parameter space where the Fisher information
matrix is evaluated. Passed as a list in the same order
as the model's parameter arguments.
:param data: The data set used for calculating the information matrix.
:param noise: The squared variance of the noise in data (default is 1.0).
:return: The Fisher information matrix.
:rtype: np.ndarray
"""
_labels = inspect.getfullargspec(model)[0][1:]
_p0dict = dict(zip(_labels, p0))

_D = np.zeros((len(p0), data.size))

for i, argname in enumerate(_labels):
# pylint: disable=cell-var-from-loop
_D[i, :] = [
misc.derivative(
lambda p: model(_record, **dict(_p0dict, **{argname: p})),
_p0dict[argname],
dx=1.0e-6,
)
for _record in data
]

return 1.0 / noise**2 * np.einsum("mk, nk", _D, _D)
if not isinstance(data, np.ndarray):
raise TypeError(
f"Expected data to be of type numpy.ndarray, got {type(data)} instead."
)

# Get the parameter names from the model's signature
_param_labels = inspect.getfullargspec(model)[0][1:] # Skip the first arg (data)
_param_dict = dict(zip(_param_labels, p0)) # Create a dictionary of parameters

# Initialize the derivative matrix
_num_params = len(p0)
_num_data_points = data.shape[0]
_D = np.zeros((_num_params, _num_data_points))

# Calculate the derivatives for each parameter
for i, _param_name in enumerate(_param_labels):
for j, _record in enumerate(data):
_D[i, j] = _partial_derivative(_record, model, _param_dict, _param_name)

return 1.0 / noise**2 * np.dot(_D, _D.T)


def _partial_derivative(record, model, param_dict, param_name):
"""Calculate the partial derivative."""
return misc.derivative(
lambda p: model(record, **{**param_dict, param_name: p}),
param_dict[param_name],
dx=1.0e-6,
)
110 changes: 110 additions & 0 deletions src/ramstk/analyses/statistics/distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# type: ignore
# -*- coding: utf-8 -*-
#
# ramstk.analyses.statistics.distributions.py is part of the RAMSTK Project
#
# All rights reserved.
# Copyright since 2007 Doyle "weibullguy" Rowland doyle.rowland <AT> reliaqual <DOT> com
"""Exponential Module."""

# Standard Library Imports
from typing import Optional

# Third Party Imports
from scipy.stats import expon, lognorm, norm, weibull_min


def calculate_hazard_rate(
time: float,
location: float = 0.0,
scale: Optional[float] = None,
shape: Optional[float] = None,
dist_type: str = "exponential",
) -> float:
"""Calculate the hazard rate for a given distribution."""
if time <= 0.0:
return 0.0

# Exponential distribution doesn't use the `shape` parameter.
if dist_type == "exponential":
return 1.0 / expon.mean(loc=location, scale=scale)

# Lognormal distribution uses `shape`, `location`, and `scale`.
elif dist_type == "lognormal":
return lognorm.pdf(time, shape, loc=location, scale=scale) / lognorm.cdf(
time, shape, loc=location, scale=scale
)

# Normal distribution uses 'scale' and 'location'.
elif dist_type == "normal":
return norm.pdf(time, loc=location, scale=scale) / norm.sf(
time, loc=location, scale=scale
)

# Weibull distribution uses `shape`, `location`, and `scale`.
elif dist_type == "weibull":
return (
0.0
if time <= 0.0
else weibull_min.pdf(time, shape, loc=location, scale=scale)
/ weibull_min.cdf(time, shape, loc=location, scale=scale)
)

else:
raise ValueError(f"Unsupported distribution: {dist_type}")


def calculate_mtbf(
shape: float = None,
location: float = 0.0,
scale: float = 1.0,
dist_type: str = "exponential",
) -> float:
"""Calculate the MTBF for a given distribution.
:param shape: the shape parameter.
:param location: the location parameter.
:param scale: the scale (MTBF) parameter.
:param dist_type: the type of distribution.
:return: the MTBF value.
:rtype: float
"""
if dist_type == "exponential":
return expon.mean(loc=location, scale=scale)
elif dist_type == "lognormal":
return lognorm.mean(shape, loc=location, scale=scale)
elif dist_type == "normal":
return norm.mean(loc=location, scale=scale)
elif dist_type == "weibull":
return weibull_min.mean(shape, loc=location, scale=scale)
else:
raise ValueError(f"Unsupported distribution type: {dist_type}")


def calculate_survival(
shape: float = None,
time: float = 0.0,
location: float = 0.0,
scale: float = 1.0,
dist_type: str = "exponential",
) -> float:
"""Calculate the survival function at time T for a given distribution.
:param shape: the shape parameter.
:param time: the time at which to calculate the survival function.
:param location: the location parameter.
:param scale: the scale parameter.
:param dist_type: the type of distribution.
:return: the survival function value at time T.
:rtype: float
"""
if dist_type == "exponential":
return expon.sf(time, loc=location, scale=scale)
elif dist_type == "lognormal":
return lognorm.sf(time, shape, loc=location, scale=scale)
elif dist_type == "normal":
return norm.sf(time, location, scale)
elif dist_type == "weibull":
return weibull_min.sf(time, shape, loc=location, scale=scale)
else:
raise ValueError(f"Unsupported distribution type: {dist_type}")
65 changes: 25 additions & 40 deletions src/ramstk/analyses/statistics/exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

# Third Party Imports
import scipy
from scipy.stats import expon

# RAMSTK Local Imports
from .distributions import calculate_hazard_rate, calculate_mtbf, calculate_survival


def get_hazard_rate(scale: float, location: float = 0.0) -> float:
Expand All @@ -32,8 +34,9 @@ def get_hazard_rate(scale: float, location: float = 0.0) -> float:
:return: _hazard_rate; the hazard rate.
:rtype: float
"""
return 1.0 / expon.mean(
loc=location,
return calculate_hazard_rate(
1.0,
location=location,
scale=scale,
)

Expand All @@ -53,14 +56,12 @@ def get_mtbf(rate: float, location: float = 0.0) -> float:
:rtype: float
"""
try:
_mtbf = expon.mean(
loc=location,
return calculate_mtbf(
location=location,
scale=1.0 / rate,
)
except ZeroDivisionError:
_mtbf = 0.0

return _mtbf
return 0.0


def get_survival(scale: float, time: float, location: float = 0.0) -> float:
Expand All @@ -81,7 +82,11 @@ def get_survival(scale: float, time: float, location: float = 0.0) -> float:
:return: _surv; the value of the survival function at time.
:rtype: float
"""
return expon.sf(time, loc=location, scale=scale)
return calculate_survival(
time=time,
location=location,
scale=scale,
)


def do_fit(data, **kwargs) -> Tuple[float, float]:
Expand All @@ -96,36 +101,16 @@ def do_fit(data, **kwargs) -> Tuple[float, float]:
_scale = kwargs.get("scale", 0.0) # Initial guess for scale.
_method = kwargs.get("method", "MLE") # One of MLE or MM.

if data.size == 0:
raise ValueError("No data provided to perform fit.")

# method is not an argument to fit() until scipy-1.7.1.
if _floc is None:
if scipy.__version__ >= "1.7.1":
_location, _scale = expon.fit(
data,
loc=_location,
scale=_scale,
method=_method,
)
else:
_location, _scale = expon.fit(
data,
loc=_location,
scale=_scale,
)
if scipy.__version__ >= "1.7.1":
_fit_args = {"loc": _location, "scale": _scale, "method": _method}
else:
if scipy.__version__ >= "1.7.1":
_location, _scale = expon.fit(
data,
loc=_location,
floc=_floc,
scale=_scale,
method=_method,
)
else:
_location, _scale = expon.fit(
data,
loc=_location,
floc=_floc,
scale=_scale,
)

return _location, _scale
_fit_args = {"loc": _location, "scale": _scale}

if _floc is not None:
_fit_args["floc"] = _floc

return scipy.stats.expon.fit(data, **_fit_args)
Loading

0 comments on commit 1862211

Please sign in to comment.