From 062f740d322d86589e0b7080fcf941b140fc3562 Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sat, 27 Jul 2024 12:58:01 -0400 Subject: [PATCH] Add `sdr.sum_distributions()` Fixes #423 --- src/sdr/_probability.py | 137 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/src/sdr/_probability.py b/src/sdr/_probability.py index 655d95715..79fb274ef 100644 --- a/src/sdr/_probability.py +++ b/src/sdr/_probability.py @@ -75,3 +75,140 @@ def Qinv(p: npt.ArrayLike) -> npt.NDArray[np.float64]: x = np.sqrt(2) * scipy.special.erfcinv(2 * p) return convert_output(x) + + +@export +def sum_distributions( + dist1: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + dist2: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + p: float = 1e-16, +) -> scipy.stats.rv_histogram: + r""" + Numerically calculates the distribution of the sum of two independent random variables. + + Arguments: + dist1: The distribution of the first random variable $X$. + dist2: The distribution of the second random variable $Y$. + p: The probability of exceeding the x axis, on either side, for each distribution. This is used to determine + the bounds on the x axis for the numerical convolution. Smaller values of $p$ will result in more accurate + analysis, but will require more computation. + + Returns: + The distribution of the sum $Z = X + Y$. + + Notes: + The PDF of the sum of two independent random variables is the convolution of the PDF of the two distributions. + + $$f_{X+Y}(t) = (f_X * f_Y)(t)$$ + + Examples: + Compute the distribution of the sum of two normal distributions. + + .. ipython:: python + + X = scipy.stats.norm(loc=-1, scale=0.5) + Y = scipy.stats.norm(loc=2, scale=1.5) + x = np.linspace(-5, 10, 1000) + + @savefig sdr_sum_distributions_1.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, Y.pdf(x), label="Y"); \ + plt.plot(x, sdr.sum_distributions(X, Y).pdf(x), label="X + Y"); \ + plt.hist(X.rvs(100_000) + Y.rvs(100_000), bins=101, density=True, histtype="step", label="X + Y empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Normal distributions"); + + Compute the distribution of the sum of two Rayleigh distributions. + + .. ipython:: python + + X = scipy.stats.rayleigh(scale=1) + Y = scipy.stats.rayleigh(loc=1, scale=2) + x = np.linspace(0, 12, 1000) + + @savefig sdr_sum_distributions_2.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, Y.pdf(x), label="Y"); \ + plt.plot(x, sdr.sum_distributions(X, Y).pdf(x), label="X + Y"); \ + plt.hist(X.rvs(100_000) + Y.rvs(100_000), bins=101, density=True, histtype="step", label="X + Y empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Rayleigh distributions"); + + Compute the distribution of the sum of two Rician distributions. + + .. ipython:: python + + X = scipy.stats.rice(2) + Y = scipy.stats.rice(3) + x = np.linspace(0, 12, 1000) + + @savefig sdr_sum_distributions_3.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, Y.pdf(x), label="Y"); \ + plt.plot(x, sdr.sum_distributions(X, Y).pdf(x), label="X + Y"); \ + plt.hist(X.rvs(100_000) + Y.rvs(100_000), bins=101, density=True, histtype="step", label="X + Y empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Rician distributions"); + + Group: + probability + """ + # Determine the x axis of each distribution such that the probability of exceeding the x axis, on either side, + # is p. + x1_min, x1_max = _x_range(dist1, p) + x2_min, x2_max = _x_range(dist2, p) + dx1 = (x1_max - x1_min) / 1_000 + dx2 = (x2_max - x2_min) / 1_000 + dx = np.min([dx1, dx2]) # Use the smaller delta x -- must use the same dx for both distributions + x1 = np.arange(x1_min, x1_max, dx) + x2 = np.arange(x2_min, x2_max, dx) + + # Compute the PDF of each distribution + y1 = dist1.pdf(x1) + y2 = dist2.pdf(x2) + + # The PDF of the sum of two independent random variables is the convolution of the PDF of the two distributions + y = np.convolve(y1, y2, mode="full") * dx + + # Determine the x axis for the output convolution + x = np.arange(y.size) * dx + x1[0] + x2[0] + + # Adjust the histograms bins to be on either side of each point. So there is one extra point added. + x = np.append(x, x[-1] + dx) + x -= dx / 2 + + return scipy.stats.rv_histogram((y, x)) + + +def _x_range(dist: scipy.stats.rv_continuous, p: float) -> tuple[float, float]: + r""" + Determines the range of x values for a given distribution such that the probability of exceeding the x axis, on + either side, is p. + """ + # Need to have these loops because for very small p, sometimes SciPy will return NaN instead of a valid value. + # The while loops will increase the p value until a valid value is returned. + + pp = p + while True: + x_min = dist.ppf(pp) + if not np.isnan(x_min): + break + pp *= 10 + + pp = p + while True: + x_max = dist.isf(pp) + if not np.isnan(x_max): + break + pp *= 10 + + return x_min, x_max