diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..188a2df
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,13 @@
+version: 2
+
+build:
+ os: "ubuntu-22.04"
+ tools:
+ python: "3.10"
+
+python:
+ install:
+ - requirements: docs/requirements.txt
+
+sphinx:
+ configuration: docs/source/conf.py
\ No newline at end of file
diff --git a/README.md b/README.md
index 119c3cf..29812cd 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,10 @@
-
+
+
here.
## Table of contents
@@ -101,6 +108,7 @@ Further examples can be found here.
+- We are continuing to expand our library, and are open to suggestions for new models to implement. If you have a model you would like to see implemented, please open an issue on our GitHub page.
## Testing
All tests are written using pytest and cover all user accessible code.
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = source
+BUILDDIR = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..6247f7e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..53fc1f3
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,2 @@
+sphinx==7.1.2
+sphinx-rtd-theme==1.3.0rc1
diff --git a/docs/source/Copulas.rst b/docs/source/Copulas.rst
new file mode 100644
index 0000000..30f740d
--- /dev/null
+++ b/docs/source/Copulas.rst
@@ -0,0 +1,342 @@
+.. _copulas:
+
+##############
+Copula Models
+##############
+
+This SklarPy package contains many different copula models.
+Unlike univariate distributions, these are not wrappers of scipy objects.
+
+All implemented copula models are able to be fitted to both multivariate numpy and pandas data and contain easy saving and plotting methods.
+
+An important concept to remember when using these models that they are composed of 2 overall parts:
+
+1. The marginal distributions. These are univariate distributions of each random variable.
+2. The copula distribution. This multivariate model captures the dependence structure between the variables.
+
+The overall multivariate joint distribution is created by combining these two parts,
+which is handled by SklarPy's copula models.
+
+Which copula models are implemented?
+------------------------------------
+Currently, the following copula models are implemented:
+
+.. csv-table:: Copula Models
+ :file: copula_table.csv
+ :header-rows: 1
+
+All Normal-Mixture models use the parameterization specified by McNeil, Frey and Embrechts (2005).
+
+MarginalFitter
+--------------
+This class is used to fit multiple univariate distributions to data easily and evaluate their methods.
+It implements the following methods and attributes:
+
+- marginal_logpdf (log of the probability density functions of the marginal distributions)
+- marginal_pdfs (the probability density functions of the fitted marginal distributions)
+- marginal_cdfs (the cumulative distribution functions of the fitted marginal distributions)
+- marginal_ppfs (the percent point functions / inverse cdfs of the fitted marginal distributions)
+- marginal_rvs (random variate generators / samplers of the fitted marginal distributions)
+- pairplot (pairplot of the fitted marginal distributions)
+- marginals (the fitted marginal distributions as a dictionary)
+- summary (a summary of the fitted marginal distributions)
+- num_variables (the number of variables present in the original dataset)
+- fitted (whether the marginal distributions have been fitted to data)
+- fit (fitting the marginal distributions to data)
+
+PreFitCopula
+-------------
+This is the base class for all copula models. It implements the following methods and attributes:
+
+- logpdf (log of the probability density function of the overall joint distribution)
+- pdf (probability density function of the overall joint distribution)
+- cdf (cumulative distribution function of the overall joint distribution)
+- mc_cdf (Monte Carlo approximation of the cumulative distribution function of the overall joint distribution)
+- rvs (random variate generator / sampler of the overall joint distribution)
+- copula_logpdf (log of the probability density function of the copula distribution)
+- copula_pdf (probability density function of the copula distribution)
+- copula_cdf (cumulative distribution function of the copula distribution)
+- copula_mc_cdf (Monte Carlo approximation of the cumulative distribution function of the copula distribution)
+- copula_rvs (random variate generator / sampler of the copula distribution)
+- num_marginal_params (number of parameters in the marginal distributions)
+- num_copula_params (number of parameters in the copula distribution)
+- num_scalar_params (number of scalar parameters in the overall joint distribution)
+- num_params (number of parameters in the overall joint distribution)
+- likelihood (likelihood of the overall joint distribution)
+- loglikelihood (log of the likelihood of the overall joint distribution)
+- aic (Akaike Information Criterion of the overall joint distribution)
+- bic (Bayesian Information Criterion of the overall joint distribution)
+- marginal_pairplot (pairplot of the marginal distributions)
+- pdf_plot (plot of the probability density function of the overall joint distribution)
+- cdf_plot (plot of the cumulative distribution function of the overall joint distribution)
+- mc_cdf_plot (plot of the Monte Carlo approximation of the cumulative distribution function of the overall joint distribution)
+- copula_pdf_plot (plot of the probability density function of the copula distribution)
+- copula_cdf_plot (plot of the cumulative distribution function of the copula distribution)
+- copula_mc_cdf_plot (plot of the Monte Carlo approximation of the cumulative distribution function of the copula distribution)
+- fit (fitting the overall joint distribution to data)
+
+mc_cdf and copula_mc_cdf are numerical approximations of their respective cumulative distribution functions.
+These are usually necessary as the analytical forms of these functions are often not available and numerical integration is computationally expensive.
+
+Also note that pdf and cdf plots are only implemented for 2-dimensional distributions.
+
+FittedCopula
+------------
+This class is the fitted version of PreFitCopula's subclasses.
+It implements the same methods as PreFitCopula, but does not require copula_params or mdists as arguments.
+It also implements the following additional methods and attributes:
+
+- copula_params (the fitted parameters of the copula distribution)
+- mdists (the fitted univariate marginal distributions)
+- num_variables (the number of variables the distribution is fitted too)
+- fitted_num_data_points (the number of observations used to fit the distribution)
+- converged (whether the fitting algorithm converged)
+- summary (a summary of the overall fitted distribution)
+- save (save the overall fitted distribution object)
+
+MarginalFitter Example
+-----------------------
+Generating data and fitting marginal distributions::
+
+ import numpy as np
+ import pandas as pd
+
+ # specifying the parameters of the multivariate normal distribution we are
+ # sampling from
+ num_generate: int = 1000
+ my_mu: np.ndarray = np.array([33, 44], dtype=float)
+ my_corr: np.ndarray = np.array([[1, 0.7], [0.7, 1]], dtype=float)
+ my_sig: np.ndarray = np.array([1.3, 2.5])
+ my_cov: np.ndarray = np.diag(my_sig) @ my_corr @ np.diag(my_sig)
+ my_mvn_params: tuple = (my_mu, my_cov)
+
+ # generating multivariate random normal variables
+ from sklarpy.multivariate import mvt_normal
+
+ rvs: np.ndarray = mvt_normal.rvs(num_generate, my_mvn_params)
+ rvs_df: pd.DataFrame = pd.DataFrame(rvs, columns=['Wife Age', 'Husband Age'
+ ], dtype=float)
+
+ # applying MarginalFitter to our random variables
+ from sklarpy.copulas import MarginalFitter
+
+ mfitter: MarginalFitter = MarginalFitter(rvs_df)
+ mfitter.fit({'pvalue': 0.01})
+
+ # printing out a summary of our fits
+ from sklarpy import print_full
+ print_full()
+
+ print(mfitter.summary)
+
+
+.. code-block:: text
+
+ Wife Age Husband Age
+ Parametric/Non-Parametric Parametric Parametric
+ Discrete/Continuous continuous continuous
+ Distribution lognorm lognorm
+ #Params 3 3
+ param0 0.000005 0.000001
+ param1 -262115.561308 -2097116.799667
+ param2 262148.497841 2097160.700641
+ Support (-262115.56130758836, inf) (-2097116.7996667635, inf)
+ Fitted Domain (28.438692411392555, 36.673753788627785) (35.20033323448715, 51.735336956575935)
+ Cramér-von Mises statistic 0.124954 0.102395
+ Cramér-von Mises p-value 0.475847 0.573349
+ Cramér-von Mises @ 10% True True
+ Cramér-von Mises @ 5% True True
+ Cramér-von Mises @ 1% True True
+ Kolmogorov-Smirnov statistic 0.032827 0.024709
+ Kolmogorov-Smirnov p-value 0.226385 0.56612
+ Kolmogorov-Smirnov @ 10% True True
+ Kolmogorov-Smirnov @ 5% True True
+ Kolmogorov-Smirnov @ 1% True True
+ Likelihood 0.0 0.0
+ Log-Likelihood -1666.824453 -2382.153726
+ AIC 3339.648906 4770.307452
+ BIC 3354.372172 4785.030718
+ Sum of Squared Error 16.819752 6.322994
+ #Fitted Data Points 1000 1000
+
+Printing Marginals::
+
+ print(mfitter.marginals)
+
+.. code-block:: text
+
+ {0: lognorm(0.0, -262115.56, 262148.5), 1: lognorm(0.0, -2097116.8, 2097160.7)}
+
+Calculating marginal cdf values::
+
+ mcdf_values: pd.DataFrame = mfitter.marginal_cdfs()
+ print(mcdf_values)
+
+.. code-block:: text
+
+ Wife Age Husband Age
+ 0 0.446886 0.676438
+ 1 0.162115 0.107338
+ 2 0.631869 0.461236
+ 3 0.182751 0.589056
+ 4 0.827908 0.870150
+ .. ... ...
+ 995 0.732827 0.523818
+ 996 0.457342 0.372388
+ 997 0.319827 0.598163
+ 998 0.476477 0.350149
+ 999 0.353060 0.323429
+
+Producing a pairplot of the marginals::
+
+ data: np.ndarray = np.full((num_generate, 10), np.NaN)
+ data[:, :2] = np.random.poisson(4, (num_generate, 2))
+ data[:, 2] = np.random.randint(-5, 5, (num_generate,))
+ data[:, 3] = data[:, :2].sum(axis=1)
+ data[:, 4] = data[:, 0] + data[:, 3]
+ data[:, 5] = np.random.normal(4, 2, (num_generate,))
+ data[:, 6] = np.random.gamma(2, 1, (num_generate,))
+ data[:, 7:9] = np.random.standard_t(3, (num_generate, 2))
+ data[:, 9] = np.random.uniform(0, 1, (num_generate,))
+
+ mfitter2: MarginalFitter = MarginalFitter(data).fit()
+
+ mfitter2.pairplot()
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/mfitter_pairplot.png?raw=true
+ :alt: MarginalFitter Pair-Plot
+ :scale: 60%
+ :align: center
+
+Copula Example
+--------------
+Here we use the generalized hyperbolic copula, though all methods and attributes are generalized::
+
+ import numpy as np
+ import pandas as pd
+ import matplotlib.pyplot as plt
+
+ # specifying the parameters of the multivariate hyperbolic distribution we are
+ # generating from
+ my_loc = np.array([1, -3], dtype=float)
+ my_shape = np.array([[1, 0.7], [0.7, 1]], dtype=float)
+ my_chi = 1.7
+ my_psi = 4.5
+ my_gamma = np.array([2.3, -4.3], dtype=float)
+ my_params = (my_chi, my_psi, my_loc, my_shape, my_gamma)
+
+ # generating multivariate hyperbolic random variables
+ from sklarpy.multivariate import mvt_hyperbolic
+
+ num_generate: int = 1000
+ rvs: np.ndarray = mvt_hyperbolic.rvs(num_generate, my_params)
+ rvs_df: pd.DataFrame = pd.DataFrame(rvs, columns=['Process A', 'Process B'],
+ dtype=float)
+
+ # fitting a generalized hyperbolic copula to our generated data using
+ # Maximum Likelihood Estimation
+ from sklarpy.copulas import gh_copula
+
+ fitted_copula = gh_copula.fit(
+ data=rvs_df, method='mle',
+ univariate_fitter_options={'significant': False}, show_progress=True)
+
+ # prining our fitted parameters
+ from sklarpy import print_full
+ print_full()
+
+ print(fitted_copula.copula_params.to_dict)
+
+.. code-block:: text
+
+ {'lamb': -10.0, 'chi': 8.460830761870396, 'psi': 10.0,
+ 'loc': array([[0.], [0.]]),
+ 'shape': array([[ 1. , -0.5214283],
+ [-0.5214283, 1. ]]),
+ 'gamma': array([[0.99848424], [0.94696141]])}
+
+Printing marginal distributions::
+
+ print(fitted_copula.mdists)
+
+.. code-block:: text
+
+ {0: lognorm(0.38, -0.78, 4.02), 1: lognorm(0.0, -1276.15, 1268.45)}
+
+Printing covariance parameters::
+
+ print(fitted_copula.copula_params.cov)
+
+.. code-block:: text
+
+ [[ 0.39404386 -0.18821382]
+ [-0.18821382 0.3928638 ]]
+
+Printing a summary of our joint fit::
+
+ print(fitted_copula.summary)
+
+.. code-block:: text
+
+ Joint Distribution gh summary summary
+ Distribution Joint Distribution mvt_gh lognorm cauchy
+ #Variables 2 2 NaN NaN
+ #Params 11 6 3 2
+ #Scalar Params 11 6 NaN NaN
+ Converged True True NaN NaN
+ Likelihood 0.0 0.0 0.0 0.0
+ Log-Likelihood -4298.311941 -1032.490682 -1880.434874 -2561.765741
+ AIC 8618.623881 2076.981365 3766.869748 5127.531482
+ BIC 8672.609189 2106.427896 3781.593014 5137.346993
+ #Fitted Data Points 1000 1000 1000 1000
+ Parametric/Non-Parametric NaN NaN Parametric Parametric
+ Discrete/Continuous NaN NaN continuous continuous
+ param0 NaN NaN 0.328725 -6.937913
+ param1 NaN NaN -1.596967 1.485756
+ param2 NaN NaN 4.826054 NaN
+ Support NaN NaN (-1.5969673012994325, inf) (-inf, inf)
+ Fitted Domain NaN NaN (0.030085402918948567, 10.416203209871883) (-28.483718062724616, -2.8836636097027206)
+ Cramér-von Mises statistic NaN NaN 0.055878 3.834238
+ Cramér-von Mises p-value NaN NaN 0.840024 0.0
+ Cramér-von Mises @ 10% NaN NaN True False
+ Cramér-von Mises @ 5% NaN NaN True False
+ Cramér-von Mises @ 1% NaN NaN True False
+ Kolmogorov-Smirnov statistic NaN NaN 0.018599 0.128949
+ Kolmogorov-Smirnov p-value NaN NaN 0.872994 0.0
+ Kolmogorov-Smirnov @ 10% NaN NaN True False
+ Kolmogorov-Smirnov @ 5% NaN NaN True False
+ Kolmogorov-Smirnov @ 1% NaN NaN True False
+ Sum of Squared Error NaN NaN 11.475127 8.464622
+
+Plotting our fit::
+
+ fitted_copula.copula_pdf_plot(show=False)
+ fitted_copula.pdf_plot(show=False)
+ fitted_copula.mc_cdf_plot(show=False)
+ plt.show()
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/PDF_Gh_PDF_Plot_Plot2.png?raw=true
+ :alt: Generalized Hyperbolic PDF
+ :scale: 60%
+ :align: center
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot2.png?raw=true
+ :alt: Generalized Hyperbolic Copula PDF
+ :scale: 60%
+ :align: center
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/MC_CDF_Gh_MC_CDF_Plot_Plot2.png?raw=true
+ :alt: Generalized Hyperbolic CDF
+ :scale: 60%
+ :align: center
+
+Saving our fitted copula::
+
+ fitted_copula.save()
+
+We can then easily reload this object later::
+
+ from sklarpy import load
+
+ loaded_copula = load('gh.pickle')
+ print(loaded_copula.summary)
\ No newline at end of file
diff --git a/docs/source/Installation.rst b/docs/source/Installation.rst
new file mode 100644
index 0000000..2d551e9
--- /dev/null
+++ b/docs/source/Installation.rst
@@ -0,0 +1,19 @@
+.. _installation:
+
+Installation
+------------
+
+To use SklarPy, first install it using pip:
+
+.. code-block:: text
+
+ pip install sklarpy
+
+For Developers
+--------------
+
+If you wish to add your own significant modifications to SklarPy, you can can clone the current repository using:
+
+.. code-block:: text
+
+ git clone https://github.com/tfm000/sklarpy
diff --git a/docs/source/Misc.rst b/docs/source/Misc.rst
new file mode 100644
index 0000000..07e8b0d
--- /dev/null
+++ b/docs/source/Misc.rst
@@ -0,0 +1,93 @@
+.. _misc:
+
+####################
+Miscellaneous Tools
+####################
+
+This SklarPy package contains functions / objects which are both implemented across SklarPy and also intended for user use.
+
+CorrelationMatrix
+------------------
+CorrelationMatrix is a SklarPy class which allows the user to estimate correlation and covariance matrices using a number of different estimators.
+
+This code is inspired by the methods described by Xu, Brin (2016) and implements the following estimators:
+
+- pearson
+- spearman
+- kendall
+- pp-kendall
+- rm-pearson
+- rm-spearman
+- rm-kendall
+- rm-pp-kendall
+- laloux-pearson
+- laloux-spearman
+- laloux-kendall
+- laloux-pp-kendall
+
+rm stands for the technique described by Rousseeuw and Molenberghs (1993) and laloux for that by Laloux et al. (2000).
+
+The corr method allows you to calculate correlation matrices, whilst cov allows you to calculate covariance matrices.
+
+debye
+-----
+This function allows the user to easily evaluate any member of the Debye function family.
+
+gradient_1d
+------------
+This function allows the user to calculate the numerical first derivative / gradient of a given 1-d function.
+
+kv
+---
+This class allows the user to easily evaluate the Modified Bessel function of the 2nd kind, in addition to its log-values.
+Limiting cases of the family parameter, v, and value, z, are also implemented.
+
+CorrelationMatrix Example
+--------------------------
+
+Here we calculate both the covariance and correlation matrix estimators::
+
+ import numpy as np
+ import pandas as pd
+
+ # specifying the parameters of the multivariate hyperbolic distribution we are
+ # generating from
+ my_loc = np.array([1, -3], dtype=float)
+ my_shape = np.array([[1, 0.7], [0.7, 1]], dtype=float)
+ my_chi = 1.7
+ my_psi = 4.5
+ my_gamma = np.array([2.3, -4.3], dtype=float)
+ my_params = (my_chi, my_psi, my_loc, my_shape, my_gamma)
+
+ # generating multivariate hyperbolic random variables
+ from sklarpy.multivariate import mvt_hyperbolic
+
+ num_generate: int = 1000
+ rvs: np.ndarray = mvt_hyperbolic.rvs(num_generate, my_params)
+ rvs_df: pd.DataFrame = pd.DataFrame(rvs, columns=['Process A', 'Process B'],
+ dtype=float)
+
+ # calculating covariance matrix and correlation matrix estimators
+ from sklarpy.misc import CorrelationMatrix
+
+ cmatrix: CorrelationMatrix = CorrelationMatrix(rvs_df)
+
+Calculating PP-Kendall Correlation Matrix with Laloux's adjustments::
+
+ corr_estimator: np.ndarray = cmatrix.corr(method='laloux_pp_kendall')
+ print(corr_estimator)
+
+.. code-block:: text
+
+ [[ 1. -0.53750912]
+ [-0.53750912 1. ]]
+
+Calculating Spearman's Covariance Matrix::
+
+ cov_estimator: np.ndarray = cmatrix.cov(method='spearman')
+ print(cov_estimator)
+
+.. code-block:: text
+
+ [[ 3.02797258 -2.68535942]
+ [-2.68535942 8.68778502]]
\ No newline at end of file
diff --git a/docs/source/Multivariate.rst b/docs/source/Multivariate.rst
new file mode 100644
index 0000000..e091343
--- /dev/null
+++ b/docs/source/Multivariate.rst
@@ -0,0 +1,158 @@
+.. _multivariate:
+
+############################
+Multivariate Distributions
+############################
+
+This SklarPy package contains many different multivariate distributions.
+Unlike univariate distributions, these are not wrappers of scipy objects (with the exceptions of mvt_normal and mvt_student_t).
+
+All implemented multivariate distributions are able to be fitted to both multivariate numpy and pandas data and contain easy saving and plotting methods.
+
+Which multivariate distributions are implemented?
+------------------------------------------------
+Currently, the following multivariate distributions are implemented:
+
+.. csv-table:: Multivariate Distributions
+ :file: mvt_table.csv
+ :header-rows: 1
+
+All Normal-Mixture models use the parameterization specified by McNeil, Frey and Embrechts (2005).
+
+PreFitContinuousMultivariate
+----------------------------
+This is the base class for all multivariate distributions. It implements the following methods and attributes:
+
+- logpdf (log of the probability density function)
+- pdf (probability density function)
+- cdf (cumulative distribution function)
+- mc_cdf (Monte Carlo approximation of the cumulative distribution function)
+- rvs (random variate generator / sampler)
+- likelihood (likelihood function)
+- loglikelihood (log of the likelihood function)
+- aic (Akaike Information Criterion)
+- bic (Bayesian Information Criterion)
+- marginal_pairplot (pairplot of the marginal distributions)
+- pdf_plot (plot of the probability density function)
+- cdf_plot (plot of the cumulative distribution function)
+- mc_cdf_plot (plot of the Monte Carlo approximation of the cumulative distribution function)
+- num_params (The number of parameters in the distribution)
+- num_scalar_params (The number of scalar values across all parameters in the distribution)
+- fit (fitting the distribution to data)
+
+mc_cdf is a numerical approximation of the cumulative distribution function.
+This is usually necessary for distributions that do not have a closed form cumulative distribution function,
+as the numerical integration alternative is computationally expensive.
+
+num_params is the number of parameter objects in the distribution, i.e. a vector / matrix is counted as 1.
+num_scalar_params counts the number of unique scalar values across all parameter objects.
+
+Also note that pdf and cdf plots are only implemented for 2-dimensional distributions.
+
+FittedContinuousMultivariate
+----------------------------
+This class is the fitted version of PreFitContinuousMultivariate's subclasses.
+It implements the same methods as PreFitContinuousMultivariate, but does not require params as an argument.
+It also implements the following additional methods and attributes:
+
+- params (the fitted parameters)
+- num_variables (the number of variables the distribution is fitted too)
+- fitted_num_data_points (the number of observations used to fit the distribution)
+- converged (whether the fitting algorithm converged)
+- summary (a summary of the fitted distribution)
+- save (save the fitted distribution object)
+
+Multivariate Example
+---------------------
+Here we use the multivariate normal and multivariate symmetric hyperbolic
+distributions, though all methods and attributes are generalized::
+
+ import numpy as np
+ import pandas as pd
+ import matplotlib.pyplot as plt
+
+ # specifying the parameters of the multivariate normal distribution we are
+ # sampling from
+ my_mu: np.ndarray = np.array([33, 44], dtype=float)
+ my_corr: np.ndarray = np.array([[1, 0.7], [0.7, 1]], dtype=float)
+ my_sig: np.ndarray = np.array([1.3, 2.5])
+ my_cov: np.ndarray = np.diag(my_sig) @ my_corr @ np.diag(my_sig)
+ my_mvn_params: tuple = (my_mu, my_cov)
+
+ # generating multivariate random normal variables
+ from sklarpy.multivariate import mvt_normal
+
+ rvs: np.ndarray = mvt_normal.rvs(1000, my_mvn_params)
+ rvs_df: pd.DataFrame = pd.DataFrame(rvs, columns=['Wife Age', 'Husband Age'],
+ dtype=float)
+
+ # fitting a symmetric hyperbolic dist to our generated data using
+ # Maximum Likelihood Estimation
+ from sklarpy.multivariate import mvt_shyperbolic
+
+ fitted_msh = mvt_shyperbolic.fit(rvs_df, method='mle', show_progress=True)
+
+ # printing our fitted parameters
+ print(fitted_msh.params.to_dict)
+ print(fitted_msh.params.cov)
+
+
+.. code-block:: text
+
+ {'chi': 6.817911964473556, 'psi': 10.0, 'loc': array([[32.99012429],
+ [43.91822886]]), 'shape': array([[1.72408489, 2.27711492],
+ [2.27711492, 6.27443288]])}
+
+ [[1.78702958 2.36025021]
+ [2.36025021 6.50350643]]
+
+Printing a summary of our fit::
+
+ print(fitted_msh.summary())
+
+.. code-block:: text
+
+ summary
+ Distribution mvt_shyperbolic
+ #Variables 2
+ #Params 4
+ #Scalar Params 7
+ Converged True
+ Likelihood 0.0
+ Log-Likelihood -3664.49604
+ AIC 7342.99208
+ BIC 7377.346367
+ #Fitted Data Points 1000
+
+Plotting our fitted distribution::
+
+ fitted_msh.pdf_plot(show=False)
+ fitted_msh.mc_cdf_plot(show=False)
+ fitted_msh.marginal_pairplot(show=False)
+ plt.show()
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/PDF_Mvt_Shyperbolic_PDF_Plot_Plot.png?raw=true
+ :alt: Symmetric Hyperbolic PDF
+ :scale: 60%
+ :align: center
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/MC_CDF_Mvt_Shyperbolic_MC_CDF_Plot_Plot.png?raw=true
+ :alt: Symmetric Hyperbolic PDF
+ :scale: 60%
+ :align: center
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/mvt_shyperbolic_marginal_pair_plot.png?raw=true
+ :alt: Symmetric Hyperbolic PDF
+ :scale: 60%
+ :align: center
+
+Saving our fitted parameters::
+
+ fitted_msh.params.save()
+
+Reloading and fitting to another distribution of the same type::
+
+ from sklarpy import load
+
+ loaded_msh_params = load('mvt_shyperbolic.pickle')
+ param_fitted_msh = mvt_shyperbolic.fit(params=loaded_msh_params)
diff --git a/docs/source/Univariate.rst b/docs/source/Univariate.rst
new file mode 100644
index 0000000..4d79d44
--- /dev/null
+++ b/docs/source/Univariate.rst
@@ -0,0 +1,363 @@
+.. _univariate:
+
+#########################
+Univariate Distributions
+#########################
+
+This SklarPy package contains many different univariate distributions in addition to objects allowing for easy fitting.
+With the exception of a handful of distributions, all univariate distribution objects are wrappers of scipy.stats univariate distributions, with added functionalities for plotting, fitting and saving.
+This means that the distributions available in SklarPy are the same as those available in your installed version of scipy.
+
+There is also the UnivariateFitter object, which allows for easy fitting of univariate distributions to data and for determining the best / statistically significant distribution(s).
+
+Why is my interpreter unable to find univariate distributions?
+--------------------------------------------------------------
+
+If you try::
+
+ from sklarpy.univariate import normal
+
+You will likely find that your interpreter flags an error along the lines of "cannot find reference 'normal' in __init__.py".
+Do not worry, this is to be expected as a side effect of the dynamic way SklarPy univariate distributions are created from scipy.stats distributions.
+At runtime, *your code will work without any errors*!
+
+But how do I know which distributions are available?
+----------------------------------------------------
+Good question! You can use the following code to print out a list of all available univariate distributions::
+
+ from sklarpy.univariate import distributions_map
+ print(distributions_map)
+
+For scipy version 1.11.4 you should get an output along the lines of:
+
+.. code-block:: text
+
+ {'all': ('ksone', 'kstwo', 'kstwobign', 'alpha', 'anglit', 'arcsine', 'beta', 'betaprime', 'bradford', 'burr', 'burr12', 'fisk', 'cauchy', 'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'expon', 'exponnorm', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f', 'foldnorm', 'weibull_min', 'truncweibull_min', 'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme', 'gamma', 'erlang', 'gengamma', 'genhalflogistic', 'genhyperbolic', 'gompertz', 'gumbel_r', 'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant', 'gausshyper', 'invgamma', 'invgauss', 'geninvgauss', 'norminvgauss', 'invweibull', 'johnsonsb', 'johnsonsu', 'laplace', 'laplace_asymmetric', 'levy', 'levy_l', 'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gibrat', 'maxwell', 'mielke', 'kappa4', 'kappa3', 'moyal', 'nakagami', 'ncx2', 'ncf', 'nct', 'pareto', 'lomax', 'pearson3', 'powerlaw', 'powerlognorm', 'powernorm', 'rdist', 'rayleigh', 'loguniform', 'reciprocal', 'rice', 'recipinvgauss', 'semicircular', 'skewcauchy', 'skewnorm', 'trapezoid', 'trapz', 'triang', 'truncexpon', 'truncnorm', 'truncpareto', 'tukeylambda', 'uniform', 'vonmises', 'vonmises_line', 'wald', 'wrapcauchy', 'gennorm', 'halfgennorm', 'crystalball', 'argus', 'studentized_range', 'rel_breitwigner', 'gh', 'gig', 'ig', 'normal', 'student_t', 'gaussian_kde', 'empirical', 'poisson', 'planck', 'discrete_laplace', 'discrete_uniform', 'geometric', 'discrete_empirical'), 'all continuous': ('ksone', 'kstwo', 'kstwobign', 'alpha', 'anglit', 'arcsine', 'beta', 'betaprime', 'bradford', 'burr', 'burr12', 'fisk', 'cauchy', 'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'expon', 'exponnorm', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f', 'foldnorm', 'weibull_min', 'truncweibull_min', 'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme', 'gamma', 'erlang', 'gengamma', 'genhalflogistic', 'genhyperbolic', 'gompertz', 'gumbel_r', 'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant', 'gausshyper', 'invgamma', 'invgauss', 'geninvgauss', 'norminvgauss', 'invweibull', 'johnsonsb', 'johnsonsu', 'laplace', 'laplace_asymmetric', 'levy', 'levy_l', 'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gibrat', 'maxwell', 'mielke', 'kappa4', 'kappa3', 'moyal', 'nakagami', 'ncx2', 'ncf', 'nct', 'pareto', 'lomax', 'pearson3', 'powerlaw', 'powerlognorm', 'powernorm', 'rdist', 'rayleigh', 'loguniform', 'reciprocal', 'rice', 'recipinvgauss', 'semicircular', 'skewcauchy', 'skewnorm', 'trapezoid', 'trapz', 'triang', 'truncexpon', 'truncnorm', 'truncpareto', 'tukeylambda', 'uniform', 'vonmises', 'vonmises_line', 'wald', 'wrapcauchy', 'gennorm', 'halfgennorm', 'crystalball', 'argus', 'studentized_range', 'rel_breitwigner', 'gh', 'gig', 'ig', 'normal', 'student_t', 'gaussian_kde', 'empirical'), 'all discrete': ('poisson', 'planck', 'discrete_laplace', 'discrete_uniform', 'geometric', 'discrete_empirical'), 'all common': ('cauchy', 'chi2', 'expon', 'gamma', 'lognorm', 'powerlaw', 'rayleigh', 'uniform', 'discrete_laplace', 'discrete_uniform', 'geometric', 'poisson'), 'all multimodal': ('arcsine', 'beta'), 'all parametric': ('ksone', 'kstwo', 'kstwobign', 'alpha', 'anglit', 'arcsine', 'beta', 'betaprime', 'bradford', 'burr', 'burr12', 'fisk', 'cauchy', 'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'expon', 'exponnorm', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f', 'foldnorm', 'weibull_min', 'truncweibull_min', 'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme', 'gamma', 'erlang', 'gengamma', 'genhalflogistic', 'genhyperbolic', 'gompertz', 'gumbel_r', 'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant', 'gausshyper', 'invgamma', 'invgauss', 'geninvgauss', 'norminvgauss', 'invweibull', 'johnsonsb', 'johnsonsu', 'laplace', 'laplace_asymmetric', 'levy', 'levy_l', 'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gibrat', 'maxwell', 'mielke', 'kappa4', 'kappa3', 'moyal', 'nakagami', 'ncx2', 'ncf', 'nct', 'pareto', 'lomax', 'pearson3', 'powerlaw', 'powerlognorm', 'powernorm', 'rdist', 'rayleigh', 'loguniform', 'reciprocal', 'rice', 'recipinvgauss', 'semicircular', 'skewcauchy', 'skewnorm', 'trapezoid', 'trapz', 'triang', 'truncexpon', 'truncnorm', 'truncpareto', 'tukeylambda', 'uniform', 'vonmises', 'vonmises_line', 'wald', 'wrapcauchy', 'gennorm', 'halfgennorm', 'crystalball', 'argus', 'studentized_range', 'rel_breitwigner', 'gh', 'gig', 'ig', 'normal', 'student_t', 'poisson', 'planck', 'discrete_laplace', 'discrete_uniform', 'geometric'), 'all numerical': ('gaussian_kde', 'empirical', 'discrete_empirical'), 'all continuous parametric': ('ksone', 'kstwo', 'kstwobign', 'alpha', 'anglit', 'arcsine', 'beta', 'betaprime', 'bradford', 'burr', 'burr12', 'fisk', 'cauchy', 'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'expon', 'exponnorm', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f', 'foldnorm', 'weibull_min', 'truncweibull_min', 'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme', 'gamma', 'erlang', 'gengamma', 'genhalflogistic', 'genhyperbolic', 'gompertz', 'gumbel_r', 'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant', 'gausshyper', 'invgamma', 'invgauss', 'geninvgauss', 'norminvgauss', 'invweibull', 'johnsonsb', 'johnsonsu', 'laplace', 'laplace_asymmetric', 'levy', 'levy_l', 'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gibrat', 'maxwell', 'mielke', 'kappa4', 'kappa3', 'moyal', 'nakagami', 'ncx2', 'ncf', 'nct', 'pareto', 'lomax', 'pearson3', 'powerlaw', 'powerlognorm', 'powernorm', 'rdist', 'rayleigh', 'loguniform', 'reciprocal', 'rice', 'recipinvgauss', 'semicircular', 'skewcauchy', 'skewnorm', 'trapezoid', 'trapz', 'triang', 'truncexpon', 'truncnorm', 'truncpareto', 'tukeylambda', 'uniform', 'vonmises', 'vonmises_line', 'wald', 'wrapcauchy', 'gennorm', 'halfgennorm', 'crystalball', 'argus', 'studentized_range', 'rel_breitwigner', 'gh', 'gig', 'ig', 'normal', 'student_t'), 'all discrete parametric': ('poisson', 'planck', 'discrete_laplace', 'discrete_uniform', 'geometric'), 'all continuous numerical': ('gaussian_kde', 'empirical'), 'all discrete numerical': ('discrete_empirical',), 'common continuous': ('cauchy', 'chi2', 'expon', 'gamma', 'lognorm', 'powerlaw', 'rayleigh', 'uniform'), 'common discrete': ('discrete_laplace', 'discrete_uniform', 'geometric', 'poisson'), 'continuous multimodal': ('arcsine', 'beta'), 'discrete multimodal': ()}
+
+So you have a lot to choose from!
+
+Name differences between SklarPy and SciPy
+-------------------------------------------
+Whilst we have generally kept most of the distribution names consistent with SciPy, there are a few notable exceptions.
+These are:
+
+.. csv-table:: Distribution Name Discrepancies
+ :file: univariate_table.csv
+ :header-rows: 1
+
+PreFitUnivariateBase
+---------------------
+This class and its subclasses contain the following methods / functions:
+
+- pdf (probability density function)
+- cdf (cumulative distribution function)
+- ppf (percent point function / cumulative inverse function)
+- support
+- ppf_approx (approximate ppf)
+- cdf_approx (approximate cdf)
+- rvs (random variate generator / sampler)
+- logpdf (log of the probability density function)
+- likelihood (likelihood function)
+- loglikelihood (log of the likelihood function)
+- aic (Akaike information criterion)
+- bic (Bayesian information criterion)
+- sse (Sum of squared errors)
+- gof (goodness of fit)
+- plot (plotting)
+- fit (fitting the distribution to data)
+
+Many / all of these methods take params as an argument. This is a tuple containing the parameters of the associated scipy.stats distribution object.
+
+ppf_approx and cdf_approx are approximations of the ppf and cdf functions respectively, which may be useful for distributions where the cdf and therefore ppf functions require numerical integration to evaluate.
+
+FittedUnivariateBase
+---------------------
+This class is the fitted version of PreFitUnivariateBase's subclasses.
+It implements the same methods as PreFitUnivariateBase, but does not require params as an argument.
+It also implements the following additional methods and attributes:
+
+- summary (summary of the distribution fit)
+- params (the fitted parameters)
+- fitted domain (the domain over which the distribution is fitted)
+- fitted_num_data_points (the number of data points used to fit the distribution)
+- save (save the fitted distribution to a pickle file)
+
+.. automodule:: sklarpy.univariate.univariate_fitter
+ :members:
+ :exclude-members: UnivariateFitter
+
+ .. autoclass:: UnivariateFitter
+ :members:
+
+ .. automethod:: __init__
+
+ .. automethod:: fit
+
+ .. caution::
+
+ If 'use_processpoolexecutor' is set to True, the UnivariateFitter object will use the ProcessPoolExecutor to parallelize the fitting process. However, if the code is ran outside 'if __name__ == "__main__":', you may receive a runtime error.
+
+ .. automethod:: get_summary
+
+ .. automethod:: get_best
+
+ .. automethod:: plot
+
+ .. automethod:: fitted_distributions
+
+
+.. automodule:: sklarpy.univariate._prefit_dists
+ :members:
+ :exclude-members: PreFitUnivariateBase, PreFitNumericalUnivariateBase
+
+
+.. automodule:: sklarpy.univariate._fitted_dists
+ :members:
+ :exclude-members: FittedUnivariateBase
+
+Continuous Example
+---------------------
+Here we use the normal and gamma distributions, though all methods and attributes are generalized::
+
+ import numpy as np
+ import pandas as pd
+
+ # generating random variables
+ from sklarpy.univariate import normal
+
+ num_generate: int = 1000
+
+ # generating a 1d array of N(1, 1) random variables
+ normal_rvs1: np.ndarray = normal.rvs((num_generate,), (1, 1))
+ # generating a 1d array of N(2, 3) random variables
+ normal_rvs2: np.ndarray = normal.rvs((num_generate,), (0, 3))
+ rvs = normal_rvs1 * normal_rvs2
+
+ # fitting a gamma distribution to our product of normal random variables
+ from sklarpy.univariate import gamma
+
+ fitted_gamma = gamma.fit(rvs)
+
+ # we can easily retrieve the fitted parameters
+ fitted_gamma_params: tuple = fitted_gamma.params
+ print(fitted_gamma_params)
+
+.. code-block:: text
+
+ (9754.44976841112, -411.8704014945831, 0.042211986922603084)
+
+We can also print a summary of our fit::
+
+ summary: pd.DataFrame = fitted_gamma.summary
+ print(summary)
+
+.. code-block:: text
+
+ summary
+ Parametric/Non-Parametric Parametric
+ Discrete/Continuous continuous
+ Distribution gamma
+ #Params 3
+ param0 9754.449768
+ param1 -411.870401
+ param2 0.042212
+ Support (-411.8704014945831, inf)
+ Fitted Domain (-20.13664960054484, 17.86802768972715)
+ Cramér-von Mises statistic 3.411862
+ Cramér-von Mises p-value 0.0
+ Cramér-von Mises @ 10% False
+ Cramér-von Mises @ 5% False
+ Cramér-von Mises @ 1% False
+ Kolmogorov-Smirnov statistic 0.094371
+ Kolmogorov-Smirnov p-value 0.0
+ Kolmogorov-Smirnov @ 10% False
+ Kolmogorov-Smirnov @ 5% False
+ Kolmogorov-Smirnov @ 1% False
+ Likelihood 0.0
+ Log-Likelihood -2846.513514
+ AIC 5699.027028
+ BIC 5713.750294
+ Sum of Squared Error 12.319097
+ #Fitted Data Points 1000
+
+And plot our fitted distribution::
+
+ fitted_gamma.plot()
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/univariate_continuous_example_figure1.png?raw=true
+ :alt: gamma plot
+ :align: center
+
+And save::
+
+ fitted_gamma.save()
+
+
+We can then easily reload our saved model::
+
+ from sklarpy import load
+
+ loaded_fitted_gamma = load('gamma.pickle')
+
+
+
+Discrete Example
+---------------------
+Here we use the poisson distribution, though all methods and attributes are generalized.
+We see this works in exactly the same way as continuous distributions.::
+
+ import numpy as np
+ import pandas as pd
+
+ # generating random variables
+ from sklarpy.univariate import poisson
+
+ num_generate: int = 10000
+ poisson_rvs: np.ndarray = poisson.rvs((num_generate, ), (4,))
+ rvs_df: pd.DataFrame = pd.DataFrame(poisson_rvs, columns=['rvs'], dtype=int)
+
+ # fitting a poisson distribution to a dataframe of rvs
+ fitted_poisson = poisson.fit(rvs_df)
+
+ # we can easily retrieve the fitted parameters
+ fitted_poisson_params: tuple = fitted_poisson.params
+ print(fitted_poisson_params)
+
+.. code-block:: text
+
+ (3.992,)
+
+We can also print a summary of our fit::
+
+ summary: pd.DataFrame = fitted_poisson.summary
+ print(summary)
+
+.. code-block:: text
+
+ summary
+ Parametric/Non-Parametric Parametric
+ Discrete/Continuous discrete
+ Distribution poisson
+ #Params 1
+ param0 3.985
+ Support (0, inf)
+ Fitted Domain (0, 12)
+ chi-square statistic 7.059903
+ chi-square p-value 1.0
+ chi-square @ 10% True
+ chi-square @ 5% True
+ chi-square @ 1% True
+ Likelihood 0.0
+ Log-Likelihood -2100.955867
+ AIC 4203.911734
+ BIC 4208.819489
+ Sum of Squared Error 0.044802
+ #Fitted Data Points 1000
+
+And plot our fitted distribution::
+
+ fitted_poisson.plot()
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/univariate_discrete_example_figure1.png?raw=true
+ :alt: poisson plot
+ :align: center
+
+And save::
+
+ fitted_poisson.save()
+
+
+We can then easily reload our saved model::
+
+ from sklarpy import load
+
+ loaded_fitted_poisson = load('poisson.pickle')
+
+UnivariateFitter Example
+-------------------------
+Here we use the UnivariateFitter object to fit a distribution to a dataset::
+
+ import numpy as np
+
+ # generating random variables
+ from sklarpy.univariate import normal
+
+ num_generate: int = 10000
+ # generating a 1d array of N(1, 1) random variables
+ normal_rvs1: np.ndarray = normal.rvs((num_generate,), (1, 1))
+ # generating a 1d array of N(2, 3) random variables
+ normal_rvs2: np.ndarray = normal.rvs((num_generate,), (0, 3))
+ rvs = normal_rvs1 * normal_rvs2
+
+ # applying UnivariateFitter to our product of normal random variables
+ from sklarpy.univariate import UnivariateFitter
+
+ ufitter: UnivariateFitter = UnivariateFitter(rvs)
+ ufitter.fit()
+
+ # printing out the summary of our fits
+ from sklarpy import print_full
+ print_full()
+
+ print(ufitter.get_summary())
+
+.. code-block:: text
+
+ Parametric/Non-Parametric Discrete/Continuous Distribution #Params param0 param1 param2 Support Fitted Domain Cramér-von Mises statistic Cramér-von Mises p-value Cramér-von Mises @ 10% Cramér-von Mises @ 5% Cramér-von Mises @ 1% Kolmogorov-Smirnov statistic Kolmogorov-Smirnov p-value Kolmogorov-Smirnov @ 10% Kolmogorov-Smirnov @ 5% Kolmogorov-Smirnov @ 1% Likelihood Log-Likelihood AIC BIC Sum of Squared Error #Fitted Data Points
+ chi2 Parametric continuous chi2 3 448.683161 -68.423622 0.15222 (-68.42362151895298, inf) (-24.241200503425766, 21.971575538054054) 3.955007 0.0 False False False 0.099469 0.0 False False False 0.0 -2916.834582 5839.669164 5854.39243 12.84073 1000
+ powerlaw Parametric continuous powerlaw 3 1.485383 -24.284621 46.256197 (-24.28462141839885, 21.97157553805406) (-24.241200503425766, 21.971575538054054) 53.515366 0.0 False False False 0.393459 0.0 False False False 0.0 -3765.295723 7536.591446 7551.314712 23.1246 1000
+ cauchy Parametric continuous cauchy 2 -0.141171 1.744522 NaN (-inf, inf) (-24.241200503425766, 21.971575538054054) 0.223919 0.225566 True True True 0.03747 0.117619 True True True 0.0 -2848.628202 5701.256403 5711.071914 7.057125 1000
+ expon Parametric continuous expon 2 -24.241201 24.121323 NaN (-24.241200503425766, inf) (-24.241200503425766, 21.971575538054054) 68.507136 0.0 False False False 0.465333 0.0 False False False 0.0 -4183.09624 8370.19248 8380.007991 24.962541 1000
+ lognorm Parametric continuous lognorm 3 0.024195 -185.928209 185.754474 (-185.92820884247777, inf) (-24.241200503425766, 21.971575538054054) 3.726801 0.0 False False False 0.093801 0.0 False False False 0.0 -2910.878606 5827.757211 5842.480477 12.702458 1000
+ rayleigh Parametric continuous rayleigh 2 -24.268255 17.360527 NaN (-24.268254515672, inf) (-24.241200503425766, 21.971575538054054) 45.036613 0.0 False False False 0.364332 0.0 False False False 0.0 -3548.608918 7101.217836 7111.033346 21.635708 1000
+ gamma Parametric continuous gamma 3 614.186953 -110.593183 0.179857 (-110.5931825074225, inf) (-24.241200503425766, 21.971575538054054) 3.612011 0.0 False False False 0.094024 0.0 False False False 0.0 -2911.657958 5829.315916 5844.039182 12.618159 1000
+ uniform Parametric continuous uniform 2 -24.241201 46.212776 NaN (-24.241200503425766, 21.971575538054054) (-24.241200503425766, 21.971575538054054) 43.325309 0.0 False False False 0.328626 0.0 False False False 0.0 -3833.256298 7670.512595 7680.328106 23.507262 1000
+
+finding our best fit::
+
+ best_fit = ufitter.get_best(significant=False)
+ print(best_fit.summary)
+ best_fit.plot()
+
+.. code-block:: text
+
+ summary
+ Parametric/Non-Parametric Parametric
+ Discrete/Continuous continuous
+ Distribution cauchy
+ #Params 2
+ param0 -0.070741
+ param1 1.642212
+ Support (-inf, inf)
+ Fitted Domain (-16.627835918238397, 20.41344998969709)
+ Cramér-von Mises statistic 0.272381
+ Cramér-von Mises p-value 0.162046
+ Cramér-von Mises @ 10% True
+ Cramér-von Mises @ 5% True
+ Cramér-von Mises @ 1% True
+ Kolmogorov-Smirnov statistic 0.034967
+ Kolmogorov-Smirnov p-value 0.169277
+ Kolmogorov-Smirnov @ 10% True
+ Kolmogorov-Smirnov @ 5% True
+ Kolmogorov-Smirnov @ 1% True
+ Likelihood 0.0
+ Log-Likelihood -2791.769256
+ AIC 5587.538511
+ BIC 5597.354022
+ Sum of Squared Error 9.18869
+ #Fitted Data Points 1000
+
+.. image:: https://github.com/tfm000/sklarpy/blob/docs/readthedocs/media/univariate_fitter_example_figure1.png?raw=true
+ :alt: poisson plot
+ :align: center
+
+We can also save our UnivariateFitter object::
+
+ ufitter.save()
+
+We can then easily reload this::
+
+ from sklarpy import load
+
+ loaded_ufitter = load('UnivariateFitter.pickle')
+ loaded_best_fit = loaded_ufitter.get_best(significant=False)
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..6dbbbe7
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,41 @@
+# Configuration file for the Sphinx documentation builder.
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath(os.path.join('..', '..')))
+
+# -- Project information
+
+project = 'SklarPy'
+copyright = '2023, Tyler Mitchell'
+author = 'Tyler Mitchell'
+
+release = '1.0.0'
+version = '1.0.0'
+
+# -- General configuration
+
+extensions = [
+ 'sphinx.ext.duration',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.intersphinx',
+ 'sphinx.ext.napoleon',
+ 'sphinx.ext.viewcode',
+]
+
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3/', None),
+ 'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+}
+intersphinx_disabled_domains = ['std']
+
+templates_path = ['_templates']
+
+# -- Options for HTML output
+
+html_theme = 'sphinx_rtd_theme'
+
+# -- Options for EPUB output
+epub_show_urls = 'footnote'
diff --git a/docs/source/copula_table.csv b/docs/source/copula_table.csv
new file mode 100644
index 0000000..4b60d1b
--- /dev/null
+++ b/docs/source/copula_table.csv
@@ -0,0 +1,16 @@
+Family,Name,Dimensions,SklarPy Model
+Normal Mixture,Normal / Gaussian,Multivariate,gaussian_copula
+Normal Mixture,Student-T,Multivariate,student_t_copula
+Normal Mixture,Skewed-T,Multivariate,skewed_t_copula
+Normal Mixture,Generalized Hyperbolic,Multivariate,gh_copula
+Normal Mixture,Symmetric Generalized Hyperbolic,Multivariate,sgh_copula
+Normal Mixture,Hyperbolic,Multivariate,hyperbolic_copula
+Normal Mixture,Symmetric Hyperbolic,Multivariate,shyperbolic_copula
+Normal Mixture,Normal-Inverse Gaussian (NIG),Multivariate,nig_copula
+Normal Mixture,Symmetric Normal-Inverse Gaussian,Multivariate,snig_copula
+Normal Mixture,Marginal Hyperbolic,Multivariate,mh_copula
+Normal Mixture,Symmetric Marginal Hyperbolic,Multivariate,smh_copula
+Archimedean,Clayton,Multivariate,clayton_copula
+Archimedean,Gumbel,Multivariate,gumbel_copula
+Archimedean,Frank,Bivariate,frank_copula
+Numerical,Gaussian KDE,Multivariate,gaussian_kde_copula
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..55a20bf
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,120 @@
+.. image:: https://github.com/tfm000/sklarpy/blob/main/media/logo.png?raw=true
+ :alt: SklarPy logo
+ :scale: 60%
+ :align: center
+
+.. raw:: html
+
+
+
+
+
+
+
+SklarPy (pronounced 'Sky-Lar-Pee' or 'Sky-La-Pie') is an open-source software for probability distribution fitting.
+It contains useful tools for fitting Copula, Multivariate and Univariate probability distributions.
+In addition to over 100 univariate distributions, we implement many multivariate normal mixture distributions and their copulas, including Gaussian, Student-T, Skewed-T and Generalized Hyperbolic distributions.
+Named after Sklar's theorem and Abe Sklar, the American mathematician who proved that multivariate cumulative distribution functions can be expressed in terms of copulas and their marginals.
+
+This library has many different possible use cases, ranging from machine learning to finance.
+
+Contents
+--------
+
+.. toctree::
+ :maxdepth: 2
+ Installation
+ Univariate
+ Multivariate
+ Copulas
+ Misc
+
+Why we are better
+-----------------
+- Unlike other Python implementations of copulas, we implement more than the Gaussian and Archimedean copulas. A full list of our implementated copula models can be found in the documentation, though it includes many normal mean-variance mixture models as well as Archimedean and non-parametric models.
+- We allow for easy parameter fitting of both the univariate marginals and the multivariate copula distribution.
+- We allow for easy plotting of all our distributions, allowing you to visualize your models.
+- We use scipy.stats as a backend for all our univariate models, meaning as scipy expands and improves their model selection, so will ours!
+- We provide multivariate and univariate distributions, in addition to our copula models, meaning SklarPy can act as a one-stop-shop for all probability distribution fitting. A full list of our implemented multivariate distributions can be found in the documentation.
+- We are continuing to expand our library, and are open to suggestions for new models to implement. If you have a model you would like to see implemented, please open an issue on our GitHub page.
+
+Example
+--------
+
+Here we show a quick example of working with SklarPy.
+For more information, see the specific documentaion.::
+
+ import numpy as np
+ import pandas as pd
+ import matplotlib.pyplot as plt
+ from sklarpy.copulas import gh_copula
+
+ # generating random data
+ n: int = 1000
+ obs: np.ndarray = np.full((n, 2), np.nan)
+ obs[:, 0] = np.random.normal(3,4, size=(n,))
+ obs[:, 1] = obs[:, 0] + 0.5 * np.random.normal(3, 5, size=(n,))
+ obvs_df: pd.DataFrame = pd.DataFrame(obs, columns=['Process A', 'Process B'])
+
+ # fitting our copula model
+ fitted_copula = gh_copula.fit(obvs_df)
+
+ # printing our fitted copula parameters
+ print(fitted_copula.copula_params.to_dict)
+
+ # printing our fitted marginal distributions
+ print(fitted_copula.mdists)
+
+ # plotting our fit
+ fitted_copula.pdf_plot(show=False)
+ fitted_copula.copula_pdf_plot(show=False)
+ plt.show()
+
+This outputs:
+
+.. code-block:: text
+
+ {'lamb': -10.0, 'chi': 4.227038325195731, 'psi': 10.0,
+ 'loc': array([[0.], [0.]]),
+ 'shape': array([[1. , 0.84273015],
+ [0.84273015, 1.]]),
+ 'gamma': array([[0.99696041], [0.99913161]])}
+
+ {0: lognorm(0.02, -203.22, 206.18), 1: lognorm(0.04, -110.89, 115.4)}
+
+.. image:: https://github.com/tfm000/sklarpy/blob/main/media/PDF_Gh_PDF_Plot_Plot.png?raw=true
+ :alt: GH PDF
+ :scale: 60%
+ :align: center
+
+.. image:: https://github.com/tfm000/sklarpy/blob/main/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot.png?raw=true
+ :alt: GH Copula PDF
+ :scale: 60%
+ :align: center
diff --git a/docs/source/mvt_table.csv b/docs/source/mvt_table.csv
new file mode 100644
index 0000000..3b41026
--- /dev/null
+++ b/docs/source/mvt_table.csv
@@ -0,0 +1,13 @@
+Family,Name,SklarPy Model
+Normal Mixture,Normal / Gaussian,mvt_normal
+Normal Mixture,Student-T,mvt_student_t
+Normal Mixture,Skewed-T,mvt_skewed_t
+Normal Mixture,Generalized Hyperbolic,mvt_gh
+Normal Mixture,Symmetric Generalized Hyperbolic,mvt_sgh
+Normal Mixture,Hyperbolic,mvt_hyperbolic
+Normal Mixture,Symmetric Hyperbolic,mvt_shyperbolic
+Normal Mixture,Normal-Inverse Gaussian (NIG),mvt_nig
+Normal Mixture,Symmetric Normal-Inverse Gaussian,mvt_snig
+Normal Mixture,Marginal Hyperbolic,mvt_mh
+Normal Mixture,Symmetric Marginal Hyperbolic,mvt_smh
+Numerical,Gaussian KDE,mvt_gaussian_kde
\ No newline at end of file
diff --git a/docs/source/univariate_table.csv b/docs/source/univariate_table.csv
new file mode 100644
index 0000000..2e409cb
--- /dev/null
+++ b/docs/source/univariate_table.csv
@@ -0,0 +1,6 @@
+SciPy Stats,SklarPy Univariate
+norm,normal
+t,student_t
+dlaplace,discrete_laplace
+randint,discrete_uniform
+geom,geometric
\ No newline at end of file
diff --git a/examples/multivariate_examples/multivariate_example.py b/examples/multivariate_examples/multivariate_example.py
index ed1093b..ad6b928 100644
--- a/examples/multivariate_examples/multivariate_example.py
+++ b/examples/multivariate_examples/multivariate_example.py
@@ -34,7 +34,7 @@
# printing a summary of our fit
print(fitted_msh.summary)
-# can plot
+# # can plot
fitted_msh.pdf_plot(show=False)
fitted_msh.mc_cdf_plot(show=False)
fitted_msh.marginal_pairplot(show=False)
diff --git a/examples/univariate_examples/discrete_example.py b/examples/univariate_examples/discrete_example.py
index e29b1a3..409cc8b 100644
--- a/examples/univariate_examples/discrete_example.py
+++ b/examples/univariate_examples/discrete_example.py
@@ -8,7 +8,7 @@
# generating random variables
from sklarpy.univariate import poisson
-num_generate: int = 10000
+num_generate: int = 100
poisson_rvs: np.ndarray = poisson.rvs((num_generate, ), (4,))
rvs_df: pd.DataFrame = pd.DataFrame(poisson_rvs, columns=['rvs'], dtype=int)
diff --git a/examples/univariate_examples/univariate_fitter_example.py b/examples/univariate_examples/univariate_fitter_example.py
index 9b88fa1..9ceb4c4 100644
--- a/examples/univariate_examples/univariate_fitter_example.py
+++ b/examples/univariate_examples/univariate_fitter_example.py
@@ -5,7 +5,7 @@
# generating random variables
from sklarpy.univariate import normal
-num_generate: int = 10000
+num_generate: int = 1000
# generating a 1d array of N(1, 1) random variables
normal_rvs1: np.ndarray = normal.rvs((num_generate,), (1, 1))
# generating a 1d array of N(2, 3) random variables
diff --git a/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot2.png b/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot2.png
new file mode 100644
index 0000000..54c42f5
Binary files /dev/null and b/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot2.png differ
diff --git a/media/MC_CDF_Gh_MC_CDF_Plot_Plot2.png b/media/MC_CDF_Gh_MC_CDF_Plot_Plot2.png
new file mode 100644
index 0000000..f15de01
Binary files /dev/null and b/media/MC_CDF_Gh_MC_CDF_Plot_Plot2.png differ
diff --git a/media/MC_CDF_Mvt_Shyperbolic_MC_CDF_Plot_Plot.png b/media/MC_CDF_Mvt_Shyperbolic_MC_CDF_Plot_Plot.png
new file mode 100644
index 0000000..b15ab55
Binary files /dev/null and b/media/MC_CDF_Mvt_Shyperbolic_MC_CDF_Plot_Plot.png differ
diff --git a/media/PDF_Gh_PDF_Plot_Plot2.png b/media/PDF_Gh_PDF_Plot_Plot2.png
new file mode 100644
index 0000000..2d79e89
Binary files /dev/null and b/media/PDF_Gh_PDF_Plot_Plot2.png differ
diff --git a/media/PDF_Mvt_Shyperbolic_PDF_Plot_Plot.png b/media/PDF_Mvt_Shyperbolic_PDF_Plot_Plot.png
new file mode 100644
index 0000000..dc91230
Binary files /dev/null and b/media/PDF_Mvt_Shyperbolic_PDF_Plot_Plot.png differ
diff --git a/media/mfitter_pairplot.png b/media/mfitter_pairplot.png
new file mode 100644
index 0000000..d4bb98c
Binary files /dev/null and b/media/mfitter_pairplot.png differ
diff --git a/media/mvt_shyperbolic_marginal_pair_plot.png b/media/mvt_shyperbolic_marginal_pair_plot.png
new file mode 100644
index 0000000..52b5330
Binary files /dev/null and b/media/mvt_shyperbolic_marginal_pair_plot.png differ
diff --git a/media/univariate_continuous_example_figure1.png b/media/univariate_continuous_example_figure1.png
new file mode 100644
index 0000000..fa8310e
Binary files /dev/null and b/media/univariate_continuous_example_figure1.png differ
diff --git a/media/univariate_discrete_example_figure1.png b/media/univariate_discrete_example_figure1.png
new file mode 100644
index 0000000..a34b324
Binary files /dev/null and b/media/univariate_discrete_example_figure1.png differ
diff --git a/media/univariate_fitter_example_figure1.png b/media/univariate_fitter_example_figure1.png
new file mode 100644
index 0000000..b2853e5
Binary files /dev/null and b/media/univariate_fitter_example_figure1.png differ
diff --git a/pyproject.toml b/pyproject.toml
index 20fe848..406b08d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,9 @@ dev = [
"pytest==7.1.2",
"tox==3.25.1",
]
+docs = [
+ "sphinx~=4.2.0",
+]
[project.urls] # Optional
"Homepage" = "https://github.com/tfm000/sklarpy"
diff --git a/sklarpy/univariate/_fitted_dists.py b/sklarpy/univariate/_fitted_dists.py
index 05866ab..372bd93 100644
--- a/sklarpy/univariate/_fitted_dists.py
+++ b/sklarpy/univariate/_fitted_dists.py
@@ -393,7 +393,7 @@ def plot(self, xrange: np.ndarray = None, include_empirical: bool = True,
"no empirical data to display.")
# getting xrange and qrange
- eps: float = 10 ** -4
+ eps: float = 0.05
prob_bounds: tuple = (eps, 1 - eps)
if xrange is None:
if not (isinstance(num_to_plot, int) and num_to_plot >= 1):
@@ -446,8 +446,10 @@ def plot(self, xrange: np.ndarray = None, include_empirical: bool = True,
alpha=empirical_alpha, label=empirical_label)
ax[3].plot(xrange, xrange, color=qqplot_yx_color,
alpha=qqplot_yx_alpha, label='y=x')
- ax[3].plot(self.ppf(qrange), empirical_ppf_values, color=color,
+ ppf_values = self.ppf(qrange)
+ ax[3].plot(ppf_values, empirical_ppf_values, color=color,
alpha=alpha, label=self.name)
+ ax[3].set_xlim([ppf_values.min(), ppf_values.max()])
# plotting distribution
ax[0].plot(xrange, self.pdf(xrange), color=color,
diff --git a/sklarpy/univariate/_prefit_dists.py b/sklarpy/univariate/_prefit_dists.py
index addac3a..29d4aad 100644
--- a/sklarpy/univariate/_prefit_dists.py
+++ b/sklarpy/univariate/_prefit_dists.py
@@ -598,7 +598,7 @@ def plot(self, params: tuple, xrange: np.ndarray = None,
"show are all boolean.")
# getting xrange and qrange
- eps: float = 10 ** -4
+ eps: float = 0.05
prob_bounds: tuple = (eps, 1 - eps)
if xrange is None:
if not (isinstance(num_to_plot, int) and num_to_plot >= 1):