JuliaDynamics · kahaaga · Aug 25, 2023 · Aug 24, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/Project.toml b/Project.toml
@@ -20,6 +20,7 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StateSpaceSets = "40b095a5-5852-4c12-98c7-d43bf788e795"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a"
 
 [compat]

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -6,11 +6,12 @@ DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
 DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
-Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 Rotations = "6038ab10-8711-5258-84ad-4b1120ba62dc"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StateSpaceSets = "40b095a5-5852-4c12-98c7-d43bf788e795"

diff --git a/docs/make.jl b/docs/make.jl
@@ -14,13 +14,24 @@ pages = [
     "convenience.md",
     "examples.md",
     "devdocs.md",
+    "references.md",
 ]
 
+
 import Downloads
 Downloads.download(
     "https://raw.githubusercontent.com/JuliaDynamics/doctheme/master/build_docs_with_style.jl",
     joinpath(@__DIR__, "build_docs_with_style.jl")
 )
 include("build_docs_with_style.jl")
 
-build_docs_with_style(pages, ComplexityMeasures, StateSpaceSets)
+using DocumenterCitations
+
+bib = CitationBibliography(
+    joinpath(@__DIR__, "refs.bib");
+    style=:authoryear
+)
+
+build_docs_with_style(pages, ComplexityMeasures, StateSpaceSets;
+    expandfirst = ["index.md"], bib,
+)
diff --git a/docs/refs.bib b/docs/refs.bib
diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -60,12 +60,13 @@ kldivergence(px, py)
 ```@example MAIN
 kldivergence(py, px)
 ```
+
 (`Inf` because there are events with 0 probability in `px`)
 
 ## Differential entropy: estimator comparison
 
 Here, we compare how the nearest neighbor differential entropy estimators
-([`Kraskov`](@ref), [`KozachenkoLeonenko`](@ref), [`Zhu`](@ref) and [`ZhuSingh`](@ref))
+([`Kraskov`](@ref), [`KozachenkoLeonenko`](@ref), [`Zhu`](@ref), [`ZhuSingh`](@ref), etc.)
 converge towards the true entropy value for increasing time series length.
 
 ComplexityMeasures.jl also provides entropy estimators based on
@@ -88,17 +89,18 @@ e = Shannon(; base = MathConstants.e)
 # kNN estimators
 # --------------------------
 w = 0 # Theiler window of 0 (only exclude the point itself during neighbor searches)
+ent = Shannon(; base = ℯ)
 knn_estimators = [
     # with k = 1, Kraskov is virtually identical to
     # Kozachenko-Leonenko, so pick a higher number of neighbors for Kraskov
-    Kraskov(; k = 3, base = ℯ, w),
-    KozachenkoLeonenko(; base = ℯ, w),
-    Zhu(; k = 3, base = ℯ, w),
-    ZhuSingh(; k = 3, base = ℯ, w),
-    Gao(; k = 3, base = ℯ, corrected = false, w),
-    Gao(; k = 3, base = ℯ, corrected = true, w),
-    Goria(; k = 3, w, base = ℯ),
-    Lord(; k = 20, w, base = ℯ) # more neighbors for accurate ellipsoid estimation
+    Kraskov(ent; k = 3, w),
+    KozachenkoLeonenko(ent; w),
+    Zhu(ent; k = 3, w),
+    ZhuSingh(ent; k = 3, w),
+    Gao(ent; k = 3, corrected = false, w),
+    Gao(ent; k = 3, corrected = true, w),
+    Goria(ent; k = 3, w),
+    Lord(ent; k = 20, w) # more neighbors for accurate ellipsoid estimation
 ]
 
 # Test each estimator `nreps` times over time series of varying length.
@@ -124,7 +126,7 @@ for (i, est_os) in enumerate(estimators_os)
         pts = randn(maximum(Ns)) # raw timeseries, not a `StateSpaceSet`
         for (k, N) in enumerate(Ns)
             m = floor(Int, N / 100) # Scale `m` to timeseries length
-            est = est_os(; m, base = ℯ) # Instantiate estimator with current `m`
+            est = est_os(ent; m) # Instantiate estimator with current `m`
             Hs_uniform_os[i][k][j] = information(est, pts[1:N])
         end
     end
@@ -247,7 +249,7 @@ Here, we show the sensitivity of the various entropies to variations in their pa
 
 ### Curado entropy
 
-Here, we reproduce Figure 2 from Curado & Nobre (2004)[^Curado2004], showing
+Here, we reproduce Figure 2 from [Curado2004](@citet), showing
 how the [`Curado`](@ref) entropy changes as function of the parameter `a` for a range of two-element probability distributions given by
 `Probabilities([p, 1 - p] for p in 1:0.0:0.01:1.0)`.
 
@@ -266,9 +268,6 @@ axislegend(ax)
 fig
 ```
 
-[^Curado2004]: Curado, E. M., & Nobre, F. D. (2004). On the stability of analytic
-    entropic forms. Physica A: Statistical Mechanics and its Applications, 335(1-2), 94-106.
-
 ### Kaniadakis entropy
 
 Here, we show how [`Kaniadakis`](@ref) entropy changes as function of the parameter `a` for
@@ -298,7 +297,7 @@ fig
 
 ### Stretched exponential entropy
 
-Here, we reproduce the example from Anteneodo & Plastino (1999)[^Anteneodo1999], showing
+Here, we reproduce the example from [Anteneodo1999](@citet), showing
 how the stretched exponential entropy changes as function of the parameter `η` for a range
 of two-element probability distributions given by
 `Probabilities([p, 1 - p] for p in 1:0.0:0.01:1.0)`.
@@ -320,19 +319,15 @@ axislegend(ax)
 fig
 ```
 
-[^Anteneodo1999]: Anteneodo, C., & Plastino, A. R. (1999). Maximum entropy approach to
-    stretched exponential probability distributions. Journal of Physics A: Mathematical
-    and General, 32(7), 1089.
-
 ## [Discrete entropy: dispersion entropy](@id dispersion_example)
 
-Here we compute dispersion entropy (Rostaghi et al. 2016)[^Rostaghi2016],
+Here we compute dispersion entropy [Rostaghi2016](@cite),
 using the use the [`Dispersion`](@ref) probabilities estimator, for a time
 series consisting of normally distributed noise with a single spike in the middle of the
 signal.
 We compute the entropies over a range subsets of the data, using a sliding window
 consisting of 70 data points, stepping the window 10 time steps at a time.
-This example is adapted from Li et al. (2021)[^Li2019].
+This example is adapted from [Li2019](@citet).
 
 ```@example MAIN
 using ComplexityMeasures
@@ -374,13 +369,6 @@ ylims!(0, max(maximum(pes), 1))
 fig
 ```
 
-[^Rostaghi2016]:
-    Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series
-    analysis. IEEE Signal Processing Letters, 23(5), 610-614.
-[^Li2019]:
-    Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
-    complexity measure for sensor signal. Sensors, 19(23), 5203.
-
 ## Discrete entropy: normalized entropy for comparing different signals
 
 When comparing different signals or signals that have different length, it is best to normalize entropies so that the "complexity" or "disorder" quantification is directly comparable between signals. Here is an example based on the wavelet entropy example where we use the spectral entropy instead of the wavelet entropy:
@@ -492,12 +480,12 @@ for [`SpatialDispersion`](@ref)).
 
 ## Complexity: reverse dispersion entropy
 
-Here, we compare regular dispersion entropy (Rostaghi et al., 2016)[^Rostaghi2016], and
-reverse dispersion entropy Li et al. (2021)[^Li2019] for a time series consisting
+Here, we compare regular dispersion entropy [Rostaghi2016](@cite), and
+reverse dispersion entropy [Li2019](@cite) for a time series consisting
 of normally distributed noise with a single spike in the middle of the signal.
 We compute the entropies over a range subsets of the data, using a sliding window
 consisting of 70 data points, stepping the window 10 time steps at a time.
-This example reproduces parts of figure 3 in Li et al. (2021), but results here are not
+This example reproduces parts of figure 3 in [Li2019](@cite), but results here are not
 exactly the same as in the original paper, because their examples are based on randomly
 generated numbers and do not provide code that specify random number seeds.
 
@@ -550,13 +538,6 @@ ylims!(0, max(maximum(pes), 1))
 fig
 ```
 
-[^Rostaghi2016]:
-    Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series
-    analysis. IEEE Signal Processing Letters, 23(5), 610-614.
-[^Li2019]:
-    Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
-    complexity measure for sensor signal. Sensors, 19(23), 5203.
-
 ## Complexity: missing dispersion patterns
 
 ```@example MAIN
@@ -628,21 +609,17 @@ for this signal.
 Of course, to robustly reject the null hypothesis, we'd need to generate a sufficient number
 of surrogate realizations, and actually compute quantiles to compare with.
 
-[^Zhou2022]:
-    Zhou, Q., Shang, P., & Zhang, B. (2022). Using missing dispersion patterns
-    to detect determinism and nonlinearity in time series data. Nonlinear Dynamics, 1-20.
-
 ## Complexity: approximate entropy
 
-Here, we reproduce the Henon map example with ``R=0.8`` from Pincus (1991),
-comparing our values with relevant values from table 1 in Pincus (1991).
+Here, we reproduce the Henon map example with ``R=0.8`` from [Pincus1991](@citet),
+comparing our values with relevant values from table 1 in [Pincus1991](@citet).
 
 We use `DiscreteDynamicalSystem` from `DynamicalSystemsBase` to represent the map,
 and use the `trajectory` function from the same package to iterate the map
 for different initial conditions, for multiple time series lengths.
 
 Finally, we summarize our results in box plots and compare the values to those
-obtained by Pincus (1991).
+obtained by [Pincus1991](@citet).
 
 ```@example MAIN
 using ComplexityMeasures
@@ -692,7 +669,7 @@ fig = Figure()
 # Example time series
 a1 = Axis(fig[1,1]; xlabel = "Time (t)", ylabel = "Value")
 sys = henon(u₀ = [0.5, 0.1], R = 0.8)
-x, y = columns(trajectory(sys, 100, Ttr = 500))
+x, y = columns(first(trajectory(sys, 100, Ttr = 500))) # we don't need time indices
 lines!(a1, 1:length(x), x, label = "x")
 lines!(a1, 1:length(y), y, label = "y")
 
@@ -778,7 +755,9 @@ fig
 
 ## Statistical complexity of iterated maps
 
-In this example, we reproduce parts of Fig. 1 in Rosso et al. (2007): We compute the statistical complexity of the Henon, logistic and Schuster map, as well as that of k-noise.
+In this example, we reproduce parts of Fig. 1 in [Rosso2007](@citet): We compute the
+statistical complexity of the Henon, logistic and Schuster map, as well as that of k-noise.
+
 ```@example MAIN
 using ComplexityMeasures
 using Distances
@@ -877,4 +856,4 @@ lines!(ax, min_curve; color=:black)
 lines!(ax, max_curve; color=:black)
 axislegend(; position=:lt)
 fig
-```
+```
diff --git a/docs/src/multiscale.md b/docs/src/multiscale.md
@@ -29,18 +29,6 @@ combination.
 
 | Method  | Syntax | Reference |
 | ------------- | ------------- | ------------- |
-| Refined composite multiscale dispersion entropy  | `multiscale(Composite(), Dispersion(), est, x, normalized = true)` | Azami et al. (2017)[^Azami2017] |
-| Multiscale sample entropy (first moment)              | `multiscale(Regular(f = mean), SampleEntropy(), x)` | Costa et al. (2002)[^Costa2002] |
-| Generalized multiscale sample entropy (second moment) | `multiscale(Regular(f = std), SampleEntropy(),  x)` | Costa et al. (2015)[^Costa2015] |
-
-[^Azami2017]:
-    Azami, H., Rostaghi, M., Abásolo, D., & Escudero, J. (2017). Refined
-    composite multiscale dispersion entropy and its application to biomedical signals.
-    IEEE Transactions on Biomedical Engineering, 64(12), 2872-2879.
-[^Costa2002]:
-    Costa, M., Goldberger, A. L., & Peng, C. K. (2002). Multiscale entropy
-    analysis of complex physiologic time series. Physical review letters, 89(6), 068102.
-[^Costa2015]:
-    Costa, M. D., & Goldberger, A. L. (2015). Generalized multiscale entropy
-    analysis: Application to quantifying the complex volatility of human heartbeat time
-    series. Entropy, 17(3), 1197-1203.
+| Refined composite multiscale dispersion entropy  | `multiscale(Composite(), Dispersion(), est, x, normalized = true)` | Azami et al. (2017)[Azami2017](@cite) |
+| Multiscale sample entropy (first moment)              | `multiscale(Regular(f = mean), SampleEntropy(), x)` | Costa et al. (2002)[Costa2002](@cite) |
+| Generalized multiscale sample entropy (second moment) | `multiscale(Regular(f = std), SampleEntropy(),  x)` | Costa et al. (2015)[Costa2015](@cite) |
diff --git a/docs/src/references.md b/docs/src/references.md
@@ -0,0 +1,4 @@
+# References
+
+```@bibliography
+```
diff --git a/docs/src/tutorial.jl b/docs/src/tutorial.jl
@@ -9,7 +9,8 @@
 # !!! note
 #     The documentation and exposition of ComplexityMeasures.jl is inspired by chapter 5 of
 #     [Nonlinear Dynamics](https://link.springer.com/book/10.1007/978-3-030-91032-7),
-#     Datseris & Parlitz, Springer 2022, and expanded to cover more content.
+#     Datseris & Parlitz, Springer 2022 [Datseris2022](@cite), and expanded to cover more
+#     content.
 
 
 # ## First things first: "complexity measures"
@@ -178,7 +179,7 @@ perm_ext_y_jack = information(Jackknife(extdef), ospace, y)
 
 # Discrete entropies (or in general, information measures) are functions of probability mass functions.
 # It is also possible to compute entropies of probability density functions.
-# In ComplexityMeasures.jl, this is done by calling [`entropy`](@ref) (or the more general [`information`](@ref)) with a differential information estimator, a subtype of [`DiffInfoEstimator`](@ref).
+# In ComplexityMeasures.jl, this is done by calling [`entropy`](@ref) (or the more general [`information`](@ref)) with a differential information estimator, a subtype of [`DifferentialInfoEstimator`](@ref).
 # These estimators are given directly to [`information`](@ref) without assigning an outcome space, because the probability density is approximated implicitly, not explicitly.
 # For example, the [`Correa`](@ref) estimator approximates the differential Shannon entropy by utilizing order statistics of the timeseries data:
 

diff --git a/src/complexity_measures/approximate_entropy.jl b/src/complexity_measures/approximate_entropy.jl
@@ -9,7 +9,7 @@ export ApproximateEntropy
     ApproximateEntropy <: ComplexityEstimator
     ApproximateEntropy([x]; r = 0.2std(x), kwargs...)
 
-An estimator for the approximate entropy (ApEn; Pincus, 1991)[^Pincus1991] complexity
+An estimator for the approximate entropy [Pincus1991](@cite) complexity
 measure, used with [`complexity`](@ref).
 
 The keyword argument `r` is mandatory if an input timeseries `x` is not provided.
@@ -25,7 +25,7 @@ The keyword argument `r` is mandatory if an input timeseries `x` is not provided
 
 ## Description
 
-Approximate entropy is defined as
+Approximate entropy (ApEn) is defined as
 
 ```math
 ApEn(m ,r) = \\lim_{N \\to \\infty} \\left[ \\phi(x, m, r) - \\phi(x, m + 1, r) \\right].
@@ -64,9 +64,6 @@ constructed from the input timeseries ``x(t)`` as
 !!! note "Flexible embedding lag"
     In the original paper, they fix `τ = 1`. In our implementation, the normalization
     constant is modified to account for embeddings with `τ != 1`.
-
-[^Pincus1991]: Pincus, S. M. (1991). Approximate entropy as a measure of system complexity.
-    Proceedings of the National Academy of Sciences, 88(6), 2297-2301.
 """
 Base.@kwdef struct ApproximateEntropy{I, B, R} <: ComplexityEstimator
     m::I = 2

diff --git a/src/complexity_measures/lempel_ziv.jl b/src/complexity_measures/lempel_ziv.jl
@@ -4,28 +4,20 @@ export LempelZiv76
     LempelZiv76 <: ComplexityEstimator
     LempelZiv76()
 
-The Lempel-Ziv, or `LempelZiv76`, complexity measure (Lempel & Ziv, 1976) [^LempelZiv1976],
+The Lempel-Ziv, or `LempelZiv76`, complexity measure [LempelZiv1976](@cite),
 which is used with [`complexity`](@ref) and [`complexity_normalized`](@ref).
 
 For results to be comparable across sequences with different length, use the normalized
-version. Normalized LempelZiv76-complexity is implemented as given in Amigó et al.
-(2004)[^Amigó2004]. The normalized measure is close to zero for very regular signals, while
+version. Normalized `LempelZiv76`-complexity is implemented as given in [Amigó2004](@citet).
+The normalized measure is close to zero for very regular signals, while
 for random sequences, it is close to 1 with high probability[^Amigó2004]. Note: the
-normalized LempelZiv76 complexity can be higher than 1[^Amigó2004].
+normalized `LempelZiv76` complexity can be higher than 1[^Amigó2004].
 
 The `LempelZiv76` measure applies only to binary sequences, i.e. sequences with a
 two-element alphabet (precisely two distinct outcomes). For performance optimization,
 we do not check the number of unique elements in the input. If your input sequence is not
 binary, you must [`encode`](@ref) it first using one of the implemented [`Encoding`](@ref)
 schemes (or encode your data manually).
-
-[^LempelZiv1976]:
-    Lempel, A., & Ziv, J. (1976). On the complexity of finite sequences. IEEE Transactions
-    on information theory, 22(1), 75-81.
-[^Amigó2004]:
-    Amigó, J. M., Szczepański, J., Wajnryb, E., & Sanchez-Vives, M. V. (2004). Estimating
-    the entropy rate of spike trains via Lempel-Ziv complexity. Neural Computation, 16(4),
-    717-736.
 """
 struct LempelZiv76 <: ComplexityEstimator end
 

diff --git a/src/complexity_measures/missing_dispersion.jl b/src/complexity_measures/missing_dispersion.jl
@@ -9,8 +9,7 @@ export MissingDispersionPatterns
     MissingDispersionPatterns(est = Dispersion())
 
 An estimator for the number of missing dispersion patterns (``N_{MDP}``), a complexity
-measure which can be used to detect nonlinearity in time series (Zhou et al.,
-2022)[^Zhou2022].
+measure which can be used to detect nonlinearity in time series [Zhou2023](@cite).
 
 Used with [`complexity`](@ref) or [`complexity_normalized`](@ref), whose implementation
 uses [`missing_outcomes`](@ref).
@@ -36,20 +35,17 @@ undersampling.
 !!! note "Encoding"
     [`Dispersion`](@ref)'s linear mapping from CDFs to integers is based on equidistant
     partitioning of the interval `[0, 1]`. This is slightly different from Zhou et
-    al. (2022), which uses the linear mapping ``s_i := \\text{round}(y + 0.5)``.
+    al. (2023)[Zhou2023](@ref), which uses the linear mapping ``s_i := \\text{round}(y + 0.5)``.
 
 ## Usage
 
-In Zhou et al. (2022), [`MissingDispersionPatterns`](@ref) is used to detect nonlinearity
+In Zhou et al. (2023)[Zhou2023](@ref), [`MissingDispersionPatterns`](@ref) is used to detect nonlinearity
 in time series by comparing the ``N_{MDP}`` for a time series `x` to ``N_{MDP}`` values for
 an ensemble of surrogates of `x`. If ``N_{MDP} > q_{MDP}^{WIAAFT}``, where
 ``q_{MDP}^{WIAAFT}`` is some `q`-th quantile of the surrogate ensemble, then it is
 taken as evidence for nonlinearity.
 
 See also: [`Dispersion`](@ref), [`ReverseDispersion`](@ref), [`total_outcomes`](@ref).
-
-[^Zhou2022]: Zhou, Q., Shang, P., & Zhang, B. (2022). Using missing dispersion patterns
-    to detect determinism and nonlinearity in time series data. Nonlinear Dynamics, 1-20.
 """
 Base.@kwdef struct MissingDispersionPatterns{D} <: ComplexityEstimator
     est::D = Dispersion()