JuliaDynamics · Datseris · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022
diff --git a/docs/src/complexity_measures.md b/docs/src/complexity_measures.md
@@ -1,8 +1,13 @@
-# [Complexity measures](@id complexity_measures)
+# [Complexity API](@id complexity_measures)
-# [Complexity API](@id complexity_measures)
+# [Complexity measures API](@id complexity_measures)
-# [Complexity API](@id complexity_measures)
+# [Complexity measures API](@id complexity_measures)
+
+```@docs
+complexity
+complexity_normalized
+```
 
 ## Reverse dispersion entropy
 
 ```@docs
-reverse_dispersion
+ReverseDispersion
 distance_to_whitenoise
 ```
diff --git a/src/Entropies.jl b/src/Entropies.jl
@@ -21,7 +21,7 @@ include("entropy.jl")
 include("symbolization/symbolize.jl")
 include("probabilities_estimators/probabilities_estimators.jl")
 include("entropies/entropies.jl")
-include("complexity_measures/complexity_measures.jl")
+include("complexity.jl")
 
 include("deprecations.jl")
 

diff --git a/src/complexity.jl b/src/complexity.jl
@@ -0,0 +1,36 @@
+export ComplexityMeasure
+export complexity
+export complexity_normalized
+
+"""
+    ComplexityMeasure
+
+Abstract type for (entropy-like) complexity measures.
-Abstract type for (entropy-like) complexity measures.
+Supertype for (entropy-like) complexity measures.
-Abstract type for (entropy-like) complexity measures.
+Supertype for (entropy-like) complexity measures.
+"""
+abstract type ComplexityMeasure end
+
+"""
+    complexity(c::ComplexityMeasure, x)
+
+Estimate the complexity measure `c` for input data `x`, where `c` can be any of the
+following measures:
+
+- [`ReverseDispersion`](@ref).
+
+"""
+function complexity end
+
+"""
+    complexity_normalized(c::ComplexityMeasure, x) → m ∈ [a, b]
+
+Estimate the normalized complexity measure `c` for input data `x`, where `c` can
+can be any of the following measures:
+
+- [`ReverseDispersion`](@ref).
+
+The potential range `[a, b]` of the output value depends on `c`. See the documentation
+strings for the individual measures to get the normalized ranges.
+"""
+function complexity_normalized end
+
+include("complexity_measures/complexity_measures.jl")
diff --git a/src/complexity_measures/reverse_dispersion_entropy.jl b/src/complexity_measures/reverse_dispersion_entropy.jl
@@ -1,42 +1,13 @@
-export reverse_dispersion
+export ReverseDispersion
 export distance_to_whitenoise
 
-# Note: this is not an entropy estimator, so we don't use the entropy_xxx_norm interface
-# for normalization, even though we rely on `alphabet_length`.
 """
-    distance_to_whitenoise(p::Probabilities, estimator::Dispersion; normalize = false)
+    ReverseDispersion <: ComplexityMeasure
+    ReverseDispersion(; m = 2, τ = 1, check_unique = true,
+        symbolization::SymbolizationScheme = GaussianSymbolization(c = 5)
+    )
 
-Compute the distance of the probability distribution `p` from a uniform distribution,
-given the parameters of `estimator` (which must be known beforehand).
-
-If `normalize == true`, then normalize the value to the interval `[0, 1]` by using the
-parameters of `estimator`.
-
-Used to compute reverse dispersion entropy([`reverse_dispersion`](@ref);
-Li et al., 2019[^Li2019]).
-
-[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
-    complexity measure for sensor signal. Sensors, 19(23), 5203.
-"""
-function distance_to_whitenoise(p::Probabilities, est::Dispersion; normalize = false)
-    # We can safely skip non-occurring symbols, because they don't contribute
-    # to the sum in eq. 3 in Li et al. (2019)
-    Hrde = sum(abs2, p) - (1 / alphabet_length(est))
-
-    if normalize
-        return Hrde / (1 - (1 / alphabet_length(est)))
-    else
-        return Hrde
-    end
-end
-
-# Note again: this is a *complexity measure*, not an entropy estimator, so we don't use
-# the entropy_xxx_norm interface for normalization, even though we rely on `alphabet_length`.
-"""
-    reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion();
-        normalize = true) where T <: Real
-
-Compute the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019].
+Estimator for the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019].
 
 ## Description
 
@@ -54,23 +25,70 @@ embedding dimension `m` and embedding delay `τ`.
 Recommended parameter values[^Li2018] are `m ∈ [2, 3]`, `τ = 1` for the embedding, and
 `c ∈ [3, 4, …, 8]` categories for the Gaussian mapping.
 
-If `normalize == true`, then the reverse dispersion entropy is normalized to `[0, 1]`.
+If normalizing, then the reverse dispersion entropy is normalized to `[0, 1]`.
 
 The minimum value of ``H_{rde}`` is zero and occurs precisely when the dispersion
 pattern distribution is flat, which occurs when all ``p_i``s are equal to ``1/c^m``.
 Because ``H_{rde} \\geq 0``, ``H_{rde}`` can therefore be said to be a measure of how far
 the dispersion pattern probability distribution is from white noise.
 
+## Data requirements
+
+Like for [`Dispersion`](@ref), the input must have more than one unique element for the
+default Gaussian mapping symbolization to be well-defined. Li et al. (2018) recommends
+that `x` has at least 1000 data points.
+
+If `check_unique == true` (default), then it is checked that the input has
+more than one unique value. If `check_unique == false` and the input only has one
+unique element, then a `InexactError` is thrown when trying to compute probabilities.
+
 [^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
     complexity measure for sensor signal. Sensors, 19(23), 5203.
 """
-function reverse_dispersion(x::AbstractVector{T}, est::Dispersion = Dispersion();
-        normalize = true) where T <: Real
+Base.@kwdef struct ReverseDispersion{S <: SymbolizationScheme} <: ComplexityMeasure
+    symbolization::S = GaussianSymbolization(c = 5)
+    m::Int = 2
+    τ::Int = 1
+    check_unique::Bool = false
+end
+
+alphabet_length(est::ReverseDispersion)::Int = alphabet_length(est.symbolization) ^ est.m
+
+"""
+    distance_to_whitenoise(p::Probabilities, estimator::ReverseDispersion; normalize = false)
+
+Compute the distance of the probability distribution `p` from a uniform distribution,
+given the parameters of `estimator` (which must be known beforehand).
 
-    p = probabilities(x, est)
+If `normalize == true`, then normalize the value to the interval `[0, 1]` by using the
+parameters of `estimator`.
+
+Used to compute reverse dispersion entropy([`ReverseDispersion`](@ref);
+Li et al., 2019[^Li2019]).
+
+[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
+    complexity measure for sensor signal. Sensors, 19(23), 5203.
+"""
+function distance_to_whitenoise(p::Probabilities, est::ReverseDispersion; normalize = false)
+    # We can safely skip non-occurring symbols, because they don't contribute
+    # to the sum in eq. 3 in Li et al. (2019)
+    Hrde = sum(abs2, p) - (1 / alphabet_length(est))
+
+    if normalize
+        return Hrde / (1 - (1 / alphabet_length(est)))
+    else
+        return Hrde
+    end
+end
+
+function complexity(c::ReverseDispersion, x)
+    (; symbolization, m, τ, check_unique) = c
+    p = probabilities(x, Dispersion(; symbolization, m, τ, check_unique))
+    return distance_to_whitenoise(p, c, normalize = false)
+end
 
-    # The following step combines distance information with the probabilities, so
-    # from here on, it is not possible to use `renyi_entropy` or similar methods, because
-    # we're not dealing with probabilities anymore.
-    Hrde = distance_to_whitenoise(p, est, normalize = normalize)
+function complexity_normalized(c::ReverseDispersion, x)
+    (; symbolization, m, τ, check_unique) = c
+    p = probabilities(x, Dispersion(; symbolization, m, τ, check_unique))
+    return distance_to_whitenoise(p, c, normalize = true)
 end
diff --git a/src/symbolization/GaussianSymbolization.jl b/src/symbolization/GaussianSymbolization.jl
@@ -53,6 +53,8 @@ Base.@kwdef struct GaussianSymbolization{I <: Integer} <: SymbolizationScheme
     c::I = 3
 end
 
+alphabet_length(symbolization::GaussianSymbolization) = symbolization.c
+
 g(xᵢ, μ, σ) = exp((-(xᵢ - μ)^2)/(2σ^2))
 
 """

diff --git a/src/symbolization/symbolize.jl b/src/symbolization/symbolize.jl
@@ -5,6 +5,13 @@ An abstract type for symbolization schemes.
 """
 abstract type SymbolizationScheme end
 
+# The internal structure of different symbolization schemes may be different, so use
+# `alphabet_length` to have a consistent way of getting the total number of possible states.
+# Thus, the default behaviour is to throw an ArgumentError when computing some normalized
+# quantity depending on `alphabet_length` of the symbolization scheme.
+alphabet_length(s::S) where S <: SymbolizationScheme =
+    throw(ArgumentError("`alphabet_length` not defined for $S."))
+
 """
     symbolize(x, scheme::SymbolizationScheme) → Vector{Int}
     symbolize!(s, x, scheme::SymbolizationScheme) → Vector{Int}

diff --git a/test/complexity_measures/reverse_dispersion.jl b/test/complexity_measures/reverse_dispersion.jl
@@ -1,18 +1,18 @@
 x = rand(100)
-@test reverse_dispersion(x) isa Real
-@test 0.0 <= reverse_dispersion(x, normalize = true) <= 1.0
+@test complexity(ReverseDispersion(), x) isa Real
+@test 0.0 <= complexity_normalized(ReverseDispersion(), x) <= 1.0
 
 @testset "Distance to whitenoise" begin
     m, n_classes = 2, 2
-    est = Dispersion(m = m, symbolization = GaussianSymbolization(c = n_classes))
+    est = ReverseDispersion(m = m, symbolization = GaussianSymbolization(c = n_classes))
 
         # Reverse dispersion entropy is 0 when all probabilities are identical and equal
     # to 1/(n_classes^m).
     flat_dist = Probabilities(repeat([1/m^n_classes], m^n_classes))
     Hrde_minimal = distance_to_whitenoise(flat_dist, est, normalize = false)
     @test round(Hrde_minimal, digits = 7) ≈ 0.0
 
-        # Reverse dispersion entropy is maximal when there is only one non-zero dispersal
+    # Reverse dispersion entropy is maximal when there is only one non-zero dispersal
     # pattern. Then reverse dispersion entropy is
     # 1 - 1/(n_classes^m). When normalizing to this value, the RDE should be 1.0.
     m, n_classes = 2, 2
-Original file line number
+Diff line change
@@ Expand Up @@
         c::I = 3
     end
+    alphabet_length(symbolization::GaussianSymbolization) = symbolization.c
     g(xᵢ, μ, σ) = exp((-(xᵢ - μ)^2)/(2σ^2))
     """
@@ Expand Down @@