diff --git a/README.md b/README.md index 2efd2e8191df8..923b40b68a134 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Basic statistics functions for Julia * `midrange(a)`: Compute the mid point of the range of `a` (e.g `(max(a) + min(a) / 2)`). * `modes(a)`: Compute all modes of `a`. Be warned that every element of an array with no repeated elements is considered a mode. * `indicators(a)`: Encode categories using one-hot scheme aka one-of-C encoding, indicator matrix or dummy variables. Optionally, you can provide a list of possible values, e.g. ["A", "B, "C"] or [1:3]. +* `pacf(A, l, method)`: Compute partial autocorrelation of an array `A` at lag(s) `l`. The computational method acn either be :regression (default) or :yulewalker. * `percentile(a)`: Compute the percentiles (0%, 10%, ..., 100%) of `a`. * `quantile(a)`: Compute any desired quantile of `a`. * `quartile(a): Compute the quartiles of `a`. diff --git a/src/Stats.jl b/src/Stats.jl index 401bc7e5bfdc2..74dbe4ba2a267 100644 --- a/src/Stats.jl +++ b/src/Stats.jl @@ -41,21 +41,24 @@ module Stats acf, ccf, cor_spearman, - cor_kendall, + cor_kendall, + pacf, # others ## Types StatisticalModel, RegressionModel, - ## Fcuntions + ## Functions coef, coeftable, confint, + durbin, ecdf, findat, indicators, inverse_rle, + levinson, loglikelihood, nobs, predict, @@ -68,6 +71,7 @@ module Stats include("means.jl") include("scalar_stats.jl") include("intstats.jl") + include("toeplitzsolvers.jl") include("corr.jl") include("others.jl") diff --git a/src/corr.jl b/src/corr.jl index f496eabeb49e6..69d1f5906078e 100644 --- a/src/corr.jl +++ b/src/corr.jl @@ -234,4 +234,50 @@ ccf{T<:Real}(X::AbstractMatrix{T}, args1...; args2...) = ccf(float(X), args1...; ccf{T<:Real}(x::AbstractMatrix{T}, lags::Integer; correlation::Bool=true, demean::Bool=true) = reshape(ccf(x, lags:lags, correlation=correlation, demean=demean), size(x, 2), size(x, 2)) - +# Partial autoroccelation +function pacf{T<:BlasReal}(X::AbstractMatrix{T}, lags::AbstractVector{Int} = 0:min(size(X,1)-1, int(10log10(size(X,1)))); + method::Symbol = :regression) + n, p = size(X) + nk = length(lags) + mk = max(lags) + if min(lags) < 0 error("Negative autoroccelations not allowed") end + if 2mk >= n error("Can at most calculate pacf for $(div(n,2) - 1) lags, you requested $mk") end + val = Array(T, nk, p) + if method == :regression + tmpX = ones(T, n, mk + 1) + for j = 1:p + for l = 1:mk + for i = 1+l:n + tmpX[i,l+1] = X[i-l,j] + end + end + i = 1 + for l in lags + sX = sub(tmpX, 1+l:n, 1:l+1) + val[i,j] = (cholfact!(sX'sX)\(sX'sub(X, 1+l:n, j)))[end] + i += 1 + end + end + elseif method == :yulewalker + tmp = Array(T, mk) + for j = 1:p + acfs = acf(sub(X,1:n,j),1:mk) + i = 1 + for l in lags + if l == 0 + val[i,j] = one(T) + elseif l == 1 + val[i,j] = acfs[i] + else + val[i,j] = -durbin!(sub(acfs, 1:l), tmp)[l] + end + i += 1 + end + end + else + error("No such method") + end + return val +end +pacf{T<:Real}(X::AbstractMatrix{T}, args1...; args2...) = pacf(float(X), args1...; args2...) +pacf(x::AbstractVector, args1...; args2...) = squeeze(pacf(reshape(x, length(x), 1), args1...; args2...),2) diff --git a/src/toeplitzsolvers.jl b/src/toeplitzsolvers.jl new file mode 100644 index 0000000000000..8e90364addf5e --- /dev/null +++ b/src/toeplitzsolvers.jl @@ -0,0 +1,66 @@ +# Symmetric Toeplitz solver +function durbin!{T<:BlasReal}(r::AbstractVector{T}, y::AbstractVector{T}) + n = length(r) + n <= length(y) || throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) + y[1] = -r[1] + β = one(T) + α = -r[1] + for k = 1:n-1 + β *= one(T) - α*α + α = -r[k+1] + for j = 1:k + α -= r[k-j+1]*y[j] + end + α /= β + for j = 1:div(k,2) + tmp = y[j] + y[j] += α*y[k-j+1] + y[k-j+1] += α*tmp + end + if isodd(k) y[div(k,2)+1] *= one(T) + α end + y[k+1] = α + end + return y +end +durbin{T<:BlasReal}(r::AbstractVector{T}) = durbin!(r, zeros(T, length(r))) + +function levinson!{T<:BlasReal}(r::AbstractVector{T}, b::AbstractVector{T}, x::AbstractVector{T}) + n = length(b) + n == length(r) || throw(DimensionMismatch("Vectors must have same length")) + n <= length(x) || throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) + x[1] = b[1] + b[1] = -r[2]/r[1] + β = one(T) + α = -r[2]/r[1] + for k = 1:n-1 + β *= one(T) - α*α + μ = b[k+1] + for j = 2:k+1 + μ -= r[j]/r[1]*x[k-j+2] + end + μ /= β + for j = 1:k + x[j] += μ*b[k-j+1] + end + x[k+1] = μ + if k < n - 1 + α = -r[k+2] + for j = 2:k+1 + α -= r[j]*b[k-j+2] + end + α /= β*r[1] + for j = 1:div(k,2) + tmp = b[j] + b[j] += α*b[k-j+1] + b[k-j+1] += α*tmp + end + if isodd(k) b[div(k,2)+1] *= one(T) + α end + b[k+1] = α + end + end + for i = 1:n + x[i] /= r[1] + end + return x +end +levinson{T<:BlasReal}(r::AbstractVector{T}, b::AbstractVector{T}) = levinson!(r, copy(b), zeros(T, length(b))) diff --git a/test/01.jl b/test/01.jl index 147ac55a96d2e..ad4dfa046aa39 100644 --- a/test/01.jl +++ b/test/01.jl @@ -180,6 +180,22 @@ for i =1:length(racvx12nodemean) @test_approx_eq racvx12nodemean[i] jacvx12nodemean[i] end println("OK") +print("Test partial autocorrelation (regression): ") + @test_approx_eq pacf(x[:,1], 1:4) [-0.218158122381419, + 0.195015316828711, + 0.144315804606139, + -0.199791229449779] + + + +println("OK") + +print("Test partial autocorrelation (Yule Walker): ") + @test_approx_eq pacf(x[:,1], 1:4, method = :yulewalker) [-0.221173011668873, + 0.189683314308021, + 0.111857020733719, + -0.175020669835420] +println("OK") @test iqr([1, 2, 3, 4, 5]) == [2.0, 4.0]