Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed linear model with perfectly collinear rhs variables and weights #432

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
26 changes: 21 additions & 5 deletions src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,27 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) w
end

function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
cf = cholfactors(p.chol)
piv = p.chol.p
cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv]
cholesky!(Hermitian(cf, Symbol(p.chol.uplo)))
ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r))
ch = p.chol
rnk = rank(ch)
if rnk == length(ch.p)
cf = cholfactors(ch)
piv = ch.p
cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv]
cholesky!(Hermitian(cf, Symbol(ch.uplo)))
ldiv!(ch, mul!(p.delbeta, transpose(p.scratchm1), r))
else
xc = p.X
scr = mul!(p.scratchm1, Diagonal(wt), xc)
scr2 = mul!(p.scratchm2, adjoint(xc), scr)
ch2 = cholesky!(scr2, Val(true), tol = -one(T), check = false)
delbeta = mul!(p.delbeta, adjoint(scr), r)
permute!(delbeta, ch.p)
for k=(rnk+1):length(delbeta)
delbeta[k] = -zero(T)
end
LAPACK.potrs!(ch2.uplo, view(ch2.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk))
invpermute!(delbeta, ch.p)
end
p
end

Expand Down
18 changes: 18 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,24 @@ end
@test isapprox(coef(m2p_dep_pos_kw), coef(m2p))
end

@testset "collinearity and weights" begin
rng = StableRNG(1234321)
x1 = randn(100)
x1_2 = 3 * x1
x2 = 10 * randn(100)
x2_2 = -2.4 * x2
y = 1 .+ randn() * x1 + randn() * x2 + 2 * randn(100)
df = DataFrame(y = y, x1 = x1, x2 = x1_2, x3 = x2, x4 = x2_2, weights = repeat([1, 0.5],50))
f = @formula(y ~ x1 + x2 + x3 + x4)
lm_model = lm(f, df, wts = df.weights)
X = [ones(length(y)) x1_2 x2_2]
W = Diagonal(df.weights)
coef_naive = (X'W*X)\X'W*y
@test lm_model.model.pp.chol isa CholeskyPivoted
@test rank(lm_model.model.pp.chol) == 3
@test filter(!=(0.0), coef(lm_model)) ≈ coef_naive atol=1e-6
end

@testset "saturated linear model" begin
df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])
model = lm(@formula(y ~ x), df)
Expand Down