Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed linear model with perfectly collinear rhs variables and weights #432

Closed
wants to merge 11 commits into from
Prev Previous commit
Next Next commit
Merge branch 'master' into master
nalimilan authored Aug 28, 2022

Verified

This commit was signed with the committer’s verified signature.
yanmxa Meng Yan
commit dd58d18ae4c81ba12f65587ca6905cd4cab8e754
135 changes: 134 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -157,7 +157,140 @@ end
coef_naive = (X'W*X)\X'W*y
@test lm_model.model.pp.chol isa CholeskyPivoted
@test rank(lm_model.model.pp.chol) == 3
@test isapprox(filter(!=(0.0), coef(lm_model)), coef_naive)
@test filter(!=(0.0), coef(lm_model)) ≈ coef_naive atol=1e-6
end

@testset "saturated linear model" begin
df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])
model = lm(@formula(y ~ x), df)
ct = coeftable(model)
@test dof_residual(model) == 0
@test dof(model) == 4
@test isinf(GLM.dispersion(model.model))
@test coef(model) ≈ [1, 1, 2]
@test isequal(hcat(ct.cols[2:end]...),
[Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf])

model = lm(@formula(y ~ 0 + x), df)
ct = coeftable(model)
@test dof_residual(model) == 0
@test dof(model) == 4
@test isinf(GLM.dispersion(model.model))
@test coef(model) ≈ [1, 2, 3]
@test isequal(hcat(ct.cols[2:end]...),
[Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf])

model = glm(@formula(y ~ x), df, Normal(), IdentityLink())
ct = coeftable(model)
@test dof_residual(model) == 0
@test dof(model) == 4
@test isinf(GLM.dispersion(model.model))
@test coef(model) ≈ [1, 1, 2]
@test isequal(hcat(ct.cols[2:end]...),
[Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf])

model = glm(@formula(y ~ 0 + x), df, Normal(), IdentityLink())
ct = coeftable(model)
@test dof_residual(model) == 0
@test dof(model) == 4
@test isinf(GLM.dispersion(model.model))
@test coef(model) ≈ [1, 2, 3]
@test isequal(hcat(ct.cols[2:end]...),
[Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf])

# Saturated and rank-deficient model
df = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3])
model = lm(@formula(y ~ x1 + x2), df)
ct = coeftable(model)
@test dof_residual(model) == 0
@test dof(model) == 4
@test isinf(GLM.dispersion(model.model))
@test coef(model) ≈ [1, 1, 2, 0, 0]
@test isequal(hcat(ct.cols[2:end]...),
[Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf
Inf 0.0 1.0 -Inf Inf
NaN NaN NaN NaN NaN
NaN NaN NaN NaN NaN])

# TODO: add tests similar to the one above once this model can be fitted
@test_broken glm(@formula(y ~ x1 + x2), df, Normal(), IdentityLink())
end

@testset "Linear model with no intercept" begin
@testset "Test with NoInt1 Dataset" begin
# test case to test r2 for no intercept model
# https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat

data = DataFrame(x = 60:70, y = 130:140)
mdl = lm(@formula(y ~ 0 + x), data)
@test coef(mdl) ≈ [2.07438016528926]
@test stderror(mdl) ≈ [0.165289256198347E-01]
@test GLM.dispersion(mdl.model) ≈ 3.56753034006338
@test dof(mdl) == 2
@test dof_residual(mdl) == 10
@test r2(mdl) ≈ 0.999365492298663
@test adjr2(mdl) ≈ 0.9993020415285
@test nulldeviance(mdl) ≈ 200585.00000000000
@test deviance(mdl) ≈ 127.2727272727272
@test aic(mdl) ≈ 62.149454400575
@test loglikelihood(mdl) ≈ -29.07472720028775
@test nullloglikelihood(mdl) ≈ -69.56936343308669
@test predict(mdl) ≈ [124.4628099173554, 126.5371900826446, 128.6115702479339,
130.6859504132231, 132.7603305785124, 134.8347107438017,
136.9090909090909, 138.9834710743802, 141.0578512396694,
143.1322314049587, 145.2066115702479]
end
@testset "Test with NoInt2 Dataset" begin
# test case to test r2 for no intercept model
# https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat

data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])
mdl = lm(@formula(y ~ 0 + x), data)
@test coef(mdl) ≈ [0.727272727272727]
@test stderror(mdl) ≈ [0.420827318078432E-01]
@test GLM.dispersion(mdl.model) ≈ 0.369274472937998
@test dof(mdl) == 2
@test dof_residual(mdl) == 2
@test r2(mdl) ≈ 0.993348115299335
@test adjr2(mdl) ≈ 0.990022172949
@test nulldeviance(mdl) ≈ 41.00000000000000
@test deviance(mdl) ≈ 0.27272727272727
@test aic(mdl) ≈ 5.3199453808329
@test loglikelihood(mdl) ≈ -0.6599726904164597
@test nullloglikelihood(mdl) ≈ -8.179255266668315
@test predict(mdl) ≈ [2.909090909090908, 3.636363636363635, 4.363636363636362]
end
@testset "Test with without formula" begin
X = [4 5 6]'
y = [3, 4, 4]

data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])
mdl1 = lm(@formula(y ~ 0 + x), data)
mdl2 = lm(X, y)

@test coef(mdl1) ≈ coef(mdl2)
@test stderror(mdl1) ≈ stderror(mdl2)
@test GLM.dispersion(mdl1.model) ≈ GLM.dispersion(mdl2)
@test dof(mdl1) ≈ dof(mdl2)
@test dof_residual(mdl1) ≈ dof_residual(mdl2)
@test r2(mdl1) ≈ r2(mdl2)
@test adjr2(mdl1) ≈ adjr2(mdl2)
@test nulldeviance(mdl1) ≈ nulldeviance(mdl2)
@test deviance(mdl1) ≈ deviance(mdl2)
@test aic(mdl1) ≈ aic(mdl2)
@test loglikelihood(mdl1) ≈ loglikelihood(mdl2)
@test nullloglikelihood(mdl1) ≈ nullloglikelihood(mdl2)
@test predict(mdl1) ≈ predict(mdl2)
end
end

dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12],
You are viewing a condensed version of this merge commit. You can view the full changes here.