Merge branch 'master' into master

JuliaStats · dwinkler1 · May 27, 2021 · Dec 2, 2021 · Feb 4, 2022 · Feb 4, 2022
commit dd58d18ae4c81ba12f65587ca6905cd4cab8e754
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -157,7 +157,140 @@ end
     coef_naive = (X'W*X)\X'W*y
     @test lm_model.model.pp.chol isa CholeskyPivoted
     @test rank(lm_model.model.pp.chol) == 3
-    @test isapprox(filter(!=(0.0), coef(lm_model)), coef_naive)
+    @test filter(!=(0.0), coef(lm_model)) ≈ coef_naive atol=1e-6
+end
+
+@testset "saturated linear model" begin
+    df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])
+    model = lm(@formula(y ~ x), df)
+    ct = coeftable(model)
+    @test dof_residual(model) == 0
+    @test dof(model) == 4
+    @test isinf(GLM.dispersion(model.model))
+    @test coef(model) ≈ [1, 1, 2]
+    @test isequal(hcat(ct.cols[2:end]...),
+                  [Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf])
+
+    model = lm(@formula(y ~ 0 + x), df)
+    ct = coeftable(model)
+    @test dof_residual(model) == 0
+    @test dof(model) == 4
+    @test isinf(GLM.dispersion(model.model))
+    @test coef(model) ≈ [1, 2, 3]
+    @test isequal(hcat(ct.cols[2:end]...),
+                  [Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf])
+
+    model = glm(@formula(y ~ x), df, Normal(), IdentityLink())
+    ct = coeftable(model)
+    @test dof_residual(model) == 0
+    @test dof(model) == 4
+    @test isinf(GLM.dispersion(model.model))
+    @test coef(model) ≈ [1, 1, 2]
+    @test isequal(hcat(ct.cols[2:end]...),
+                  [Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf])
+
+    model = glm(@formula(y ~ 0 + x), df, Normal(), IdentityLink())
+    ct = coeftable(model)
+    @test dof_residual(model) == 0
+    @test dof(model) == 4
+    @test isinf(GLM.dispersion(model.model))
+    @test coef(model) ≈ [1, 2, 3]
+    @test isequal(hcat(ct.cols[2:end]...),
+                  [Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf])
+
+    # Saturated and rank-deficient model
+    df = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3])
+    model = lm(@formula(y ~ x1 + x2), df)
+    ct = coeftable(model)
+    @test dof_residual(model) == 0
+    @test dof(model) == 4
+    @test isinf(GLM.dispersion(model.model))
+    @test coef(model) ≈ [1, 1, 2, 0, 0]
+    @test isequal(hcat(ct.cols[2:end]...),
+                  [Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf
+                   Inf 0.0 1.0 -Inf Inf
+                   NaN NaN NaN  NaN NaN
+                   NaN NaN NaN  NaN NaN])
+
+    # TODO: add tests similar to the one above once this model can be fitted
+    @test_broken glm(@formula(y ~ x1 + x2), df, Normal(), IdentityLink())
+end
+
+@testset "Linear model with no intercept" begin
+    @testset "Test with NoInt1 Dataset" begin
+        # test case to test r2 for no intercept model
+        # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat
+
+        data = DataFrame(x = 60:70, y = 130:140)
+        mdl = lm(@formula(y ~ 0 + x), data)
+        @test coef(mdl) ≈ [2.07438016528926]
+        @test stderror(mdl) ≈ [0.165289256198347E-01]
+        @test GLM.dispersion(mdl.model) ≈ 3.56753034006338
+        @test dof(mdl) == 2
+        @test dof_residual(mdl) == 10
+        @test r2(mdl) ≈ 0.999365492298663
+        @test adjr2(mdl) ≈ 0.9993020415285
+        @test nulldeviance(mdl) ≈ 200585.00000000000
+        @test deviance(mdl) ≈ 127.2727272727272
+        @test aic(mdl) ≈ 62.149454400575
+        @test loglikelihood(mdl) ≈ -29.07472720028775
+        @test nullloglikelihood(mdl) ≈ -69.56936343308669
+        @test predict(mdl) ≈ [124.4628099173554, 126.5371900826446, 128.6115702479339,
+                              130.6859504132231, 132.7603305785124, 134.8347107438017,
+                              136.9090909090909, 138.9834710743802, 141.0578512396694,
+                              143.1322314049587, 145.2066115702479]
+    end
+    @testset "Test with NoInt2 Dataset" begin
+        # test case to test r2 for no intercept model
+        # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat
+
+        data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])
+        mdl = lm(@formula(y ~ 0 + x), data)
+        @test coef(mdl) ≈ [0.727272727272727]
+        @test stderror(mdl) ≈ [0.420827318078432E-01]
+        @test GLM.dispersion(mdl.model) ≈ 0.369274472937998
+        @test dof(mdl) == 2
+        @test dof_residual(mdl) == 2
+        @test r2(mdl) ≈ 0.993348115299335
+        @test adjr2(mdl) ≈ 0.990022172949
+        @test nulldeviance(mdl) ≈ 41.00000000000000
+        @test deviance(mdl) ≈ 0.27272727272727
+        @test aic(mdl) ≈ 5.3199453808329
+        @test loglikelihood(mdl) ≈ -0.6599726904164597
+        @test nullloglikelihood(mdl) ≈ -8.179255266668315
+        @test predict(mdl) ≈ [2.909090909090908, 3.636363636363635, 4.363636363636362]
+    end
+    @testset "Test with without formula" begin
+        X = [4 5 6]'
+        y = [3, 4, 4]
+
+        data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])
+        mdl1 = lm(@formula(y ~ 0 + x), data)
+        mdl2 = lm(X, y)
+
+        @test coef(mdl1) ≈ coef(mdl2)
+        @test stderror(mdl1) ≈ stderror(mdl2)
+        @test GLM.dispersion(mdl1.model) ≈ GLM.dispersion(mdl2)
+        @test dof(mdl1) ≈ dof(mdl2)
+        @test dof_residual(mdl1) ≈ dof_residual(mdl2)
+        @test r2(mdl1) ≈ r2(mdl2)
+        @test adjr2(mdl1) ≈ adjr2(mdl2)
+        @test nulldeviance(mdl1) ≈ nulldeviance(mdl2)
+        @test deviance(mdl1) ≈ deviance(mdl2)
+        @test aic(mdl1) ≈ aic(mdl2)
+        @test loglikelihood(mdl1) ≈ loglikelihood(mdl2)
+        @test nullloglikelihood(mdl1) ≈ nullloglikelihood(mdl2)
+        @test predict(mdl1) ≈ predict(mdl2)
+    end
 end
 
 dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12],