Skip to content

Commit

Permalink
Remove white space
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasnoack committed May 7, 2024
1 parent e2f6c98 commit 827a7e2
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 43 deletions.
2 changes: 1 addition & 1 deletion src/ftest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ function show(io::IO, ftr::FTestResult{N}) where N

# get rid of negative zero -- doesn't matter mathematically,
# but messes up doctests and various other things
# cf. Issue #461
# cf. Issue #461
r2vals = [replace(@sprintf("%.4f", val), "-0.0000" => "0.0000") for val in ftr.r2]

outrows[2, :] = ["[1]", @sprintf("%.0d", ftr.dof[1]), " ",
Expand Down
2 changes: 1 addition & 1 deletion src/glmfit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ function fit(::Type{M},
off = offset === nothing ? similar(y, 0) : offset
wts = wts === nothing ? similar(y, 0) : wts
rr = GlmResp(y, d, l, off, wts)

if method === :cholesky
res = M(rr, cholpred(X, dropcollinear), f, false)
elseif method === :qr
Expand Down
4 changes: 2 additions & 2 deletions src/glmtools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ dispersion_parameter(::Union{Bernoulli, Binomial, Poisson}) = false

"""
_safe_int(x::T)
Convert to Int, when `x` is within 1 eps of an integer.
"""
function _safe_int(x::T) where {T<:AbstractFloat}
Expand All @@ -527,7 +527,7 @@ function loglik_obs end
loglik_obs(::Bernoulli, y, μ, wt, ϕ) = wt*logpdf(Bernoulli(μ), y)
loglik_obs(::Binomial, y, μ, wt, ϕ) = logpdf(Binomial(Int(wt), μ), _safe_int(y*wt))
loglik_obs(::Gamma, y, μ, wt, ϕ) = wt*logpdf(Gamma(inv(ϕ), μ*ϕ), y)
# In Distributions.jl, a Geometric distribution characterizes the number of failures before
# In Distributions.jl, a Geometric distribution characterizes the number of failures before
# the first success in a sequence of independent Bernoulli trials with success rate p.
# The mean of Geometric distribution is (1 - p) / p.
# Hence, p = 1 / (1 + μ).
Expand Down
6 changes: 3 additions & 3 deletions src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY, QRPivoted}} <: Dens
scratchbeta::Vector{T}
qr::Q
scratchm1::Matrix{T}

function DensePredQR(X::AbstractMatrix, pivot::Bool=false)
n, p = size(X)
T = typeof(float(zero(eltype(X))))
Expand Down Expand Up @@ -74,7 +74,7 @@ function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T})
rnk == length(p.delbeta) || throw(RankDeficientException(rnk))
X = p.X
W = Diagonal(wt)
sqrtW = Diagonal(sqrt.(wt))
sqrtW = Diagonal(sqrt.(wt))
mul!(p.scratchm1, sqrtW, X)
mul!(p.delbeta, X'W, r)
qnr = qr(p.scratchm1)
Expand All @@ -88,7 +88,7 @@ function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal
if rnk == length(p.delbeta)
p.delbeta = p.qr\r
else
R = @view p.qr.R[:, 1:rnk]
R = @view p.qr.R[:, 1:rnk]
Q = @view p.qr.Q[:, 1:size(R, 1)]
piv = p.qr.p
p.delbeta = zeros(size(p.delbeta))
Expand Down
4 changes: 2 additions & 2 deletions src/lm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<
"argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
dropcollinear = allowrankdeficient_dep
end

if method === :cholesky
fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), nothing))
elseif method === :qr
Expand Down Expand Up @@ -299,7 +299,7 @@ function StatsModels.predict!(res::Union{AbstractVector,
length(res) == size(newx, 1) ||
throw(DimensionMismatch("length of `res` must equal the number of rows in `newx`"))
res .= newx * coef(mm)
elseif mm.pp isa DensePredChol &&
elseif mm.pp isa DensePredChol &&
mm.pp.chol isa CholeskyPivoted &&
mm.pp.chol.rank < size(mm.pp.chol, 2)
throw(ArgumentError("prediction intervals are currently not implemented " *
Expand Down
69 changes: 35 additions & 34 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ using CategoricalArrays, CSV, DataFrames, LinearAlgebra, SparseArrays, StableRNG
using GLM
using StatsFuns: logistic
using Distributions: TDist
using Downloads

test_show(x) = show(IOBuffer(), x)

Expand All @@ -28,7 +29,7 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y
lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form; method=dmethod)
test_show(lm1)
@test isapprox(coef(lm1), linreg(form.Carb, form.OptDen))

@test isapprox(vcov(lm1), Σ)
@test isapprox(cor(lm1), Diagonal(diag(Σ))^(-1/2)*Σ*Diagonal(diag(Σ))^(-1/2))
@test dof(lm1) == 3
Expand Down Expand Up @@ -72,22 +73,22 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y
end

@testset "Cook's Distance in Linear Model with $dmethod" for dmethod in (:cholesky, :qr)
st_df = DataFrame(
st_df = DataFrame(
Y=[6.4, 7.4, 10.4, 15.1, 12.3 , 11.4],
XA=[1.5, 6.5, 11.5, 19.9, 17.0, 15.5],
XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8],
XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8],
XC=[3., 13., 23., 39.8, 34., 31.],
# values from SAS proc reg
CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, 1.0704009915, 0.0875726457, 0.1331183932],
CooksD_noint=[0.0076891801, 0.0302993877, 0.0410262965, 0.0294348488, 0.0691589296, 0.0273045538],
CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, 1.0704009915, 0.0875726457, 0.1331183932],
CooksD_noint=[0.0076891801, 0.0302993877, 0.0410262965, 0.0294348488, 0.0691589296, 0.0273045538],
CooksD_multi=[1.7122291956, 18.983407026, 0.000118078, 0.8470797843, 0.0715921999, 0.1105843157],
)

# linear regression
t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod)
@test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base))

# linear regression, no intercept
# linear regression, no intercept
t_lm_noint = lm(@formula(Y ~ XA +0), st_df; method=dmethod)
@test isapprox(st_df.CooksD_noint, cooksdistance(t_lm_noint))

Expand All @@ -102,20 +103,20 @@ end
@test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli))
end

@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr)
@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr)
df = dataset("quantreg", "engel")
N = nrow(df)
df.weights = repeat(1:5, Int(N/5))
f = @formula(FoodExp ~ Income)

lm_model = lm(f, df, wts = df.weights; method=dmethod)
glm_model = glm(f, df, Normal(), wts = df.weights; method=dmethod)
@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
@test isapprox(r2(lm_model), 0.8330258148644486)
@test isapprox(adjr2(lm_model), 0.832788298242634)
@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813;
@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813;
-0.06772589439264813 6.670664781664879e-5])
@test isapprox(first(predict(lm_model)), 357.57694841780994)
@test isapprox(loglikelihood(lm_model), -4353.946729075838)
Expand Down Expand Up @@ -153,7 +154,7 @@ end
X = ModelMatrix(ModelFrame(f, dfrm)).m
y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1))
inds = deleteat!(collect(1:length(y)), 7:8)

m1 = fit(LinearModel, X, y; method=dmethod)
@test isapprox(deviance(m1), 0.12160301538297297)
Xmissingcell = X[inds, :]
Expand All @@ -170,23 +171,23 @@ end
@test isa(m2p.pp.chol, CholeskyPivoted)
@test isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281,
3.9661379199401257, 5.079410103608552, 6.1944618141188625,
0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915,
0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915,
10.84972230524356, 11.844809275711485])

@test isa(m2p_dep_pos.pp.chol, CholeskyPivoted)
@test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted)
elseif dmethod == :qr
@test_throws RankDeficientException m2 = fit(LinearModel, Xmissingcell, ymissingcell;
method = dmethod, dropcollinear=false)
@test isapprox(coef(m2p), [0.9772643585228962, 11.889730016918342, 3.027347397503282,
3.9661379199401177, 5.079410103608539, 6.194461814118862,
-2.9863884084219015, 7.930328728005132, 8.87999491860477,
-2.9863884084219015, 7.930328728005132, 8.87999491860477,
0.0, 10.849722305243555, 11.844809275711487]) ||
isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281,
3.9661379199401257, 5.079410103608552, 6.1944618141188625,
0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915,
0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915,
10.84972230524356, 11.844809275711485])

@test isa(m2p.pp.qr, QRPivoted)

@test isa(m2p_dep_pos.pp.qr, QRPivoted)
Expand All @@ -198,7 +199,7 @@ end

@test GLM.linpred_rank(m2p.pp) == 11
@test isapprox(deviance(m2p), 0.1215758392280204)

@test GLM.linpred_rank(m2p_dep_pos.pp) == GLM.linpred_rank(m2p.pp)
@test isapprox(deviance(m2p_dep_pos), deviance(m2p))
@test isapprox(coef(m2p_dep_pos), coef(m2p))
Expand All @@ -210,7 +211,7 @@ end

@testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr)
df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])

model = lm(@formula(y ~ x), df1; method=dmethod)
ct = coeftable(model)
@test dof_residual(model) == 0
Expand Down Expand Up @@ -279,7 +280,7 @@ end
# https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat

data = DataFrame(x = 60:70, y = 130:140)

mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky)
@test coef(mdl) [2.07438016528926]
@test stderror(mdl) [0.165289256198347E-01]
Expand All @@ -303,7 +304,7 @@ end
# https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat

data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])

mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky)
@test coef(mdl) [0.727272727272727]
@test stderror(mdl) [0.420827318078432E-01]
Expand All @@ -324,7 +325,7 @@ end
X = [4 5 6]'
y = [3, 4, 4]
data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])

mdl1 = lm(@formula(y ~ 0 + x), data; method=dmethod)
mdl2 = lm(X, y)

Expand Down Expand Up @@ -811,8 +812,8 @@ end
@test aic(gm22) 1112.7422553284146
@test aicc(gm22) 1113.7933502189255
@test bic(gm22) 1136.6111083020812
@test coef(gm22)[1:7] [2.8978546663153897, -0.5701067649409168, 0.08040181505082235,
-0.4497584898742737, 0.08622664933901254, 0.3558996662512287,
@test coef(gm22)[1:7] [2.8978546663153897, -0.5701067649409168, 0.08040181505082235,
-0.4497584898742737, 0.08622664933901254, 0.3558996662512287,
0.29016080736927813]
@test stderror(gm22) [0.22754287093719366, 0.15274755092180423, 0.15928431669166637,
0.23853372776980591, 0.2354231414867577, 0.24750780320597515,
Expand Down Expand Up @@ -1405,7 +1406,7 @@ end
────────────────────────────────────────────────────────────────
DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
────────────────────────────────────────────────────────────────
[1] 2 3.2292 0.0000
[1] 2 3.2292 0.0000
[2] 3 1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-7
────────────────────────────────────────────────────────────────"""
end
Expand Down Expand Up @@ -1852,7 +1853,7 @@ end
end

@testset "dropcollinear in GLM with Cholesky" begin
data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1],
data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1],
x3=[4.2, 4.6, 8.4, 6.2, 4.2], y=[14, 14, 24, 20, 11])

@testset "Check normal with identity link against equivalent linear model" begin
Expand Down Expand Up @@ -2001,7 +2002,7 @@ end
# see issue 503
y, μ, wt, ϕ = 0.6376811594202898, 0.8492925285671102, 69.0, NaN
# due to floating point:
# 1. y * wt == 43.99999999999999
# 1. y * wt == 43.99999999999999
# 2. 44 / y == wt
# 3. 44 / wt == y
@test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) GLM.logpdf(Binomial(Int(wt), μ), 44)
Expand All @@ -2018,8 +2019,8 @@ end
# > data(Duncan)
# > lm1 = lm(prestige ~ 1 + income + education, Duncan)
# > vif(lm1)
# income education
# 2.1049 2.1049
# income education
# 2.1049 2.1049
# > lm2 = lm(prestige ~ 1 + income + education + type, Duncan)
# > vif(lm2)
# GVIF Df GVIF^(1/(2*Df))
Expand All @@ -2030,26 +2031,26 @@ end
lm1 = lm(@formula(Prestige ~ 1 + Income + Education), duncan)
@test termnames(lm1)[2] == coefnames(lm1)
@test vif(lm1) gvif(lm1)

lm1_noform = lm(modelmatrix(lm1), response(lm1))
@test vif(lm1) vif(lm1_noform)
@test_throws ArgumentError("model was fitted without a formula") gvif(lm1_noform)

lm1log = lm(@formula(Prestige ~ 1 + exp(log(Income)) + exp(log(Education))), duncan)
@test termnames(lm1log)[2] == coefnames(lm1log) == ["(Intercept)", "exp(log(Income))", "exp(log(Education))"]
@test vif(lm1) vif(lm1log)

gm1 = glm(modelmatrix(lm1), response(lm1), Normal())
@test vif(lm1) vif(gm1)

lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan)
@test termnames(lm2)[2] != coefnames(lm2)
@test gvif(lm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4

gm2 = glm(@formula(Prestige ~ 1 + Income + Education + Type), duncan, Normal())
@test termnames(gm2)[2] != coefnames(gm2)
@test gvif(gm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4
@test gvif(gm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4

# the VIF definition depends on modelmatrix, vcov and stderror returning valid
# values. It doesn't care about links, offsets, etc. as long as the model matrix,
# vcov matrix and stderrors are well defined.
Expand Down

0 comments on commit 827a7e2

Please sign in to comment.