-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsantaR.R
100 lines (85 loc) · 4.23 KB
/
santaR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#' Source: https://rdrr.io/cran/santaR/src/R/df_search.R
#'
#' Calculate the penalised loglikelihood of a smooth.spline
#'
#' Calculate the penalised loglikelihood of a \code{\link[stats]{smooth.spline}} using the integrated second derivative. The likelihood consists of 1) the (weighted) residuals sum of squares, 2) a penalty term (integrated second derivative = total curvature). The smaller the penalised loglikelihood, the better the fit as the residuals and penalty on roughness are minimised. Adapted from \code{aroma.light::likelihood.smooth.spline}.
#'
#' @param fittedSmoothSpline A fitted \code{\link[stats]{smooth.spline}}
#'
#' @return The penalised loglikelihood.
loglik_smooth_spline <- function(fittedSmoothSpline) {
## Return the penalised loglikelihood
#
# FDA (Functional Data Analysis) uses the integrated squared second derivative as roughness, here we use the integrated second derivative.
#
# Calculate the (log) likelihood of a spline given the data used to fit the spline.
# The likelihood consists of two main parts:
# 1) (weighted) residuals sum of squares
# 2) a penalty term - roughness is the integrated second derivative aka total curvature
# The penalty term consists of a smoothing parameter lambda and a roughness measure of the spline
# J(s) = INTEGRAL s''(t) dt.
# The overall log likelihood is log L(s|x) = (y-s(x))'W(y-s(x)) + lambda J(s)
s <- fittedSmoothSpline
x <- fittedSmoothSpline$x
y <- fittedSmoothSpline$y
w <- fittedSmoothSpline$w
yinput <- fittedSmoothSpline$yin
# weighted residuals sum of squares (weighted euclidian dist yinput to ypred) = loglikelihood without penality (but then forgets the smoothing part of the equation)
wrss <- sum(w * (yinput-y)^2)
# smoothing parameter
lambda <- s$lambda
# roughness score
sDeriv <- stats::smooth.spline(stats::predict(s, x, deriv=2))
ab <- range(x, na.rm=TRUE)
Js <- stats::integrate(function(x) stats::predict(sDeriv, x=x)$y,lower=ab[1], upper=ab[2], rel.tol=.Machine$double.eps^(1/8), stop.on.error=FALSE)$value
# penalty term
penalty <- -lambda * Js
# penalised loglikelihood (as it's a sum, if it wasn't a log it would be multiplied)
l <- (wrss + penalty)
# The smoothing spline estimate of a function is defined to be the minimizer of l
return(l)
}
#' Calculate the Akaike Information Criterion for a smooth.spline
#'
#' Calculate the Akaike Information Criterion (\emph{AIC}) for a fitted \code{\link[stats]{smooth.spline}}. The smaller the AIC, the better the spline fit.
#'
#' @param fittedSmoothSpline A fitted \code{\link[stats]{smooth.spline}}
#'
#' @return The AIC value.
AIC_smooth_spline <- function(fittedSmoothSpline) {
# AIC = -2*logLik + k*npar
# for AIC k=2
# npar = df
df = fittedSmoothSpline$df
AIC = -2 * -loglik_smooth_spline(fittedSmoothSpline) + 2 * df
return(AIC)
}
#' Calculate the Akaike Information Criterion Corrected for small observation numbers for a smooth.spline
#'
#' Calculate the Akaike Information Criterion Corrected for small observation numbers (\emph{AICc}) for a fitted \code{\link[stats]{smooth.spline}}. The smaller the AICc, the better the spline fit.
#'
#' @param fittedSmoothSpline A fitted \code{\link[stats]{smooth.spline}}
#'
#' @return The AICc value.
AICc_smooth_spline <- function(fittedSmoothSpline) {
df = fittedSmoothSpline$df
nobs = length(fittedSmoothSpline$yin)
AICc = AIC_smooth_spline(fittedSmoothSpline) + 2 * df * (df + 1) / (nobs - df - 1)
return(AICc)
}
#' Calculate the Bayesian Information Criterion for a smooth.spline
#'
#' Calculate the Bayesian Information Criterion (\emph{BIC}) for a fitted \code{\link[stats]{smooth.spline}}. The smaller the BIC, the better the spline fit.
#'
#' @param fittedSmoothSpline A fitted \code{\link[stats]{smooth.spline}}
#'
#' @return The BIC value.
BIC_smooth_spline <- function(fittedSmoothSpline) {
# BIC = -2*logLik + k*npar
# for BIC k = log(nobs(object)) # nobs being the total number of observations (across all tp and all individuals used for this fitting)
# npar = df
df = fittedSmoothSpline$df
nobs = length(fittedSmoothSpline$yin)
BIC = -2 * -loglik_smooth_spline(fittedSmoothSpline) + log(nobs) * df
return(BIC)
}