Skip to content

Commit

Permalink
Merge pull request #16 from hendersontrent/trent-dev
Browse files Browse the repository at this point in the history
Two-tailed test as default w/ user specified options for directional hypothesis tests
  • Loading branch information
hendersontrent authored Mar 13, 2024
2 parents c295124 + 05aed8b commit bed541d
Show file tree
Hide file tree
Showing 19 changed files with 325 additions and 124 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: correctR
Type: Package
Title: Corrected Test Statistics for Comparing Machine Learning Models on Correlated Samples
Version: 0.1.3
Date: 2023-08-20
Version: 0.2.1
Date: 2024-03-13
Authors@R: c(
person("Trent", "Henderson", email = "then6675@uni.sydney.edu.au", role = c("cre", "aut"))
)
Expand Down
50 changes: 36 additions & 14 deletions R/kfold_ttest.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,25 @@
#' @param y \code{numeric} vector of values for model B
#' @param n \code{integer} denoting total sample size
#' @param k \code{integer} denoting number of folds used in k-fold
#' @return object of class \code{data.frame}
#' @param tailed \code{character} denoting whether to perform a two-tailed or one-tailed test. Can be one of \code{"two"} or \code{"one"}. Defaults to \code{"two"}
#' @param greater \code{character} specifying whether \code{"x"} or \code{"y"} is greater for the one-tailed test if \code{tailed = "one"}. Defaults to \code{NULL}
#' @return \code{data.frame} containing the test statistic and p-value
#' @references Nadeau, C., and Bengio, Y. Inference for the Generalization Error. Machine Learning 52, (2003).
#' @references Corani, G., Benavoli, A., Demsar, J., Mangili, F., and Zaffalon, M. Statistical comparison of classifiers through Bayesian hierarchical modelling. Machine Learning, 106, (2017).
#' @author Trent Henderson
#' @export
#' @examples
#' x <- rnorm(100, mean = 95, sd = 0.5)
#' y <- rnorm(100, mean = 90, sd = 1)
#' kfold_ttest(x = x, y = y, n = 100, k = 5, tailed = "two")
#'

kfold_ttest <- function(x, y, n, k){
kfold_ttest <- function(x, y, n, k, tailed = c("two", "one"), greater = NULL){

# Arg checks

tailed <- match.arg(tailed)

if(length(x) != length(y)){
stop("x and y are not the same length.")
}
Expand All @@ -31,25 +39,39 @@ kfold_ttest <- function(x, y, n, k){
stop("n and k should be integer scalars.")
}

# Calculations

d <- x - y # Calculate differences
if(tailed == "one"){
if(!greater %in% c("x", "y")){
stop("If tailed = 'one', greater must be either 'x' or 'y'")
}
}

# Catch for when there is zero difference(s) between the models
#--------- Calculations ---------

if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
if(tailed == "two"){
d <- x - y
if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
return(tmp)
} else{
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1 / n + (1 / k)) / (1 - 1 / k)))
p.value <- 2 * stats::pt(statistic, n - 1, lower.tail = FALSE)
}
} else{
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1/n + (1/k)) / (1 - 1/k))) # Calculate t-statistic

if(statistic < 0){
p.value <- stats::pt(statistic, n - 1) # p-value for left tail
if(greater == "x"){
d <- x - y
} else{
p.value <- stats::pt(statistic, n - 1, lower.tail = FALSE) # p-value for right tail
d <- y - x
}

tmp <- data.frame(statistic = statistic, p.value = p.value)
if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
return(tmp)
} else{
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1 / n + (1 / k)) / (1 - 1 / k)))
p.value <- stats::pt(statistic, n - 1, lower.tail = FALSE)
}
}

tmp <- data.frame(statistic = statistic, p.value = p.value)
return(tmp)
}
71 changes: 51 additions & 20 deletions R/repkfold_ttest.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,28 @@
#' @param n2 \code{integer} denoting test set size
#' @param k \code{integer} denoting number of folds used in k-fold
#' @param r \code{integer} denoting number of repeats per fold
#' @return object of class \code{data.frame}
#' @param tailed \code{character} denoting whether to perform a two-tailed or one-tailed test. Can be one of \code{"two"} or \code{"one"}. Defaults to \code{"two"}
#' @param greater value specifying which value in the \code{"model"} column is greater for the one-tailed test if \code{tailed = "one"}. Defaults to \code{NULL}
#' @return \code{data.frame} containing the test statistic and p-value
#' @references Nadeau, C., and Bengio, Y. Inference for the Generalization Error. Machine Learning 52, (2003).
#' @references Bouckaert, R. R., and Frank, E. Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms. Advances in Knowledge Discovery and Data Mining. PAKDD 2004. Lecture Notes in Computer Science, 3056, (2004).
#' @author Trent Henderson
#' @export
#' @examples
#' tmp <- data.frame(model = rep(c(1, 2), each = 60),
#' values = c(stats::rnorm(60, mean = 0.6, sd = 0.1),
#' stats::rnorm(60, mean = 0.4, sd = 0.1)),
#' k = rep(c(1, 1, 2, 2), times = 15),
#' r = rep(c(1, 2), times = 30))
#'
#' repkfold_ttest(data = tmp, n1 = 80, n2 = 20, k = 2, r = 2, tailed = "two")
#'

repkfold_ttest <- function(data, n1, n2, k, r){
repkfold_ttest <- function(data, n1, n2, k, r, tailed = c("two", "one"), greater = NULL){

# Arg checks

tailed <- match.arg(tailed)
'%ni%' <- Negate('%in%')

if("model" %ni% colnames(data) || "values" %ni% colnames(data) || "k" %ni% colnames(data) || "r" %ni% colnames(data)){
Expand All @@ -35,34 +46,54 @@ repkfold_ttest <- function(data, n1, n2, k, r){
stop("Column 'model' in data should only have two unique labels (one for each model to compare).")
}

# Calculations

d <- c()

for(i in 1:k){
for(j in 1:r){
x <- data[data$k == i, ]
x <- x[x$r == j, ]
d <- c(d, x[x$model == unique(x$model)[1], c("values")] - x[x$model == unique(x$model)[2], c("values")]) # Differences
if(tailed == "one"){
if(!greater %in% unique(data$model)){
stop("greater must correspond to a value in the 'model' column of data")
}
}

# Catch for when there is zero difference(s) between the models
#--------- Calculations ---------

if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
} else{
if(tailed == "two"){

statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1/(k * r)) + (n2/n1))) # Calculate t-statistic
d <- c()

if(statistic < 0){
p.value <- stats::pt(statistic, (k * r) - 1) # p-value for left tail
for(i in 1:k){
for(j in 1:r){
x <- data[data$k == i, ]
x <- x[x$r == j, ]
d <- append(d, x[x$model == unique(x$model)[1], c("values")] - x[x$model == unique(x$model)[2], c("values")])
}
}
if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
return(tmp)
} else{
p.value <- stats::pt(statistic, (k * r) - 1, lower.tail = FALSE) # p-value for right tail
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1 / (k * r)) + (n2 / n1)))
p.value <- 2 * stats::pt(statistic, (k * r) - 1, lower.tail = FALSE)
}

} else{

d <- c()

for(i in 1:k){
for(j in 1:r){
x <- data[data$k == i, ]
x <- x[x$r == j, ]
d <- append(d, x[x$model == greater, c("values")] - x[x$model != greater, c("values")])
}
}

tmp <- data.frame(statistic = statistic, p.value = p.value)
if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
return(tmp)
} else{
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1 / (k * r)) + (n2 / n1)))
p.value <- stats::pt(statistic, (k * r) - 1, lower.tail = FALSE)
}
}

tmp <- data.frame(statistic = statistic, p.value = p.value)
return(tmp)
}
54 changes: 38 additions & 16 deletions R/resampled_ttest.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,25 @@
#' @param n \code{integer} denoting number of repeat samples. Defaults to \code{length(x)}
#' @param n1 \code{integer} denoting train set size
#' @param n2 \code{integer} denoting test set size
#' @return object of class \code{data.frame}
#' @param tailed \code{character} denoting whether to perform a two-tailed or one-tailed test. Can be one of \code{"two"} or \code{"one"}. Defaults to \code{"two"}
#' @param greater \code{character} specifying whether \code{"x"} or \code{"y"} is greater for the one-tailed test if \code{tailed = "one"}. Defaults to \code{NULL}
#' @return \code{data.frame} containing the test statistic and p-value
#' @references Nadeau, C., and Bengio, Y. Inference for the Generalization Error. Machine Learning 52, (2003).
#' @references Bouckaert, R. R., and Frank, E. Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms. Advances in Knowledge Discovery and Data Mining. PAKDD 2004. Lecture Notes in Computer Science, 3056, (2004).
#' @author Trent Henderson
#' @export
#' @examples
#' x <- rnorm(100, mean = 95, sd = 0.5)
#' y <- rnorm(100, mean = 90, sd = 1)
#' resampled_ttest(x = x, y = y, n = 100, n1 = 80, n2 = 20, tailed = "two")
#'

resampled_ttest <- function(x, y, n, n1, n2){
resampled_ttest <- function(x, y, n, n1, n2, tailed = c("two", "one"), greater = NULL){

# Arg checks

tailed <- match.arg(tailed)

if(length(x) != length(y)){
stop("x and y are not the same length.")
}
Expand All @@ -34,25 +42,39 @@ resampled_ttest <- function(x, y, n, n1, n2){
message("n argument missing. Using length(x) as default.")
}

# Calculations

d <- x - y # Calculate differences
if(tailed == "one"){
if(!greater %in% c("x", "y")){
stop("If tailed = 'one', greater must be either 'x' or 'y'")
}
}

# Catch for when there is zero difference(s) between the models
#--------- Calculations ---------

if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
} else{
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * (1/n + n2/n1)) # Calculate t-statistic

if(statistic < 0){
p.value <- stats::pt(statistic, n - 1) # p-value for left tail
if(tailed == "two"){
d <- x - y
if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
return(tmp)
} else{
p.value <- stats::pt(statistic, n - 1, lower.tail = FALSE) # p-value for right tail
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * (1 / n + n2 / n1))
p.value <- 2 * stats::pt(statistic, n - 1, lower.tail = FALSE)
}
} else{
if(greater == "x"){
d <- x - y
} else{
d <- y - x
}

tmp <- data.frame(statistic = statistic, p.value = p.value)
}
if (sum(d) == 0) {
tmp <- data.frame(statistic = 0, p.value = 1)
return(tmp)
} else{
statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * (1 / n + n2 / n1))
p.value <- stats::pt(statistic, n - 1, lower.tail = FALSE)
}
}

tmp <- data.frame(statistic = statistic, p.value = p.value)
return(tmp)
}
5 changes: 5 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

.onAttach <- function(libname, pkgname) {
packageStartupMessage("This is version ", utils::packageVersion(pkgname),
" of ", pkgname, ". All functions now use two-tailed hypothesis tests by default instead of one-tailed.\nOne-tailed tests can be manually specified through the new 'tailed' and 'greater' arguments.\nPlease consult the help files and vignette for more information.")
}
11 changes: 6 additions & 5 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bed541d

Please sign in to comment.