Skip to content

Commit

Permalink
Merge pull request #5 from wevanjohnson/master
Browse files Browse the repository at this point in the history
Added unit tests for ComBat
  • Loading branch information
jtleek committed Mar 3, 2015
2 parents 97f8f19 + 7632f0c commit a7cb4b2
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 10 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sva
Title: Surrogate Variable Analysis
Version: 3.13.2
Version: 3.13.3
Author: Jeffrey T. Leek <jtleek@gmail.com>, W. Evan Johnson <wej@bu.edu>,
Hilary S. Parker <hiparker@jhsph.edu>, Elana J. Fertig <ejfertig@jhmi.edu>,
Andrew E. Jaffe <ajaffe@jhsph.edu>, John D. Storey <jstorey@princeton.edu>
Expand All @@ -25,17 +25,18 @@ Description: The sva package contains functions for removing batch
reproducibility, see (Leek and Storey 2007 PLoS Genetics, 2008
PNAS or Leek et al. 2011 Nat. Reviews Genetics).
Maintainer: Jeffrey T. Leek <jtleek@gmail.com>, John D. Storey
<jstorey@princeton.edu>
Depends:
<jstorey@princeton.edu>, W. Evan Johnson <wej@bu.edu>
Depends:
R (>= 2.8),
mgcv,
genefilter
Suggests:
Suggests:
limma,
pamr,
bladderbatch,
BiocStyle,
zebrafishRNASeq
zebrafishRNASeq,
testthat
License: Artistic-2.0
biocViews: Microarray, StatisticalMethod, Preprocessing,
MultipleComparison, Sequencing, RNASeq, BatchEffect, Normalization
10 changes: 5 additions & 5 deletions R/ComBat.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#'

ComBat <- function(dat, batch, mod=NULL, par.prior=TRUE,prior.plots=FALSE) {

# make batch a factor and make a set of indicators for batch
if(length(dim(batch))>1){stop("This version of ComBat only allows one batch variable")} ## to be updated soon!
batch <- as.factor(batch)
batchmod <- model.matrix(~-1+batch)
cat("Found",nlevels(batch),'batches\n')
Expand All @@ -42,11 +42,11 @@ ComBat <- function(dat, batch, mod=NULL, par.prior=TRUE,prior.plots=FALSE) {

# Check if the design is confounded
if(qr(design)$rank<ncol(design)){
if(ncol(design)<=(n.batch)){stop("your batch variables are redundant or nested! Please remove one or more of the batch variables so they are no longer confounded.")}
if(ncol(design)==(n.batch+1)){stop("your covariate is confounded with batch! Please remove the confounded covariate and rerun ComBat.")}
#if(ncol(design)<=(n.batch)){stop("Batch variables are redundant! Remove one or more of the batch variables so they are no longer confounded")}
if(ncol(design)==(n.batch+1)){stop("The covariate is confounded with batch! Remove the covariate and rerun ComBat")}
if(ncol(design)>(n.batch+1)){
if((qr(design[,-c(1:n.batch)])$rank<ncol(design[,-c(1:n.batch)]))){stop('the experimental design of your covariates is confounded! Please remove one (or more) of the covariates so that your design is no longer confounded.')
}else{stop("one (or more) of your covariates is confounded with batch! Please remove the confounded covariate and rerun ComBat.")}}
if((qr(design[,-c(1:n.batch)])$rank<ncol(design[,-c(1:n.batch)]))){stop('The covariates are confounded! Please remove one or more of the covariates so the design is not confounded')
}else{stop("At least one covariate is confounded with batch! Please remove confounded covariates and rerun ComBat")}}
}

## Check for missing values
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(sva)

test_check("sva")
43 changes: 43 additions & 0 deletions tests/testthat/test_combat_bladderbatch.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Tests the ComBat output in the bladder batch dataset
library(sva)
library(bladderbatch)
data(bladderdata)
library(testthat)
context("ComBat test on bladder batch data")

test_that("check ComBat output with several different parameters on bladder cancer data",{
# get expression data, phenotype, and batch
pheno = pData(bladderEset)
edata = exprs(bladderEset)
batch = pheno$batch

# set up full and reduced models for testing
mod = model.matrix(~as.factor(cancer), data=pheno)
mod0 = model.matrix(~1, data=pheno)

#run ComBat without covariates:
combat_edata = ComBat(dat=edata, batch=batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
pValuesComBat = f.pvalue(combat_edata,mod,mod0)
expect_equal(sum(pValuesComBat<=.05),12468)

#run ComBat without covariates (using the NULL model):
combat_edata = ComBat(dat=edata, batch=batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
pValuesComBat_null = f.pvalue(combat_edata,mod,mod0)
expect_equal(pValuesComBat_null,pValuesComBat)

#Check to see if the prior.plots option works
combat_edata = ComBat(dat=edata, batch=batch, prior.plots=TRUE)
pValuesComBat_null = f.pvalue(combat_edata,mod,mod0)
expect_equal(pValuesComBat_null,pValuesComBat)

#run ComBat without covariates non-parametric (small dataset):
combat_edata = ComBat(dat=edata[1:100,], batch=batch, par.prior=FALSE, prior.plots=FALSE)
pValuesComBat = f.pvalue(combat_edata,mod,mod0)
expect_equal(sum(pValuesComBat<=.05),78)

#run ComBat with covariates:
combat_edata = ComBat(dat=edata, batch=batch, mod=mod, par.prior=TRUE, prior.plots=FALSE)
pValuesComBat = f.pvalue(combat_edata,mod,mod0)
expect_equal(sum(pValuesComBat<.05),18507)

})
27 changes: 27 additions & 0 deletions tests/testthat/test_combat_errors.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
library(sva)
library(testthat)
context("ComBat Error Check")

test_that("Check ComBat errors for too many batch variables and confounded designs",{
# set up number of batches (nb), total sample size (n), and batch/treatment vaariables
nb <- 2
ns <- 2 # one of nb or ns needs to be even (see treat variable)
n <- ns*nb
batch <- as.factor(rep(1:nb, each=ns))
treat <- rep(c(0,1), n/2)

#generate random data
X <- cbind(model.matrix(~-1+batch), treat)
beta <- c(3*(1:nb-1), 2)
sim <- 100
y <- matrix(rnorm(n*sim,X%*%beta),nrow=sim, byrow=TRUE)

## user gives multiple batch variables
expect_error(ComBat(y,cbind(batch, batch)),"This version of ComBat only allows one batch variable")

## test confounded errors:
expect_error(ComBat(y,batch, batch), "The covariate is confounded with batch! Remove the covariate and rerun ComBat")
expect_error(ComBat(y,batch, cbind(treat,batch)), "At least one covariate is confounded with batch! Please remove confounded covariates and rerun ComBat")
expect_error(ComBat(y,batch, cbind(treat,treat)), "The covariates are confounded! Please remove one or more of the covariates so the design is not confounded")
})

56 changes: 56 additions & 0 deletions tests/testthat/test_combat_sim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
library(sva)
library(testthat)
context("ComBat test on simulated data")


### Run ComBat on a simple simulated dataset
test_that("check ComBat output with several different parameters on simulated genes",{
# generate simulated data
nb <- 3 # number of batches
nw <- 5 # samples within batch
n <- nw*nb # total number of sampes
batch <- rep(1:nb, each = nw) # batch variable for ComBat
set.seed(0)
treat <- rbinom(n,1,c(.5,.5)) # treatment variable--generated at random
beta <- 1:(nb+1) # batch effects + treatment effect (last one)
X <- model.matrix(~-1+as.factor(batch)+treat)
genes <- 100
set.seed(0)
y <- matrix(rnorm(n*genes,X%*%beta),nrow=genes, byrow=TRUE) # generate data

### Test ComBat with different parameters:
# without covariates
batch_only <- ComBat(y,batch)
gene1_batch_only <- c(6.5275687,1.9434116,3.3016615,6.5353399,5.8317968,-0.3314552,3.9485709,4.5368716,4.8050618,7.0422784,4.2176928,2.6528653,2.3037212,7.1688336,3.1533711)
expect_equal(as.numeric(batch_only[1,]),gene1_batch_only)

# without covariates non-parametric
batch_only_non <- ComBat(y,batch,par.prior=FALSE)
gene1_batch_only_non <- c(6.5041159,1.9798940,3.3203856,6.5117856,5.8174409,-0.2101846,3.9735703,4.5486383,4.8107961,6.9976908,4.1925793,2.6502177,2.3060862,7.1013511,3.1435378)
expect_equal(as.numeric(batch_only_non[1,]),gene1_batch_only_non)

# with covariates
batch_treat <- ComBat(y,batch,treat,par.prior=TRUE)
gene1_batch_treat <- c(6.9550540,1.3750580,3.0128757,6.9644248,6.1160719,0.8808531,5.5188802,6.0339045,6.2686899,8.2272474,2.8499106,1.3062056,0.9617743,5.8187037,1.7999553)
expect_equal(as.numeric(batch_treat[1,]),gene1_batch_treat)

# with covariates non-parametric
batch_treat_non <- ComBat(y,batch,treat,par.prior=FALSE)
gene1_batch_treat_non <- c(6.9621810,1.4557761, 2.9477604, 6.9707174, 6.1979029, 0.9018672, 5.5466543, 6.0315322, 6.2525748, 8.0964907, 2.7563817, 1.3122575, 0.9900446, 5.8403030, 1.7741566)
expect_equal(as.numeric(batch_treat_non[1,]),gene1_batch_treat_non)

# with covariates missing data
## introduce missing values 10%
set.seed(1)
m <- matrix(rbinom(n*genes,1,.9),nrow=genes, byrow=TRUE)
ymis <- y
ymis[m==0] <- NA
batch_treat_mis <- ComBat(ymis,batch,treat)
gene1_batch_treat_mis <- c(7.0249052, 1.4302766, 3.0979753, NA, 6.1706164, 0.7848869, NA, 5.9650280, 6.2100799, 8.2542787, 2.8907924, 1.3319739, 0.9841704, 5.8422657, 1.8305576)
expect_equal(as.numeric(batch_treat_mis[1,]),gene1_batch_treat_mis)

})




0 comments on commit a7cb4b2

Please sign in to comment.