Skip to content

Commit

Permalink
Commit made by the Bioconductor Git-SVN bridge.
Browse files Browse the repository at this point in the history
Commit id: a7cb4b2

    Merge pull request #5 from wevanjohnson/master

    Added unit tests for ComBat

Commit id: 7632f0c

    Made the following changes: 1) added unit tests for ComBat to check ComBat errors for confounded designs and for the output on simulated and the bladder cancer data. 2) Streamlined some of the ComBat errors (just shortened the text). 3) Added myself as a maintainer of the package since I am the only one updating/maintaining the ComBat part of the package.



git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/sva@100106 bc3139a8-67e5-0310-9ffc-ced21a209358
  • Loading branch information
j.leek committed Mar 3, 2015
1 parent 5358098 commit 3076e0c
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 10 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sva
Title: Surrogate Variable Analysis
Version: 3.13.2
Version: 3.13.3
Author: Jeffrey T. Leek <jtleek@gmail.com>, W. Evan Johnson <wej@bu.edu>,
Hilary S. Parker <hiparker@jhsph.edu>, Elana J. Fertig <ejfertig@jhmi.edu>,
Andrew E. Jaffe <ajaffe@jhsph.edu>, John D. Storey <jstorey@princeton.edu>
Expand All @@ -25,17 +25,18 @@ Description: The sva package contains functions for removing batch
reproducibility, see (Leek and Storey 2007 PLoS Genetics, 2008
PNAS or Leek et al. 2011 Nat. Reviews Genetics).
Maintainer: Jeffrey T. Leek <jtleek@gmail.com>, John D. Storey
<jstorey@princeton.edu>
Depends:
<jstorey@princeton.edu>, W. Evan Johnson <wej@bu.edu>
Depends:
R (>= 2.8),
mgcv,
genefilter
Suggests:
Suggests:
limma,
pamr,
bladderbatch,
BiocStyle,
zebrafishRNASeq
zebrafishRNASeq,
testthat
License: Artistic-2.0
biocViews: Microarray, StatisticalMethod, Preprocessing,
MultipleComparison, Sequencing, RNASeq, BatchEffect, Normalization
10 changes: 5 additions & 5 deletions R/ComBat.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#'

ComBat <- function(dat, batch, mod=NULL, par.prior=TRUE,prior.plots=FALSE) {

# make batch a factor and make a set of indicators for batch
if(length(dim(batch))>1){stop("This version of ComBat only allows one batch variable")} ## to be updated soon!
batch <- as.factor(batch)
batchmod <- model.matrix(~-1+batch)
cat("Found",nlevels(batch),'batches\n')
Expand All @@ -42,11 +42,11 @@ ComBat <- function(dat, batch, mod=NULL, par.prior=TRUE,prior.plots=FALSE) {

# Check if the design is confounded
if(qr(design)$rank<ncol(design)){
if(ncol(design)<=(n.batch)){stop("your batch variables are redundant or nested! Please remove one or more of the batch variables so they are no longer confounded.")}
if(ncol(design)==(n.batch+1)){stop("your covariate is confounded with batch! Please remove the confounded covariate and rerun ComBat.")}
#if(ncol(design)<=(n.batch)){stop("Batch variables are redundant! Remove one or more of the batch variables so they are no longer confounded")}
if(ncol(design)==(n.batch+1)){stop("The covariate is confounded with batch! Remove the covariate and rerun ComBat")}
if(ncol(design)>(n.batch+1)){
if((qr(design[,-c(1:n.batch)])$rank<ncol(design[,-c(1:n.batch)]))){stop('the experimental design of your covariates is confounded! Please remove one (or more) of the covariates so that your design is no longer confounded.')
}else{stop("one (or more) of your covariates is confounded with batch! Please remove the confounded covariate and rerun ComBat.")}}
if((qr(design[,-c(1:n.batch)])$rank<ncol(design[,-c(1:n.batch)]))){stop('The covariates are confounded! Please remove one or more of the covariates so the design is not confounded')
}else{stop("At least one covariate is confounded with batch! Please remove confounded covariates and rerun ComBat")}}
}

## Check for missing values
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(sva)

test_check("sva")
43 changes: 43 additions & 0 deletions tests/testthat/test_combat_bladderbatch.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Tests the ComBat output in the bladder batch dataset
library(sva)
library(bladderbatch)
data(bladderdata)
library(testthat)
context("ComBat test on bladder batch data")

test_that("check ComBat output with several different parameters on bladder cancer data",{
# get expression data, phenotype, and batch
pheno = pData(bladderEset)
edata = exprs(bladderEset)
batch = pheno$batch

# set up full and reduced models for testing
mod = model.matrix(~as.factor(cancer), data=pheno)
mod0 = model.matrix(~1, data=pheno)

#run ComBat without covariates:
combat_edata = ComBat(dat=edata, batch=batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
pValuesComBat = f.pvalue(combat_edata,mod,mod0)
expect_equal(sum(pValuesComBat<=.05),12468)

#run ComBat without covariates (using the NULL model):
combat_edata = ComBat(dat=edata, batch=batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
pValuesComBat_null = f.pvalue(combat_edata,mod,mod0)
expect_equal(pValuesComBat_null,pValuesComBat)

#Check to see if the prior.plots option works
combat_edata = ComBat(dat=edata, batch=batch, prior.plots=TRUE)
pValuesComBat_null = f.pvalue(combat_edata,mod,mod0)
expect_equal(pValuesComBat_null,pValuesComBat)

#run ComBat without covariates non-parametric (small dataset):
combat_edata = ComBat(dat=edata[1:100,], batch=batch, par.prior=FALSE, prior.plots=FALSE)
pValuesComBat = f.pvalue(combat_edata,mod,mod0)
expect_equal(sum(pValuesComBat<=.05),78)

#run ComBat with covariates:
combat_edata = ComBat(dat=edata, batch=batch, mod=mod, par.prior=TRUE, prior.plots=FALSE)
pValuesComBat = f.pvalue(combat_edata,mod,mod0)
expect_equal(sum(pValuesComBat<.05),18507)

})
27 changes: 27 additions & 0 deletions tests/testthat/test_combat_errors.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
library(sva)
library(testthat)
context("ComBat Error Check")

test_that("Check ComBat errors for too many batch variables and confounded designs",{
# set up number of batches (nb), total sample size (n), and batch/treatment vaariables
nb <- 2
ns <- 2 # one of nb or ns needs to be even (see treat variable)
n <- ns*nb
batch <- as.factor(rep(1:nb, each=ns))
treat <- rep(c(0,1), n/2)

#generate random data
X <- cbind(model.matrix(~-1+batch), treat)
beta <- c(3*(1:nb-1), 2)
sim <- 100
y <- matrix(rnorm(n*sim,X%*%beta),nrow=sim, byrow=TRUE)

## user gives multiple batch variables
expect_error(ComBat(y,cbind(batch, batch)),"This version of ComBat only allows one batch variable")

## test confounded errors:
expect_error(ComBat(y,batch, batch), "The covariate is confounded with batch! Remove the covariate and rerun ComBat")
expect_error(ComBat(y,batch, cbind(treat,batch)), "At least one covariate is confounded with batch! Please remove confounded covariates and rerun ComBat")
expect_error(ComBat(y,batch, cbind(treat,treat)), "The covariates are confounded! Please remove one or more of the covariates so the design is not confounded")
})

56 changes: 56 additions & 0 deletions tests/testthat/test_combat_sim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
library(sva)
library(testthat)
context("ComBat test on simulated data")


### Run ComBat on a simple simulated dataset
test_that("check ComBat output with several different parameters on simulated genes",{
# generate simulated data
nb <- 3 # number of batches
nw <- 5 # samples within batch
n <- nw*nb # total number of sampes
batch <- rep(1:nb, each = nw) # batch variable for ComBat
set.seed(0)
treat <- rbinom(n,1,c(.5,.5)) # treatment variable--generated at random
beta <- 1:(nb+1) # batch effects + treatment effect (last one)
X <- model.matrix(~-1+as.factor(batch)+treat)
genes <- 100
set.seed(0)
y <- matrix(rnorm(n*genes,X%*%beta),nrow=genes, byrow=TRUE) # generate data

### Test ComBat with different parameters:
# without covariates
batch_only <- ComBat(y,batch)
gene1_batch_only <- c(6.5275687,1.9434116,3.3016615,6.5353399,5.8317968,-0.3314552,3.9485709,4.5368716,4.8050618,7.0422784,4.2176928,2.6528653,2.3037212,7.1688336,3.1533711)
expect_equal(as.numeric(batch_only[1,]),gene1_batch_only)

# without covariates non-parametric
batch_only_non <- ComBat(y,batch,par.prior=FALSE)
gene1_batch_only_non <- c(6.5041159,1.9798940,3.3203856,6.5117856,5.8174409,-0.2101846,3.9735703,4.5486383,4.8107961,6.9976908,4.1925793,2.6502177,2.3060862,7.1013511,3.1435378)
expect_equal(as.numeric(batch_only_non[1,]),gene1_batch_only_non)

# with covariates
batch_treat <- ComBat(y,batch,treat,par.prior=TRUE)
gene1_batch_treat <- c(6.9550540,1.3750580,3.0128757,6.9644248,6.1160719,0.8808531,5.5188802,6.0339045,6.2686899,8.2272474,2.8499106,1.3062056,0.9617743,5.8187037,1.7999553)
expect_equal(as.numeric(batch_treat[1,]),gene1_batch_treat)

# with covariates non-parametric
batch_treat_non <- ComBat(y,batch,treat,par.prior=FALSE)
gene1_batch_treat_non <- c(6.9621810,1.4557761, 2.9477604, 6.9707174, 6.1979029, 0.9018672, 5.5466543, 6.0315322, 6.2525748, 8.0964907, 2.7563817, 1.3122575, 0.9900446, 5.8403030, 1.7741566)
expect_equal(as.numeric(batch_treat_non[1,]),gene1_batch_treat_non)

# with covariates missing data
## introduce missing values 10%
set.seed(1)
m <- matrix(rbinom(n*genes,1,.9),nrow=genes, byrow=TRUE)
ymis <- y
ymis[m==0] <- NA
batch_treat_mis <- ComBat(ymis,batch,treat)
gene1_batch_treat_mis <- c(7.0249052, 1.4302766, 3.0979753, NA, 6.1706164, 0.7848869, NA, 5.9650280, 6.2100799, 8.2542787, 2.8907924, 1.3319739, 0.9841704, 5.8422657, 1.8305576)
expect_equal(as.numeric(batch_treat_mis[1,]),gene1_batch_treat_mis)

})




0 comments on commit 3076e0c

Please sign in to comment.