Skip to content

Commit

Permalink
Add argument stopOnError to xcmsSet function
Browse files Browse the repository at this point in the history
o Add argument stopOnError to xcmsSet to enable feature detection in all
  provided files without stopping on errors. This relates to issue #55.
o Add related documentation.
  • Loading branch information
jorainer committed Sep 19, 2016
1 parent ea0e449 commit 5313459
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 38 deletions.
8 changes: 4 additions & 4 deletions R/MPI.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ findPeaksPar <- function(arg) {
includeMSn=params$includeMSn, mslevel=params$mslevel,
scanrange=params$scanrange)
if(params$lockMassFreq == TRUE){
xRaw<-stitch(xRaw, AutoLockMass(xRaw))
xRaw <- stitch(xRaw, AutoLockMass(xRaw))
}

## remove parameters which are not used by method() from the parameter list
Expand All @@ -37,13 +37,13 @@ findPeaksPar <- function(arg) {
params["includeMSn"] <- NULL
params["lockMassFreq"] <- NULL
params["mslevel"] <- NULL
params["scanrange"] <- NULL ## avoid filtering scanrange twice, first in xRaw then in findPeaks
params["scanrange"] <- NULL ## avoid filtering scanrange twice.

## Could wrap this inside a tryCatch.
peaks <- do.call(method, args = c(list(object = xRaw), params))

list(scantime=xRaw@scantime,
peaks=cbind(peaks, sample = rep.int(myID, nrow(peaks))))
peaks=cbind(peaks,
sample = rep.int(myID, nrow(peaks))))
}

############################################################
Expand Down
75 changes: 51 additions & 24 deletions R/do_detectFeatures-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -573,22 +573,23 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect,
do_detectFeatures_massifquant <- function() {
}
## The original code.
## Not much to speed up here; it's more code tidying.
.massifquant_orig <- function(mz,
int,
scantime,
valsPerSpect,
ppm=10,
peakwidth=c(20,50),
snthresh=10,
prefilter=c(3,100),
mzCenterFun="wMean",
integrate=1,
mzdiff=-0.001,
fitgauss=FALSE,
scanrange= numeric(),
noise=0, ## noise.local=TRUE,
sleep=0,
verbose.columns=FALSE,
ppm = 10,
peakwidth = c(20,50),
snthresh = 10,
prefilter = c(3,100),
mzCenterFun = "wMean",
integrate = 1,
mzdiff = -0.001,
fitgauss = FALSE,
scanrange = numeric(),
noise = 0, ## noise.local=TRUE,
sleep = 0,
verboseColumns = FALSE,
criticalValue = 1.125,
consecMissedLimit = 2,
unions = 1,
Expand All @@ -598,18 +599,40 @@ do_detectFeatures_massifquant <- function() {
cat("\n Massifquant comes with ABSOLUTELY NO WARRANTY. See LICENSE for details.\n");
flush.console();

##keeep this check since massifquant doesn't check internally
if (!isCentroided(object))
warning("It looks like this file is in profile mode. massifquant can process only centroid mode data !\n")
## TODO @jo Ensure in upstream method that data is in centroided mode!
## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange
## Input argument checking.
if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect))
stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'",
" are required!")
if (length(mz) != length(int) | length(valsPerSpect) != length(scantime)
| length(mz) != sum(valsPerSpect))
stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'",
" have to much. Also, 'length(mz)' should be equal to",
" 'sum(valsPerSpect)'.")
scanindex <- valueCount2ScanIndex(valsPerSpect) ## Get index vector for C calls
mz <- as.double(mz)
int <- as.double(int)
## Fix the mzCenterFun
mzCenterFun <- paste("mzCenter",
gsub(mzCenterFun, pattern = "mzCenter.",
replacement = "", fixed = TRUE), sep=".")
if (!exists(mzCenterFun, mode="function"))
stop("Error: >", mzCenterFun, "< not defined !")

cat("\n Detecting mass traces at",ppm,"ppm ... \n"); flush.console();
massifquantROIs = findKalmanROI(object, minIntensity = prefilter[2], minCentroids = peakwidth[1], criticalVal = criticalValue,
consecMissedLim = consecMissedLimit, segs = unions, scanBack = checkBack,ppm=ppm);
## LLLL
massifquantROIs = findKalmanROI(object, minIntensity = prefilter[2],
minCentroids = peakwidth[1], criticalVal = criticalValue,
consecMissedLim = consecMissedLimit,
segs = unions, scanBack = checkBack, ppm = ppm);

if (withWave == 1) {
featlist = findPeaks.centWave(object, ppm, peakwidth, snthresh,
prefilter, mzCenterFun, integrate, mzdiff, fitgauss,
scanrange, noise, sleep, verbose.columns, ROI.list= massifquantROIs);
## LLLL
featlist = do_detectFeatures_centWave()
## featlist = findPeaks.centWave(object, ppm, peakwidth, snthresh,
## prefilter, mzCenterFun, integrate, mzdiff, fitgauss,
## scanrange, noise, sleep, verbose.columns, ROI.list= massifquantROIs);
}
else {
basenames <- c("mz","mzmin","mzmax","rtmin","rtmax","rt", "into")
Expand Down Expand Up @@ -644,9 +667,11 @@ do_detectFeatures_massifquant <- function() {
uindex <- rectUnique(pm,uorder,mzdiff,ydiff = -0.00001) ## allow adjacent peaks;
featlist <- p[uindex,,drop=FALSE];
cat("\n",dim(featlist)[1]," Peaks.\n");
invisible(new("xcmsPeaks", featlist));
## invisible(new("xcmsPeaks", featlist));
return(featlist)
}
return(invisible(featlist));
## return(invisible(featlist));
return(featlist)
}

## The version of matchedFilter:
Expand Down Expand Up @@ -1575,11 +1600,13 @@ do_detectFeatures_MSW <- function(int, mz, snthresh = 3,
stop("Length of 'int' and 'mz' do not match!")
if (!is.numeric(int) | !is.numeric(mz))
stop("'int' and 'mz' are supposed to be numeric vectors!")
.MSW_orig(int = int, mz = mz, snthresh = snthresh,
verboseColumns = verboseColumns, ...)

.MSW(int = int, mz = mz, snthresh = snthresh,
verboseColumns = verboseColumns, ...)
}
############################################################
## The original code
## This should be removed at some point.
.MSW_orig <- function(int, mz, snthresh = 3, verboseColumns = FALSE, ...) {

## MassSpecWavelet Calls
Expand Down
51 changes: 43 additions & 8 deletions R/functions-xcmsSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,23 @@

## The "constructor"
## The "new" xcmsSet method using BiocParallel.
xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL,
profmethod = "bin", profparam = list(),
polarity = NULL, lockMassFreq=FALSE,
mslevel=NULL, nSlaves=0, progressCallback=NULL,
scanrange=NULL, BPPARAM=bpparam(), ...) {
xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL,
phenoData = NULL, profmethod = "bin",
profparam = list(), polarity = NULL,
lockMassFreq=FALSE, mslevel=NULL, nSlaves=0,
progressCallback=NULL, scanrange=NULL,
BPPARAM=bpparam(), stopOnError = TRUE, ...) {

if (nSlaves != 0)
warning("Use of argument 'nSlaves' is deprecated!",
" Please use 'BPPARAM' instead.")
if (!is.logical(stopOnError))
stop("'stopOnError' has to be a logical.")
## Overwriting the stop.on.error in BPPARAM:
## bpstopOnError(BPPARAM) <- stopOnError
orig <- bpstopOnError(BPPARAM)
on.exit(BPPARAM@.xData$stop.on.error <- orig)
BPPARAM@.xData$stop.on.error <- stopOnError

object <- new("xcmsSet")

Expand Down Expand Up @@ -125,8 +133,35 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL
argList <- apply(ft ,1 ,function(x) list(file = x["file"],
id = as.numeric(x["id"]),
params = params))
## Use BiocParallel:
res <- bplapply(argList, findPeaksPar, BPPARAM = BPPARAM)
## Use BiocParallel: bplapply along with bptry
res <- bptry(bplapply(argList, findPeaksPar, BPPARAM = BPPARAM))
## Catch the error.
if (stopOnError) {
if (inherits(res, "bperror"))
stop(res)
} else {
isOK <- bpok(res)
if (any(!isOK)) {
## OK; process the errors:
if (all(!isOK))
stop("Feature detection failed for all files!",
" The first error was: ", res[[1]])
## Keep the results and mention the errors as warnings
warning("Feature detection failed in ", sum(!isOK), " of ",
length(isOK), " files!")
## Fix each of the errors to allow proceeding with the valid results
notErr <- res[[which(isOK)[1]]]$peaks
sct <- res[[which(isOK)[1]]]$scantime
emptyMat <- matrix(ncol = ncol(notErr), nrow = 0)
colnames(emptyMat) <- colnames(notErr)
for (i in which(!isOK)) {
warning("Feature detection failed in file: ", files[i],
"! Error was: ", res[[i]])
res[[i]] <- list(scantime = sct, peaks = emptyMat)
}
}
}

peaklist <- lapply(res, function(x) x$peaks)
rtlist$raw <- rtlist$corrected <- lapply(res, function(x) x$scantime)
if(lockMassFreq){
Expand All @@ -136,7 +171,7 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL
lapply(1:length(peaklist), function(i) {
if (is.null(peaklist[[i]]))
warning("No peaks found in sample ", snames[i], call. = FALSE)
else if (nrow(peaklist[[i]]) == 0)
else if (nrow(peaklist[[i]]) == 0)
warning("No peaks found in sample ", snames[i], call. = FALSE)
else if (nrow(peaklist[[i]]) == 1)
warning("Only 1 peak found in sample ", snames[i], call. = FALSE)
Expand Down
27 changes: 27 additions & 0 deletions inst/unitTests/test_stopOnError.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
############################################################
## Unit tests related to stopIfNot parameters (translated to
## into the stop.if.not BPPARAM argument.

dontrun_test_xcmsSet_stopIfNot <- function() {

## Run the feature detection using xcmsSet in parallel mode
## and evaluate the stopOnError parameter
## We need some problematic test files here; we're using the ones
## from issue #55

mzfs <- paste0("../../local_data/mzML-files/",
c("123.mzML", "261.mzML", "263.mzML"))

## Default: stops on the first encountered error.
checkException(xcmsSet(mzfs, method = "centWave", ppm = 2.5,
peakwidth = c(2.5, 9), mzdiff = -0.001,
snthresh = 10, stopOnError = TRUE))
suppressWarnings(
res <- xcmsSet(mzfs, method = "centWave", ppm = 2.5,
peakwidth = c(2.5, 9), mzdiff = -0.001,
snthresh = 10, stopOnError = FALSE)
)
checkTrue(all(res@peaks[, "sample"] == 1))
}


11 changes: 9 additions & 2 deletions man/xcmsSet.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
xcmsSet(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL,
profmethod = "bin", profparam = list(),
polarity = NULL, lockMassFreq=FALSE,
mslevel=NULL, nSlaves=0, progressCallback=NULL,
scanrange = NULL, BPPARAM = bpparam(), ...)
mslevel=NULL, nSlaves=0, progressCallback=NULL,
scanrange = NULL, BPPARAM = bpparam(),
stopOnError = TRUE, ...)
}
\arguments{
\item{files}{path names of the NetCDF/mzXML files to read}
Expand Down Expand Up @@ -44,6 +45,12 @@ xcmsSet(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL,
\code{\link[BiocParallel]{MulticoreParam}} or
\code{\link[BiocParallel]{SnowParam}} functions.
}
\item{stopOnError}{Logical specifying whether the feature detection
call should stop on the first encountered error (the default), or
whether feature detection is performed in all files regardless
eventual failures for individual files in which case all errors are
reported as \code{warnings}.
}
\item{\dots}{
further arguments to the \code{findPeaks} method of the
\code{xcmsRaw} class
Expand Down

0 comments on commit 5313459

Please sign in to comment.