diff --git a/CHANGELOG.md b/CHANGELOG.md index ebaa4f8..81b9b8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,11 +24,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - exported functions in NAMESPACE [#154](https://github.com/RECETOX/recetox-aplcms/pull/154) - docstrings and documentation files for refactored functions [#160](https://github.com/RECETOX/recetox-aplcms/pull/160) - refactored parameter names to keep them more harmonized [#167](https://github.com/RECETOX/recetox-aplcms/pull/167) -- moved some utility functions to a more suitable locations [#164](https://github.com/RECETOX/recetox-aplcms/pull/160) +- moved some utility functions to a more suitable locations [#164](https://github.com/RECETOX/recetox-aplcms/pull/164) ### Removed - `extract_features` and `feature.align` [#154](https://github.com/RECETOX/recetox-aplcms/pull/154) - improper usage of `@examples` [#160](https://github.com/RECETOX/recetox-aplcms/pull/160) -- several obsolete utility functions [#164](https://github.com/RECETOX/recetox-aplcms/pull/160) +- several obsolete utility functions [#164](https://github.com/RECETOX/recetox-aplcms/pull/164) +- several outdated `.Rd` files [#168](https://github.com/RECETOX/recetox-aplcms/pull/168) +- default argument values from low-level functions [#168](https://github.com/RECETOX/recetox-aplcms/pull/168) ## [0.9.4] - 2022-05-10 diff --git a/R/adaptive.bin.R b/R/adaptive.bin.R index 63f2cdc..a6d2b0d 100644 --- a/R/adaptive.bin.R +++ b/R/adaptive.bin.R @@ -75,7 +75,7 @@ adaptive.bin <- function(features, min_pres, mz_tol, baseline_correct, - intensity_weighted = FALSE) { + intensity_weighted) { # order inputs after mz values features <- features |> dplyr::arrange_at("mz") diff --git a/R/compute_clusters.R b/R/compute_clusters.R index ece73ce..b1dfd76 100644 --- a/R/compute_clusters.R +++ b/R/compute_clusters.R @@ -25,7 +25,7 @@ compute_clusters <- function(feature_tables, mz_tol_absolute, mz_max_diff, rt_tol_relative, - do.plot = FALSE, + do.plot, sample_names = NA) { number_of_samples <- length(feature_tables) all <- concatenate_feature_tables(feature_tables, sample_names) @@ -34,6 +34,9 @@ compute_clusters <- function(feature_tables, mz_tol_relative <- find.tol( all$mz, mz_max_diff = mz_max_diff, + aver.bin.size = 4000, + min.bins = 50, + max.bins = 200, do.plot = do.plot ) if (length(mz_tol_relative) == 0) { @@ -60,7 +63,11 @@ compute_clusters <- function(feature_tables, number_of_samples = number_of_samples, mz_tol_relative = mz_tol_relative, rt_tol_relative = rt_tol_relative, + aver.bin.size = 200, + min.bins = 50, + max.bins = 100, mz_tol_absolute = mz_tol_absolute, + max.num.segments = 10000, do.plot = do.plot ) diff --git a/R/find.match.R b/R/find.match.R index a0770b4..cc0e69c 100644 --- a/R/find.match.R +++ b/R/find.match.R @@ -5,7 +5,7 @@ #' @param a A matrix of distances. #' @param unacceptable A distance larger than which cannot be accepted as pairs. #' @return A matrix the same dimension as the input matrix, with matched position taking value 1, and all other positions taking value 0. -find.match <- function(a, unacceptable=4) { +find.match <- function(a, unacceptable) { find.min.pos<-function(d) { pos<-which(d==min(d))[1] diff --git a/R/find.tol.R b/R/find.tol.R index 170a068..b7b10e3 100644 --- a/R/find.tol.R +++ b/R/find.tol.R @@ -15,11 +15,11 @@ NULL #' @return The tolerance level is returned. #' @export find.tol <- function(mz, - mz_max_diff = 1e-4, - aver.bin.size = 4000, - min.bins = 50, - max.bins = 200, - do.plot = TRUE) { + mz_max_diff, + aver.bin.size, + min.bins, + max.bins, + do.plot) { mz <- mz[order(mz)] l <- length(mz) # pairwise m/z difference divided by their average, filtered outside of tolerance limit diff --git a/R/find.tol.time.R b/R/find.tol.time.R index 988dbb7..eeb5f1a 100644 --- a/R/find.tol.time.R +++ b/R/find.tol.time.R @@ -151,14 +151,14 @@ compute_rt_tol_relative <- function(breaks, #' spectrum label, and peak group label. The rows are ordered by the median m/z of each peak group, and with each peak group the rows are ordered find.tol.time <- function(features, number_of_samples, - mz_tol_relative = 2e-5, - rt_tol_relative = NA, - aver.bin.size = 200, - min.bins = 50, - max.bins = 100, - mz_tol_absolute = 0.01, - max.num.segments = 10000, - do.plot = TRUE) { + mz_tol_relative, + rt_tol_relative, + aver.bin.size, + min.bins, + max.bins, + mz_tol_absolute, + max.num.segments, + do.plot) { features <- dplyr::arrange_at(features, "mz") min_mz_tol <- compute_min_mz_tolerance( diff --git a/R/hybrid.R b/R/hybrid.R index ca219a5..de77ac7 100644 --- a/R/hybrid.R +++ b/R/hybrid.R @@ -295,6 +295,7 @@ augment_known_table <- function( #' @param use_observed_range If the value is TRUE, the actual range of the observed locations of the feature in all the spectra will be used. #' @param recover_min_count Minimum number of raw data points to support a recovery. #' @param intensity_weighted Whether to use intensity to weight mass density estimation. +#' @param do.plot Indicates whether plot should be drawn. #' @param cluster The number of CPU cores to be used #' @export hybrid <- function( @@ -326,6 +327,7 @@ hybrid <- function( use_observed_range = TRUE, recover_min_count = 3, intensity_weighted = FALSE, + do_plot = FALSE, cluster = 4 ) { if (!is(cluster, 'cluster')) { @@ -351,7 +353,7 @@ hybrid <- function( baseline_correct = baseline_correct, baseline_correct_noise_percentile = baseline_correct_noise_percentile, intensity_weighted = intensity_weighted, - do.plot = FALSE, + do.plot = do_plot, cache = FALSE ) }) @@ -369,7 +371,7 @@ hybrid <- function( component_eliminate = component_eliminate, moment_power = moment_power, BIC_factor = BIC_factor, - do.plot = FALSE + do.plot = do_plot ) }) @@ -380,6 +382,7 @@ hybrid <- function( mz_tol_absolute = mz_tol_absolute, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, + do.plot = do_plot, sample_names = sample_names ) @@ -401,7 +404,8 @@ hybrid <- function( mz_tol_relative = extracted_clusters$mz_tol_relative, mz_tol_absolute = extracted_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, - rt_tol_relative = rt_tol_relative + rt_tol_relative = rt_tol_relative, + do.plot = do_plot ) message("**** feature alignment ****") @@ -455,7 +459,8 @@ hybrid <- function( mz_tol_relative = mz_tol_relative, mz_tol_absolute = mz_tol_absolute, mz_max_diff = 10 * mz_tol, - rt_tol_relative = rt_tol_relative + rt_tol_relative = rt_tol_relative, + do.plot = do_plot ) message("**** computing template ****") @@ -476,7 +481,8 @@ hybrid <- function( mz_tol_relative = recovered_clusters$mz_tol_relative, mz_tol_absolute = recovered_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, - rt_tol_relative = rt_tol_relative + rt_tol_relative = rt_tol_relative, + do.plot = do_plot ) message("**** second feature alignment ****") diff --git a/R/proc.cdf.R b/R/proc.cdf.R index b9210eb..eff2317 100644 --- a/R/proc.cdf.R +++ b/R/proc.cdf.R @@ -65,14 +65,14 @@ load_data <- function(filename, #' @return A matrix with four columns: m/z value, retention time, intensity, and group number. #' @export proc.cdf <- function(filename, - min_pres = 0.5, - min_run = 12, - mz_tol = 1e-05, - baseline_correct = 0.0, - baseline_correct_noise_percentile = 0.05, - intensity_weighted = FALSE, - do.plot = FALSE, - cache = FALSE) { + min_pres, + min_run, + mz_tol, + baseline_correct, + baseline_correct_noise_percentile, + intensity_weighted, + do.plot, + cache) { raw.prof <- load_data( filename, cache, diff --git a/R/prof.to.features.R b/R/prof.to.features.R index c5f8bd3..383d8a9 100644 --- a/R/prof.to.features.R +++ b/R/prof.to.features.R @@ -826,17 +826,17 @@ normix.bic <- function(x, y, moment_power = 2, do.plot = FALSE, bw = c(15, 30, 6 #' curve), and estimated total signal strength (total area of the estimated normal curve). #' @export prof.to.features <- function(profile, - bandwidth = 0.5, - min_bandwidth = NA, - max_bandwidth = NA, - sd_cut = c(0.01, 500), - sigma_ratio_lim = c(0.01, 100), - shape_model = "bi-Gaussian", - peak_estim_method = "moment", - moment_power = 1, - component_eliminate = 0.01, - BIC_factor = 2, - do.plot = TRUE) { + bandwidth, + min_bandwidth, + max_bandwidth, + sd_cut, + sigma_ratio_lim, + shape_model, + peak_estim_method, + moment_power, + component_eliminate, + BIC_factor, + do.plot) { validate_inputs(shape_model, peak_estim_method) profile <- preprocess_profile(profile) diff --git a/R/recover.weaker.R b/R/recover.weaker.R index fbcd10f..93e3491 100644 --- a/R/recover.weaker.R +++ b/R/recover.weaker.R @@ -659,15 +659,15 @@ recover.weaker <- function(filename, rt_tol_relative, extracted_features, adjusted_features, - recover_mz_range = NA, - recover_rt_range = NA, - use_observed_range = TRUE, - mz_tol = 1e-5, - min_bandwidth = NA, - max_bandwidth = NA, - bandwidth = .5, - recover_min_count = 3, - intensity_weighted = FALSE) { + recover_mz_range, + recover_rt_range, + use_observed_range, + mz_tol, + min_bandwidth, + max_bandwidth, + bandwidth, + recover_min_count, + intensity_weighted) { # load raw data data_table <- load_file(filename) |> dplyr::arrange_at("mz") times <- sort(unique(data_table$rt)) diff --git a/R/run_filter.R b/R/run_filter.R index 54718e7..01e4d19 100644 --- a/R/run_filter.R +++ b/R/run_filter.R @@ -5,7 +5,7 @@ #' @param profile The matrix containing m/z, retention time, intensity, and EIC label as columns. #' @return unique_grp. #' @export -compute_uniq_grp <- function(profile, min_count_run, min_pres = 0.6) { +compute_uniq_grp <- function(profile, min_count_run, min_pres) { grps <- profile ttt <- table(grps) ttt <- ttt[ttt >= max(min_count_run * min_pres, 2)] @@ -18,7 +18,7 @@ compute_uniq_grp <- function(profile, min_count_run, min_pres = 0.6) { #' @param times. Retention times vector. #' @return predicted rt. #' @export -predict_smoothed_rt <- function(min_run = 5, times) { +predict_smoothed_rt <- function(min_run, times) { # ksmooth(x, y, kernel, bandwidth, range, n.points, x.points) smooth <- ksmooth( seq(-min_run + 1, length(times) + min_run), @@ -78,8 +78,8 @@ label_val_to_keep <- function(min_run, timeline, min_pres, this_times, times) { #' @return A list is returned. new_rec - The matrix containing m/z, retention time, intensity, and EIC label as columns after applying the run filter. #' @export run_filter <- function(newprof, - min_pres = 0.6, - min_run = 5) { + min_pres, + min_run) { newprof <- tibble::tibble(mz = newprof[,1], rt = newprof[,2], intensi = newprof[,3], grps = newprof[,4]) diff --git a/R/semi.sup.R b/R/semi.sup.R index 89cc2aa..9dba450 100644 --- a/R/semi.sup.R +++ b/R/semi.sup.R @@ -144,7 +144,7 @@ semi.sup <- function( that.name<-paste(strsplit(tolower(files[j]),"\\.")[[1]][1],suf.prof,".profile",sep="_") processable<-"goodgood" - processable<-try(this.prof<-proc.cdf(files[j], min.pres=min.pres, min.run=min.run, tol=mz.tol, baseline.correct=baseline.correct, baseline.correct.noise.percentile=baseline.correct.noise.percentile, do.plot=FALSE, intensity.weighted=intensity.weighted)) + processable<-try(this.prof<-proc.cdf(files[j], min_pres=min.pres, min_run=min.run, mz_tol=mz.tol, baseline_correct=baseline.correct, baseline_correct_noise_percentile=baseline.correct.noise.percentile, do.plot=FALSE, intensity_weighted=intensity.weighted, cache=FALSE)) if(substr(processable,1,5)=="Error") { file.copy(from=files[j], to="error_files") @@ -156,7 +156,7 @@ semi.sup <- function( if(substr(processable,1,5)!="Error") { processable.2<-"goodgood" - processable.2<-try(this.feature<-prof.to.features(this.prof, min.bw=min.bw, max.bw=max.bw, sd.cut=sd.cut, shape.model=shape.model, estim.method=peak.estim.method, do.plot=FALSE, component.eliminate=component.eliminate, power=moment.power, BIC.factor=BIC.factor)) + processable.2<-try(this.feature<-prof.to.features(profile, bandwidth = 0.5, min_bandwidth=min.bw, max_bandwidth=max.bw, sd_cut=sd.cut, sigma_ratio_lim = c(0.01, 100), shape_model=shape.model, peak_estim_method=peak.estim.method, do.plot=FALSE, component_eliminate=component.eliminate, moment_power=moment.power, BIC_factor=BIC.factor)) if(substr(processable.2,1,5)=="Error") { diff --git a/R/unsupervised.R b/R/unsupervised.R index 7ac3855..564fc01 100644 --- a/R/unsupervised.R +++ b/R/unsupervised.R @@ -88,6 +88,7 @@ get_sample_name <- function(filename) { #' the spectra will be used. #' @param recover_min_count Minimum number of raw data points to support a recovery. #' @param intensity_weighted Whether to use intensity to weight mass density estimation. +#' @param do.plot Indicates whether plot should be drawn. #' @param cluster The number of CPU cores to be used #' @export unsupervised <- function( @@ -116,6 +117,7 @@ unsupervised <- function( use_observed_range = TRUE, recover_min_count = 3, intensity_weighted = FALSE, + do_plot = FALSE, cluster = 4 ) { if (!is(cluster, 'cluster')) { @@ -141,7 +143,7 @@ unsupervised <- function( baseline_correct = baseline_correct, baseline_correct_noise_percentile = baseline_correct_noise_percentile, intensity_weighted = intensity_weighted, - do.plot = FALSE, + do.plot = do_plot, cache = FALSE ) }) @@ -159,7 +161,7 @@ unsupervised <- function( component_eliminate = component_eliminate, moment_power = moment_power, BIC_factor = BIC_factor, - do.plot = FALSE + do.plot = do_plot ) }) @@ -170,6 +172,7 @@ unsupervised <- function( mz_tol_absolute = mz_tol_absolute, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, + do.plot = do_plot, sample_names = sample_names ) @@ -191,7 +194,8 @@ unsupervised <- function( mz_tol_relative = extracted_clusters$mz_tol_relative, mz_tol_absolute = extracted_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, - rt_tol_relative = rt_tol_relative + rt_tol_relative = rt_tol_relative, + do.plot = do_plot ) message("**** feature alignment ****") @@ -235,7 +239,8 @@ unsupervised <- function( mz_tol_relative = adjusted_clusters$mz_tol_relative, mz_tol_absolute = adjusted_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, - rt_tol_relative = rt_tol_relative + rt_tol_relative = rt_tol_relative, + do.plot = do_plot ) message("**** feature alignment ****") diff --git a/man/adaptive.bin.Rd b/man/adaptive.bin.Rd index f625305..51fd488 100644 --- a/man/adaptive.bin.Rd +++ b/man/adaptive.bin.Rd @@ -10,7 +10,7 @@ adaptive.bin( min_pres, mz_tol, baseline_correct, - intensity_weighted = FALSE + intensity_weighted ) } \arguments{ diff --git a/man/aligned.Rd b/man/aligned.Rd deleted file mode 100644 index c6712cf..0000000 --- a/man/aligned.Rd +++ /dev/null @@ -1,21 +0,0 @@ -\name{aligned} -\alias{aligned} -\docType{data} -\title{ sample data after alignment } -\description{ - This is the aligned feature data from 4 sample LC/MS profiles. -} -\usage{data(aligned)} -\format{ - The format is: -List of 4 - $ aligned.ftrs: A matrix with m/z, median elution time, and signal strength in each profile. - $ pk.times : A matrix with m/z, median elution time, and elution time in each profile. - $ mz.tol : the m/z tolerance level in alignment - $ chr.tol : the retention time tolerance level in alignment -} -\source{Dean Jones lab. Emory University, School of Medicine.} -\examples{ -data(aligned) -} -\keyword{datasets} diff --git a/man/compute_clusters.Rd b/man/compute_clusters.Rd index 78520bb..0fd7453 100644 --- a/man/compute_clusters.Rd +++ b/man/compute_clusters.Rd @@ -10,7 +10,7 @@ compute_clusters( mz_tol_absolute, mz_max_diff, rt_tol_relative, - do.plot = FALSE, + do.plot, sample_names = NA ) } diff --git a/man/features.Rd b/man/features.Rd deleted file mode 100644 index 3e1518e..0000000 --- a/man/features.Rd +++ /dev/null @@ -1,18 +0,0 @@ -\name{features} -\alias{features} -\docType{data} -\title{ Sample feature tables from 4 profiles } -\description{ - A list object containing 4 matrices, each of which is the feature table from a profile. -} -\usage{data(features)} -\format{ - List object containing multiple matrices. One matrix from each spectrum. -} -\source{ -Data from Dean Jones lab, Emory University School of Medicine. -} -\examples{ -data(features) -} -\keyword{datasets} diff --git a/man/features.learn.Rd b/man/features.learn.Rd deleted file mode 100644 index 563d454..0000000 --- a/man/features.learn.Rd +++ /dev/null @@ -1,18 +0,0 @@ -\name{features.learn} -\alias{features.learn} -\docType{data} -\title{ Sample feature tables from 4 profiles. The original feature detection is done by machine learning approach. } -\description{ - A list object containing 4 matrices, each of which is the feature table from a profile. -} -\usage{data(features)} -\format{ - List object containing multiple matrices. One matrix from each spectrum. -} -\source{ -Data from Dean Jones lab, Emory University School of Medicine. -} -\examples{ -data(features.learn) -} -\keyword{datasets} diff --git a/man/features2.Rd b/man/features2.Rd deleted file mode 100644 index fc9a4b5..0000000 --- a/man/features2.Rd +++ /dev/null @@ -1,17 +0,0 @@ -\name{features2} -\alias{features2} -\docType{data} -\title{ Feature tables after elution time correction. } -\description{ - A list object containing 4 matrices, each of which is from a profile. The elution times in the matrices are corrected. -} -\usage{data(features2)} -\format{ -A list object. Each component of the list is a matrix. -} -\source{ -Raw data from Dean Jones lab, Emory School of Medicine.} -\examples{ -data(features2) -} -\keyword{datasets} diff --git a/man/features2.learn.Rd b/man/features2.learn.Rd deleted file mode 100644 index 55971f9..0000000 --- a/man/features2.learn.Rd +++ /dev/null @@ -1,17 +0,0 @@ -\name{features2.learn} -\alias{features2.learn} -\docType{data} -\title{ Feature tables after elution time correction. The original feature detection is done by machine learning approach. } -\description{ - A list object containing 4 matrices, each of which is from a profile. The elution times in the matrices are corrected. -} -\usage{data(features2)} -\format{ -A list object. Each component of the list is a matrix. -} -\source{ -Raw data from Dean Jones lab, Emory School of Medicine.} -\examples{ -data(features2.learn) -} -\keyword{datasets} diff --git a/man/find.match.Rd b/man/find.match.Rd index 30ef09c..742a240 100644 --- a/man/find.match.Rd +++ b/man/find.match.Rd @@ -1,14 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find.match.R \name{find.match} \alias{find.match} -\title{ Internal function: finding the best match between a set of detected features and a set of known features. } -\description{ Given a small matrix of distances, find the best column-row pairing that minimize the sum of distances of the matched pairs. } +\title{Internal function: finding the best match between a set of detected features and a set of known features.} \usage{ -find.match(a, unacceptable = 4) +find.match(a, unacceptable) } \arguments{ - \item{a}{ A matrix of distances. } - \item{unacceptable}{ A distance larger than which cannot be accepted as pairs. } +\item{a}{A matrix of distances.} + +\item{unacceptable}{A distance larger than which cannot be accepted as pairs.} +} +\value{ +A matrix the same dimension as the input matrix, with matched position taking value 1, and all other positions taking value 0. +} +\description{ +Given a small matrix of distances, find the best column-row pairing that minimize the sum of distances of the matched pairs. } -\value{ A matrix the same dimension as the input matrix, with matched position taking value 1, and all other positions taking value 0. } -\author{ Tianwei Yu } -\keyword{ models } \ No newline at end of file diff --git a/man/find.tol.Rd b/man/find.tol.Rd index 9fce3b4..c693e88 100644 --- a/man/find.tol.Rd +++ b/man/find.tol.Rd @@ -4,14 +4,7 @@ \alias{find.tol} \title{An internal function that is not supposed to be directly accessed by the user. Find m/z tolerance level.} \usage{ -find.tol( - mz, - mz_max_diff = 1e-04, - aver.bin.size = 4000, - min.bins = 50, - max.bins = 200, - do.plot = TRUE -) +find.tol(mz, mz_max_diff, aver.bin.size, min.bins, max.bins, do.plot) } \arguments{ \item{mz}{The vector of observed m/z values.} diff --git a/man/find.tol.time.Rd b/man/find.tol.time.Rd index a1ed881..60ee71f 100644 --- a/man/find.tol.time.Rd +++ b/man/find.tol.time.Rd @@ -7,14 +7,14 @@ find.tol.time( features, number_of_samples, - mz_tol_relative = 2e-05, - rt_tol_relative = NA, - aver.bin.size = 200, - min.bins = 50, - max.bins = 100, - mz_tol_absolute = 0.01, - max.num.segments = 10000, - do.plot = TRUE + mz_tol_relative, + rt_tol_relative, + aver.bin.size, + min.bins, + max.bins, + mz_tol_absolute, + max.num.segments, + do.plot ) } \arguments{ diff --git a/man/find.turn.point.Rd b/man/find.turn.point.Rd index 055b45f..1e678f5 100644 --- a/man/find.turn.point.Rd +++ b/man/find.turn.point.Rd @@ -1,24 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/find.turn.point.R \name{find.turn.point} \alias{find.turn.point} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ Find peaks and valleys of a curve. } -\description{ - This is an internal function which is not supposed to be directly accessed by the user. Finds the peaks and valleys of a smooth curve. -} +\title{Find peaks and valleys of a curve.} \usage{ find.turn.point(y) } \arguments{ - \item{y}{ The y values of a curve in x-y plane. } +\item{y}{The y values of a curve in x-y plane.} } \value{ A list object: - \item{pks}{The peak positions.} - \item{vlys}{The valley positions} +\itemize{ + \item pks - The peak positions + \item vlys - The valley positions } -\references{ -Bioinformatics. 25(15):1930-36. -BMC Bioinformatics. 11:559. } -\author{ Tianwei Yu } -\keyword{ models } \ No newline at end of file +\description{ +This is an internal function which finds the peaks and valleys of a smooth curve. +} diff --git a/man/hybrid.Rd b/man/hybrid.Rd index 6c44aed..b94e425 100644 --- a/man/hybrid.Rd +++ b/man/hybrid.Rd @@ -33,6 +33,7 @@ hybrid( use_observed_range = TRUE, recover_min_count = 3, intensity_weighted = FALSE, + do_plot = FALSE, cluster = 4 ) } @@ -107,6 +108,8 @@ in which case 0.5 times the retention time tolerance in the aligned object will \item{intensity_weighted}{Whether to use intensity to weight mass density estimation.} \item{cluster}{The number of CPU cores to be used} + +\item{do.plot}{Indicates whether plot should be drawn.} } \description{ features extraction in hybrid mode. diff --git a/man/interpol.area.Rd b/man/interpol.area.Rd index e01a2cd..f6d4ac4 100644 --- a/man/interpol.area.Rd +++ b/man/interpol.area.Rd @@ -1,29 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interpol.area.R \name{interpol.area} \alias{interpol.area} -%- Also NEED an '\alias' for EACH other topic documented here. -\title{ - Interpolate missing intensities and calculate the area for a single EIC. -} -\description{ - This is an internal function that's not supposed to be called directly by the user. -} +\title{Interpolate missing intensities and calculate the area for a single EIC.} \usage{ interpol.area(x, y, all.x, all.w) } -%- maybe also 'usage' for other objects documented here. \arguments{ - \item{x}{the positions of x(retention time) where non-NA y is observed.} - \item{y}{the observed intensities.} - \item{all.x}{all possible x(retention time) in the LCMS profile.} - \item{all.w}{the "footprint" of each measured retention time, used as weight for the corresponding y.} -} -\details{ - This is an internal function. It interpolates missing y using linear interpolation, and then calculates the area under the curve. +\item{x}{the positions of x(retention time) where non-NA y is observed.} + +\item{y}{the observed intensities.} + +\item{all.x}{all possible x(retention time) in the LCMS profile.} + +\item{all.w}{the "footprint" of each measured retention time, used as weight for the corresponding y.} } \value{ - The area is returned. +The area is returned. } -\author{ - Tianwei Yu +\description{ +This is an internal function. } -\keyword{ models } \ No newline at end of file diff --git a/man/known.table.common.pos.Rd b/man/known.table.common.pos.Rd deleted file mode 100644 index 7fc3f19..0000000 --- a/man/known.table.common.pos.Rd +++ /dev/null @@ -1,37 +0,0 @@ -\name{known.table.common.pos} -\alias{known.table.common.pos} -\docType{data} -\title{ -A known feature table based on HMDB. -} -\description{ -This table was compiled from HMDB metabolites. It contains only the H+, Na+, K+ and NH4+ derivative of known metabolites. It suites the need for analyzing LC/MS data generated with anion exchange chromatography. -} -\usage{data(known.table.common.pos)} -\format{ -A data frame containing the known metabolite ions and previously found features. It contains 18 columns: - "chemical_formula": the chemical formula if knonw; - "HMDB_ID": HMDB ID if known; - "KEGG_compound_ID": KEGG compound ID if known; - "neutral.mass": the neutral mass if known: - "ion.type": the ion form, such as H+, Na+, ..., if known; - "m.z": m/z value, either theoretical for known metabolites, or mean observed value for unknown but previously found features; - "Number_profiles_processed": the total number of LC/MS profiles that were used to build this database; - "Percent_found": in what percentage was this feature found historically amount all data processed in building this database; - "mz_min": the minimum m/z value observed for this feature; - "mz_max": the maximum m/z value observed for this feature; - "RT_mean": the mean retention time observed for this feature; - "RT_sd": the standard deviation of retention time observed for this feature; - "RT_min": the minimum retention time observed for this feature; - "RT_max": the maximum retention time observed for this feature; - "int_mean.log.": the mean log intensity observed for this feature; - "int_sd.log.": the standard deviation of log intensity observed for this feature; - "int_min.log.": the minimum log intensity observed for this feature; - "int_max.log.": the maximum log intensity observed for this feature; -} -\source{ -Wishart, D. S., et al. (2009). HMDB: a knowledgebase for the human metabolome. Nucleic Acids Res 37, D603-10.} -\examples{ -data(known.table.common.pos) -} -\keyword{datasets} \ No newline at end of file diff --git a/man/known.table.hplus.Rd b/man/known.table.hplus.Rd deleted file mode 100644 index 6eb63e8..0000000 --- a/man/known.table.hplus.Rd +++ /dev/null @@ -1,37 +0,0 @@ -\name{known.table.hplus} -\alias{known.table.hplus} -\docType{data} -\title{ -A known feature table based on HMDB. -} -\description{ -This table was compiled from HMDB metabolites. It contains only the H+ derivative of known metabolites. It suites the need for analyzing LC/MS data generated with anion exchange chromatography. -} -\usage{data(known.table.hplus)} -\format{ -A data frame containing the known metabolite ions and previously found features. It contains 18 columns: - "chemical_formula": the chemical formula if knonw; - "HMDB_ID": HMDB ID if known; - "KEGG_compound_ID": KEGG compound ID if known; - "neutral.mass": the neutral mass if known: - "ion.type": the ion form, such as H+, Na+, ..., if known; - "m.z": m/z value, either theoretical for known metabolites, or mean observed value for unknown but previously found features; - "Number_profiles_processed": the total number of LC/MS profiles that were used to build this database; - "Percent_found": in what percentage was this feature found historically amount all data processed in building this database; - "mz_min": the minimum m/z value observed for this feature; - "mz_max": the maximum m/z value observed for this feature; - "RT_mean": the mean retention time observed for this feature; - "RT_sd": the standard deviation of retention time observed for this feature; - "RT_min": the minimum retention time observed for this feature; - "RT_max": the maximum retention time observed for this feature; - "int_mean.log.": the mean log intensity observed for this feature; - "int_sd.log.": the standard deviation of log intensity observed for this feature; - "int_min.log.": the minimum log intensity observed for this feature; - "int_max.log.": the maximum log intensity observed for this feature; -} -\source{ -Wishart, D. S., et al. (2009). HMDB: a knowledgebase for the human metabolome. Nucleic Acids Res 37, D603-10.} -\examples{ -data(known.table.hplus) -} -\keyword{datasets} diff --git a/man/load.lcms.Rd b/man/load.lcms.Rd index 3f3b4ed..7d72106 100644 --- a/man/load.lcms.Rd +++ b/man/load.lcms.Rd @@ -1,24 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load.lcms.R \name{load.lcms} \alias{load.lcms} -\title{ Loading LC/MS data. } -\description{ This is an internal function. It loads LC/MS data into memory. } +\title{Loading LC/MS data.} \usage{ load.lcms(filename) } \arguments{ - \item{filename}{ The CDF file name. } +\item{filename}{The CDF file name.} } -\details{ The function uses functionality provided by the mzR package from Bioconductor. } \value{ A list is returned. - \item{masses}{ The vector of m/z values. } - \item{labels}{ The vector of retention times. } - \item{intensi}{ The vector of intensity values. } - \item{times}{ The vector of unique time points. } +\itemize{ + \item masses - The vector of m/z values. + \item labels - The vector of retention times. + \item intensi - The vector of intensity values. + \item times - The vector of unique time points. } -\references{ -Bioinformatics. 25(15):1930-36. -BMC Bioinformatics. 11:559. } -\author{ Tianwei Yu } -\keyword{ models } +\description{ +This is an internal function. It loads LC/MS data into memory. +} diff --git a/man/new.aligned.Rd b/man/new.aligned.Rd deleted file mode 100644 index dbc2b26..0000000 --- a/man/new.aligned.Rd +++ /dev/null @@ -1,22 +0,0 @@ -\name{new.aligned} -\alias{new.aligned} -\docType{data} -\title{ Feature data after alignment and weak signal recovery } -\description{ - This is the aligned feature data from 4 sample profiles after weaker signal recovery. -} -\usage{data(new.aligned)} -\format{ - The format is: -List of 4 - $ aligned.ftrs: A matrix with m/z, median elution time, and signal strength in each spectrum. - $ pk.times : A matrix with m/z, median elution time, and elution time in each spectrum. - $ mz.tol : num 1.14e-05 (the m/z tolerance level in alignment) - $ chr.tol : num 79.21 (the elution time tolerance level in alignment) -} -\source{ -Dean Jones lab. Emory University, School of Medicine.} -\examples{ -data(new.aligned) -} -\keyword{datasets} diff --git a/man/new.aligned.learn.Rd b/man/new.aligned.learn.Rd deleted file mode 100644 index b3167fe..0000000 --- a/man/new.aligned.learn.Rd +++ /dev/null @@ -1,22 +0,0 @@ -\name{new.aligned.learn} -\alias{new.aligned.learn} -\docType{data} -\title{ Feature data after alignment and weak signal recovery. The initial peak detection is done by machine learning approach. } -\description{ - This is the aligned feature data from 4 sample profiles after weaker signal recovery. -} -\usage{data(new.aligned)} -\format{ - The format is: -List of 4 - $ aligned.ftrs: A matrix with m/z, median elution time, and signal strength in each spectrum. - $ pk.times : A matrix with m/z, median elution time, and elution time in each spectrum. - $ mz.tol : num 1.14e-05 (the m/z tolerance level in alignment) - $ chr.tol : num 79.21 (the elution time tolerance level in alignment) -} -\source{ -Dean Jones lab. Emory University, School of Medicine.} -\examples{ -data(new.aligned.learn) -} -\keyword{datasets} diff --git a/man/proc.cdf.Rd b/man/proc.cdf.Rd index 728f31e..4632382 100644 --- a/man/proc.cdf.Rd +++ b/man/proc.cdf.Rd @@ -6,14 +6,14 @@ \usage{ proc.cdf( filename, - min_pres = 0.5, - min_run = 12, - mz_tol = 1e-05, - baseline_correct = 0, - baseline_correct_noise_percentile = 0.05, - intensity_weighted = FALSE, - do.plot = FALSE, - cache = FALSE + min_pres, + min_run, + mz_tol, + baseline_correct, + baseline_correct_noise_percentile, + intensity_weighted, + do.plot, + cache ) } \arguments{ diff --git a/man/prof.Rd b/man/prof.Rd deleted file mode 100644 index b7e5754..0000000 --- a/man/prof.Rd +++ /dev/null @@ -1,17 +0,0 @@ -\name{prof} -\alias{prof} -\docType{data} -\title{ Sample profile data after noise filtration by the run filter } -\description{ - A list object containing 4 matrices. Each matrix is from an LC/MS profile. -} -\usage{data(prof)} -\format{ -Each matrix contains 4 columns: m/z, retention time, intensity, and group number. -} -\source{Data from Dean Jones lab, Emory University School of Medicine.} -\examples{ -data(prof) -this.feature<-prof.to.features(prof[[1]]) -} -\keyword{datasets} diff --git a/man/prof.learn.Rd b/man/prof.learn.Rd deleted file mode 100644 index 245a01a..0000000 --- a/man/prof.learn.Rd +++ /dev/null @@ -1,17 +0,0 @@ -\name{prof.learn} -\alias{prof.learn} -\docType{data} -\title{ Sample profile data after noise filtration by the machine learning approach. } -\description{ - A list object containing 4 matrices. Each matrix is from an LC/MS profile. -} -\usage{data(prof)} -\format{ -Each matrix contains 4 columns: m/z, retention time, intensity, and group number. -} -\source{Data from Dean Jones lab, Emory University School of Medicine.} -\examples{ -data(prof.learn) -this.feature<-prof.to.features(prof.learn[[1]][[1]]) -} -\keyword{datasets} diff --git a/man/prof.to.features.Rd b/man/prof.to.features.Rd index 13883f5..74de75d 100644 --- a/man/prof.to.features.Rd +++ b/man/prof.to.features.Rd @@ -6,17 +6,17 @@ \usage{ prof.to.features( profile, - bandwidth = 0.5, - min_bandwidth = NA, - max_bandwidth = NA, - sd_cut = c(0.01, 500), - sigma_ratio_lim = c(0.01, 100), - shape_model = "bi-Gaussian", - peak_estim_method = "moment", - moment_power = 1, - component_eliminate = 0.01, - BIC_factor = 2, - do.plot = TRUE + bandwidth, + min_bandwidth, + max_bandwidth, + sd_cut, + sigma_ratio_lim, + shape_model, + peak_estim_method, + moment_power, + component_eliminate, + BIC_factor, + do.plot ) } \arguments{ diff --git a/man/recover.weaker.Rd b/man/recover.weaker.Rd index 245191e..2be7dc5 100644 --- a/man/recover.weaker.Rd +++ b/man/recover.weaker.Rd @@ -14,15 +14,15 @@ recover.weaker( rt_tol_relative, extracted_features, adjusted_features, - recover_mz_range = NA, - recover_rt_range = NA, - use_observed_range = TRUE, - mz_tol = 1e-05, - min_bandwidth = NA, - max_bandwidth = NA, - bandwidth = 0.5, - recover_min_count = 3, - intensity_weighted = FALSE + recover_mz_range, + recover_rt_range, + use_observed_range, + mz_tol, + min_bandwidth, + max_bandwidth, + bandwidth, + recover_min_count, + intensity_weighted ) } \arguments{ diff --git a/man/recovered.Rd b/man/recovered.Rd deleted file mode 100644 index 7c336fd..0000000 --- a/man/recovered.Rd +++ /dev/null @@ -1,19 +0,0 @@ -\name{recovered} -\alias{recovered} -\docType{data} -\title{Sample data after weak signal recovery} -\description{This is a list object with four items, each from a sample LC/MS profile.} -\usage{data(recovered)} -\format{ - The format is: -List of 4, each item is in turn a list of 4. - $this.ftrs : the signal strength of each feature identified. - $this.times : the retention time of each feature identified. - $this.f1 : A matrix with m/z, retention time, retention time spread, and peak area - this.f2 : A matrix with m/z, retention time, retention time spread, peak area, file identifier, and feature identifier -} -\source{Dean Jones lab. Emory University, School of Medicine.} -\examples{ -data(recovered) -} -\keyword{datasets} \ No newline at end of file diff --git a/man/recovered.learn.Rd b/man/recovered.learn.Rd deleted file mode 100644 index a86f65b..0000000 --- a/man/recovered.learn.Rd +++ /dev/null @@ -1,19 +0,0 @@ -\name{recovered.learn} -\alias{recovered.learn} -\docType{data} -\title{Sample data after weak signal recovery. The original peak detection was conducted using machine learning approach} -\description{This is a list object with four items, each from a sample LC/MS profile.} -\usage{data(recovered)} -\format{ - The format is: -List of 4, each item is in turn a list of 4. - $this.ftrs : the signal strength of each feature identified. - $this.times : the retention time of each feature identified. - $this.f1 : A matrix with m/z, retention time, retention time spread, and peak area - this.f2 : A matrix with m/z, retention time, retention time spread, peak area, file identifier, and feature identifier -} -\source{Dean Jones lab. Emory University, School of Medicine.} -\examples{ -data(recovered.learn) -} -\keyword{datasets} \ No newline at end of file diff --git a/man/rm.ridge.Rd b/man/rm.ridge.Rd index a238152..3317dbd 100644 --- a/man/rm.ridge.Rd +++ b/man/rm.ridge.Rd @@ -1,19 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rm.ridge.R \name{rm.ridge} \alias{rm.ridge} \title{Removing long ridges at the same m/z.} -\description{ This is an internal function. It substracts a background estimated through kernel smoothing when an EIC continuously span more than half the retention time range. } \usage{ rm.ridge(x, y2, bw) } \arguments{ - \item{x}{ Retention time vector. } - \item{y2}{ Intensity vector. } - \item{bw}{ Bandwidth for the kernel smoother. A very wide one is used here. } +\item{x}{Retention time vector.} + +\item{y2}{Intensity vector.} + +\item{bw}{Bandwidth for the kernel smoother. A very wide one is used here.} } -\value{ A vector of intensity value is returned. } -\references{ -Bioinformatics. 25(15):1930-36. -BMC Bioinformatics. 11:559. +\value{ +A vector of intensity value is returned. +} +\description{ +This is an internal function. It substracts a background estimated through kernel smoothing when an EIC continuously +span more than half the retention time range. } -\author{ Tianwei Yu } -\keyword{ models } \ No newline at end of file diff --git a/man/run_filter.Rd b/man/run_filter.Rd index 04806ee..911e5c7 100644 --- a/man/run_filter.Rd +++ b/man/run_filter.Rd @@ -4,7 +4,7 @@ \alias{run_filter} \title{Continuity index.} \usage{ -run_filter(newprof, min_pres = 0.6, min_run = 5) +run_filter(newprof, min_pres, min_run) } \arguments{ \item{newprof}{The matrix containing m/z, retention time, intensity, and EIC label as columns.} diff --git a/man/unsupervised.Rd b/man/unsupervised.Rd index 1315cef..aa0527a 100644 --- a/man/unsupervised.Rd +++ b/man/unsupervised.Rd @@ -30,6 +30,7 @@ unsupervised( use_observed_range = TRUE, recover_min_count = 3, intensity_weighted = FALSE, + do_plot = FALSE, cluster = 4 ) } @@ -103,6 +104,8 @@ the spectra will be used.} \item{intensity_weighted}{Whether to use intensity to weight mass density estimation.} \item{cluster}{The number of CPU cores to be used} + +\item{do.plot}{Indicates whether plot should be drawn.} } \description{ features extraction in unsupervised mode. diff --git a/tests/testthat/test-benchmark-extract_features.R b/tests/testthat/test-benchmark-extract_features.R index 71b11cb..ed2754e 100644 --- a/tests/testthat/test-benchmark-extract_features.R +++ b/tests/testthat/test-benchmark-extract_features.R @@ -41,6 +41,7 @@ patrick::with_parameters_test_that( actual <- snow::parLapply(cluster, profiles, function(profile) { prof.to.features( profile = profile, + bandwidth = 0.5, min_bandwidth = NA, max_bandwidth = NA, sd_cut = sd_cut, @@ -96,7 +97,7 @@ patrick::with_parameters_test_that( intensity_weighted = FALSE, sd_cut = c(0.01, 500), sigma_ratio_lim = c(0.01, 100), - skip = TRUE + skip = FALSE ) ) ) diff --git a/tests/testthat/test-compute_clusters.R b/tests/testthat/test-compute_clusters.R index 69069dc..1f1613d 100644 --- a/tests/testthat/test-compute_clusters.R +++ b/tests/testthat/test-compute_clusters.R @@ -14,6 +14,7 @@ patrick::with_parameters_test_that( rt_tol_relative = NA, mz_max_diff = mz_max_diff, mz_tol_absolute = mz_tol_absolute, + do.plot = FALSE, sample_names = files ) diff --git a/tests/testthat/test-extract_features.R b/tests/testthat/test-extract_features.R index c7612a5..23dddae 100644 --- a/tests/testthat/test-extract_features.R +++ b/tests/testthat/test-extract_features.R @@ -38,6 +38,7 @@ patrick::with_parameters_test_that( actual <- snow::parLapply(cluster, profiles, function(profile) { prof.to.features( profile = profile, + bandwidth = 0.5, min_bandwidth = NA, max_bandwidth = NA, sd_cut = sd_cut, diff --git a/tests/testthat/test-proc.cdf.R b/tests/testthat/test-proc.cdf.R index 9e5017e..dfaf594 100644 --- a/tests/testthat/test-proc.cdf.R +++ b/tests/testthat/test-proc.cdf.R @@ -11,7 +11,10 @@ patrick::with_parameters_test_that( min_pres = min_pres, min_run = min_run, mz_tol = mz_tol, + baseline_correct = 0.0, + baseline_correct_noise_percentile = 0.05, intensity_weighted = intensity_weighted, + do.plot = FALSE, cache = cache ) diff --git a/tests/testthat/test-prof.to.features.R b/tests/testthat/test-prof.to.features.R index 9caf178..ee809cc 100644 --- a/tests/testthat/test-prof.to.features.R +++ b/tests/testthat/test-prof.to.features.R @@ -7,9 +7,16 @@ patrick::with_parameters_test_that( actual <- prof.to.features( profile = extracted_features, + bandwidth = 0.5, + min_bandwidth = NA, + max_bandwidth = NA, sd_cut = sd_cut, sigma_ratio_lim = sigma_ratio_lim, shape_model = shape_model, + peak_estim_method = "moment", + moment_power = 1, + component_eliminate = 0.01, + BIC_factor = 2, do.plot = do.plot )