diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f2aff0..98f46a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - refactored find.match [#193](https://github.com/RECETOX/recetox-aplcms/pull/193) - Simplified Evaluation Conditions For Test Cases [#197](https://github.com/RECETOX/recetox-aplcms/pull/197) +- Renamed `proc.cdf` to `remove_noise` [#190](https://github.com/RECETOX/recetox-aplcms/pull/190) ### Removed ## [0.10.3] - 2023-03-27 diff --git a/NAMESPACE b/NAMESPACE index 2f14ee9..472287e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,9 +71,9 @@ export(predict_smoothed_rt) export(prep.uv) export(preprocess_bandwidth) export(preprocess_profile) -export(proc.cdf) export(prof.to.features) export(recover.weaker) +export(remove_noise) export(rev_cum_sum) export(rm.ridge) export(run_filter) diff --git a/R/hybrid.R b/R/hybrid.R index 08ac92e..2e4c9bf 100644 --- a/R/hybrid.R +++ b/R/hybrid.R @@ -256,7 +256,7 @@ augment_known_table <- function( #' @param filenames The CDF file names. #' @param known_table Table of known chemicals. #' @param min_occurrence A feature has to show up in at least this number of profiles to be included in the final result. -#' @param min_pres This is a parameter of the run filter, to be passed to the function proc.cdf(). +#' @param min_pres This is a parameter of the run filter, to be passed to the function remove_noise(). #' @param min_run Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak. #' @param mz_tol m/z tolerance level for the grouping of data points. This value is expressed as the fraction of the m/z value. #' This value, multiplied by the m/z value, becomes the cutoff level. The recommended value is the machine's nominal accuracy level. @@ -344,7 +344,7 @@ hybrid <- function( message("**** feature extraction ****") profiles <- snow::parLapply(cluster, filenames, function(filename) { - proc.cdf( + remove_noise( filename = filename, min_pres = min_pres, min_run = min_run, diff --git a/R/prof.to.features.R b/R/prof.to.features.R index a573dd8..05d128a 100644 --- a/R/prof.to.features.R +++ b/R/prof.to.features.R @@ -869,10 +869,10 @@ normix.bic <- function(x, y, do.plot = FALSE, bw = c(15, 30, 60), eliminate = .0 #' Generate feature table from noise-removed LC/MS profile. #' @description -#' Each LC/MS profile is first processed by the function proc.cdf() to remove noise and reduce data size. A matrix containing m/z -#' value, retention time, intensity, and group number is output from proc.cdf(). This matrix is then fed to the function +#' Each LC/MS profile is first processed by the function remove_noise() to remove noise and reduce data size. A matrix containing m/z +#' value, retention time, intensity, and group number is output from remove_noise(). This matrix is then fed to the function #' prof.to.features() to generate a feature list. Every detected feature is summarized into a single row in the output matrix from this function. -#' @param profile The matrix output from proc.cdf(). It contains columns of m/z value, retention time, intensity and group number. +#' @param profile The matrix output from remove_noise(). It contains columns of m/z value, retention time, intensity and group number. #' @param bandwidth A value between zero and one. Multiplying this value to the length of the signal along the time axis helps #' determine the bandwidth in the kernel smoother used for peak identification. #' @param min_bandwidth The minimum bandwidth to use in the kernel smoother. diff --git a/R/recover.weaker.R b/R/recover.weaker.R index 5c3b371..e8edfd1 100644 --- a/R/recover.weaker.R +++ b/R/recover.weaker.R @@ -636,7 +636,7 @@ refine_selection <- function(target_rt, rectangle, aligned_mz, rt_tol, mz_tol) { #' The default value is NA, in which case 0.5 times the retention time tolerance in the aligned object will be used. #' @param use_observed_range If the value is TRUE, the actual range of the observed locations #' of the feature in all the spectra will be used. -#' @param mz_tol The mz.tol parameter provided to the proc.cdf() function. This helps retrieve the intermediate file. +#' @param mz_tol The mz.tol parameter provided to the remove_noise() function. This helps retrieve the intermediate file. #' @param min_bandwidth The minimum bandwidth to use in the kernel smoother. #' @param max_bandwidth The maximum bandwidth to use in the kernel smoother. #' @param bandwidth A value between zero and one. Multiplying this value to the length of the signal along the diff --git a/R/proc.cdf.R b/R/remove_noise.R similarity index 99% rename from R/proc.cdf.R rename to R/remove_noise.R index e98537c..22529af 100644 --- a/R/proc.cdf.R +++ b/R/remove_noise.R @@ -61,7 +61,7 @@ load_data <- function(filename, #' @param cache Whether to use cache #' @return A matrix with four columns: m/z value, retention time, intensity, and group number. #' @export -proc.cdf <- function(filename, +remove_noise <- function(filename, min_pres, min_run, mz_tol, diff --git a/R/semi.sup.R b/R/semi.sup.R index acb7660..19f3c12 100644 --- a/R/semi.sup.R +++ b/R/semi.sup.R @@ -14,12 +14,12 @@ NULL #' @param known.table A data frame containing the known metabolite ions and previously found features. #' @param n.nodes The number of CPU cores to be used #' @param min.exp If a feature is to be included in the final feature table, it must be present in at least this number of spectra. -#' @param min.pres This is a parameter of thr run filter, to be passed to the function proc.cdf(). -#' @param min.run This is a parameter of thr run filter, to be passed to the function proc.cdf(). +#' @param min.pres This is a parameter of thr run filter, to be passed to the function remove_noise(). +#' @param min.run This is a parameter of thr run filter, to be passed to the function remove_noise(). #' @param mz.tol The user can provide the m/z tolerance level for peak identification. This value is expressed #' as the percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. #' @param baseline.correct.noise.percentile The perenctile of signal strength of those EIC that don't pass the run filter, -#' to be used as the baseline threshold of signal strength. This parameter is passed to proc.cdf() +#' to be used as the baseline threshold of signal strength. This parameter is passed to remove_noise() #' @param shape.model The mathematical model for the shape of a peak. There are two choices - bi-Gaussian and Gaussian. #' When the peaks are asymmetric, the bi-Gaussian is better. The default is bi-Gaussian. #' @param BIC.factor the factor that is multiplied on the number of parameters to modify the BIC criterion. If @@ -144,7 +144,7 @@ semi.sup <- function( that.name<-paste(strsplit(tolower(files[j]),"\\.")[[1]][1],suf.prof,".profile",sep="_") processable<-"goodgood" - processable<-try(this.prof<-proc.cdf(files[j], min_pres=min.pres, min_run=min.run, mz_tol=mz.tol, baseline_correct=baseline.correct, baseline_correct_noise_percentile=baseline.correct.noise.percentile, do.plot=FALSE, intensity_weighted=intensity.weighted, cache=FALSE)) + processable<-try(this.prof<-remove_noise(files[j], min_pres=min.pres, min_run=min.run, mz_tol=mz.tol, baseline_correct=baseline.correct, baseline_correct_noise_percentile=baseline.correct.noise.percentile, do.plot=FALSE, intensity_weighted=intensity.weighted, cache=FALSE)) if(substr(processable,1,5)=="Error") { file.copy(from=files[j], to="error_files") diff --git a/R/two.step.hybrid.R b/R/two.step.hybrid.R index bcb130f..559c2c6 100644 --- a/R/two.step.hybrid.R +++ b/R/two.step.hybrid.R @@ -390,12 +390,12 @@ semisup_to_hybrid_adapter <- function(batchwise, batches_idx) { #' @param batch.align.rt.tol The RT tolerance for between-batch alignment. #' @param known.table A data frame containing the known metabolite ions and previously found features. #' @param cluster The number of CPU cores to be used -#' @param min.pres This is a parameter of the run filter, to be passed to the function proc.cdf(). -#' @param min.run This is a parameter of the run filter, to be passed to the function proc.cdf(). +#' @param min.pres This is a parameter of the run filter, to be passed to the function remove_noise(). +#' @param min.run This is a parameter of the run filter, to be passed to the function remove_noise(). #' @param mz.tol The user can provide the m/z tolerance level for peak identification. This value is expressed as the #' percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. #' @param baseline.correct.noise.percentile The perenctile of signal strength of those EIC that don't pass the run filter, -#' to be used as the baseline threshold of signal strength. This parameter is passed to proc.cdf() +#' to be used as the baseline threshold of signal strength. This parameter is passed to remove_noise() #' @param shape.model The mathematical model for the shape of a peak. There are two choices - "bi-Gaussian" and "Gaussian". #' When the peaks are asymmetric, the bi-Gaussian is better. The default is "bi-Gaussian". #' @param baseline.correct This is a parameter in peak detection. After grouping the observations, the highest observation diff --git a/R/unsupervised.R b/R/unsupervised.R index 6012fb2..a41caf0 100644 --- a/R/unsupervised.R +++ b/R/unsupervised.R @@ -46,7 +46,7 @@ get_sample_name <- function(filename) { #' #' @param filenames The CDF file names. #' @param min_occurrence A feature has to show up in at least this number of profiles to be included in the final result. -#' @param min_pres This is a parameter of the run filter, to be passed to the function proc.cdf(). +#' @param min_pres This is a parameter of the run filter, to be passed to the function remove_noise(). #' @param min_run Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z #' to be considered a peak. #' @param mz_tol m/z tolerance level for the grouping of data points. This value is expressed as the fraction of @@ -134,7 +134,7 @@ unsupervised <- function( message("**** feature extraction ****") profiles <- snow::parLapply(cluster, filenames, function(filename) { - proc.cdf( + remove_noise( filename = filename, min_pres = min_pres, min_run = min_run, diff --git a/R/utils.R b/R/utils.R index 368ca06..b82c7b2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -4,7 +4,7 @@ NULL register_functions_to_cluster <- function(cluster) { snow::clusterExport(cluster, list( - 'proc.cdf', + 'remove_noise', 'prof.to.features', 'load.lcms', 'adaptive.bin', @@ -123,4 +123,4 @@ get_num_workers <- function() { num_workers <- parallel::detectCores() } return(num_workers) -} \ No newline at end of file +} diff --git a/man/hybrid.Rd b/man/hybrid.Rd index e8c77ff..70df963 100644 --- a/man/hybrid.Rd +++ b/man/hybrid.Rd @@ -44,7 +44,7 @@ hybrid( \item{min_occurrence}{A feature has to show up in at least this number of profiles to be included in the final result.} -\item{min_pres}{This is a parameter of the run filter, to be passed to the function proc.cdf().} +\item{min_pres}{This is a parameter of the run filter, to be passed to the function remove_noise().} \item{min_run}{Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak.} diff --git a/man/prof.to.features.Rd b/man/prof.to.features.Rd index 74de75d..b0edd70 100644 --- a/man/prof.to.features.Rd +++ b/man/prof.to.features.Rd @@ -20,7 +20,7 @@ prof.to.features( ) } \arguments{ -\item{profile}{The matrix output from proc.cdf(). It contains columns of m/z value, retention time, intensity and group number.} +\item{profile}{The matrix output from remove_noise(). It contains columns of m/z value, retention time, intensity and group number.} \item{bandwidth}{A value between zero and one. Multiplying this value to the length of the signal along the time axis helps determine the bandwidth in the kernel smoother used for peak identification.} @@ -54,7 +54,7 @@ A matrix is returned. The columns are: m/z value, retention time, spread (standa curve), and estimated total signal strength (total area of the estimated normal curve). } \description{ -Each LC/MS profile is first processed by the function proc.cdf() to remove noise and reduce data size. A matrix containing m/z -value, retention time, intensity, and group number is output from proc.cdf(). This matrix is then fed to the function +Each LC/MS profile is first processed by the function remove_noise() to remove noise and reduce data size. A matrix containing m/z +value, retention time, intensity, and group number is output from remove_noise(). This matrix is then fed to the function prof.to.features() to generate a feature list. Every detected feature is summarized into a single row in the output matrix from this function. } diff --git a/man/recover.weaker.Rd b/man/recover.weaker.Rd index 2be7dc5..8d34ed8 100644 --- a/man/recover.weaker.Rd +++ b/man/recover.weaker.Rd @@ -54,7 +54,7 @@ The default value is NA, in which case 0.5 times the retention time tolerance in \item{use_observed_range}{If the value is TRUE, the actual range of the observed locations of the feature in all the spectra will be used.} -\item{mz_tol}{The mz.tol parameter provided to the proc.cdf() function. This helps retrieve the intermediate file.} +\item{mz_tol}{The mz.tol parameter provided to the remove_noise() function. This helps retrieve the intermediate file.} \item{min_bandwidth}{The minimum bandwidth to use in the kernel smoother.} diff --git a/man/proc.cdf.Rd b/man/remove_noise.Rd similarity index 94% rename from man/proc.cdf.Rd rename to man/remove_noise.Rd index 32e89d0..82949f3 100644 --- a/man/proc.cdf.Rd +++ b/man/remove_noise.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/proc.cdf.R -\name{proc.cdf} -\alias{proc.cdf} +% Please edit documentation in R/remove_noise.R +\name{remove_noise} +\alias{remove_noise} \title{Filter noise and detect peaks from LC/MS data in CDF format} \usage{ -proc.cdf( +remove_noise( filename, min_pres, min_run, diff --git a/man/semi.sup.Rd b/man/semi.sup.Rd index 4168d52..c141d5b 100644 --- a/man/semi.sup.Rd +++ b/man/semi.sup.Rd @@ -49,15 +49,15 @@ semi.sup( \item{min.exp}{If a feature is to be included in the final feature table, it must be present in at least this number of spectra.} -\item{min.pres}{This is a parameter of thr run filter, to be passed to the function proc.cdf().} +\item{min.pres}{This is a parameter of thr run filter, to be passed to the function remove_noise().} -\item{min.run}{This is a parameter of thr run filter, to be passed to the function proc.cdf().} +\item{min.run}{This is a parameter of thr run filter, to be passed to the function remove_noise().} \item{mz.tol}{The user can provide the m/z tolerance level for peak identification. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.} \item{baseline.correct.noise.percentile}{The perenctile of signal strength of those EIC that don't pass the run filter, -to be used as the baseline threshold of signal strength. This parameter is passed to proc.cdf()} +to be used as the baseline threshold of signal strength. This parameter is passed to remove_noise()} \item{shape.model}{The mathematical model for the shape of a peak. There are two choices - bi-Gaussian and Gaussian. When the peaks are asymmetric, the bi-Gaussian is better. The default is bi-Gaussian.} diff --git a/man/two.step.hybrid.Rd b/man/two.step.hybrid.Rd index da51f90..71f8f45 100644 --- a/man/two.step.hybrid.Rd +++ b/man/two.step.hybrid.Rd @@ -67,15 +67,15 @@ considered in the entire data.} \item{cluster}{The number of CPU cores to be used} -\item{min.pres}{This is a parameter of the run filter, to be passed to the function proc.cdf().} +\item{min.pres}{This is a parameter of the run filter, to be passed to the function remove_noise().} -\item{min.run}{This is a parameter of the run filter, to be passed to the function proc.cdf().} +\item{min.run}{This is a parameter of the run filter, to be passed to the function remove_noise().} \item{mz.tol}{The user can provide the m/z tolerance level for peak identification. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.} \item{baseline.correct.noise.percentile}{The perenctile of signal strength of those EIC that don't pass the run filter, -to be used as the baseline threshold of signal strength. This parameter is passed to proc.cdf()} +to be used as the baseline threshold of signal strength. This parameter is passed to remove_noise()} \item{shape.model}{The mathematical model for the shape of a peak. There are two choices - "bi-Gaussian" and "Gaussian". When the peaks are asymmetric, the bi-Gaussian is better. The default is "bi-Gaussian".} diff --git a/man/unsupervised.Rd b/man/unsupervised.Rd index 99ceb69..f41cf6a 100644 --- a/man/unsupervised.Rd +++ b/man/unsupervised.Rd @@ -39,7 +39,7 @@ unsupervised( \item{min_occurrence}{A feature has to show up in at least this number of profiles to be included in the final result.} -\item{min_pres}{This is a parameter of the run filter, to be passed to the function proc.cdf().} +\item{min_pres}{This is a parameter of the run filter, to be passed to the function remove_noise().} \item{min_run}{Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak.} diff --git a/tests/testthat/test-benchmark-extract_features.R b/tests/testthat/test-benchmark-extract_features.R index 3310b15..41f4848 100644 --- a/tests/testthat/test-benchmark-extract_features.R +++ b/tests/testthat/test-benchmark-extract_features.R @@ -25,7 +25,7 @@ patrick::with_parameters_test_that( res <- microbenchmark::microbenchmark( extract_feature = { profiles <- snow::parLapply(cluster, filenames, function(filename) { - proc.cdf( + remove_noise( filename = filename, min_pres = min_pres, min_run = min_run, diff --git a/tests/testthat/test-extract_features.R b/tests/testthat/test-extract_features.R index 11b5c99..bcf5a69 100644 --- a/tests/testthat/test-extract_features.R +++ b/tests/testthat/test-extract_features.R @@ -22,7 +22,7 @@ patrick::with_parameters_test_that( register_functions_to_cluster(cluster) profiles <- snow::parLapply(cluster, filenames, function(filename) { - proc.cdf( + remove_noise( filename = filename, min_pres = min_pres, min_run = min_run, diff --git a/tests/testthat/test-proc.cdf.R b/tests/testthat/test-proc.cdf.R index dfaf594..3fe7949 100644 --- a/tests/testthat/test-proc.cdf.R +++ b/tests/testthat/test-proc.cdf.R @@ -1,12 +1,12 @@ patrick::with_parameters_test_that( - "test proc.cdf", + "test remove_noise", { if(ci_skip == TRUE) skip_on_ci() testdata <- file.path("..", "testdata") input_path <- file.path(testdata, "input", filename) - sut <- proc.cdf( + sut <- remove_noise( input_path, min_pres = min_pres, min_run = min_run,