diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index e25854db..3f0a920c 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -2,42 +2,40 @@
 // https://github.com/microsoft/vscode-dev-containers/tree/v0.217.1/containers/docker-existing-dockerfile
 {
 	"name": "recetox-aplcms-dev",
-
 	// Sets the run context to one level up instead of the .devcontainer folder.
-	"context": "..",
-
 	// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
-	"dockerFile": "../Dockerfile",
-
+	"image": "ubuntu:20.04",
 	"features": {
-		"git": {
+		"ghcr.io/devcontainers/features/git:1": {
 			"version": "latest",
-			"ppa": false,
+			"ppa": false
 		},
-		"common": {
-			"installZsh": "false",
-			"username": "false",
-		}
+		"ghcr.io/devcontainers/features/common-utils:2": {
+			"installZsh": "false"
+		},
+		"ghcr.io/devcontainers/features/github-cli:1": {},
+		"ghcr.io/devcontainers/features/conda:1": {}
 	},
-	
 	// Add the IDs of extensions you want installed when the container is created.
-	"extensions": [
-		"reditorsupport.r",
-		"rdebugger.r-debugger",
-		"eamodio.gitlens",
-		"mutantdino.resourcemonitor",
-		"meakbiyik.vscode-r-test-adapter",
-		"dvirtz.parquet-viewer",
-		"github.vscode-pull-request-github",
-		"ms-vsliveshare.vsliveshare",
-		"tianyishi.rmarkdown"
-	],
-	"settings": {
-		"r.rterm.linux": "/bin/local/miniconda/envs/recetox-aplcms/bin/radian",
-		"r.rpath.linux": "/bin/local/miniconda/envs/recetox-aplcms/bin/R"
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"reditorsupport.r",
+				"rdebugger.r-debugger",
+				"eamodio.gitlens",
+				"mutantdino.resourcemonitor",
+				"meakbiyik.vscode-r-test-adapter",
+				"dvirtz.parquet-viewer",
+				"github.vscode-pull-request-github",
+				"ms-vsliveshare.vsliveshare",
+				"tianyishi.rmarkdown"
+			],
+			"settings": {
+				"r.rterm.linux": "/opt/conda/envs/recetox-aplcms-dev/bin/radian",
+				"r.rpath.linux": "/opt/conda/envs/recetox-aplcms-dev/bin/R"
+			}
+		}		
 	},
-
-	"onCreateCommand": "apt update && apt install -y locales && locale-gen en_US.UTF-8 && git config --global --add safe.directory /workspaces/recetox-aplcms",
-
+	"onCreateCommand": "apt update && apt install -y locales && locale-gen en_US.UTF-8 && apt-get update -y && apt-get install -y libxml2-dev && apt-get install -y libssl-dev && apt-get install -y libcurl4-openssl-dev && apt-get install -y libcgal-dev && apt-get install -y libglu1-mesa-dev && apt-get install -y wget && git config --global --add safe.directory /workspaces/recetox-aplcms && conda init && conda update -y conda && conda config --add channels conda-forge && conda config --add channels bioconda && conda config --set channel_priority strict && conda env create --file conda/environment-dev.yaml",
 	"postAttachCommand": "/bin/bash"
-}
+}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14c4ce1b..972d4eee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [dev] - unreleased
 ### Added
+ - implemented new clustrering algorithm and included parallelism in unsupervised and hybrid [#201](https://github.com/RECETOX/recetox-aplcms/pull/201)
 ### Changed
 - refactored adaptive.bin and combine.seq.3 [#196](https://github.com/RECETOX/recetox-aplcms/pull/196)
 - refactored find.match [#193](https://github.com/RECETOX/recetox-aplcms/pull/193)
diff --git a/NAMESPACE b/NAMESPACE
index eb1bd7d3..1061d6aa 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(solve,a)
-S3method(solve,sigma)
 export(adaptive.bin)
 export(adjust.time)
 export(aggregate_by_rt)
@@ -15,6 +13,10 @@ export(compute_breaks)
 export(compute_breaks_3)
 export(compute_chromatographic_profile)
 export(compute_clusters)
+export(compute_clusters_simple)
+export(compute_comb)
+export(compute_corrected_features)
+export(compute_curr_rec_with_enough_peaks)
 export(compute_delta_rt)
 export(compute_densities)
 export(compute_dx)
@@ -26,33 +28,43 @@ export(compute_mass_density)
 export(compute_mass_values)
 export(compute_mu_sc_std)
 export(compute_mz_sd)
+export(compute_peaks_and_valleys)
+export(compute_pks_vlys_rt)
+export(compute_rectangle)
 export(compute_rt_intervals_indices)
 export(compute_scale)
+export(compute_sel)
 export(compute_start_bound)
 export(compute_target_times)
 export(compute_template)
+export(compute_template_adjusted_rt)
 export(compute_uniq_grp)
 export(correct_time)
+export(count_peaks)
 export(create_aligned_feature_table)
 export(create_output)
 export(create_rows)
 export(draw_rt_correction_plot)
 export(draw_rt_normal_peaks)
 export(duplicate.row.remove)
+export(fill_missing_values)
 export(filter_based_on_density)
-export(find.tol)
 export(find.turn.point)
 export(find_local_maxima)
 export(find_mz_match)
+export(find_mz_tolerance)
 export(find_optima)
 export(get_custom_rt_tol)
+export(get_features_in_rt_range)
 export(get_mzrange_bound_indices)
 export(get_num_workers)
 export(get_rt_region_indices)
+export(get_single_occurrence_mask)
 export(get_times_to_use)
 export(hybrid)
 export(increment_counter)
 export(interpol.area)
+export(l2normalize)
 export(label_val_to_keep)
 export(load.lcms)
 export(load_aligned_features)
@@ -68,11 +80,12 @@ export(plot_raw_profile_histogram)
 export(plot_rt_profile)
 export(predict_mz_break_indices)
 export(predict_smoothed_rt)
-export(prep.uv)
+export(prep_uv)
 export(preprocess_bandwidth)
 export(preprocess_profile)
 export(prof.to.features)
 export(recover.weaker)
+export(refine_selection)
 export(remove_noise)
 export(rev_cum_sum)
 export(rm.ridge)
@@ -80,6 +93,8 @@ export(run_filter)
 export(select_mz)
 export(select_rt)
 export(semi.sup)
+export(solve_a)
+export(solve_sigma)
 export(sort_data)
 export(span)
 export(two.step.hybrid)
diff --git a/R/adjust.time.R b/R/adjust.time.R
index 86293231..47e4c215 100644
--- a/R/adjust.time.R
+++ b/R/adjust.time.R
@@ -2,6 +2,7 @@
 NULL
 #> NULL
 
+#' @export
 compute_comb <- function(template_features, features) {
   combined <- dplyr::bind_rows(
     template_features,
@@ -11,6 +12,7 @@ compute_comb <- function(template_features, features) {
   return(combined)
 }
 
+#' @export
 compute_sel <- function(combined, mz_tol_relative, rt_tol_relative) {
   l <- nrow(combined)
   sel <- which(combined$mz[2:l] - combined$mz[1:(l - 1)] <
@@ -20,6 +22,7 @@ compute_sel <- function(combined, mz_tol_relative, rt_tol_relative) {
   return(sel)
 }
 
+#' @export
 compute_template_adjusted_rt <- function(combined, sel, j) {
   all_features <- cbind(combined$rt[sel], combined$rt[sel + 1])
   flip_indices <- which(combined$sample_id[sel] == j)
@@ -34,6 +37,7 @@ compute_template_adjusted_rt <- function(combined, sel, j) {
   return(all_features)
 }
 
+#' @export
 compute_corrected_features <- function(features, delta_rt, avg_time) {
   features <- features[order(features$rt, features$mz), ]
   corrected <- features$rt
@@ -58,6 +62,7 @@ compute_corrected_features <- function(features, delta_rt, avg_time) {
   return(features)
 }
 
+#' @export
 fill_missing_values <- function(orig.feature, this.feature) {
   missing_values <- which(is.na(this.feature$rt))
   for (i in missing_values) {
diff --git a/R/compute_clusters.R b/R/compute_clusters.R
index 87d8dc51..402e70ac 100644
--- a/R/compute_clusters.R
+++ b/R/compute_clusters.R
@@ -2,13 +2,13 @@
 #' @description
 #' Sort tibble based on sample_names
 #' @export
-sort_data <- function(sample_names, feature_tables){
+sort_data <- function(sample_names, feature_tables) {
   index <- c()
   for (i in seq_along(sample_names))
   {
-    index <-  append(index, feature_tables[[i]]$sample_id[1])
+    index <- append(index, feature_tables[[i]]$sample_id[1])
   }
-  
+
   index <- match(sample_names, index)
   feature_tables <- feature_tables[index]
 
@@ -16,7 +16,7 @@ sort_data <- function(sample_names, feature_tables){
 }
 
 #' Compute clusters of mz and rt and assign cluster id to individual features.
-#' 
+#'
 #' @description
 #' Uses tolerances to group features with mz and rt within the tolerance into clusters,
 #' creating larger features from raw data points. Custom tolerances for mz and rt are
@@ -34,7 +34,7 @@ sort_data <- function(sample_names, feature_tables){
 #'   \item feature_tables - list - Feature tables with added columns [sample_id, cluster].
 #'   \item rt_tol_relative - float - Newly determined relative rt tolerance.
 #'   \item mz_tol_relative - float - Newly determined relative mz tolerance.
-#'}
+#' }
 #' @export
 compute_clusters <- function(feature_tables,
                              mz_tol_relative,
@@ -45,9 +45,9 @@ compute_clusters <- function(feature_tables,
                              sample_names = NA) {
   number_of_samples <- length(feature_tables)
   all <- concatenate_feature_tables(feature_tables, sample_names)
-  
+
   if (is.na(mz_tol_relative)) {
-    mz_tol_relative <- find.tol(
+    mz_tol_relative <- find_mz_tolerance(
       all$mz,
       mz_max_diff = mz_max_diff,
       aver.bin.size = 4000,
@@ -74,37 +74,104 @@ compute_clusters <- function(feature_tables,
     )
   }
 
-  res <- find.tol.time(
-    all,
-    number_of_samples = number_of_samples,
-    mz_tol_relative = mz_tol_relative,
-    rt_tol_relative = rt_tol_relative,
-    aver.bin.size = 200,
-    min.bins = 50,
-    max.bins = 100,
-    mz_tol_absolute = mz_tol_absolute,
-    max.num.segments = 10000,
-    do.plot = do.plot
+  aver.bin.size <- 200
+  min.bins <- 50
+  max.bins <- 100
+  max.num.segments <- 10000
+
+  features <- dplyr::arrange_at(all, "mz")
+  min_mz_tol <- compute_min_mz_tolerance(
+    features$mz,
+    mz_tol_relative,
+    mz_tol_absolute
   )
 
-  rt_tol_relative <- res$rt.tol
+  mz_breaks <- compute_breaks_3(features$mz, min_mz_tol)
+  features$mz_group <- 0
+
+  for (i in 2:length(mz_breaks)) {
+    subset_indices <- (mz_breaks[i - 1] + 1):mz_breaks[i]
+    features$mz_group[subset_indices] <- i
+  }
+
+  features <- features |> dplyr::arrange_at(c("mz_group", "rt"))
+
+  mz_breaks <- mz_breaks[c(-1, -length(mz_breaks))]
+
+  if (is.na(rt_tol_relative)) {
+    rt_tol_relative <- compute_rt_tol_relative(
+      mz_breaks,
+      max.num.segments,
+      aver.bin.size,
+      number_of_samples,
+      features$rt,
+      min.bins,
+      max.bins
+    )
+  }
+
+  # compute breaks in rt domain
+  rt_diffs <- diff(features$rt)
+  rt_breaks <- which(rt_diffs > rt_tol_relative)
+
+  # combine indices of all breaks in array and sort
+  all.breaks <- c(0, unique(c(mz_breaks, rt_breaks)), nrow(features))
+  all.breaks <- all.breaks[order(all.breaks)]
+
+  features$cluster <- 0
+  for (i in 2:length(all.breaks)) {
+    features$cluster[(all.breaks[i - 1] + 1):all.breaks[i]] <- i
+  }
 
-  message("**** performing time correction ****")
   message(paste("m/z tolerance level: ", mz_tol_relative))
   message(paste("time tolerance level:", rt_tol_relative))
 
-  # Select features from individual samples, sort by mz and rt and 
+  # Select features from individual samples, sort by mz and rt and
   # return the sorted tables as individual tibbles.
-  feature_tables <- res$features |>
+  feature_tables <- features |>
+    dplyr::select(-mz_group) |>
     dplyr::group_by(sample_id) |>
     dplyr::arrange_at(c("mz", "rt")) |>
     dplyr::group_split()
-  
+
   feature_tables <- sort_data(sample_names, feature_tables)
-  
+
   return(list(feature_tables = feature_tables, rt_tol_relative = rt_tol_relative, mz_tol_relative = mz_tol_relative))
 }
 
+#' Compute clusters using simple grouping based on numeric thresholds.
+#' 
+#' @describtion
+#' Features are first grouped in mz dimension based on the tolerance.
+#' First, the absolute tolerance is computed for each feature, then a new group is started
+#' once the difference between consecutive features is above this threshold.
+#' The same process is then repeated for the retention time dimension.
+#' The individual indices are then combined into a single index in the `cluster` columns.
+#' @param feature_tables list of tibbles feature tables coming from all samples.
+#' @param sample_names list of strings Sample names of the feature tables used to distinguish the samples.
+#' @param mz_tol_ppm float Relative tolerance for mz grouping in parts per million.
+#' @param rt_tol float Tolerance in retention time dimension [seconds].
+#' @return list of tibbles Feature tables passed initially with additional columns indicating the 
+#' mz and rt groups as well as the combined cluster index.
+#' @export
+compute_clusters_simple <- function(feature_tables, sample_names, mz_tol_ppm, rt_tol) {
+  all <- concatenate_feature_tables(feature_tables, sample_names) |> dplyr::arrange_at("mz")
+
+  mz_tol_rel <- mz_tol_ppm * 1e-06
+  mz_tol_abs <- all$mz * mz_tol_rel
+
+  all |>
+    dplyr::mutate(mz_group = cumsum(c(0, diff(mz)) > mz_tol_abs)) |>
+    dplyr::group_by(mz_group) |>
+    dplyr::arrange_at("rt") |>
+    dplyr::mutate(rt_group = cumsum(c(0, diff(rt)) > rt_tol)) |>
+    dplyr::group_by(mz_group, rt_group) |>
+    dplyr::mutate(cluster = cur_group_id()) |>
+    dplyr::ungroup() |>
+    dplyr::arrange_at("cluster") |>
+    dplyr::group_by(sample_id) |>
+    dplyr::group_split()
+}
 
 
 # compute_clusters_v2 <- function(feature_tables, mz_tol_ppm, rt_tol) {
diff --git a/R/feature.align.R b/R/feature.align.R
index 2fb725b4..12486898 100644
--- a/R/feature.align.R
+++ b/R/feature.align.R
@@ -134,11 +134,13 @@ create_aligned_feature_table <- function(features_table,
     if (!is(cluster, "cluster")) {
         cluster <- parallel::makeCluster(cluster)
         on.exit(parallel::stopCluster(cluster))
+        
+        # NOTE: side effect (doParallel has no functionality to clean up)
+        doParallel::registerDoParallel(cluster)
+        register_functions_to_cluster(cluster)
     }
 
-    # NOTE: side effect (doParallel has no functionality to clean up)
-    doParallel::registerDoParallel(cluster)
-    register_functions_to_cluster(cluster)
+
 
     number_of_samples <- length(sample_names)
     metadata_colnames <- c("id", "mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "npeaks", sample_names)
diff --git a/R/find.tol.time.R b/R/find.tol.time.R
index eeb5f1a4..1103c9b0 100644
--- a/R/find.tol.time.R
+++ b/R/find.tol.time.R
@@ -1,21 +1,3 @@
-#' Compute minimum mz tolerance to use.
-#' @description
-#' Compute the minimum mz tolerance based on the relative
-#' tolerance and the mz values and the absolute tolerance.
-#' Uses midpoints between mz values for the weighting.
-#' @param mz vector Mz values to use.
-#' @param mz_tol_relative float Relative mz tolerance to use with the mz values.
-#' This forms a sort of weighted tolerance.
-#' @param mz_tol_absolute float Absolute tolerance to use independent from the mz values.
-#' @return float Minimum tolerance values to use.
-compute_min_mz_tolerance <- function(mz, mz_tol_relative, mz_tol_absolute) {
-    l <- length(mz)
-    mz_midpoints <- ((mz[2:l] + mz[1:(l - 1)]) / 2)
-    mz_ftr_relative_tolerances <- mz_tol_relative * mz_midpoints
-    min_mz_tol <- min(mz_tol_absolute, mz_ftr_relative_tolerances)
-    return(min_mz_tol)
-}
-
 #' @description
 #' Compute indices of mass differences greater than min_mz_tol.
 #' @param mz mz values of all peaks in all profiles in the study.
diff --git a/R/find.tol.R b/R/find_mz_tolerance.R
similarity index 67%
rename from R/find.tol.R
rename to R/find_mz_tolerance.R
index b7b10e35..db957efc 100644
--- a/R/find.tol.R
+++ b/R/find_mz_tolerance.R
@@ -2,6 +2,25 @@
 NULL
 #> NULL
 
+#' Compute minimum mz tolerance to use.
+#' @description
+#' Compute the minimum mz tolerance based on the relative
+#' tolerance and the mz values and the absolute tolerance.
+#' Uses midpoints between mz values for the weighting.
+#' @param mz vector Mz values to use.
+#' @param mz_tol_relative float Relative mz tolerance to use with the mz values.
+#' This forms a sort of weighted tolerance.
+#' @param mz_tol_absolute float Absolute tolerance to use independent from the mz values.
+#' @return float Minimum tolerance values to use.
+compute_min_mz_tolerance <- function(mz, mz_tol_relative, mz_tol_absolute) {
+    l <- length(mz)
+    mz_midpoints <- ((mz[2:l] + mz[1:(l - 1)]) / 2)
+    mz_ftr_relative_tolerances <- mz_tol_relative * mz_midpoints
+    min_mz_tol <- min(mz_tol_absolute, mz_ftr_relative_tolerances)
+    return(min_mz_tol)
+}
+
+
 #' An internal function that is not supposed to be directly accessed by the user. Find m/z tolerance level.
 #' 
 #' The function finds the tolerance level in m/z from a given vector of observed m/z values.
@@ -14,38 +33,46 @@ NULL
 #' @param do.plot Indicates whether plot should be drawn.
 #' @return The tolerance level is returned.
 #' @export
-find.tol <- function(mz,
+find_mz_tolerance <- function(mz,
                      mz_max_diff,
                      aver.bin.size,
                      min.bins,
                      max.bins,
                      do.plot) {
-    mz <- mz[order(mz)]
+    mz <- sort(mz)
     l <- length(mz)
     # pairwise m/z difference divided by their average, filtered outside of tolerance limit
-    distances <- (mz[2:l] - mz[1:(l - 1)]) / 
-                 ((mz[2:l] + mz[1:(l - 1)]) / 2)
+    pairwise_mean <- (mz[2:l] + mz[1:(l - 1)]) / 2
+    distances <- diff(mz) / pairwise_mean
     distances <- distances[distances < mz_max_diff]
     
     # number of equally spaced points at which the density is to be estimated
     n <- min(max.bins, max(round(length(distances) / aver.bin.size), min.bins))
+
     # estimate probability density function of distances
-    des <- density(distances, kernel = "gaussian", n = n,
-                   bw = mz_max_diff / n * 2, from = 0)
+    des <- density(
+        distances,
+        kernel = "gaussian",
+        n = n,
+        bw = mz_max_diff / n * 2,
+        from = 0
+    )
     # the n (-1?) coordinates of the points where the density is estimated
     points <- des$y[des$x > 0]
     # the estimated density values
     density_values <- des$x[des$x > 0]
+
+    q1 <- max(distances) / 4
     
     # select the upper 75% of the sorted data
-    top_data <- distances[distances > max(distances) / 4] - max(distances) / 4
+    top_data <- distances[distances > q1] - q1
     # parameter of the exponential distribution is estimated
     lambda <- MASS::fitdistr(top_data, "exponential")$estimate
     # values of the exponential distribution are calculated at equally spaced points
     estimated_density_values <- dexp(density_values, rate = lambda)
     # add the rest of the data
-    estimated_density_values <- estimated_density_values * sum(points[density_values > max(distances) / 4]) / 
-                                                           sum(estimated_density_values[density_values > max(distances) / 4])
+    estimated_density_values <- estimated_density_values * sum(points[density_values > q1]) / 
+                                                           sum(estimated_density_values[density_values > q1])
     
     # cutoff is selected where the density of the empirical distribution is >1.5 times the density of the exponential distribution
     cumulative <- cumsum(points > 1.5 * estimated_density_values)
diff --git a/R/hybrid.R b/R/hybrid.R
index 2e4c9bfa..b10896e9 100644
--- a/R/hybrid.R
+++ b/R/hybrid.R
@@ -390,7 +390,7 @@ hybrid <- function(
 
 
   message("**** time correction ****")
-  corrected <- foreach::foreach(this.feature = extracted_clusters$feature_tables) %do% correct_time(
+  corrected <- foreach::foreach(this.feature = extracted_clusters$feature_tables) %dopar% correct_time(
     this.feature,
     template_features,
     extracted_clusters$mz_tol_relative,
@@ -429,7 +429,7 @@ hybrid <- function(
   )
   
   message("**** weaker signal recovery ****")
-  recovered <- lapply(seq_along(filenames), function(i) {
+  recovered <- snow::parLapply(cluster, seq_along(filenames), function(i) {
     recover.weaker(
       filename = filenames[[i]],
       sample_name = sample_names[i],
@@ -470,7 +470,7 @@ hybrid <- function(
 
 
   message("**** second time correction ****")
-  corrected <- foreach::foreach(this.feature = recovered_clusters$feature_tables) %do% correct_time(
+  corrected <- foreach::foreach(this.feature = recovered_clusters$feature_tables) %dopar% correct_time(
     this.feature,
     template_features,
     recovered_clusters$mz_tol_relative,
diff --git a/R/prof.to.features.R b/R/prof.to.features.R
index 05d128ac..357cc3a6 100644
--- a/R/prof.to.features.R
+++ b/R/prof.to.features.R
@@ -102,7 +102,7 @@ compute_gaussian_peak_shape <- function(rt_profile, bw, component_eliminate, BIC
 #' @param sigma.1 left standard deviation of the gaussian curve
 #' @param sigma.2 right standard deviation of the gaussian curve
 #' @export
-solve.a <- function(x, t, a, sigma.1, sigma.2) {
+solve_a <- function(x, t, a, sigma.1, sigma.2) {
   # This function is a part of bigauss.esti.EM and is not covered by any of test-cases
   w <- x * (as.numeric(t < a) / sigma.1 + as.numeric(t >= a) / sigma.2)
   return(sum(t * w) / sum(w))
@@ -115,7 +115,7 @@ solve.a <- function(x, t, a, sigma.1, sigma.2) {
 #' @param t A vector of numerical values (rt).
 #' @param a A vector of peak summits.
 #' @export
-prep.uv <- function(x, t, a) {
+prep_uv <- function(x, t, a) {
   # This function is a part of bigauss.esti.EM and is not covered by any of test-cases
   temp <- (t - a)^2 * x
   u <- sum(temp * as.numeric(t < a))
@@ -138,9 +138,9 @@ prep.uv <- function(x, t, a) {
 #'   \item standard deviation at the right side of the gaussian curve
 #' }
 #' @export
-solve.sigma <- function(x, t, a) {
+solve_sigma <- function(x, t, a) {
   # This function is a part of bigauss.esti.EM and is not covered by any of test-cases
-  tt <- prep.uv(x, t, a)
+  tt <- prep_uv(x, t, a)
   sigma.1 <- tt$u / tt$x.sum * ((tt$v / tt$u)^(1 / 3) + 1)
   sigma.2 <- tt$v / tt$x.sum * ((tt$u / tt$v)^(1 / 3) + 1)
   return(list(
@@ -192,11 +192,11 @@ bigauss.esti.EM <- function(t, x, max.iter = 50, epsilon = 0.005, do.plot = FALS
   while ((change > epsilon) & (n.iter < max.iter)) {
     a.old <- a.new
     n.iter <- n.iter + 1
-    sigma <- solve.sigma(x, t, a.old)
+    sigma <- solve_sigma(x, t, a.old)
     if (n.iter == 1) {
         sigma[is.na(sigma)] <- as.numeric(sigma[which(!is.na(sigma))])[1] / 10
     }
-    a.new <- solve.a(x, t, a.old, sigma$sigma.1, sigma$sigma.2)
+    a.new <- solve_a(x, t, a.old, sigma$sigma.1, sigma$sigma.2)
     change <- abs(a.old - a.new)
   }
   d <- x
diff --git a/R/recover.weaker.R b/R/recover.weaker.R
index 46082d24..eab14d79 100644
--- a/R/recover.weaker.R
+++ b/R/recover.weaker.R
@@ -58,6 +58,7 @@ compute_delta_rt <- function(times) {
 #' @description x / sum(x)
 #' @param x Data to normalize.
 #' @return Normalized data.
+#' @export
 l2normalize <- function(x) {
   x / sum(x)
 }
@@ -157,6 +158,7 @@ compute_target_times <- function(aligned_rts,
 #' checks which values only occur a single time.
 #' @param values vector Values for which to compute the mask.
 #' @return vector Boolean vector which is the mask of values occuring only once.
+#' @export
 get_single_occurrence_mask <- function(values) {
   ttt <- table(values)
   mask <- values %in% as.numeric(names(ttt)[ttt == 1])
@@ -275,6 +277,7 @@ get_rt_region_indices <- function(target_time, features, rt_tol) {
 #'   \item pks - vector - The data points at which the density peaks.
 #'   \item vlys - vector - The points in the data where the density is low
 #'                         (forming a valley in the function).
+#' @export
 get_features_in_rt_range <- function(features, times, bw) {
   time_curve <- times[between(times, min(features$rt), max(features$rt))]
 
@@ -297,6 +300,7 @@ get_features_in_rt_range <- function(features, times, bw) {
 #' @param roi list Named list with vectors `pks` and `vlys`.
 #' @param times vector Retention time values
 #' @return vector Numbers of peaks within each region defined by a peak and the two valley points.
+#' @export
 count_peaks <- function(roi, times) {
   num_peaks <- rep(0, length(roi$pks))
 
@@ -319,6 +323,7 @@ count_peaks <- function(roi, times) {
 #'   \item pks - vector - The data points at which the density peaks with at least `recover_min_count` peaks between the valley points.
 #'   \item vlys - vector - The points in the data where the density is low
 #'                         (forming a valley in the function).
+#' @export
 compute_pks_vlys_rt <- function(features, times, bandwidth, target_rt, recover_min_count) {
   roi <- get_features_in_rt_range(
     features,
@@ -383,6 +388,7 @@ compute_mu_sc_std <- function(features, aver_diff) {
 #' @param delta_rt vector Differences between consecutive retention time values (diff(times)).
 #' @importFrom dplyr between
 #' @return list Triplet of mz, label and intensity for the feature.
+#' @export
 compute_curr_rec_with_enough_peaks <- function(mz,
                                                peak,
                                                valleys,
@@ -448,6 +454,7 @@ compute_boundaries <- function(valley_points, peak) {
 #'   \item vlys - vector - The points in the data where the density is low
 #'                         (forming a valley in the function).
 #' }
+#' @export
 compute_peaks_and_valleys <- function(dens) {
   turns <- find.turn.point(dens$y)
   pks <- dens$x[turns$pks] # mz values with highest density
@@ -474,6 +481,7 @@ compute_peaks_and_valleys <- function(dens) {
 #' @param min_bandwidth float Minimum bandwidth to use.
 #' @param max_bandwidth float Maximum bandwidth to use.
 #' @return tibble Tibble with `mz`, `rt` and `intensities` columns.
+#' @export 
 compute_rectangle <- function(data_table,
                               aligned_feature_mz,
                               breaks,
@@ -556,7 +564,7 @@ compute_rectangle <- function(data_table,
       } else {
         rt_intensities <- dplyr::select(
           that.prof,
-          c("rt", "intensities")
+          all_of(c("rt", "intensities"))
         ) |> dplyr::arrange_at("rt")
         bw <- min(max(bandwidth * (span(rt_intensities$rt)), min_bandwidth), max_bandwidth)
 
@@ -602,6 +610,7 @@ compute_rectangle <- function(data_table,
 #' @param rt_tol float Retention time tolerance.
 #' @param mz_tol float Mz tolerance to use.
 #' @return int Index of value in rectable closest to `target_rt` and `aligned_mz`.
+#' @export
 refine_selection <- function(target_rt, rectangle, aligned_mz, rt_tol, mz_tol) {
   if (!is.na(target_rt)) {
     rt_term <- (rectangle$rt - target_rt)^2 / rt_tol^2
@@ -684,8 +693,8 @@ recover.weaker <- function(filename,
   vec_delta_rt <- compute_delta_rt(times)
 
   sample_intensities <- unlist(dplyr::select(
-    intensity_table %>% dplyr::rename_with(~ str_remove(., "_intensity")),
-    sample_name
+    intensity_table %>% dplyr::rename_with(~str_remove(., "_intensity")),
+    all_of(sample_name)
   ), use.names = FALSE)
 
   custom.mz.tol <- recover_mz_range * metadata_table$mz
diff --git a/R/remove_noise.R b/R/remove_noise.R
index 05d2ce81..07ca42d8 100644
--- a/R/remove_noise.R
+++ b/R/remove_noise.R
@@ -55,7 +55,7 @@ load_data <- function(filename,
 #'  the machine's nominal accuracy level. Divide the ppm value by 1e6. For FTMS, 1e-5 is recommended.
 #' @param baseline_correct After grouping the observations, the highest intensity in each group is found. If 
 #'  the highest is lower than this value, the entire group will be deleted.
-#' @param baseline_correct_noise_percentile The percentile of signal strength of those EIC that don't pass the 
+#' @param baseline_correct_noise_percentile Only used for plotting. The percentile of signal strength of those EIC that don't pass the 
 #'  run filter, to be used as the baseline threshold of signal strength.
 #' @param intensity_weighted Whether to use intensity to weight mass density estimation.
 #' @param do.plot Indicates whether plot should be drawn.
diff --git a/R/unsupervised.R b/R/unsupervised.R
index a41caf0b..09c39b20 100644
--- a/R/unsupervised.R
+++ b/R/unsupervised.R
@@ -180,7 +180,7 @@ unsupervised <- function(
 
 
   message("**** time correction ****")
-  corrected <- foreach::foreach(this.feature = extracted_clusters$feature_tables) %do% correct_time(
+  corrected <- foreach::foreach(this.feature = extracted_clusters$feature_tables) %dopar% correct_time(
     this.feature,
     template_features,
     extracted_clusters$mz_tol_relative,
@@ -209,7 +209,7 @@ unsupervised <- function(
   )
 
   message("**** weaker signal recovery ****")
-  recovered <- lapply(seq_along(filenames), function(i) {
+  recovered <- snow::parLapply(cluster, seq_along(filenames), function(i) {
     recover.weaker(
       filename = filenames[[i]],
       sample_name = sample_names[i],
diff --git a/R/utils.R b/R/utils.R
index 288f77e6..14f48277 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -40,7 +40,6 @@ register_functions_to_cluster <- function(cluster) {
         'compute_e_step',
         'compute_start_bound',
         'compute_end_bound',
-        'compute_bounds',
         'compute_scale',
         'span',
         'compute_uniq_grp',
@@ -53,9 +52,40 @@ register_functions_to_cluster <- function(cluster) {
         "find_optima",
         "filter_based_on_density",
         "create_output",
-        "comb"
+        "comb",
+        'bigauss.esti.EM',
+        'solve_sigma',
+        'prep_uv',
+        'solve_a',
+        'correct_time',
+        'compute_comb',
+        'compute_sel',
+        'compute_template_adjusted_rt',
+        'compute_corrected_features',
+        'fill_missing_values',
+        'recover.weaker',
+        'get_custom_rt_tol',
+        'compute_target_times',
+        'predict_mz_break_indices',
+        'compute_rectangle',
+        'get_rt_region_indices',
+        'refine_selection',
+        'duplicate.row.remove',
+        'get_times_to_use',
+        'get_single_occurrence_mask',
+        'compute_curr_rec_with_enough_peaks',
+        'compute_mu_sc_std',
+        'compute_pks_vlys_rt',
+        'count_peaks',
+        'get_features_in_rt_range',
+        'get_mzrange_bound_indices',
+        'compute_mass_density',
+        'l2normalize',
+        'compute_peaks_and_valleys'
     ))
     snow::clusterEvalQ(cluster, library("dplyr"))
+    snow::clusterEvalQ(cluster, library("stringr"))
+    snow::clusterEvalQ(cluster, library("tidyselect"))
 }
 
 #' Concatenate multiple feature lists and add the sample id (origin of feature) as additional column.
diff --git a/man/aggregate_by_rt.Rd b/man/aggregate_by_rt.Rd
new file mode 100644
index 00000000..e81cb6ac
--- /dev/null
+++ b/man/aggregate_by_rt.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aggregate_by_rt.R
+\name{aggregate_by_rt}
+\alias{aggregate_by_rt}
+\title{Aggregate the intensities and select median mz for features with identical rt.}
+\usage{
+aggregate_by_rt(features)
+}
+\arguments{
+\item{features}{dataframe of retention time, m/z ratio, signal strength.}
+}
+\value{
+returns a tibble with the following columns
+\itemize{
+  \item mz - m/z ratio
+  \item rt - retention time
+  \item intensities - signal strength
+}
+}
+\description{
+This functions computes median mz and sum of intensities over features with same rt.
+}
diff --git a/man/augment_known_table.Rd b/man/augment_known_table.Rd
new file mode 100644
index 00000000..a8c3126e
--- /dev/null
+++ b/man/augment_known_table.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hybrid.R
+\name{augment_known_table}
+\alias{augment_known_table}
+\title{Add newly detected aligned features to a known features table.}
+\usage{
+augment_known_table(
+  aligned,
+  known_table,
+  match_tol_ppm,
+  mz_tol_relative,
+  rt_tol_relative,
+  new_feature_min_count
+)
+}
+\arguments{
+\item{aligned}{A list object with three tibble tables: metadata, intensity, and rt.}
+
+\item{known_table}{A table of known/previously detected peaks.}
+
+\item{match_tol_ppm}{The ppm tolerance to match identified features to known metabolites/features.}
+
+\item{mz_tol_relative}{The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the 
+tolerance level based on the data. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z 
+value, becomes the cutoff level.}
+
+\item{rt_tol_relative}{The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for 
+the tolerance level based on the data.}
+
+\item{new_feature_min_count}{The number of profiles a new feature must be present for it to be added to the database.}
+}
+\value{
+Known table with novel features.
+}
+\description{
+Add newly detected aligned features to a known features table.
+}
diff --git a/man/compute_boundaries.Rd b/man/compute_boundaries.Rd
new file mode 100644
index 00000000..0c5086f1
--- /dev/null
+++ b/man/compute_boundaries.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_boundaries}
+\alias{compute_boundaries}
+\title{Compute bounds of area using given peak and mass valley points.}
+\usage{
+compute_boundaries(valley_points, peak)
+}
+\arguments{
+\item{valley_points}{vector values of valley points defining clusters.}
+
+\item{peak}{double Value of the peak for which to get the valley bounds.}
+}
+\value{
+Returns a list object with the following objects in it:
+\itemize{
+  \item lower - double - The value of the lower bound valley point.
+  \item upper - double - The value of the upper bound valley point.
+}
+}
+\description{
+The lower bound is the mass of the valley the closest but smaller than peak
+and the upper bound is the mass of the valley the closest but higher than
+the peak.
+}
diff --git a/man/compute_clusters_simple.Rd b/man/compute_clusters_simple.Rd
new file mode 100644
index 00000000..09ab9f3e
--- /dev/null
+++ b/man/compute_clusters_simple.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compute_clusters.R
+\name{compute_clusters_simple}
+\alias{compute_clusters_simple}
+\title{Compute clusters using simple grouping based on numeric thresholds.}
+\usage{
+compute_clusters_simple(feature_tables, sample_names, mz_tol_ppm, rt_tol)
+}
+\arguments{
+\item{feature_tables}{list of tibbles List of feature tables coming from all samples.}
+
+\item{sample_names}{list of strings Sample names of the feature tables used to distinguish the samples.}
+
+\item{mz_tol_ppm}{float Relative tolerance for mz grouping in parts per million.}
+
+\item{rt_tol}{float Tolerance in retention time dimension [seconds].}
+}
+\value{
+list of tibbles Feature tables passed initially with additional columns indicating the 
+mz and rt groups as well as the combined cluster index.
+}
+\description{
+Compute clusters using simple grouping based on numeric thresholds.
+}
diff --git a/man/compute_curr_rec_with_enough_peaks.Rd b/man/compute_curr_rec_with_enough_peaks.Rd
new file mode 100644
index 00000000..bb673ce4
--- /dev/null
+++ b/man/compute_curr_rec_with_enough_peaks.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_curr_rec_with_enough_peaks}
+\alias{compute_curr_rec_with_enough_peaks}
+\title{Compute the rectangle around recovered features given that enough peaks are present.}
+\usage{
+compute_curr_rec_with_enough_peaks(
+  mz,
+  peak,
+  valleys,
+  features,
+  aver_diff,
+  times,
+  delta_rt
+)
+}
+\arguments{
+\item{mz}{Mz value of the feature.}
+
+\item{peak}{Peak around which to detect the new feature.}
+
+\item{valleys}{Valley points to compute the boundary region.}
+
+\item{features}{tibble Tibble with `rt` and `intensities` column.}
+
+\item{aver_diff}{float Average retention time difference.}
+
+\item{times}{vector Raw retention time values from raw data file.}
+
+\item{delta_rt}{vector Differences between consecutive retention time values (diff(times)).}
+}
+\value{
+list Triplet of mz, label and intensity for the feature.
+}
+\description{
+Compute the rectangle around recovered features given that enough peaks are present.
+}
diff --git a/man/compute_delta_rt.Rd b/man/compute_delta_rt.Rd
new file mode 100644
index 00000000..05fa5b66
--- /dev/null
+++ b/man/compute_delta_rt.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_delta_rt}
+\alias{compute_delta_rt}
+\title{Compute custom smoothed h-method derivative of function.}
+\usage{
+compute_delta_rt(times)
+}
+\arguments{
+\item{times}{Retention time values.}
+}
+\value{
+Differences between time values.
+}
+\description{
+The function adds an extrapolated element in the end and a 0 element in the front,
+then computes the midpoints between#' neighbouring elements and then uses the `diff`
+function to compute the changes in rt between points.
+}
diff --git a/man/compute_mass_density.Rd b/man/compute_mass_density.Rd
new file mode 100644
index 00000000..601c94d1
--- /dev/null
+++ b/man/compute_mass_density.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_mass_density}
+\alias{compute_mass_density}
+\title{Compute the density function of mz values.}
+\usage{
+compute_mass_density(features, bandwidth, intensity_weighted, n = 512)
+}
+\arguments{
+\item{bandwidth}{Bandwidth to use to compute the kernel density.}
+
+\item{intensity_weighted}{Whether to use intensity weighting or not.}
+
+\item{mz}{Mass values to compute the density over.}
+
+\item{intensities}{Intensities of the peaks at mz values.
+Only used if intensity_weighted == TRUE.}
+}
+\value{
+\link[stats]{density} object containing the densities.
+}
+\description{
+The function takes the mz values and uses \link[stats]{density} to
+compute the local density, optionally using intensity based weighting.
+}
diff --git a/man/compute_min_mz_tolerance.Rd b/man/compute_min_mz_tolerance.Rd
new file mode 100644
index 00000000..f228b26e
--- /dev/null
+++ b/man/compute_min_mz_tolerance.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/find_mz_tolerance.R
+\name{compute_min_mz_tolerance}
+\alias{compute_min_mz_tolerance}
+\title{Compute minimum mz tolerance to use.}
+\usage{
+compute_min_mz_tolerance(mz, mz_tol_relative, mz_tol_absolute)
+}
+\arguments{
+\item{mz}{vector Mz values to use.}
+
+\item{mz_tol_relative}{float Relative mz tolerance to use with the mz values.
+This forms a sort of weighted tolerance.}
+
+\item{mz_tol_absolute}{float Absolute tolerance to use independent from the mz values.}
+}
+\value{
+float Minimum tolerance values to use.
+}
+\description{
+Compute the minimum mz tolerance based on the relative
+tolerance and the mz values and the absolute tolerance.
+Uses midpoints between mz values for the weighting.
+}
diff --git a/man/compute_mu_sc_std.Rd b/man/compute_mu_sc_std.Rd
new file mode 100644
index 00000000..460842e3
--- /dev/null
+++ b/man/compute_mu_sc_std.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_mu_sc_std}
+\alias{compute_mu_sc_std}
+\title{Compute interpolated retention time, its standard deviation, and intensity values,.}
+\usage{
+compute_mu_sc_std(features, aver_diff)
+}
+\arguments{
+\item{features}{tibble Features with `rt` and `intensities` columns.}
+
+\item{aver_diff}{float Average retention time difference.}
+}
+\value{
+
+}
+\description{
+Compute interpolated retention time, its standard deviation, and intensity values,.
+}
diff --git a/man/compute_peaks_and_valleys.Rd b/man/compute_peaks_and_valleys.Rd
new file mode 100644
index 00000000..3321bff7
--- /dev/null
+++ b/man/compute_peaks_and_valleys.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_peaks_and_valleys}
+\alias{compute_peaks_and_valleys}
+\title{Compute peaks and valleys of density function.}
+\usage{
+compute_peaks_and_valleys(dens)
+}
+\arguments{
+\item{density}{stats::density Density object for which to compute peaks
+and valleys.}
+}
+\value{
+Returns a list object with the following objects in it:
+\itemize{
+  \item pks - vector - The data points at which the density peaks.
+  \item vlys - vector - The points in the data where the density is low
+                        (forming a valley in the function).
+}
+}
+\description{
+Given a density function, the turn points are computed and
+the peaks and valleys in the original data (points with highest
+and lowest density) are returned.
+}
diff --git a/man/compute_pks_vlys_rt.Rd b/man/compute_pks_vlys_rt.Rd
new file mode 100644
index 00000000..00a46608
--- /dev/null
+++ b/man/compute_pks_vlys_rt.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_pks_vlys_rt}
+\alias{compute_pks_vlys_rt}
+\title{Compute peaks and valleys which have at least `recover_min_count` peaks.}
+\usage{
+compute_pks_vlys_rt(features, times, bandwidth, target_rt, recover_min_count)
+}
+\arguments{
+\item{features}{tibble Features with `mz`, `rt` and `intensities`.}
+
+\item{times}{vector Retention time values from the raw data file.}
+
+\item{bandwidth}{float Bandwidth to use in smoothing.}
+
+\item{target_rt}{float Retention time at which to recover the intensity.}
+
+\item{recover_min_count}{int Minimum number of peaks required in the area to recover the signal.}
+}
+\value{
+
+}
+\description{
+Compute peaks and valleys which have at least `recover_min_count` peaks.
+}
diff --git a/man/compute_rectangle.Rd b/man/compute_rectangle.Rd
new file mode 100644
index 00000000..edb26654
--- /dev/null
+++ b/man/compute_rectangle.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_rectangle}
+\alias{compute_rectangle}
+\title{Compute rectangle around feature with `aligned_feature_mz` and `target_rt` for recovery.}
+\usage{
+compute_rectangle(
+  data_table,
+  aligned_feature_mz,
+  breaks,
+  custom_mz_tol,
+  orig_mz_tol,
+  use_intensity_weighting,
+  recover_min_count,
+  target_rt,
+  custom_rt_tol,
+  times,
+  delta_rt,
+  aver_diff,
+  bandwidth,
+  min_bandwidth,
+  max_bandwidth
+)
+}
+\arguments{
+\item{data_table}{tibble Feature table with `mz`, `rt` and `intensities` column.}
+
+\item{aligned_feature_mz}{float Mz value of feature in aligned feature table.}
+
+\item{breaks}{vector Integer boundaries of clusters in mz values.}
+
+\item{custom_mz_tol}{float Custom mz tolerance for the feature.}
+
+\item{orig_mz_tol}{float Flat original mz tolerance to use.}
+
+\item{use_intensity_weighting}{bool Whether to use intensity weighting.}
+
+\item{recover_min_count}{int Minimum number of peaks required in the area to recover the signal.}
+
+\item{target_rt}{float Target retention time value.}
+
+\item{custom_rt_tol}{float Custom chromatographic tolerance to use.}
+
+\item{times}{vector Raw retention time values from raw data file.}
+
+\item{delta_rt}{vector Differences between consecutive retention time values (diff(times)).}
+
+\item{aver_diff}{float Average retention time difference.}
+
+\item{bandwidth}{float Bandwidth to use in smoothing.}
+
+\item{min_bandwidth}{float Minimum bandwidth to use.}
+
+\item{max_bandwidth}{float Maximum bandwidth to use.}
+}
+\value{
+tibble Tibble with `mz`, `rt` and `intensities` columns.
+}
+\description{
+Compute rectangle around feature with `aligned_feature_mz` and `target_rt` for recovery.
+}
diff --git a/man/compute_rt_tol_relative.Rd b/man/compute_rt_tol_relative.Rd
new file mode 100644
index 00000000..2b479c4d
--- /dev/null
+++ b/man/compute_rt_tol_relative.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/find.tol.time.R
+\name{compute_rt_tol_relative}
+\alias{compute_rt_tol_relative}
+\title{Compute rt relative tolerance to use.}
+\usage{
+compute_rt_tol_relative(
+  breaks,
+  max.num.segments,
+  aver.bin.size,
+  number_of_samples,
+  rt,
+  min.bins,
+  max.bins,
+  do.plot = FALSE
+)
+}
+\arguments{
+\item{max.num.segments}{the maximum number of segments.}
+
+\item{aver.bin.size}{The average bin size to determine the number of equally spaced points in the kernel density estimation.}
+
+\item{number_of_samples}{The number of spectra in this analysis.}
+
+\item{rt}{retention time of all peaks in all profiles in the study.}
+
+\item{min.bins}{the minimum number of bins to use in the kernel density estimation. It overrides aver.bin.size when too few observations are present.}
+
+\item{max.bins}{the maximum number of bins to use in the kernel density estimation. It overrides aver.bin.size when too many observations are present.}
+
+\item{do.plot}{Indicates whether plot should be drawn.}
+}
+\value{
+rt_tol_relative the elution time tolerance.
+This conditional makes sure that length(s) is <= max.num.segments
+If False, length(s) =  max.num.segments, and s[i] is the largest
+integer no greater than the corresponding element. Otherwise
+length(s) =  length(breaks) - 1
+This loop creates a vector with distances between rt peaks. Distances
+are stored in a triangular matrix and converted to a vector subsequently.
+Vector length should be < 100, otherwise, vector is
+constructed extracting only 100 samples.
+}
+\description{
+Compute the elution time tolerance based on the kernel density estimation.
+It plots the fitting function if set to TRUE.
+}
diff --git a/man/compute_target_times.Rd b/man/compute_target_times.Rd
new file mode 100644
index 00000000..05f5fbc8
--- /dev/null
+++ b/man/compute_target_times.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{compute_target_times}
+\alias{compute_target_times}
+\title{Compute target times for regions of interest for recovery.}
+\usage{
+compute_target_times(
+  aligned_rts,
+  original_sample_rts,
+  adjusted_sample_rts,
+  min_common_times = 4
+)
+}
+\arguments{
+\item{aligned_rts}{vector Aligned retention time values.}
+
+\item{original_sample_rts}{vector Original feature retention time values before correction.}
+
+\item{adjusted_sample_rts}{vector Feature retention time values after time correction.}
+}
+\description{
+Compute the individual target times for the features to be recovered in the sample.
+Spline interpolation using \link[splines]{interpSpline} is used to map from adjusted times
+back into the original times. The function requires `x` to be distinct, hence the filtering
+to only include rt values that occurr only once in both lists.
+}
diff --git a/man/compute_uniq_grp.Rd b/man/compute_uniq_grp.Rd
new file mode 100644
index 00000000..ec2aa5b6
--- /dev/null
+++ b/man/compute_uniq_grp.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_filter.R
+\name{compute_uniq_grp}
+\alias{compute_uniq_grp}
+\title{Computes unique groups}
+\usage{
+compute_uniq_grp(profile, min_count_run, min_pres)
+}
+\arguments{
+\item{profile}{The matrix containing m/z, retention time, intensity, and EIC label as columns.}
+
+\item{min_count_run}{filter parameter.}
+
+\item{min_pres}{Run filter parameter. The minimum proportion of presence in the time period for a series of signals grouped
+by m/z to be considered a peak.}
+}
+\value{
+unique_grp.
+}
+\description{
+Computes unique groups
+}
diff --git a/man/concatenate_feature_tables.Rd b/man/concatenate_feature_tables.Rd
new file mode 100644
index 00000000..510bdd2e
--- /dev/null
+++ b/man/concatenate_feature_tables.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{concatenate_feature_tables}
+\alias{concatenate_feature_tables}
+\title{Concatenate multiple feature lists and add the sample id (origin of feature) as additional column.}
+\usage{
+concatenate_feature_tables(features, sample_names)
+}
+\arguments{
+\item{features}{list List of tibbles containing extracted feature tables.}
+}
+\description{
+Concatenate multiple feature lists and add the sample id (origin of feature) as additional column.
+}
diff --git a/man/count_peaks.Rd b/man/count_peaks.Rd
new file mode 100644
index 00000000..ed07b14b
--- /dev/null
+++ b/man/count_peaks.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{count_peaks}
+\alias{count_peaks}
+\title{Count the number of peaks in all valleys}
+\usage{
+count_peaks(roi, times)
+}
+\arguments{
+\item{roi}{list Named list with vectors `pks` and `vlys`.}
+
+\item{times}{vector Retention time values}
+}
+\value{
+vector Numbers of peaks within each region defined by a peak and the two valley points.
+}
+\description{
+For each peak in ROI, count the peaks between the valley points.
+}
diff --git a/man/create_aligned_feature_table.Rd b/man/create_aligned_feature_table.Rd
new file mode 100644
index 00000000..034df37d
--- /dev/null
+++ b/man/create_aligned_feature_table.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/feature.align.R
+\name{create_aligned_feature_table}
+\alias{create_aligned_feature_table}
+\title{Align peaks from spectra into a feature table.}
+\usage{
+create_aligned_feature_table(
+  features_table,
+  min_occurrence,
+  sample_names,
+  rt_tol_relative,
+  mz_tol_relative,
+  cluster = 4
+)
+}
+\arguments{
+\item{features_table}{A list object. Each component is a matrix which is the output from compute_clusters().}
+
+\item{min_occurrence}{A feature has to show up in at least this number of profiles to be included in the final result.}
+
+\item{sample_names}{list List of sample names.}
+
+\item{rt_tol_relative}{The retention time tolerance level for peak alignment. The default is NA, which
+allows the program to search for the tolerance level based on the data.}
+
+\item{mz_tol_relative}{The m/z tolerance level for peak alignment. The default is NA, which allows the
+program to search for the tolerance level based on the data. This value is expressed as the
+percentage of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.}
+
+\item{cluster}{The number of CPU cores to be used}
+}
+\value{
+A tibble with three tables containing aligned metadata, intensities an RTs.
+}
+\description{
+Align peaks from spectra into a feature table.
+}
diff --git a/man/duplicate.row.remove.Rd b/man/duplicate.row.remove.Rd
new file mode 100644
index 00000000..fadc52ea
--- /dev/null
+++ b/man/duplicate.row.remove.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{duplicate.row.remove}
+\alias{duplicate.row.remove}
+\title{Custom way of removing duplicate rows from a specifically formatted table.}
+\usage{
+duplicate.row.remove(features, tolerance = 1e-10)
+}
+\arguments{
+\item{tolerance}{Tolerance to use for numeric comparisons.}
+
+\item{new.table}{The table from which the duplicate rows should be removed. Needs at least 5 columns.
+Columns 1, 2 and 5 have to be of numeric type.}
+}
+\value{
+Returns the same table with duplicate rows removed.
+}
+\description{
+Rows are considered as duplicate if the 1st, 2nd and 5th column are less than 1e-10 (tolerance) apart.
+Only a single row in this `range` is kept from a group.
+}
diff --git a/man/enrich_table_by_known_features.Rd b/man/enrich_table_by_known_features.Rd
new file mode 100644
index 00000000..d322eaee
--- /dev/null
+++ b/man/enrich_table_by_known_features.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hybrid.R
+\name{enrich_table_by_known_features}
+\alias{enrich_table_by_known_features}
+\title{Add entries from the known features table to the aligned table.}
+\usage{
+enrich_table_by_known_features(
+  aligned,
+  known_table,
+  match_tol_ppm,
+  mz_tol_relative,
+  rt_tol_relative
+)
+}
+\arguments{
+\item{aligned}{A list object with three tibble tables: metadata, intensity, and rt.}
+
+\item{known_table}{A table of known/previously detected peaks.}
+
+\item{match_tol_ppm}{The ppm tolerance to match identified features to known metabolites/features.}
+
+\item{mz_tol_relative}{The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the 
+tolerance level based on the data. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z 
+value, becomes the cutoff level.}
+
+\item{rt_tol_relative}{The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for 
+the tolerance level based on the data.}
+}
+\value{
+Aligned table with known features.
+}
+\description{
+Add entries from the known features table to the aligned table.
+}
diff --git a/man/find_mz_match.Rd b/man/find_mz_match.Rd
new file mode 100644
index 00000000..2724f7b4
--- /dev/null
+++ b/man/find_mz_match.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hybrid.R
+\name{find_mz_match}
+\alias{find_mz_match}
+\title{Compute matches between mz array and specific mass value with a tolerance.}
+\usage{
+find_mz_match(sample_mz, known_mz, match_tol_ppm = 5)
+}
+\arguments{
+\item{sample_mz}{The mz array for which to compute the matching.}
+
+\item{known_mz}{The mz value with which to match.}
+
+\item{match_tol_ppm}{Matching tolerance in ppm.}
+}
+\value{
+Indicies of m/z values within the tolerance of any known m/z.
+}
+\description{
+Compute matches between mz array and specific mass value with a tolerance.
+}
+\examples{
+find_mz_match(
+ sample_mz = c(10, 20, 21),
+ known_mz = 20
+)
+}
diff --git a/man/find.tol.Rd b/man/find_mz_tolerance.Rd
similarity index 83%
rename from man/find.tol.Rd
rename to man/find_mz_tolerance.Rd
index c693e887..43d63e7f 100644
--- a/man/find.tol.Rd
+++ b/man/find_mz_tolerance.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/find.tol.R
-\name{find.tol}
-\alias{find.tol}
+% Please edit documentation in R/find_mz_tolerance.R
+\name{find_mz_tolerance}
+\alias{find_mz_tolerance}
 \title{An internal function that is not supposed to be directly accessed by the user. Find m/z tolerance level.}
 \usage{
-find.tol(mz, mz_max_diff, aver.bin.size, min.bins, max.bins, do.plot)
+find_mz_tolerance(mz, mz_max_diff, aver.bin.size, min.bins, max.bins, do.plot)
 }
 \arguments{
 \item{mz}{The vector of observed m/z values.}
diff --git a/man/get_custom_rt_tol.Rd b/man/get_custom_rt_tol.Rd
new file mode 100644
index 00000000..00d052cf
--- /dev/null
+++ b/man/get_custom_rt_tol.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{get_custom_rt_tol}
+\alias{get_custom_rt_tol}
+\title{Compute custom chromatographic tolerance.}
+\usage{
+get_custom_rt_tol(use_observed_range, peak_rts, rt_range, aligned_features)
+}
+\arguments{
+\item{use_observed_range}{bool Whether to use the observed chromatographic range for computation or not.}
+
+\item{peak_rts}{data.frame Retention time cross table with all peak rts.}
+
+\item{rt_range}{float Default chromatographic tolerance to use.}
+
+\item{aligned_features}{data.frame Aligned feature table.}
+}
+\value{
+vector Custom chromatographic tolerances to use for each feature.
+}
+\description{
+Compute chromatographic tolerance for each feature. If `use_observed_range == TRUE`,
+the whole range of retention times for all peaks is used to compute the tolerance,
+otherwise `rt_range` is used for each feature.
+}
diff --git a/man/get_features_in_rt_range.Rd b/man/get_features_in_rt_range.Rd
new file mode 100644
index 00000000..0311079f
--- /dev/null
+++ b/man/get_features_in_rt_range.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{get_features_in_rt_range}
+\alias{get_features_in_rt_range}
+\title{Get peaks and valleys of smoothed rt values in range.}
+\usage{
+get_features_in_rt_range(features, times, bw)
+}
+\arguments{
+\item{features}{tibble Data table with `rt` and `intensities` columns.}
+
+\item{times}{vector Raw retention time data from raw data file.}
+
+\item{bw}{float Bandwidth to use for kernel smoothing.}
+}
+\value{
+
+}
+\description{
+Get peaks and valleys of smoothed rt values in range.
+}
diff --git a/man/get_mzrange_bound_indices.Rd b/man/get_mzrange_bound_indices.Rd
new file mode 100644
index 00000000..f77b5f1c
--- /dev/null
+++ b/man/get_mzrange_bound_indices.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{get_mzrange_bound_indices}
+\alias{get_mzrange_bound_indices}
+\title{Compute range of valley indices which are in mz_tol range around aligned_feature_mass.}
+\usage{
+get_mzrange_bound_indices(aligned_feature_mass, mz, vlys_indices, mz_tol)
+}
+\arguments{
+\item{aligned_feature_mass}{float Mz value of the aligned feature.}
+
+\item{mz}{vector mz values of the features.}
+
+\item{vlys_indices}{vector Indices of the valley points of mz clusters.}
+
+\item{mz_tol}{float Tolerance to use to check if values are close.}
+}
+\value{
+pair Index range (start, end).
+}
+\description{
+Compute range of valley indices which are in mz_tol range around aligned_feature_mass.
+}
diff --git a/man/get_rt_region_indices.Rd b/man/get_rt_region_indices.Rd
new file mode 100644
index 00000000..67cf2a22
--- /dev/null
+++ b/man/get_rt_region_indices.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{get_rt_region_indices}
+\alias{get_rt_region_indices}
+\title{Get indices where rt in `features` is within `rt_tol` of `target_time`.}
+\usage{
+get_rt_region_indices(target_time, features, rt_tol)
+}
+\arguments{
+\item{target_time}{float Target retention time region.}
+
+\item{features}{tibble Feature table including `rt` column.}
+
+\item{rt_tol}{float Retention time tolerance.}
+}
+\value{
+vector Indices which are within `rt_tol` from `target_time` or
+1 if `target_time` is NA.
+}
+\description{
+Get indices where rt in `features` is within `rt_tol` of `target_time`.
+}
diff --git a/man/get_single_occurrence_mask.Rd b/man/get_single_occurrence_mask.Rd
new file mode 100644
index 00000000..4638b86c
--- /dev/null
+++ b/man/get_single_occurrence_mask.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{get_single_occurrence_mask}
+\alias{get_single_occurrence_mask}
+\title{Get boolean mask for values that occur only once.}
+\usage{
+get_single_occurrence_mask(values)
+}
+\arguments{
+\item{values}{vector Values for which to compute the mask.}
+}
+\value{
+vector Boolean vector which is the mask of values occuring only once.
+}
+\description{
+Uses the \link[base]{table} function to compute the occurrences and then
+checks which values only occur a single time.
+}
diff --git a/man/get_times_to_use.Rd b/man/get_times_to_use.Rd
new file mode 100644
index 00000000..f27d9c2a
--- /dev/null
+++ b/man/get_times_to_use.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{get_times_to_use}
+\alias{get_times_to_use}
+\title{Get retention time values to use}
+\usage{
+get_times_to_use(original_sample_rts, adjusted_sample_rts, cap = 2000)
+}
+\arguments{
+\item{original_sample_rts}{vector Original feature retention time values before correction.}
+
+\item{adjusted_sample_rts}{vector Feature retention time values after time correction.}
+
+\item{cap}{int Maximum number of time values to return.}
+}
+\value{
+Indices of retention time values to use.
+}
+\description{
+Obtain retention time values which occur only once in both the original and the adjusted times.
+This is a custom version of the unique or intersection function with rounding etc.
+}
diff --git a/man/l2normalize.Rd b/man/l2normalize.Rd
new file mode 100644
index 00000000..b871aeee
--- /dev/null
+++ b/man/l2normalize.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{l2normalize}
+\alias{l2normalize}
+\title{Normalize vector so that sum(vec) = 1}
+\usage{
+l2normalize(x)
+}
+\arguments{
+\item{x}{Data to normalize.}
+}
+\value{
+Normalized data.
+}
+\description{
+x / sum(x)
+}
diff --git a/man/label_val_to_keep.Rd b/man/label_val_to_keep.Rd
new file mode 100644
index 00000000..ff65f677
--- /dev/null
+++ b/man/label_val_to_keep.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_filter.R
+\name{label_val_to_keep}
+\alias{label_val_to_keep}
+\title{This function labels the indices of values kept to perform further calculations}
+\usage{
+label_val_to_keep(min_run, timeline, min_pres, this_times, times)
+}
+\arguments{
+\item{min_run}{Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak.}
+
+\item{min_pres}{Run filter parameter. The minimum proportion of presence in the time period for a series of signals grouped
+by m/z to be considered a peak.}
+
+\item{times.}{Retention times vector.}
+}
+\value{
+to_keep.
+}
+\description{
+This function labels the indices of values kept to perform further calculations
+}
diff --git a/man/load_data.Rd b/man/load_data.Rd
new file mode 100644
index 00000000..053ba8e3
--- /dev/null
+++ b/man/load_data.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/remove_noise.R
+\name{load_data}
+\alias{load_data}
+\title{Load data either from cache or load raw file and detect peaks.}
+\usage{
+load_data(filename, cache, min_run, min_pres, mz_tol, intensity_weighted)
+}
+\description{
+THIS FUNCTION IS DEPRECATED!
+}
diff --git a/man/load_file.Rd b/man/load_file.Rd
new file mode 100644
index 00000000..6a5be672
--- /dev/null
+++ b/man/load_file.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/remove_noise.R
+\name{load_file}
+\alias{load_file}
+\title{Load raw data from file}
+\usage{
+load_file(filename)
+}
+\description{
+Load raw data from file
+}
diff --git a/man/match_peaks.Rd b/man/match_peaks.Rd
new file mode 100644
index 00000000..2f785aa7
--- /dev/null
+++ b/man/match_peaks.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hybrid.R
+\name{match_peaks}
+\alias{match_peaks}
+\title{Match peaks from sample table to already known peaks via similar m/z and rt.}
+\usage{
+match_peaks(
+  aligned,
+  known_table,
+  match_tol_ppm,
+  mz_tol_relative,
+  rt_tol_relative
+)
+}
+\arguments{
+\item{aligned}{A list object with three tibble tables: metadata, intensity, and rt.}
+
+\item{known_table}{A table of known/previously detected peaks.}
+
+\item{match_tol_ppm}{The ppm tolerance to match identified features to known metabolites/features.}
+
+\item{mz_tol_relative}{The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the 
+tolerance level based on the data. This value is expressed as the percentage of the m/z value. This value, multiplied by the m/z 
+value, becomes the cutoff level.}
+
+\item{rt_tol_relative}{The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for 
+the tolerance level based on the data.}
+}
+\value{
+n x 2 matrix containing sample features-known features pairs.
+}
+\description{
+Match peaks from sample table to already known peaks via similar m/z and rt.
+}
diff --git a/man/peak_characterize.Rd b/man/peak_characterize.Rd
new file mode 100644
index 00000000..f72277d0
--- /dev/null
+++ b/man/peak_characterize.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/peak.characterize.R
+\name{peak_characterize}
+\alias{peak_characterize}
+\title{Internal function: Updates the information of a feature for the known feature table.}
+\usage{
+peak_characterize(existing_row = NA, metadata_row, ftrs_row, rt_row)
+}
+\arguments{
+\item{existing_row}{The existing row in the known feature table.}
+
+\item{ftrs_row}{The row of the matched feature in the new aligned feature table.}
+
+\item{rt_row}{The row of the matched feature in the new retention time table of aligned features.}
+}
+\value{
+A vector, the updated row for the known feature table.
+}
+\description{
+The function takes the information about the feature in the known feature table (if available), and updates it using the
+information found in the current dataset.
+}
diff --git a/man/predict_mz_break_indices.Rd b/man/predict_mz_break_indices.Rd
new file mode 100644
index 00000000..9c3f2a90
--- /dev/null
+++ b/man/predict_mz_break_indices.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{predict_mz_break_indices}
+\alias{predict_mz_break_indices}
+\title{Predict the indices for the valley points with low mass density.}
+\usage{
+predict_mz_break_indices(features, mz_orig_tol)
+}
+\arguments{
+\item{features}{data.frame Data table with features for which to predict the indices.}
+
+\item{mz_orig_tol}{float Mz tolerance to use as KDE bandwidth parameter.}
+}
+\value{
+vector Predicted indices for valley points.
+}
+\description{
+The density of mz values in the feature table is computed based on the tolerance.
+The valleys or breaks of clusters in mz values are detected and a function
+is approximated to predict the indices for the mass values which are the closest to those
+valley points.
+}
diff --git a/man/predict_smoothed_rt.Rd b/man/predict_smoothed_rt.Rd
new file mode 100644
index 00000000..c2dc1fd0
--- /dev/null
+++ b/man/predict_smoothed_rt.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_filter.R
+\name{predict_smoothed_rt}
+\alias{predict_smoothed_rt}
+\title{Computes the smoothed retention times by using The Nadaraya-Watson kernel regression estimate function.}
+\usage{
+predict_smoothed_rt(min_run, times)
+}
+\arguments{
+\item{min_run}{Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak.}
+
+\item{times.}{Retention times vector.}
+}
+\value{
+predicted rt.
+}
+\description{
+Computes the smoothed retention times by using The Nadaraya-Watson kernel regression estimate function.
+}
diff --git a/man/recetox.aplcms.Rd b/man/recetox.aplcms.Rd
deleted file mode 100644
index c24ccb3a..00000000
--- a/man/recetox.aplcms.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-\name{recetox.aplcms}
-\alias{recetox.aplcms-package}
-\alias{recetox.aplcms}
-\docType{package}
-\title{
-Adaptive processing of LC/MS data
-}
-\description{
-The package generates a feature table from a batch of LC/MS spectra. It finds m/z and retention time tolerance levels from the data. A run-filter is used to detect peaks and remove noise. Non-parametric statistical methods are used to find-tune peak selection and grouping. After retention time correction, a feature table is generated by aligning peaks across spectra. 
-}
-\author{
-Tianwei Yu <tyu8@emory.edu>
-Maintainer: Tianwei Yu <tyu8@emory.edu>
-}
-\references{
-Bioinformatics. 25(15):1930-36. 
-BMC Bioinformatics. 11:559.
-J. Proteome Res. 12(3):1419-27.
-}
-\keyword{ package }
diff --git a/man/refine_selection.Rd b/man/refine_selection.Rd
new file mode 100644
index 00000000..611aefe6
--- /dev/null
+++ b/man/refine_selection.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/recover.weaker.R
+\name{refine_selection}
+\alias{refine_selection}
+\title{Refine the selection based on mz and rt differences.}
+\usage{
+refine_selection(target_rt, rectangle, aligned_mz, rt_tol, mz_tol)
+}
+\arguments{
+\item{target_rt}{float Target retention time value.}
+
+\item{rectangle}{tibble Features with columns `rt` and `mz`.}
+
+\item{aligned_mz}{float Mz value in the aligned feature table of the
+feature to be recovered.}
+
+\item{rt_tol}{float Retention time tolerance.}
+
+\item{mz_tol}{float Mz tolerance to use.}
+}
+\value{
+int Index of value in rectable closest to `target_rt` and `aligned_mz`.
+}
+\description{
+Refine the selection based on mz and rt differences.
+}
diff --git a/man/remove_noise.Rd b/man/remove_noise.Rd
index 82949f31..debe3937 100644
--- a/man/remove_noise.Rd
+++ b/man/remove_noise.Rd
@@ -32,7 +32,7 @@ the machine's nominal accuracy level. Divide the ppm value by 1e6. For FTMS, 1e-
 \item{baseline_correct}{After grouping the observations, the highest intensity in each group is found. If 
 the highest is lower than this value, the entire group will be deleted.}
 
-\item{baseline_correct_noise_percentile}{The percentile of signal strength of those EIC that don't pass the 
+\item{baseline_correct_noise_percentile}{Only used for plotting. The percentile of signal strength of those EIC that don't pass the 
 run filter, to be used as the baseline threshold of signal strength.}
 
 \item{intensity_weighted}{Whether to use intensity to weight mass density estimation.}
diff --git a/tests/remote-files/clusters.txt b/tests/remote-files/clusters.txt
index 03f22e5a..37d5e7bd 100644
--- a/tests/remote-files/clusters.txt
+++ b/tests/remote-files/clusters.txt
@@ -3,4 +3,7 @@ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/
 https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/RCX_08_shortened_extracted_clusters.parquet
 https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/RCX_06_shortened_adjusted_clusters.parquet
 https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/RCX_07_shortened_adjusted_clusters.parquet
-https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/RCX_08_shortened_adjusted_clusters.parquet
\ No newline at end of file
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/RCX_08_shortened_adjusted_clusters.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/8_qc_no_dil_milliq.parquet  
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/21_qc_no_dil_milliq.parquet  
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/clusters/29_qc_no_dil_milliq.parquet
\ No newline at end of file
diff --git a/tests/remote-files/extracted-whole-extracted.txt b/tests/remote-files/extracted-whole-extracted.txt
index f33a65f4..623086df 100644
--- a/tests/remote-files/extracted-whole-extracted.txt
+++ b/tests/remote-files/extracted-whole-extracted.txt
@@ -1,3 +1,3 @@
-https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/8_qc_no_dil_milliq.parquet  
-https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/21_qc_no_dil_milliq.parquet  
-https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/29_qc_no_dil_milliq.parquet
\ No newline at end of file
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/8_qc_no_dil_milliq.mzml.parquet  
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/21_qc_no_dil_milliq.mzml.parquet  
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/29_qc_no_dil_milliq.mzml.parquet
\ No newline at end of file
diff --git a/tests/remote-files/extracted.txt b/tests/remote-files/extracted.txt
index 242c643c..91e6d4b1 100644
--- a/tests/remote-files/extracted.txt
+++ b/tests/remote-files/extracted.txt
@@ -1,3 +1,6 @@
 https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/RCX_06_shortened.parquet  
 https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/RCX_07_shortened.parquet  
-https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/RCX_08_shortened.parquet 
\ No newline at end of file
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/RCX_08_shortened.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/8_qc_no_dil_milliq.mzml.parquet  
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/21_qc_no_dil_milliq.mzml.parquet  
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/extracted/29_qc_no_dil_milliq.mzml.parquet
\ No newline at end of file
diff --git a/tests/testthat/test-compute_clusters.R b/tests/testthat/test-compute_clusters.R
index 1f1613dd..9449cbc8 100644
--- a/tests/testthat/test-compute_clusters.R
+++ b/tests/testthat/test-compute_clusters.R
@@ -51,3 +51,30 @@ patrick::with_parameters_test_that(
     )
   )
 )
+
+test_that("compute clusters simple", {
+  testdata <- file.path("..", "testdata")
+  files <- c("8_qc_no_dil_milliq", "21_qc_no_dil_milliq", "29_qc_no_dil_milliq")
+
+  extracted <- lapply(files, function(x) {
+    xx <- file.path(testdata, "extracted", paste0(x, ".mzml.parquet"))
+    tibble::as_tibble(arrow::read_parquet(xx))
+  })
+  actual <- compute_clusters_simple(
+    feature_tables = extracted,
+    sample_names = files,
+    mz_tol_ppm = 10,
+    rt_tol = 2
+  )
+
+  actual <- actual[order(sapply(actual, function(x) x$sample_id[1]))]
+
+  expected <- lapply(files, function(x) {
+    file <- file.path(testdata, "clusters", paste0(x, ".parquet"))
+    tibble::as_tibble(arrow::read_parquet(file))
+  })
+
+  expected <- expected[order(sapply(expected, function(x) x$sample_id[1]))]
+
+  expect_equal(as.list(actual), expected, tolerance = 0.02)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-extract_features.R b/tests/testthat/test-extract_features.R
index bcf5a69c..5989d2be 100644
--- a/tests/testthat/test-extract_features.R
+++ b/tests/testthat/test-extract_features.R
@@ -2,7 +2,7 @@ patrick::with_parameters_test_that(
   "extract single feature works",
   {
     skip_on_ci()
-    if (full_testdata) {
+    if (skip_tests) {
       skip("skipping whole data test case")
     }
 
@@ -44,7 +44,7 @@ patrick::with_parameters_test_that(
             sd_cut = sd_cut,
             sigma_ratio_lim = sigma_ratio_lim,
             shape_model = "bi-Gaussian",
-            peak_estim_method = "moment",
+            peak_estim_method = peak_estim_method,
             component_eliminate = 0.01,
             moment_power = 1,
             BIC_factor = 2.0,
@@ -69,18 +69,20 @@ patrick::with_parameters_test_that(
       intensity_weighted = FALSE,
       sd_cut = c(0.01, 500),
       sigma_ratio_lim = c(0.01, 100),
-      full_testdata = FALSE
+      peak_estim_method = "moment",
+      skip_tests = FALSE
     ),
     qc_no_dil_milliq = list(
       files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"),
-      expected_files = c("8_qc_no_dil_milliq.parquet", "21_qc_no_dil_milliq.parquet", "29_qc_no_dil_milliq.parquet"),
-      mz_tol = 1e-05,
-      min_pres = 0.5,
-      min_run = 12,
+      expected_files = c("8_qc_no_dil_milliq.mzml.parquet", "21_qc_no_dil_milliq.mzml.parquet", "29_qc_no_dil_milliq.mzml.parquet"),
+      mz_tol = 5e-05,
+      min_pres = 0.7,
+      min_run = 0.5,
       intensity_weighted = FALSE,
-      sd_cut = c(0.01, 500),
-      sigma_ratio_lim = c(0.01, 100),
-      full_testdata = TRUE
+      sd_cut = c(0.05, 10),
+      sigma_ratio_lim = c(0, Inf),
+      peak_estim_method = "EM",
+      skip_tests = TRUE
     )
   )
 )
diff --git a/tests/testthat/test-find.tol.time.R b/tests/testthat/test-find.tol.time.R
new file mode 100644
index 00000000..0264d585
--- /dev/null
+++ b/tests/testthat/test-find.tol.time.R
@@ -0,0 +1,21 @@
+test_that("compute_rt_tol_relative computes something", {
+  aver.bin.size <- 3
+  min.bins <- 2
+  max.bins <- 10
+  max.num.segments <- 10
+  number_of_samples <- 2
+  rt <- c(0, 1, 2, 3, 3.5, 3.7, 3.9, 4, 100, 101, 101.5, 101.2, 105, 108, 108.1, 108.12, 108.3)
+  breaks <- c(1, 2, 3, 8, 13, 17)
+
+  actual <- compute_rt_tol_relative(
+    breaks,
+    max.num.segments,
+    aver.bin.size,
+    number_of_samples,
+    rt,
+    min.bins,
+    max.bins
+  )
+
+  expect_equal(actual, 1.04167, tolerance=0.001)
+})
diff --git a/tests/testthat/test-find_mz_tolerance.R b/tests/testthat/test-find_mz_tolerance.R
new file mode 100644
index 00000000..1a75edc9
--- /dev/null
+++ b/tests/testthat/test-find_mz_tolerance.R
@@ -0,0 +1,15 @@
+test_that("mz tolerance is found", {
+  file <- file.path("..", "testdata", "extracted", "RCX_06_shortened.parquet")
+  mz <- arrow::read_parquet(file)$mz
+
+  mz_tol_relative <- find_mz_tolerance(
+    mz,
+    mz_max_diff = 0.1,
+    aver.bin.size = 4000,
+    min.bins = 50,
+    max.bins = 200,
+    do.plot = FALSE
+  )
+
+  expect_equal(mz_tol_relative, 0.01664097, tolerance=0.001)
+})
\ No newline at end of file
diff --git a/tests/testthat/test-hybrid.R b/tests/testthat/test-hybrid.R
index 0df675e2..15e5a3bd 100644
--- a/tests/testthat/test-hybrid.R
+++ b/tests/testthat/test-hybrid.R
@@ -30,21 +30,21 @@ patrick::with_parameters_test_that("basic hybrid test", {
   )
 
   if (store_reports) {
-  report <- dataCompareR::rCompare(
-    actual,
-    expected,
-    keys = keys,
-    roundDigits = 3,
-    mismatches = 100000
-  )
-  dataCompareR::saveReport(
-    report,
-    reportName = paste0(.test_name, "_hybrid_report"),
-    showInViewer = FALSE,
-    HTMLReport = FALSE,
-    mismatchCount = 10000
-  )
-}
+    report <- dataCompareR::rCompare(
+      actual,
+      expected,
+      keys = keys,
+      roundDigits = 3,
+      mismatches = 100000
+    )
+    dataCompareR::saveReport(
+      report,
+      reportName = paste0(.test_name, "_hybrid_report"),
+      showInViewer = FALSE,
+      HTMLReport = FALSE,
+      mismatchCount = 10000
+    )
+  }
 
   expect_equal(actual, expected)
 },