From 3a6ba26ec466256f22885d60bf4fdca7a84255cf Mon Sep 17 00:00:00 2001 From: Matej Trojak Date: Tue, 8 Nov 2022 10:32:57 +0100 Subject: [PATCH 1/5] Include util functions in parallel init --- R/extract_features.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/extract_features.R b/R/extract_features.R index ce5374ca..9ea5d4b4 100644 --- a/R/extract_features.R +++ b/R/extract_features.R @@ -65,6 +65,9 @@ extract_features <- function( 'find_local_maxima', 'combine.seq.3', 'run_filter', + 'compute_uniq_grp', + 'label_val_to_keep', + 'predict_smoothed_rt', 'interpol.area', 'load_file', 'load_data', From cc0c92303e2d64a56c1e75aa7c7b500da7404a6a Mon Sep 17 00:00:00 2001 From: Matej Trojak Date: Tue, 8 Nov 2022 10:33:18 +0100 Subject: [PATCH 2/5] Add min_pres param to compute_uniq_grp call --- R/run_filter.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/run_filter.R b/R/run_filter.R index 7c03882e..1053dcc6 100644 --- a/R/run_filter.R +++ b/R/run_filter.R @@ -99,7 +99,7 @@ run_filter <- function(newprof, min_run <- round(min_count_run) # computes unique groups - uniq_grp <- compute_uniq_grp(newprof$grps, min_count_run) + uniq_grp <- compute_uniq_grp(newprof$grps, min_count_run, min_pres) # ordered by mz and grps data that are inside unigrps newprof <- dplyr::filter(newprof, grps %in% uniq_grp) |> dplyr::arrange(grps, mz) From 03060d6e27e3c207affa0e93c147fb8ac621c903 Mon Sep 17 00:00:00 2001 From: Matej Trojak Date: Tue, 8 Nov 2022 10:35:12 +0100 Subject: [PATCH 3/5] Additional test cases for run_filter --- tests/testthat/test-run_filter.R | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-run_filter.R b/tests/testthat/test-run_filter.R index b0773af3..309cb110 100644 --- a/tests/testthat/test-run_filter.R +++ b/tests/testthat/test-run_filter.R @@ -4,9 +4,9 @@ patrick::with_parameters_test_that( if(ci_skip == TRUE) skip_on_ci() testdata <- file.path("..", "testdata") - input_path <- file.path(testdata, "filtered", "run_filter", filename) + input_path <- file.path(testdata, "filtered", "run_filter", paste0(filename, ".parquet")) - input_data <- as.matrix(arrow::read_parquet(input_path) ) + input_data <- as.matrix(arrow::read_parquet(input_path)) actual <- run_filter(input_data, min_pres, min_run) actual <- tibble::tibble( @@ -16,18 +16,36 @@ patrick::with_parameters_test_that( group_number = actual$new_rec[, 4] ) - expected_path <- file.path(testdata, "filtered", "run_filter", paste0(.test_name, "_run_filter.parquet")) + expected_path <- file.path(testdata, "filtered", "run_filter", paste0(filename, "_run_filter.parquet")) expected <- arrow::read_parquet(expected_path) - + expect_equal(actual, expected) }, patrick::cases( mbr_test0 = list( - filename = c("mbr_test0.parquet"), + filename = "mbr_test0", min_pres = 0.5, min_run = 12, ci_skip = FALSE + ), + RCX_06_shortened = list( + filename = "RCX_06_shortened", + min_pres = 0.7, + min_run = 4, + ci_skip = FALSE + ), + RCX_07_shortened = list( + filename = "RCX_07_shortened", + min_pres = 0.7, + min_run = 4, + ci_skip = FALSE + ), + RCX_08_shortened = list( + filename = "RCX_08_shortened", + min_pres = 0.7, + min_run = 4, + ci_skip = FALSE ) ) ) From 4a0b006c06401a9c03cc90de93d788b3ec5d05aa Mon Sep 17 00:00:00 2001 From: Matej Trojak Date: Tue, 8 Nov 2022 11:04:23 +0100 Subject: [PATCH 4/5] Add remote files for run_filter tests --- tests/remote-files/run_filter.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/remote-files/run_filter.txt b/tests/remote-files/run_filter.txt index 3ea29298..2898e597 100644 --- a/tests/remote-files/run_filter.txt +++ b/tests/remote-files/run_filter.txt @@ -1,2 +1,8 @@ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/mbr_test0_run_filter.parquet -https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/mbr_test0.parquet \ No newline at end of file +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/mbr_test0.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_06_shortened_run_filter.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_06_shortened.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_07_shortened_run_filter.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_07_shortened.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_08_shortened_run_filter.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_08_shortened.parquet \ No newline at end of file From 98d12946f9a8ae70df0bb77fe9063c7c3aff14b4 Mon Sep 17 00:00:00 2001 From: Matej Trojak Date: Tue, 8 Nov 2022 12:58:08 +0100 Subject: [PATCH 5/5] Export run_filter util functions --- NAMESPACE | 3 +++ R/run_filter.R | 3 +++ 2 files changed, 6 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 854995b2..887f7d3a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(compute_mz_sd) export(compute_scale) export(compute_start_bound) export(compute_target_times) +export(compute_uniq_grp) export(draw_rt_correction_plot) export(draw_rt_normal_peaks) export(duplicate.row.remove) @@ -46,6 +47,7 @@ export(get_times_to_use) export(hybrid) export(increment_counter) export(interpol.area) +export(label_val_to_keep) export(load.lcms) export(load_data) export(load_file) @@ -59,6 +61,7 @@ export(plot_raw_profile_histogram) export(plot_rt_histograms) export(plot_rt_profile) export(predict_mz_break_indices) +export(predict_smoothed_rt) export(prep.uv) export(preprocess_bandwidth) export(preprocess_profile) diff --git a/R/run_filter.R b/R/run_filter.R index 1053dcc6..9f60a6be 100644 --- a/R/run_filter.R +++ b/R/run_filter.R @@ -5,6 +5,7 @@ #' by m/z to be considered a peak. #' @param profile The matrix containing m/z, retention time, intensity, and EIC label as columns. #' @return unique_grp. +#' @export compute_uniq_grp <- function(profile, min_count_run, min_pres = 0.6) { grps <- profile ttt <- table(grps) @@ -18,6 +19,7 @@ compute_uniq_grp <- function(profile, min_count_run, min_pres = 0.6) { #' @param min_run Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak. #' @param times. Retention times vector. #' @return predicted rt. +#' @export #' @examples #' predict_smoothed_rt(min_run = min_run, times) predict_smoothed_rt <- function(min_run = 5, times) { @@ -45,6 +47,7 @@ predict_smoothed_rt <- function(min_run = 5, times) { #' @param this_times. #' @param times. Retention times vector. #' @return to_keep. +#' @export label_val_to_keep <- function(min_run, timeline, min_pres, this_times, times) { this_timeline <- timeline this_timeline[this_times] <- 1