RECETOX · xtrojak · Nov 8, 2022 · Nov 8, 2022 · Nov 8, 2022 · Nov 8, 2022
diff --git a/NAMESPACE b/NAMESPACE
@@ -28,6 +28,7 @@ export(compute_mz_sd)
 export(compute_scale)
 export(compute_start_bound)
 export(compute_target_times)
+export(compute_uniq_grp)
 export(draw_rt_correction_plot)
 export(draw_rt_normal_peaks)
 export(duplicate.row.remove)
@@ -46,6 +47,7 @@ export(get_times_to_use)
 export(hybrid)
 export(increment_counter)
 export(interpol.area)
+export(label_val_to_keep)
 export(load.lcms)
 export(load_data)
 export(load_file)
@@ -59,6 +61,7 @@ export(plot_raw_profile_histogram)
 export(plot_rt_histograms)
 export(plot_rt_profile)
 export(predict_mz_break_indices)
+export(predict_smoothed_rt)
 export(prep.uv)
 export(preprocess_bandwidth)
 export(preprocess_profile)

diff --git a/R/extract_features.R b/R/extract_features.R
@@ -65,6 +65,9 @@ extract_features <- function(
     'find_local_maxima',
     'combine.seq.3',
     'run_filter',
+    'compute_uniq_grp',
+    'label_val_to_keep',
+    'predict_smoothed_rt',
     'interpol.area',
     'load_file',
     'load_data',

diff --git a/R/run_filter.R b/R/run_filter.R
@@ -5,6 +5,7 @@
 #'  by m/z to be considered a peak.
 #' @param profile The matrix containing m/z, retention time, intensity, and EIC label as columns.
 #' @return unique_grp. 
+#' @export
 compute_uniq_grp <- function(profile, min_count_run, min_pres = 0.6) {
   grps <- profile  
   ttt <- table(grps)
@@ -18,6 +19,7 @@ compute_uniq_grp <- function(profile, min_count_run, min_pres = 0.6) {
 #' @param min_run Run filter parameter. The minimum length of elution time for a series of signals grouped by m/z to be considered a peak.
 #' @param times. Retention times vector.
 #' @return predicted rt.
+#' @export
 #' @examples
 #' predict_smoothed_rt(min_run = min_run, times)
 predict_smoothed_rt <- function(min_run = 5, times) {
@@ -45,6 +47,7 @@ predict_smoothed_rt <- function(min_run = 5, times) {
 #' @param this_times. 
 #' @param times. Retention times vector.
 #' @return to_keep. 
+#' @export
 label_val_to_keep <- function(min_run, timeline, min_pres, this_times, times) {
     this_timeline <- timeline
     this_timeline[this_times] <- 1
@@ -99,7 +102,7 @@ run_filter <- function(newprof,
   min_run <- round(min_count_run)
 
   # computes unique groups 
-  uniq_grp <- compute_uniq_grp(newprof$grps, min_count_run)
+  uniq_grp <- compute_uniq_grp(newprof$grps, min_count_run, min_pres)
 
   # ordered by mz and grps data that are inside unigrps
   newprof <- dplyr::filter(newprof, grps %in% uniq_grp) |> dplyr::arrange(grps, mz)

diff --git a/tests/remote-files/run_filter.txt b/tests/remote-files/run_filter.txt
@@ -1,2 +1,8 @@
 https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/mbr_test0_run_filter.parquet
-https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/mbr_test0.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/mbr_test0.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_06_shortened_run_filter.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_06_shortened.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_07_shortened_run_filter.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_07_shortened.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_08_shortened_run_filter.parquet
+https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/filtered/run_filter/RCX_08_shortened.parquet
diff --git a/tests/testthat/test-run_filter.R b/tests/testthat/test-run_filter.R
@@ -4,9 +4,9 @@ patrick::with_parameters_test_that(
     if(ci_skip == TRUE) skip_on_ci()
 
     testdata <- file.path("..", "testdata")
-    input_path <- file.path(testdata, "filtered", "run_filter", filename)
+    input_path <- file.path(testdata, "filtered", "run_filter", paste0(filename, ".parquet"))
 
-    input_data <- as.matrix(arrow::read_parquet(input_path) ) 
+    input_data <- as.matrix(arrow::read_parquet(input_path))
     actual <- run_filter(input_data, min_pres, min_run)
 
     actual <- tibble::tibble(
@@ -16,18 +16,36 @@ patrick::with_parameters_test_that(
       group_number = actual$new_rec[, 4]
       )
 
-    expected_path <- file.path(testdata, "filtered", "run_filter", paste0(.test_name, "_run_filter.parquet"))
+    expected_path <- file.path(testdata, "filtered", "run_filter", paste0(filename, "_run_filter.parquet"))
     expected <- arrow::read_parquet(expected_path) 
-  
+
     expect_equal(actual, expected)
 
   },
   patrick::cases(
     mbr_test0 = list(
-      filename = c("mbr_test0.parquet"),
+      filename = "mbr_test0",
       min_pres = 0.5,
       min_run = 12,
       ci_skip = FALSE
+    ),
+    RCX_06_shortened = list(
+      filename = "RCX_06_shortened",
+      min_pres = 0.7,
+      min_run = 4,
+      ci_skip = FALSE
+    ),
+    RCX_07_shortened = list(
+      filename = "RCX_07_shortened",
+      min_pres = 0.7,
+      min_run = 4,
+      ci_skip = FALSE
+    ),
+    RCX_08_shortened = list(
+      filename = "RCX_08_shortened",
+      min_pres = 0.7,
+      min_run = 4,
+      ci_skip = FALSE
     )
   )
 )