diff --git a/CHANGELOG.md b/CHANGELOG.md index 12a3a5c..ea17170 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - refactored `rm.ridge.R` [#171](https://github.com/RECETOX/recetox-aplcms/pull/171) - refactored and documented `prof.to.features.R` [#170](https://github.com/RECETOX/recetox-aplcms/pull/170) +- added full testdata case for `unsupervised.R` and `hybrid.R` [#177](https://github.com/RECETOX/recetox-aplcms/pull/177) +- added function to sort data in `compute_clusters.R` to return sorted data [#177](https://github.com/RECETOX/recetox-aplcms/pull/177) ### Changed +- updated remote files with the full data get links [#177](https://github.com/RECETOX/recetox-aplcms/pull/177) +- fixed parameter value of recover.weaker in `unsupervised.R` and `hybrid.R` [#177](https://github.com/RECETOX/recetox-aplcms/pull/177) + ### Removed +removed NA check in `concatenate_feature_tables` [#177](https://github.com/RECETOX/recetox-aplcms/pull/177) ## [0.10.0] - 2022-12-07 diff --git a/NAMESPACE b/NAMESPACE index 6387392..0f5c673 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,6 +72,7 @@ export(rev_cum_sum) export(rm.ridge) export(run_filter) export(semi.sup) +export(sort_data) export(span) export(two.step.hybrid) export(unsupervised) diff --git a/R/compute_clusters.R b/R/compute_clusters.R index b1dfd76..12a4ec4 100644 --- a/R/compute_clusters.R +++ b/R/compute_clusters.R @@ -1,4 +1,20 @@ +#' @description +#' Sort tibble based on sample_names +#' @export +sort_data <- function(sample_names, feature_tables){ + index <- c() + for (i in seq_along(sample_names)) + { + index <- append(index, feature_tables[[i]]$sample_id[1]) + } + + index <- match(sample_names, index) + feature_tables <- feature_tables[index] + + return(feature_tables) +} + #' Compute clusters of mz and rt and assign cluster id to individual features. #' #' @description @@ -83,6 +99,8 @@ compute_clusters <- function(feature_tables, dplyr::group_by(sample_id) |> dplyr::arrange_at(c("mz", "rt")) |> dplyr::group_split() - + + feature_tables <- sort_data(sample_names, feature_tables) + return(list(feature_tables = feature_tables, rt_tol_relative = rt_tol_relative, mz_tol_relative = mz_tol_relative)) } \ No newline at end of file diff --git a/R/hybrid.R b/R/hybrid.R index de77ac7..a7f3c30 100644 --- a/R/hybrid.R +++ b/R/hybrid.R @@ -405,7 +405,8 @@ hybrid <- function( mz_tol_absolute = extracted_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, - do.plot = do_plot + do.plot = do_plot, + sample_names = sample_names ) message("**** feature alignment ****") @@ -426,12 +427,12 @@ hybrid <- function( rt_tol_relative = adjusted_clusters$rt_tol_relative, from_features_to_known_table = FALSE ) - + message("**** weaker signal recovery ****") recovered <- lapply(seq_along(filenames), function(i) { recover.weaker( filename = filenames[[i]], - sample_name = as.character(i), + sample_name = sample_names[i], extracted_features = extracted[[i]], adjusted_features = corrected[[i]], metadata_table = merged$metadata, @@ -460,9 +461,10 @@ hybrid <- function( mz_tol_absolute = mz_tol_absolute, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, - do.plot = do_plot + do.plot = do_plot, + sample_names = sample_names ) - + message("**** computing template ****") template_features <- compute_template(recovered_clusters$feature_tables) @@ -482,9 +484,10 @@ hybrid <- function( mz_tol_absolute = recovered_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, - do.plot = do_plot + do.plot = do_plot, + sample_names = sample_names ) - + message("**** second feature alignment ****") recovered_aligned <- create_aligned_feature_table( dplyr::bind_rows(adjusted_clusters$feature_tables), diff --git a/R/recover.weaker.R b/R/recover.weaker.R index 93e3491..1f27fb4 100644 --- a/R/recover.weaker.R +++ b/R/recover.weaker.R @@ -148,6 +148,7 @@ compute_target_times <- function(aligned_rts, aligned_rts[sel_non_na] <- predict(sp, aligned_rts[sel_non_na])$y } + return(aligned_rts) } #' Get boolean mask for values that occur only once. diff --git a/R/unsupervised.R b/R/unsupervised.R index 564fc01..58e4297 100644 --- a/R/unsupervised.R +++ b/R/unsupervised.R @@ -195,7 +195,8 @@ unsupervised <- function( mz_tol_absolute = extracted_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, - do.plot = do_plot + do.plot = do_plot, + sample_names = sample_names ) message("**** feature alignment ****") @@ -211,7 +212,7 @@ unsupervised <- function( recovered <- lapply(seq_along(filenames), function(i) { recover.weaker( filename = filenames[[i]], - sample_name = as.character(i), + sample_name = sample_names[i], extracted_features = feature_tables[[i]], adjusted_features = corrected[[i]], metadata_table = aligned$metadata, @@ -240,7 +241,8 @@ unsupervised <- function( mz_tol_absolute = adjusted_clusters$rt_tol_relative, mz_max_diff = 10 * mz_tol, rt_tol_relative = rt_tol_relative, - do.plot = do_plot + do.plot = do_plot, + sample_names = sample_names ) message("**** feature alignment ****") diff --git a/R/utils.R b/R/utils.R index c936d9f..218f0c4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -53,11 +53,9 @@ register_functions_to_cluster <- function(cluster) { #' #' @param features list List of tibbles containing extracted feature tables. concatenate_feature_tables <- function(features, sample_names) { - if(!all(is.na(sample_names))) { - for (i in seq_along(features)) { - if(!("sample_id" %in% colnames(features[[i]]))) { - features[[i]] <- tibble::add_column(features[[i]], sample_id = sample_names[i]) - } + for (i in seq_along(features)) { + if(!("sample_id" %in% colnames(features[[i]]))) { + features[[i]] <- tibble::add_column(features[[i]], sample_id = sample_names[i]) } } diff --git a/tests/remote-files/hybrid.txt b/tests/remote-files/hybrid.txt index cdb454a..a64881f 100644 --- a/tests/remote-files/hybrid.txt +++ b/tests/remote-files/hybrid.txt @@ -1,3 +1,4 @@ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/RCX_shortened_recovered_feature_sample_table.parquet https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/known_table.parquet -https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/mbr_recovered_feature_sample_table.parquet \ No newline at end of file +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/mbr_recovered_feature_sample_table.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/qc_no_dil_milliq_recovered_feature_sample_table.parquet \ No newline at end of file diff --git a/tests/remote-files/unsupervised.txt b/tests/remote-files/unsupervised.txt index 7cb239e..76661d5 100644 --- a/tests/remote-files/unsupervised.txt +++ b/tests/remote-files/unsupervised.txt @@ -1,2 +1,3 @@ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/RCX_shortened_unsupervised.parquet -https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/mbr_test_unsupervised.parquet \ No newline at end of file +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/mbr_test_unsupervised.parquet +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/qc_no_dil_milliq_unsupervised.parquet \ No newline at end of file diff --git a/tests/remote-files/extracted-whole-input.txt b/tests/remote-files/whole-data-input.txt similarity index 100% rename from tests/remote-files/extracted-whole-input.txt rename to tests/remote-files/whole-data-input.txt diff --git a/tests/testthat/test-benchmark-extract_features.R b/tests/testthat/test-benchmark-extract_features.R index ed2754e..a747555 100644 --- a/tests/testthat/test-benchmark-extract_features.R +++ b/tests/testthat/test-benchmark-extract_features.R @@ -1,7 +1,7 @@ patrick::with_parameters_test_that( "test benchmark", { - if (skip) { + if (skip_benchmark) { skip("Disabled") } @@ -97,7 +97,7 @@ patrick::with_parameters_test_that( intensity_weighted = FALSE, sd_cut = c(0.01, 500), sigma_ratio_lim = c(0.01, 100), - skip = FALSE + skip_benchmark = FALSE ) ) ) diff --git a/tests/testthat/test-benchmark-unsupervised.R b/tests/testthat/test-benchmark-unsupervised.R index 555e32f..a231800 100644 --- a/tests/testthat/test-benchmark-unsupervised.R +++ b/tests/testthat/test-benchmark-unsupervised.R @@ -1,7 +1,7 @@ patrick::with_parameters_test_that( "test benchmark", { - if (skip) { + if (skip_benchmark) { skip("Disabled") } @@ -37,7 +37,7 @@ patrick::with_parameters_test_that( patrick::cases( mbr_test = list( filename = c("mbr_test0", "mbr_test1", "mbr_test2"), - skip = TRUE + skip_benchmark = TRUE ) ) ) diff --git a/tests/testthat/test-extract_features.R b/tests/testthat/test-extract_features.R index 23dddae..3505ea6 100644 --- a/tests/testthat/test-extract_features.R +++ b/tests/testthat/test-extract_features.R @@ -2,7 +2,7 @@ patrick::with_parameters_test_that( "extract single feature works", { skip_on_ci() - if (skip) { + if (full_testdata) { skip("skipping whole data test case") } @@ -86,7 +86,7 @@ patrick::with_parameters_test_that( intensity_weighted = FALSE, sd_cut = c(0.01, 500), sigma_ratio_lim = c(0.01, 100), - skip = FALSE + full_testdata = FALSE ), qc_no_dil_milliq = list( files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"), @@ -97,7 +97,7 @@ patrick::with_parameters_test_that( intensity_weighted = FALSE, sd_cut = c(0.01, 500), sigma_ratio_lim = c(0.01, 100), - skip = TRUE + full_testdata = TRUE ) ) ) diff --git a/tests/testthat/test-hybrid.R b/tests/testthat/test-hybrid.R index ca454e9..1d6725c 100644 --- a/tests/testthat/test-hybrid.R +++ b/tests/testthat/test-hybrid.R @@ -1,6 +1,10 @@ patrick::with_parameters_test_that("basic hybrid test", { if(ci_skip == TRUE) skip_on_ci() + if (full_testdata) { + skip("skipping whole data test case") + } + store_reports <- FALSE testdata <- file.path("..", "testdata") @@ -50,10 +54,17 @@ patrick::with_parameters_test_that("basic hybrid test", { patrick::cases( mbr = list( files = c("mbr_test0.mzml", "mbr_test1.mzml", "mbr_test2.mzml"), - ci_skip = TRUE + ci_skip = TRUE, + full_testdata = FALSE ), RCX_shortened = list( files = c("RCX_06_shortened.mzML", "RCX_07_shortened.mzML", "RCX_08_shortened.mzML"), - ci_skip = FALSE + ci_skip = FALSE, + full_testdata = FALSE + ), + qc_no_dil_milliq = list( + files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"), + ci_skip = TRUE, + full_testdata = TRUE ) )) diff --git a/tests/testthat/test-unsupervised.R b/tests/testthat/test-unsupervised.R index 4e36802..afed835 100644 --- a/tests/testthat/test-unsupervised.R +++ b/tests/testthat/test-unsupervised.R @@ -2,6 +2,11 @@ patrick::with_parameters_test_that( "basic unsupervised test", { store_reports <- FALSE + + if (full_testdata) { + skip("skipping whole data test case") + } + test_files <- sapply(files, function(x) file.path("../testdata/input", x)) expected <- arrow::read_parquet(file.path("../testdata/unsupervised", paste0(.test_name, "_unsupervised.parquet"))) @@ -30,7 +35,17 @@ patrick::with_parameters_test_that( expect_equal(actual, expected, tolerance = 0.01) }, patrick::cases( - mbr_test = list(files = c("mbr_test0.mzml", "mbr_test1.mzml", "mbr_test2.mzml")), - RCX_shortened = list(files = c("RCX_06_shortened.mzML", "RCX_07_shortened.mzML", "RCX_08_shortened.mzML")) + mbr_test = list( + files = c("mbr_test0.mzml", "mbr_test1.mzml", "mbr_test2.mzml"), + full_testdata = FALSE + ), + RCX_shortened = list( + files = c("RCX_06_shortened.mzML", "RCX_07_shortened.mzML", "RCX_08_shortened.mzML"), + full_testdata = FALSE + ), + qc_no_dil_milliq = list( + files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"), + full_testdata = TRUE + ) ) )