Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full test data test for unsupervised and hybrid #177

Merged
merged 16 commits into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- refactored `rm.ridge.R` [#171](https://github.com/RECETOX/recetox-aplcms/pull/171)
- refactored and documented `prof.to.features.R` [#170](https://github.com/RECETOX/recetox-aplcms/pull/170)
- added full testdata case for `unsupervised.R` and `hybrid.R` [#177](https://github.com/RECETOX/recetox-aplcms/pull/177)
- added function to sort data in `compute_clusters.R` to return sorted data [#177](https://github.com/RECETOX/recetox-aplcms/pull/177)

### Changed
- updated remote files with the full data get links [#177](https://github.com/RECETOX/recetox-aplcms/pull/177)
- fixed parameter value of recover.weaker in `unsupervised.R` and `hybrid.R` [#177](https://github.com/RECETOX/recetox-aplcms/pull/177)

### Removed
removed NA check in `concatenate_feature_tables` [#177](https://github.com/RECETOX/recetox-aplcms/pull/177)

## [0.10.0] - 2022-12-07

Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ export(rev_cum_sum)
export(rm.ridge)
export(run_filter)
export(semi.sup)
export(sort_data)
export(span)
export(two.step.hybrid)
export(unsupervised)
Expand Down
20 changes: 19 additions & 1 deletion R/compute_clusters.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@

#' @description
#' Sort tibble based on sample_names
#' @export
sort_data <- function(sample_names, feature_tables){
index <- c()
for (i in seq_along(sample_names))
{
index <- append(index, feature_tables[[i]]$sample_id[1])
}

index <- match(sample_names, index)
feature_tables <- feature_tables[index]

return(feature_tables)
}

#' Compute clusters of mz and rt and assign cluster id to individual features.
#'
#' @description
Expand Down Expand Up @@ -83,6 +99,8 @@ compute_clusters <- function(feature_tables,
dplyr::group_by(sample_id) |>
dplyr::arrange_at(c("mz", "rt")) |>
dplyr::group_split()


feature_tables <- sort_data(sample_names, feature_tables)

return(list(feature_tables = feature_tables, rt_tol_relative = rt_tol_relative, mz_tol_relative = mz_tol_relative))
}
17 changes: 10 additions & 7 deletions R/hybrid.R
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,8 @@ hybrid <- function(
mz_tol_absolute = extracted_clusters$rt_tol_relative,
mz_max_diff = 10 * mz_tol,
rt_tol_relative = rt_tol_relative,
do.plot = do_plot
do.plot = do_plot,
sample_names = sample_names
xtrojak marked this conversation as resolved.
Show resolved Hide resolved
)

message("**** feature alignment ****")
Expand All @@ -426,12 +427,12 @@ hybrid <- function(
rt_tol_relative = adjusted_clusters$rt_tol_relative,
from_features_to_known_table = FALSE
)

message("**** weaker signal recovery ****")
recovered <- lapply(seq_along(filenames), function(i) {
recover.weaker(
filename = filenames[[i]],
sample_name = as.character(i),
sample_name = sample_names[i],
extracted_features = extracted[[i]],
adjusted_features = corrected[[i]],
metadata_table = merged$metadata,
Expand Down Expand Up @@ -460,9 +461,10 @@ hybrid <- function(
mz_tol_absolute = mz_tol_absolute,
mz_max_diff = 10 * mz_tol,
rt_tol_relative = rt_tol_relative,
do.plot = do_plot
do.plot = do_plot,
sample_names = sample_names
)

message("**** computing template ****")
template_features <- compute_template(recovered_clusters$feature_tables)

Expand All @@ -482,9 +484,10 @@ hybrid <- function(
mz_tol_absolute = recovered_clusters$rt_tol_relative,
mz_max_diff = 10 * mz_tol,
rt_tol_relative = rt_tol_relative,
do.plot = do_plot
do.plot = do_plot,
sample_names = sample_names
)

message("**** second feature alignment ****")
recovered_aligned <- create_aligned_feature_table(
dplyr::bind_rows(adjusted_clusters$feature_tables),
Expand Down
1 change: 1 addition & 0 deletions R/recover.weaker.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ compute_target_times <- function(aligned_rts,

aligned_rts[sel_non_na] <- predict(sp, aligned_rts[sel_non_na])$y
}
return(aligned_rts)
}

#' Get boolean mask for values that occur only once.
Expand Down
8 changes: 5 additions & 3 deletions R/unsupervised.R
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ unsupervised <- function(
mz_tol_absolute = extracted_clusters$rt_tol_relative,
mz_max_diff = 10 * mz_tol,
rt_tol_relative = rt_tol_relative,
do.plot = do_plot
do.plot = do_plot,
sample_names = sample_names
)

message("**** feature alignment ****")
Expand All @@ -211,7 +212,7 @@ unsupervised <- function(
recovered <- lapply(seq_along(filenames), function(i) {
recover.weaker(
filename = filenames[[i]],
sample_name = as.character(i),
sample_name = sample_names[i],
extracted_features = feature_tables[[i]],
adjusted_features = corrected[[i]],
metadata_table = aligned$metadata,
Expand Down Expand Up @@ -240,7 +241,8 @@ unsupervised <- function(
mz_tol_absolute = adjusted_clusters$rt_tol_relative,
mz_max_diff = 10 * mz_tol,
rt_tol_relative = rt_tol_relative,
do.plot = do_plot
do.plot = do_plot,
sample_names = sample_names
)

message("**** feature alignment ****")
Expand Down
8 changes: 3 additions & 5 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,9 @@ register_functions_to_cluster <- function(cluster) {
#'
#' @param features list List of tibbles containing extracted feature tables.
concatenate_feature_tables <- function(features, sample_names) {
if(!all(is.na(sample_names))) {
for (i in seq_along(features)) {
if(!("sample_id" %in% colnames(features[[i]]))) {
features[[i]] <- tibble::add_column(features[[i]], sample_id = sample_names[i])
}
for (i in seq_along(features)) {
if(!("sample_id" %in% colnames(features[[i]]))) {
features[[i]] <- tibble::add_column(features[[i]], sample_id = sample_names[i])
}
}

Expand Down
3 changes: 2 additions & 1 deletion tests/remote-files/hybrid.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/RCX_shortened_recovered_feature_sample_table.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/known_table.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/mbr_recovered_feature_sample_table.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/mbr_recovered_feature_sample_table.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/hybrid/qc_no_dil_milliq_recovered_feature_sample_table.parquet
3 changes: 2 additions & 1 deletion tests/remote-files/unsupervised.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/RCX_shortened_unsupervised.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/mbr_test_unsupervised.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/mbr_test_unsupervised.parquet
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/unsupervised/qc_no_dil_milliq_unsupervised.parquet
4 changes: 2 additions & 2 deletions tests/testthat/test-benchmark-extract_features.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
patrick::with_parameters_test_that(
"test benchmark",
{
if (skip) {
if (skip_benchmark) {
skip("Disabled")
}

Expand Down Expand Up @@ -97,7 +97,7 @@ patrick::with_parameters_test_that(
intensity_weighted = FALSE,
sd_cut = c(0.01, 500),
sigma_ratio_lim = c(0.01, 100),
skip = FALSE
skip_benchmark = FALSE
)
)
)
4 changes: 2 additions & 2 deletions tests/testthat/test-benchmark-unsupervised.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
patrick::with_parameters_test_that(
"test benchmark",
{
if (skip) {
if (skip_benchmark) {
skip("Disabled")
}

Expand Down Expand Up @@ -37,7 +37,7 @@ patrick::with_parameters_test_that(
patrick::cases(
mbr_test = list(
filename = c("mbr_test0", "mbr_test1", "mbr_test2"),
skip = TRUE
skip_benchmark = TRUE
)
)
)
6 changes: 3 additions & 3 deletions tests/testthat/test-extract_features.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ patrick::with_parameters_test_that(
"extract single feature works",
{
skip_on_ci()
if (skip) {
if (full_testdata) {
skip("skipping whole data test case")
}

Expand Down Expand Up @@ -86,7 +86,7 @@ patrick::with_parameters_test_that(
intensity_weighted = FALSE,
sd_cut = c(0.01, 500),
sigma_ratio_lim = c(0.01, 100),
skip = FALSE
full_testdata = FALSE
),
qc_no_dil_milliq = list(
files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"),
Expand All @@ -97,7 +97,7 @@ patrick::with_parameters_test_that(
intensity_weighted = FALSE,
sd_cut = c(0.01, 500),
sigma_ratio_lim = c(0.01, 100),
skip = TRUE
full_testdata = TRUE
)
)
)
15 changes: 13 additions & 2 deletions tests/testthat/test-hybrid.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
patrick::with_parameters_test_that("basic hybrid test", {
if(ci_skip == TRUE) skip_on_ci()

if (full_testdata) {
skip("skipping whole data test case")
}

store_reports <- FALSE
testdata <- file.path("..", "testdata")

Expand Down Expand Up @@ -50,10 +54,17 @@ patrick::with_parameters_test_that("basic hybrid test", {
patrick::cases(
mbr = list(
files = c("mbr_test0.mzml", "mbr_test1.mzml", "mbr_test2.mzml"),
ci_skip = TRUE
ci_skip = TRUE,
full_testdata = FALSE
),
RCX_shortened = list(
files = c("RCX_06_shortened.mzML", "RCX_07_shortened.mzML", "RCX_08_shortened.mzML"),
ci_skip = FALSE
ci_skip = FALSE,
full_testdata = FALSE
),
qc_no_dil_milliq = list(
files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"),
ci_skip = TRUE,
full_testdata = TRUE
)
))
19 changes: 17 additions & 2 deletions tests/testthat/test-unsupervised.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ patrick::with_parameters_test_that(
"basic unsupervised test",
{
store_reports <- FALSE

if (full_testdata) {
skip("skipping whole data test case")
}

test_files <- sapply(files, function(x) file.path("../testdata/input", x))

expected <- arrow::read_parquet(file.path("../testdata/unsupervised", paste0(.test_name, "_unsupervised.parquet")))
Expand Down Expand Up @@ -30,7 +35,17 @@ patrick::with_parameters_test_that(
expect_equal(actual, expected, tolerance = 0.01)
},
patrick::cases(
mbr_test = list(files = c("mbr_test0.mzml", "mbr_test1.mzml", "mbr_test2.mzml")),
RCX_shortened = list(files = c("RCX_06_shortened.mzML", "RCX_07_shortened.mzML", "RCX_08_shortened.mzML"))
mbr_test = list(
files = c("mbr_test0.mzml", "mbr_test1.mzml", "mbr_test2.mzml"),
full_testdata = FALSE
),
RCX_shortened = list(
files = c("RCX_06_shortened.mzML", "RCX_07_shortened.mzML", "RCX_08_shortened.mzML"),
full_testdata = FALSE
),
qc_no_dil_milliq = list(
files = c("8_qc_no_dil_milliq.mzml", "21_qc_no_dil_milliq.mzml", "29_qc_no_dil_milliq.mzml"),
full_testdata = TRUE
)
)
)