Skip to content

Commit

Permalink
Merge pull request #219 from KristinaGomoryova/remove_noise_update
Browse files Browse the repository at this point in the history
remove_noise function updated
  • Loading branch information
hechth authored Jul 11, 2024
2 parents c1937cd + 24fe08e commit 82c9526
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 6 deletions.
24 changes: 22 additions & 2 deletions R/remove_noise.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ load_data <- function(filename,
#' @param intensity_weighted Whether to use intensity to weight mass density estimation.
#' @param do.plot Indicates whether plot should be drawn.
#' @param cache Whether to use cache
#' @param grouping_threshold The maximum difference between two scans to be considered the same EIC. Default is Inf.
#' @return A matrix with four columns: m/z value, retention time, intensity, and group number.
#' @export
remove_noise <- function(filename,
Expand All @@ -70,7 +71,8 @@ remove_noise <- function(filename,
baseline_correct_noise_percentile,
intensity_weighted,
do.plot,
cache) {
cache,
grouping_threshold = Inf) {
raw.data <- load_file(filename)

raw.prof <- adaptive.bin(
Expand All @@ -87,10 +89,28 @@ remove_noise <- function(filename,
raw.prof$features$intensities,
raw.prof$features$grps
)

run.sel <- raw.prof$height.rec[which(raw.prof$height.rec[, 2] >= raw.prof$min.count.run * min_pres & raw.prof$height.rec[, 3] > baseline_correct), 1]

newprof <- newprof[newprof[, 4] %in% run.sel, ]

if (grouping_threshold < Inf) {
sorted_newprof <- newprof[order(newprof[,2]),]
new_grps <- cumsum(c(0, diff(sorted_newprof[,2])) > grouping_threshold)
sorted_newprof <- cbind(sorted_newprof, new_grps, deparse.level = 0)

sorted_newprof_df <- tibble::as_tibble(sorted_newprof)

newprof <- as.matrix(sorted_newprof_df |>
dplyr::group_by(V4, V5) |>
dplyr::mutate(cluster = cur_group_id()) |>
dplyr::ungroup() |>
dplyr::arrange(cluster) |>
dplyr::select(-V4, -V5)
)
colnames(newprof) <- NULL
}

new.prof <- run_filter(
newprof,
min_pres = min_pres,
Expand Down
6 changes: 3 additions & 3 deletions conda/environment-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ channels:
- defaults
dependencies:
- r-base
- icu <=70.1
- icu
- r-mass
- r-rgl
- bioconductor-mzR ==2.28.0
- bioconductor-mzR ==2.36.0
- r-splines2
- r-doparallel
- r-foreach
- r-snow
- r-rcpp
- r-arrow >=7.0.0,<10.0.0
- r-arrow
- r-dplyr
- r-tidyr
- r-stringr
Expand Down
3 changes: 2 additions & 1 deletion tests/remote-files/input.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/RCX_08_shortened.mzML
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/single_eic.mzml
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/alg3.mzdata
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/test_file.mzXML
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/test_file.mzXML
https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/Tribrid_201106_009-QC1_1_NEG_FISABIO_single_eic.raw.mzML
4 changes: 4 additions & 0 deletions tests/testthat/test-load.lcms.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ create_test_case <- function(filename, mz_length, rt_length, intensities_length)
patrick::with_parameters_test_that(
"test load.lcms reads different file types",
{
if(packageVersion("mzR") >= "2.29.0" && tools::file_ext(filename) == "mzdata") {
print("mzR >= 2.29.0 no longer supports mzdata.")
skip()
}
# Arrange: Set up test inputs
testdata <- file.path("..", "testdata")
input_path <- file.path(testdata, "input", filename)
Expand Down
30 changes: 30 additions & 0 deletions tests/testthat/test-remove_noise.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,33 @@ patrick::with_parameters_test_that(
)
)
)

test_that("remove noise works with grouping threshold", {
testdata <- file.path("..", "testdata")
input_path <- file.path(testdata,
"input",
"Tribrid_201106_009-QC1_1_NEG_FISABIO_single_eic.raw.mzML")

expected <- tibble(group_number = c(1, 2, 3, 5, 6, 7, 8, 9),
n = c(67, 73, 3, 39, 2, 6, 3, 7))

sut <- remove_noise(
input_path,
min_pres = 0.8,
min_run = 0.2,
mz_tol = 5e-05,
baseline_correct = 0.0,
baseline_correct_noise_percentile = 0.05,
intensity_weighted = FALSE,
do.plot = FALSE,
cache = FALSE,
grouping_threshold = 4
)

actual <- sut %>%
mutate(group = factor(group_number)) %>%
group_by(group_number) %>%
summarize(n = n())

expect_equal(actual, expected)
})

0 comments on commit 82c9526

Please sign in to comment.