Skip to content

Commit

Permalink
Merge pull request #164 from xtrojak/reorganise-util-functions
Browse files Browse the repository at this point in the history
Cleanup utility functions
  • Loading branch information
hechth authored Nov 28, 2022
2 parents 5e1e986 + 6306301 commit 9e9fc64
Show file tree
Hide file tree
Showing 13 changed files with 147 additions and 392 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- use proper sample IDs inside feature tables [#153](https://github.com/RECETOX/recetox-aplcms/pull/153)
- exported functions in NAMESPACE [#154](https://github.com/RECETOX/recetox-aplcms/pull/154)
- docstrings and documentation files for refactored functions [#160](https://github.com/RECETOX/recetox-aplcms/pull/160)
- moved some utility functions to a more suitable locations [#164](https://github.com/RECETOX/recetox-aplcms/pull/160)
### Removed
- `extract_features` and `feature.align` [#154](https://github.com/RECETOX/recetox-aplcms/pull/154)
- improper usage of `@examples` [#160](https://github.com/RECETOX/recetox-aplcms/pull/160)
- several obsolete utility functions [#164](https://github.com/RECETOX/recetox-aplcms/pull/160)

## [0.9.4] - 2022-05-10

Expand Down
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ export(create_aligned_feature_table)
export(draw_rt_correction_plot)
export(draw_rt_normal_peaks)
export(duplicate.row.remove)
export(extract_pattern_colnames)
export(find.tol)
export(find.turn.point)
export(find_local_maxima)
Expand All @@ -53,15 +52,13 @@ export(load.lcms)
export(load_aligned_features)
export(load_data)
export(load_file)
export(make.known.table)
export(merge_features_and_known_table)
export(msExtrema)
export(normix)
export(normix.bic)
export(plot_normix_bic)
export(plot_peak_summary)
export(plot_raw_profile_histogram)
export(plot_rt_histograms)
export(plot_rt_profile)
export(predict_mz_break_indices)
export(predict_smoothed_rt)
Expand All @@ -75,7 +72,6 @@ export(rev_cum_sum)
export(rm.ridge)
export(run_filter)
export(semi.sup)
export(sort_samples_by_acquisition_number)
export(span)
export(two.step.hybrid)
export(unsupervised)
Expand Down
24 changes: 24 additions & 0 deletions R/hybrid.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@
NULL
#> NULL

#' Compute matches between mz array and specific mass value with a tolerance.
#' @param sample_mz The mz array for which to compute the matching.
#' @param known_mz The mz value with which to match.
#' @param match_tol_ppm Matching tolerance in ppm.
#' @return Indicies of m/z values within the tolerance of any known m/z.
#' @export
#' @examples
#' find_mz_match(
#' sample_mz = c(10, 20, 21),
#' known_mz = 20
#' )
find_mz_match <- function(sample_mz, known_mz, match_tol_ppm = 5) {
matched_mz_idx <- rep(0, length(sample_mz))
match_tol_ppm <- match_tol_ppm / 1e6

for (i in seq_along(sample_mz)) {
rel_diff <- abs((sample_mz[i] - known_mz) / sample_mz[i])
if (min(rel_diff) < match_tol_ppm) {
matched_mz_idx[i] <- 1
}
}
return(which(matched_mz_idx == 1))
}

#' Match peaks from sample table to already known peaks via similar m/z and rt.
#' @param aligned A list object with three tibble tables: metadata, intensity, and rt.
#' @param known_table A table of known/previously detected peaks.
Expand Down
115 changes: 0 additions & 115 deletions R/make.known.table.R

This file was deleted.

23 changes: 0 additions & 23 deletions R/mass.match.R

This file was deleted.

13 changes: 0 additions & 13 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,3 @@ draw_rt_correction_plot <- function(colors,
)
}
}

#' @export
plot_rt_histograms <- function(pk.times,
mz_sd) {
hist(mz_sd,
xlab = "m/z SD", ylab = "Frequency",
main = "m/z SD distribution"
)
hist(apply(pk.times[, -1:-4], 1, sd, na.rm = TRUE),
xlab = "Retention time SD", ylab = "Frequency",
main = "Retention time SD distribution"
)
}
119 changes: 119 additions & 0 deletions R/two.step.hybrid.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,123 @@
NULL
#> NULL

#' @importFrom dplyr select inner_join
as_feature_crosstab <- function(sample_names, metadata, data) {
metadata_cols <- c('id', 'mz', 'rt', 'mzmin', 'mzmax')
data <- select(metadata, metadata_cols) |>
inner_join(data, on='id')
colnames(data) <- c(metadata_cols, sample_names)

return(data)
}

recover_weaker_signals <- function(
cluster,
filenames,
extracted_features,
corrected_features,
aligned_rt_crosstab,
aligned_int_crosstab,
original_mz_tolerance,
aligned_mz_tolerance,
aligned_rt_tolerance,
recover_mz_range,
recover_rt_range,
use_observed_range,
min_bandwidth,
max_bandwidth,
recover_min_count
) {
snow::clusterExport(cluster, c('recover.weaker'))
snow::clusterEvalQ(cluster, library("splines"))

recovered <- lapply(seq_along(filenames), function(i) {
recover.weaker(
sample_name = get_sample_name(filenames[i]),
filename = filenames[[i]],
extracted_features = as_tibble(extracted_features[[i]]),
adjusted_features = as_tibble(corrected_features[[i]]),
pk.times = aligned_rt_crosstab,
aligned.ftrs = aligned_int_crosstab,
orig.tol = original_mz_tolerance,
align.mz.tol = aligned_mz_tolerance,
align.rt.tol = aligned_rt_tolerance,
recover_mz_range = recover_mz_range,
recover_rt_range = recover_rt_range,
use.observed.range = use_observed_range,
bandwidth = 0.5,
min.bw = min_bandwidth,
max.bw = max_bandwidth,
recover.min.count = recover_min_count
)
})

feature_table <- aligned_rt_crosstab[, 1:4]
rt_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.times))
int_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.ftrs))

feature_names <- rownames(feature_table)
sample_names <- colnames(aligned_rt_crosstab[, -(1:4)])

list(
extracted_features = lapply(recovered, function(x) x$this.f1),
corrected_features = lapply(recovered, function(x) x$this.f2),
rt_crosstab = as_feature_crosstab(feature_names, sample_names, rt_crosstab),
int_crosstab = as_feature_crosstab(feature_names, sample_names, int_crosstab)
)
}

pivot_feature_values <- function(feature_table, variable) {
extended_variable <- paste0("sample_", variable)
values <- dplyr::select(feature_table, mz, rt, sample, !!sym(extended_variable))
values <- tidyr::pivot_wider(values, names_from = sample, values_from = !!sym(extended_variable))
variable_colnames <- colnames(values)[3:ncol(values)]
variable_colnames <- paste0(variable_colnames, "_", variable)
colnames(values)[3:ncol(values)] <- variable_colnames
return(values)
}

long_to_wide_feature_table <- function(feature_table) {
sample_rts <- pivot_feature_values(feature_table, "rt")
sample_intensities <- pivot_feature_values(feature_table, "intensity")
feature_table <- dplyr::select(feature_table, mz, rt) %>%
dplyr::distinct(mz, rt) %>%
dplyr::inner_join(sample_rts, by = c("mz", "rt")) %>%
dplyr::inner_join(sample_intensities, by = c("mz", "rt"))
}

wide_to_long_feature_table <- function(wide_table, sample_names) {
wide_table <- tibble::rowid_to_column(wide_table, "feature")

long_rt <- tidyr::gather(wide_table, sample, sample_rt, contains("_rt"), factor_key=FALSE) %>%
dplyr::select(-contains("_intensity")) %>%
mutate(sample = stringr::str_remove_all(sample, "_rt"))
long_int <- tidyr::gather(wide_table, sample, sample_intensity, contains("_intensity"), factor_key=FALSE) %>%
dplyr::select(-contains("_rt")) %>%
mutate(sample = stringr::str_remove_all(sample, "_intensity"))

long_features <- dplyr::full_join(long_rt, long_int, by = c("feature", "mz", "rt", "mz_min", "mz_max", "sample"))

return(long_features)
}

extract_pattern_colnames <- function(dataframe, pattern) {
dataframe <- dplyr::select(dataframe, contains(pattern))
return(colnames(dataframe))
}

as_wide_aligned_table <- function(aligned) {
mz_scale_table <- aligned$rt_crosstab[, c("mz", "rt", "mz_min", "mz_max")]
aligned <- as_feature_sample_table(
rt_crosstab = aligned$rt_crosstab,
int_crosstab = aligned$int_crosstab
)
aligned <- long_to_wide_feature_table(aligned)
aligned <- dplyr::inner_join(aligned, mz_scale_table, by = c("mz", "rt"))
return(aligned)
}


merge_known_tables <- function(batchwise, batches_idx) {
colnames <- c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", "m.z",
"Number_profiles_processed", "Percent_found", "mz_min", "mz_max",
Expand Down Expand Up @@ -256,6 +373,8 @@ semisup_to_hybrid_adapter <- function(batchwise, batches_idx) {
#' Two step hybrid feature detection.
#'
#' A two-stage hybrid feature detection and alignment procedure, for data generated in multiple batches.
#' NOTE: This function is OBSOLETE and should no longer be used,
#' since it is no longer maintained and will soon be removed.
#'
#' @param filenames file names
#' @param metadata the batch label of each file.
Expand Down
Loading

0 comments on commit 9e9fc64

Please sign in to comment.