Skip to content

Commit

Permalink
Add segregate_seqs and BiocParallel wrappers for parallel processing
Browse files Browse the repository at this point in the history
  • Loading branch information
HDash committed Jun 28, 2024
1 parent 126023a commit 453bc37
Show file tree
Hide file tree
Showing 13 changed files with 382 additions and 3 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ Imports:
GenomeInfoDb,
Biostrings,
BSgenome,
memes
memes,
S4Vectors
Suggests:
BiocStyle,
BSgenome.Hsapiens.UCSC.hg19,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(MotifPeeker)
export(bpapply)
export(calc_frip)
export(check_ENCODE)
export(check_JASPAR)
Expand All @@ -12,6 +13,7 @@ export(read_motif_file)
export(read_peak_file)
export(report_command)
export(report_header)
export(segregate_seqs)
export(summit_to_motif)
export(to_plotly)
export(trim_seqs)
Expand All @@ -29,13 +31,16 @@ importFrom(GenomeInfoDb,seqlengths)
importFrom(GenomicAlignments,summarizeOverlaps)
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,end)
importFrom(GenomicRanges,findOverlaps)
importFrom(GenomicRanges,mcols)
importFrom(GenomicRanges,seqnames)
importFrom(GenomicRanges,start)
importFrom(GenomicRanges,strand)
importFrom(GenomicRanges,width)
importFrom(IRanges,IRanges)
importFrom(Rsamtools,countBam)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,subjectHits)
importFrom(SummarizedExperiment,assay)
importFrom(htmltools,tagList)
importFrom(htmlwidgets,JS)
Expand Down
2 changes: 1 addition & 1 deletion R/MotifPeeker.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ MotifPeeker <- function(
output_dir = tempdir(),
display = NULL,
use_cache = TRUE,
ncpus = 1,
ncpus = 2,
quiet = TRUE,
debug = FALSE,
verbose = FALSE
Expand Down
46 changes: 46 additions & 0 deletions R/bpapply.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Use BiocParallel functions with appropriate parameters
#'
#' Light wrapper around \code{\link[BiocParallel]{BiocParallel}} functions that
#' automatically sets the appropriate parameters based on the number of workers
#' specified.
#'
#' @param apply_fun A \code{\link[BiocParallel]{BiocParallel}} function to use
#' for parallel processing. (default = \code{BiocParallel::bplapply})
#' @inheritParams BiocParallel::bplapply
#' @inheritDotParams BiocParallel::bplapply
#' @inheritDotParams BiocParallel::bpmapply
#' @inheritParams get_bpparam
#'
#' @returns Output relevant to the \code{apply_fun} specified.
#'
#' @examples
#' half_it <- function(arg1) return(arg1 / 2)
#' x <- seq_len(10)
#'
#' res <- bpapply(x, half_it, workers = 2)
#' print(res)
#'
#' @export
bpapply <- function(
X,
FUN,
apply_fun = BiocParallel::bplapply,
workers = 1,
progressbar = workers > 1,
force_snowparam = FALSE,
verbose = FALSE,
...
) {
stp_msg <- paste("Supplied apply_fun is not a valid BiocParallel function.")
apply_fun_package <- attr(apply_fun, "package")
if (length(apply_fun_package) == 0 ||
apply_fun_package != "BiocParallel") stopper(stp_msg)

BPPARAM <- get_bpparam(workers = workers,
progressbar = progressbar,
force_snowparam = force_snowparam,
verbose = verbose)

res <- apply_fun(X, FUN = FUN, BPPARAM = BPPARAM, ...)
return(res)
}
45 changes: 45 additions & 0 deletions R/get_bpparam.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#' Get parameters for \link[BiocParallel]{BiocParallel}
#'
#' Get appropriate parameters for \code{BiocParallel} based on the
#' number of workers specified. For less than 4 workers, the function returns a
#' \code{MulticoreParam} object. For 4 or more cores, the function
#' returns a \code{SnowParam} object. Since Windows supports
#' neither, the function returns a \code{SerialParam} object. As a
#' result, Windows users do not benefit from parallel processing.
#'
#' @param workers The number of workers to use for parallel processing.
#' @param force_snowparam A logical indicating whether to force the use of
#' \link[BiocParallel]{SnowParam} object.
#' @param verbose A logical indicating whether to print verbose messages while
#' running the function. (default = FALSE)
#' @inheritParams BiocParallel::SnowParam
#'
#' @returns A \code{BPPARAM} object.
#'
#' @seealso \link[BiocParallel]{BiocParallelParam}
#'
#' @keywords internal
get_bpparam <- function(workers,
progressbar = workers > 1,
force_snowparam = FALSE,
verbose = FALSE) {
if (.Platform$OS.type == "windows") {
custom_bpparam <- BiocParallel::SerialParam()
messager("Windows does not support parallel processing.",
"Returning SerialParam object for BiocParallel.",
v = verbose)
} else if (workers < 4 && !force_snowparam) {
custom_bpparam <-
BiocParallel::MulticoreParam(workers = workers,
progressbar = progressbar)
messager("Using MulticoreParam object for BiocParallel (workers =",
paste0(workers, ")."), v = verbose)
} else {
custom_bpparam <- BiocParallel::SnowParam(workers = workers,
progressbar = progressbar)
messager("Using SnowParam object for BiocParallel (workers =",
paste0(workers, ")."), v = verbose)
}

return(custom_bpparam)
}
58 changes: 58 additions & 0 deletions R/segregate_seqs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Segregate input sequences into common and unique groups
#'
#' This function takes two sets of sequences and segregates them into common and
#' unique sequences. The common sequences are sequences that are present in both
#' sets of sequences. The unique sequences are sequences that are present in
#' only one of the sets of sequences.
#'
#' Sequences are considered common if their base pairs align in any
#' position, even if they vary in length. Consequently, while the number of
#' common sequences remains consistent between both sets, but the length and
#' composition of these sequences may differ. As a result, the function returns
#' distinct sets of common sequences for each input set of sequences.
#'
#' @param seqs1 A set of sequences (\code{GRanges} object)
#' @param seqs2 A set of sequences (\code{GRanges} object)
#'
#' @importFrom GenomicRanges findOverlaps
#' @importFrom S4Vectors queryHits subjectHits
#'
#' @returns A list containing the common sequences and unique sequences for each
#' set of sequences. The list contains the following \code{GRanges} objects:
#' \itemize{
#' \item \code{common_seqs1}: Common sequences in \code{seqs1}
#' \item \code{common_seqs2}: Common sequences in \code{seqs2}
#' \item \code{unique_seqs1}: Unique sequences in \code{seqs1}
#' \item \code{unique_seqs2}: Unique sequences in \code{seqs2}
#' }
#'
#' @examples
#' data("CTCF_ChIP_peaks", package = "MotifPeeker")
#' data("CTCF_TIP_peaks", package = "MotifPeeker")
#'
#' seqs1 <- CTCF_ChIP_peaks
#' seqs2 <- CTCF_TIP_peaks
#' res <- segregate_seqs(seqs1, seqs2)
#' print(res)
#'
#' @seealso \link[GenomicRanges]{findOverlaps}
#'
#' @export
segregate_seqs <- function(seqs1, seqs2) {
common_seqs_ranges <- GenomicRanges::findOverlaps(seqs1, seqs2,
type = "any")

common_seqs1 <- seqs1[S4Vectors::queryHits(common_seqs_ranges)]
common_seqs2 <- seqs2[S4Vectors::subjectHits(common_seqs_ranges)]
unique_seqs1 <- seqs1[-S4Vectors::queryHits(common_seqs_ranges)]
unique_seqs2 <- seqs2[-S4Vectors::subjectHits(common_seqs_ranges)]

return(
list(
common_seqs1 = common_seqs1,
common_seqs2 = common_seqs2,
unique_seqs1 = unique_seqs1,
unique_seqs2 = unique_seqs2
)
)
}
4 changes: 4 additions & 0 deletions inst/markdown/MotifPeeker.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ if (alignment_metrics) {
### Known-motif Analysis ###
comparison_indices <- setdiff(length(result$peaks), params$reference_index)
segregated_peaks <- lapply(comparison_indices, function(x) {
segregate_seqs(result$peaks[[params$reference_index]], result$peaks[[x]])
})
## Calculate enrichment for segregated peaks
}
Expand Down
2 changes: 1 addition & 1 deletion man/MotifPeeker.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 85 additions & 0 deletions man/bpapply.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/get_bpparam.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 453bc37

Please sign in to comment.