Skip to content

Commit

Permalink
Replace workers with BPPARAM argument and remove get_bpparam()
Browse files Browse the repository at this point in the history
  • Loading branch information
HDash committed Nov 5, 2024
1 parent 613fefe commit 2b1f51d
Show file tree
Hide file tree
Showing 32 changed files with 168 additions and 243 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ Imports:
stats,
utils
Suggests:
BiocStyle,
BSgenome.Hsapiens.UCSC.hg19,
BSgenome.Hsapiens.UCSC.hg38,
downloadthis,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ importFrom(BSgenome,getSeq)
importFrom(BiocFileCache,BiocFileCache)
importFrom(BiocFileCache,bfcinfo)
importFrom(BiocFileCache,bfcrpath)
importFrom(BiocParallel,bpnworkers)
importFrom(Biostrings,DNAString)
importFrom(Biostrings,letterFrequency)
importFrom(DT,datatable)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# MotifPeeker 0.99.7

## New Features
* Replace `workers` argument with `BPPARAM`. Give users more control over the
BiocParallel implementation.

## Miscellaneous

* Remove `cat()` calls in functions.
Expand Down
63 changes: 37 additions & 26 deletions R/MotifPeeker.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,28 @@
#' hours to complete. To make computation faster, we highly recommend tuning the
#' following arguments:
#' \describe{
#' \item{\code{workers}}{Running motif discovery in parallel can
#' significantly reduce runtime, but it is very memory-intensive, consuming
#' upwards of 10GB of RAM per thread. Memory starvation can greatly slow the
#' process, so set \code{workers} with caution.}
#' \item{\code{denovo_motifs}}{The number of motifs to discover per sequence
#' group exponentially increases runtime. We recommend no more than 5
#' motifs to make a meaningful inference.}
#' \item{\code{trim_seq_width}}{Trimming sequences before running de-novo
#' motif discovery can significantly reduce the search space. Sequence
#' length can exponentially increase runtime. We recommend running the
#' script with \code{denovo_motif_discovery = FALSE} and studying the
#' motif-summit distance distribution under general metrics to find the
#' sequence length that captures most motifs. A good starting point is 150
#' but it can be reduced further if appropriate.}
#' \item{\code{BPPARAM=MulticoreParam(x)}}{Running motif discovery in
#' parallel can significantly reduce runtime, but it is very
#' memory-intensive, consuming 10+GB of RAM per thread. Memory starvation can
#' greatly slow the process, so set the number of cores with caution.}
#' \item{\code{denovo_motifs}}{The number of motifs to discover per sequence
#' group exponentially increases runtime. We recommend no more than 5
#' motifs to make a meaningful inference.}
#' \item{\code{trim_seq_width}}{Trimming sequences before running de-novo
#' motif discovery can significantly reduce the search space. Sequence
#' length can exponentially increase runtime. We recommend running the
#' script with \code{denovo_motif_discovery = FALSE} and studying the
#' motif-summit distance distribution under general metrics to find the
#' sequence length that captures most motifs. A good starting point is 150
#' but it can be reduced further if appropriate.}
#' }
#'
#' @param peak_files A character vector of path to peak files, or a vector of
#' GRanges objects generated using \code{\link{read_peak_file}}. Currently,
#' peak files from the following peak-calling tools are supported:
#' \itemize{
#' \item MACS2: \code{.narrowPeak} files
#' \item SEACR: \code{.bed} files
#' \item MACS2: \code{.narrowPeak} files
#' \item SEACR: \code{.bed} files
#' }
#' ENCODE file IDs can also be provided to automatically fetch peak file(s) from
#' the ENCODE database.
Expand Down Expand Up @@ -81,13 +81,22 @@
#' @param display A character vector specifying the display mode for the HTML
#' report once it is generated. (default = NULL) Options are:
#' \itemize{
#' \item \code{"browser"}: Open the report in the default web browser.
#' \item \code{"rstudio"}: Open the report in the RStudio Viewer.
#' \item \code{NULL}: Do not open the report.
#' \item \code{"browser"}: Open the report in the default web browser.
#' \item \code{"rstudio"}: Open the report in the RStudio Viewer.
#' \item \code{NULL}: Do not open the report.
#' }
#' @param workers An integer specifying the number of threads to use for
#' parallel processing. (default = 1)\cr
#' \strong{IMPORTANT:} For each worker, please ensure a minimum of 6GB of
#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam-class}} object
#' enabling parallel execution. (default = SerialParam(), single-CPU run)\cr\cr
#' Following are two examples of how to set up parallel processing:
#' \itemize{
#' \item \code{BPPARAM = BiocParallel::MulticoreParam(4)}: Uses 4
#' CPU cores for parallel processing.
#' \item \code{library("BiocParallel")} followed by
#' \code{register(MulticoreParam(4))} sets all subsequent BiocParallel
#' functions to use 4 CPU cores. \code{Motifpeeker()} must be run
#' with \code{BPPARAM = BiocParallel::MulticoreParam()}.
#' }
#' \strong{IMPORTANT:} For each worker, please ensure a minimum of 8GB of
#' memory (RAM) is available as \code{denovo_motif_discovery} is
#' memory-intensive.
#' @param quiet A logical indicating whether to print markdown knit messages.
Expand All @@ -99,7 +108,7 @@
#' @inheritParams check_genome_build
#' @inheritParams read_motif_file
#' @inheritParams check_genome_build
#' @inheritParams get_bpparam
#' @inheritParams bpapply
#' @inheritParams memes::runFimo
#' @inheritParams denovo_motifs
#' @inheritParams find_motifs
Expand All @@ -111,6 +120,7 @@
#' @importFrom viridis scale_fill_viridis scale_color_viridis
#' @importFrom tools file_path_sans_ext
#' @importFrom rmarkdown render
#' @importFrom BiocParallel bpnworkers
#'
#' @return Path to the output directory.
#'
Expand Down Expand Up @@ -159,7 +169,6 @@
#' motif_db = NULL,
#' download_buttons = TRUE,
#' out_dir = tempdir(),
#' workers = 1,
#' debug = FALSE,
#' quiet = TRUE,
#' verbose = FALSE
Expand Down Expand Up @@ -188,7 +197,7 @@ MotifPeeker <- function(
out_dir = tempdir(),
save_runfiles = FALSE,
display = if (interactive()) "browser",
workers = 2,
BPPARAM = BiocParallel::SerialParam(), # Default to single-core
quiet = TRUE,
debug = FALSE,
verbose = FALSE
Expand Down Expand Up @@ -269,14 +278,16 @@ MotifPeeker <- function(
meme_path = meme_path,
out_dir = out_dir,
save_runfiles = save_runfiles,
workers = workers,
BPPARAM = BPPARAM,
debug = debug,
verbose = verbose
)

### Knit Rmd ###
rmd_file <- system.file("markdown",
"MotifPeeker.Rmd", package = "MotifPeeker")
messager("Starting run with", BiocParallel::bpnworkers(BPPARAM), "cores.",
v = verbose)
rmarkdown::render(
input = rmd_file,
output_dir = out_dir,
Expand Down
15 changes: 5 additions & 10 deletions R/bpapply.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#' Use BiocParallel functions with appropriate parameters
#'
#' Light wrapper around \code{\link[BiocParallel]{BiocParallel}} functions that
#' automatically sets the appropriate parameters based on the number of workers
#' specified.
#' automatically applies appropriate parallel function.
#'
#' @param apply_fun A \code{\link[BiocParallel]{BiocParallel}} function to use
#' for parallel processing. (default = \code{BiocParallel::bplapply})
#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam-class}} object
#' specifying run parameters. (default = bpparam())
#' @inheritParams BiocParallel::bplapply
#' @inheritDotParams BiocParallel::bplapply
#' @inheritDotParams BiocParallel::bpmapply
#' @inheritParams get_bpparam
#'
#' @import BiocParallel
#'
Expand All @@ -19,15 +19,15 @@
#' half_it <- function(arg1) return(arg1 / 2)
#' x <- seq_len(10)
#'
#' res <- MotifPeeker:::bpapply(x, half_it, workers = 2)
#' res <- MotifPeeker:::bpapply(x, half_it)
#' print(res)
#'
#' @keywords internal
bpapply <- function(
X,
FUN,
apply_fun = BiocParallel::bplapply,
workers = 1,
BPPARAM = BiocParallel::bpparam(),
progressbar = FALSE,
force_snowparam = FALSE,
verbose = FALSE,
Expand All @@ -38,11 +38,6 @@ bpapply <- function(
if (length(apply_fun_package) == 0 ||
apply_fun_package != "BiocParallel") stop(stp_msg)

BPPARAM <- get_bpparam(workers = workers,
progressbar = progressbar,
force_snowparam = force_snowparam,
verbose = verbose)

res <- apply_fun(X, FUN = FUN, BPPARAM = BPPARAM, ...)
return(res)
}
9 changes: 6 additions & 3 deletions R/denovo_motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
#' (default = 6)
#' @param out_dir A \code{character} vector of output directory to save STREME
#' results to. (default = \code{tempdir()})
#' @param BPPARAM A \code{\link[BiocParallel]{BiocParallelParam-class}} object
#' specifying run parameters. (default = SerialParam(), single core run)
#' @param debug A logical indicating whether to print debug messages while
#' running the function. (default = FALSE)
#' @param ... Additional arguments to pass to \code{STREME}. For more
#' information, refer to the official MEME Suite documentation on
#' \href{https://meme-suite.org/meme/doc/streme.html}{STREME}.
#' @inheritParams bpapply
#' @inheritParams motif_enrichment
#' @inheritParams MotifPeeker
#'
#' @returns A list of \code{\link[universalmotif]{universalmotif}} objects and
#' associated metadata.
Expand Down Expand Up @@ -60,7 +63,7 @@ denovo_motifs <- function(seqs,
filter_n = 6,
out_dir = tempdir(),
meme_path = NULL,
workers = 1,
BPPARAM = BiocParallel::SerialParam(),
verbose = FALSE,
debug = FALSE,
...) {
Expand Down Expand Up @@ -96,7 +99,7 @@ denovo_motifs <- function(seqs,
### Filter motifs ###
out <- filter_repeats(streme_out, filter_n)
return(out)
}, workers = workers, verbose = verbose
}, BPPARAM = BPPARAM, verbose = verbose
)
messager("STREME run complete.", v = verbose)
return(res)
Expand Down
5 changes: 3 additions & 2 deletions R/find_motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @param ... Additional arguments to pass to \code{TOMTOM}. For more
#' information, refer to the official MEME Suite documentation on
#' \href{https://meme-suite.org/meme/doc/tomtom.html}{TOMTOM}.
#' @inheritParams bpapply
#' @inheritParams denovo_motifs
#'
#' @importFrom memes runTomTom
Expand Down Expand Up @@ -46,7 +47,7 @@ find_motifs <- function(streme_out,
motif_db,
out_dir = tempdir(),
meme_path = NULL,
workers = 1,
BPPARAM = BiocParallel::bpparam(),
verbose = FALSE,
debug = FALSE,
...) {
Expand All @@ -66,7 +67,7 @@ find_motifs <- function(streme_out,
)
return(res_x)
})
}, workers = workers, verbose = verbose
}, BPPARAM = BPPARAM, verbose = verbose
)
return(res)
}
47 changes: 0 additions & 47 deletions R/get_bpparam.R

This file was deleted.

7 changes: 3 additions & 4 deletions R/get_df_distances.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@
#'
#' if (requireNamespace("BSgenome.Hsapiens.UCSC.hg38")) {
#' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' distances_df <- get_df_distances(input, motifs, genome_build,
#' workers = 1)
#' distances_df <- get_df_distances(input, motifs, genome_build)
#' print(distances_df)
#' }
#' }
Expand All @@ -66,7 +65,7 @@ get_df_distances <- function(result,
user_motifs,
genome_build,
out_dir = tempdir(),
workers = 1,
BPPARAM = BiocParallel::bpparam(),
meme_path = NULL,
verbose = FALSE) {
if (!is.list(result$peaks)) result$peaks <- list(result$peaks)
Expand Down Expand Up @@ -95,7 +94,7 @@ get_df_distances <- function(result,
)$distance_to_summit
)
},
workers = workers, verbose = verbose) %>%
BPPARAM = BPPARAM, verbose = verbose) %>%
purrr::map_df(as.data.frame)

## Output: Peak 1 - Motif 1, 2...
Expand Down
8 changes: 4 additions & 4 deletions R/get_df_enrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
#'
#' enrichment_df <- get_df_enrichment(
#' input, segregated_input, motifs, genome_build,
#' reference_index = 1, workers = 1
#' reference_index = 1
#' )
#' }
#' }
Expand All @@ -67,7 +67,7 @@ get_df_enrichment <- function(result,
genome_build,
reference_index = 1,
out_dir = tempdir(),
workers = 1,
BPPARAM = BiocParallel::bpparam(),
meme_path = NULL,
verbose = FALSE) {
if (!is.list(result$peaks)) result$peaks <- list(result$peaks)
Expand Down Expand Up @@ -113,7 +113,7 @@ get_df_enrichment <- function(result,
run_index = i
)
},
workers = workers, verbose = verbose) %>%
BPPARAM = BPPARAM, verbose = verbose) %>%
purrr::map_df(as.data.frame)

## 2. Segregated peaks
Expand Down Expand Up @@ -165,7 +165,7 @@ get_df_enrichment <- function(result,
run_index = i
)
},
workers = workers, verbose = verbose) %>%
BPPARAM = BPPARAM, verbose = verbose) %>%
purrr::map_df(as.data.frame)

enrichment_df <- rbind(enrichment_df_all, enrichment_df_seg)
Expand Down
Loading

0 comments on commit 2b1f51d

Please sign in to comment.