Skip to content

Commit

Permalink
Add trim_seqs and additional arguments to MotifPeeker
Browse files Browse the repository at this point in the history
  • Loading branch information
HDash committed Jun 27, 2024
1 parent 54c6708 commit 126023a
Show file tree
Hide file tree
Showing 11 changed files with 166 additions and 22 deletions.
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,12 @@ Imports:
tools,
htmltools,
rmarkdown,
curl,
jsonlite,
viridis,
emoji,
SummarizedExperiment,
htmlwidgets,
Rsamtools,
GenomicAlignments,
GenomeInfoDb,
Biostrings,
BSgenome,
memes
Expand All @@ -71,7 +69,10 @@ Suggests:
rworkflows,
testthat (>= 3.0.0),
utils,
withr
withr,
emoji,
curl,
jsonlite
VignetteBuilder:
knitr
biocViews: Epigenetics, Genetics, QualityControl, ChIPSeq,
Expand Down
8 changes: 5 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export(report_command)
export(report_header)
export(summit_to_motif)
export(to_plotly)
export(trim_seqs)
import(ggplot2)
import(plotly, except = last_plot)

Expand All @@ -24,19 +25,20 @@ importFrom(BiocFileCache,bfcinfo)
importFrom(Biostrings,DNAString)
importFrom(Biostrings,letterFrequency)
importFrom(DT,datatable)
importFrom(GenomeInfoDb,seqlengths)
importFrom(GenomicAlignments,summarizeOverlaps)
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,end)
importFrom(GenomicRanges,mcols)
importFrom(GenomicRanges,seqnames)
importFrom(GenomicRanges,start)
importFrom(GenomicRanges,strand)
importFrom(GenomicRanges,width)
importFrom(IRanges,IRanges)
importFrom(Rsamtools,countBam)
importFrom(SummarizedExperiment,assay)
importFrom(curl,curl_fetch_memory)
importFrom(emoji,emoji)
importFrom(htmltools,tagList)
importFrom(htmlwidgets,JS)
importFrom(jsonlite,fromJSON)
importFrom(memes,meme_is_installed)
importFrom(memes,runAme)
importFrom(memes,runFimo)
Expand Down
14 changes: 12 additions & 2 deletions R/MotifPeeker.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@
#' this database to find similar motifs. If not provided, JASPAR CORE database
#' will be used. \strong{NOTE}: p-value estimates are inaccurate when the
#' database has fewer than 50 entries.
#' @param trim_seq_width An integer specifying the width of the sequence to
#' extract around the summit (default = NULL). This sequence is used to search
#' for de novo motifs. If not provided, the entire peak region will be used.
#' This parameter is intended to reduce the search space and speed up motif
#' discovery; therefore, a value less than the average peak width is
#' recommended. Peaks are trimmed symmetrically around the summit while
#' respecting the peak bounds.
#' @param download_buttons A logical indicating whether to include download
#' buttons for various files within the HTML report. (default = TRUE)
#' @param output_dir A character string specifying the directory to save the
Expand Down Expand Up @@ -92,7 +99,6 @@
#'
#' @import ggplot2
#' @import tidyverse
#' @importFrom emoji emoji
#' @importFrom viridis scale_fill_viridis
#' @importFrom tools file_path_sans_ext
#' @importFrom rmarkdown render
Expand All @@ -103,7 +109,8 @@
#'
#' @note Running de-novo motif discovery is computationally expensive and can
#' require from minutes to hours. \code{denovo_motifs} can widely affect the
#' runtime (higher values take longer).
#' runtime (higher values take longer). Setting \code{trim_seq_width} to a lower
#' value can also reduce the runtime significantly.
#'
#' @examples
#' peaks <- list(
Expand Down Expand Up @@ -163,6 +170,7 @@ MotifPeeker <- function(
denovo_motif_discovery = TRUE,
denovo_motifs = 3,
motif_db = NULL,
trim_seq_width = NULL,
download_buttons = TRUE,
meme_path = NULL,
output_dir = tempdir(),
Expand Down Expand Up @@ -222,7 +230,9 @@ MotifPeeker <- function(
denovo_motif_discovery = denovo_motif_discovery,
denovo_motifs = denovo_motifs,
motif_db = motif_db,
trim_seq_width = trim_seq_width,
download_buttons = download_buttons,
meme_path = meme_path,
output_dir = output_dir,
use_cache = use_cache,
ncpus = ncpus,
Expand Down
8 changes: 4 additions & 4 deletions R/check_ENCODE.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
#' @returns A character string specifying the path to the downloaded file. If
#' the input is not in ENCODE ID format, the input is returned as-is.
#'
#' @importFrom curl curl_fetch_memory
#' @importFrom jsonlite fromJSON
#'
#' @examples
#' check_ENCODE("ENCFF109VAD", expect_format = "bam")
#' if (requireNamespace("curl", quietly = TRUE) &&
#' requireNamespace("jsonlite", quietly = TRUE)) {
#' check_ENCODE("ENCFF109VAD", expect_format = "bam")
#' }
#'
#' @export
check_ENCODE <- function(encode_id, expect_format, verbose = FALSE) {
Expand Down
59 changes: 59 additions & 0 deletions R/trim_seqs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Trim sequences to a specified width around the summit
#'
#' @param peaks A GRanges object created using
#' \code{\link[=read_peak_file]{read_peak_file()}}.
#' @param peak_width Total expected width of the peak.
#' @param genome_build The genome build that the peak sequences should be
#' derived from.
#' @param respect_bounds Logical indicating whether the peak width should be
#' respected when trimming sequences. (default = TRUE) If \code{TRUE}, the
#' trimmed sequences will not extend beyond the peak boundaries.
#'
#' @importFrom IRanges IRanges
#' @importFrom GenomicRanges seqnames GRanges strand mcols start end
#' @importFrom GenomeInfoDb seqlengths
#'
#' @return A GRanges object with the trimmed sequences. The sequences are
#' guaranteed to not exceed the \code{peak width + 1} (peak width + the summit
#' base).
#'
#' @examples
#' data("CTCF_TIP_peaks", package = "MotifPeeker")
#' peaks <- CTCF_TIP_peaks
#' genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#'
#' trimmed_seqs <- trim_seqs(peaks, peak_width = 100,
#' genome_build = genome_build)
#' summary(GenomicRanges::width(trimmed_seqs))
#'
#' @export
trim_seqs <- function(peaks, peak_width, genome_build, respect_bounds = TRUE) {
peak_width <- round(peak_width / 2, 0)
max_len <- GenomeInfoDb::seqlengths(genome_build)[as.character(
GenomicRanges::seqnames(peaks))]

if (respect_bounds) {
adjusted_iranges <- IRanges::IRanges(
## Ensure start is not less than 1 or more than the sequence length
start = pmax(peaks$summit - peak_width, 1,
GenomicRanges::start(peaks)),
## Ensure end is not more than the sequence or peak length
end = pmin(peaks$summit + peak_width, max_len,
GenomicRanges::end(peaks))
)
} else {
adjusted_iranges <- IRanges::IRanges(
start = pmax(peaks$summit - peak_width, 1),
end = pmin(peaks$summit + peak_width, max_len)
)
}

adjusted_granges <- GenomicRanges::GRanges(
seqnames = GenomicRanges::seqnames(peaks),
ranges = adjusted_iranges,
strand = GenomicRanges::strand(peaks)
)
GenomicRanges::mcols(adjusted_granges) <- GenomicRanges::mcols(peaks)

return(adjusted_granges)
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ status](https://github.com/neurogenomics/MotifPeeker/workflows/rworkflows/badge.
Authors: <i>Hiranyamaya Dash, Thomas Roberts, Nathan Skene</i>
</h4>
<h4>
Updated: <i>Jun-26-2024</i>
Updated: <i>Jun-27-2024</i>
</h4>

# Introduction
Expand Down
16 changes: 10 additions & 6 deletions inst/markdown/MotifPeeker.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ params:
value: 3
motif_db:
value: NULL
trim_seq_width:
value: NULL
download_buttons:
value: NULL
meme_path:
value: NULL
output_dir:
value: NULL
use_cache:
Expand Down Expand Up @@ -86,14 +90,12 @@ cellcount_metrics <- ifelse((length(params$cell_counts) == 0), FALSE, TRUE)
user_motif_metrics <- ifelse((length(user_motifs$motifs) == 0), FALSE, TRUE)
## Misc
ex_emo <- emoji::emoji("exclamation")
ex_emo <- ifelse(requireNamespace("emoji", quietly = TRUE),
emoji::emoji("exclamation"), "!!")
## Check motif_db
if (params$denovo_motif_discovery && is.null(params$motif_db)) {
motif_db <- get_JASPARCORE()
} else {
motif_db <- params$motif_db
}
motif_db <- ifelse(params$denovo_motif_discovery && is.null(params$motif_db),
get_JASPARCORE(), params$motif_db)
### General Metrics ###
if (alignment_metrics) {
Expand Down Expand Up @@ -124,6 +126,8 @@ if (alignment_metrics) {
)
### Known-motif Analysis ###
comparison_indices <- setdiff(length(result$peaks), params$reference_index)
}
Expand Down
12 changes: 11 additions & 1 deletion man/MotifPeeker.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/check_ENCODE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions man/trim_seqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-trim_seqs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
test_that("trim_seqs works", {
data("CTCF_TIP_peaks", package = "MotifPeeker")
peaks <- CTCF_TIP_peaks
genome_build <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38

trimmed_seqs <- trim_seqs(peaks, peak_width = 100,
genome_build = genome_build,
respect_bounds = FALSE)
expect_true(all(GenomicRanges::width(trimmed_seqs) <= 101))

trimmed_seqs_bound <- trim_seqs(peaks, peak_width = 1000,
genome_build = genome_build,
respect_bounds = TRUE)
expect_true(all(GenomicRanges::width(trimmed_seqs_bound) <=
max(GenomicRanges::width(peaks)) + 1))
})

0 comments on commit 126023a

Please sign in to comment.