update slides and exercises 21

rnabioco · Sep 20, 2024 · 5648bfa · 5648bfa
1 parent f7a58bb
commit 5648bfa
Show file tree

Hide file tree

Showing 2 changed files with 180 additions and 3 deletions.
diff --git a/exercises/ex-21.qmd b/exercises/ex-21.qmd
@@ -33,6 +33,143 @@ bedGraphToBigWig Abf1.bg sacCer3.chrom.sizes Abf1.bw
 
 The bigWig files are available here in the `data/` directory.
 
+# CUT&RUN analysis
+
+```{r}
+#| label: load-libs
+#| message: false
+library(tidyverse)
+library(here)
+library(valr)
+
+# genome viz
+library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene)
+library(Gviz)
+library(rtracklayer)
+
+# motif discovery and viz
+library(BSgenome.Scerevisiae.UCSC.sacCer3)
+library(rGADEM)
+library(seqLogo)
+```
+
+
+
+```{r}
+#| label: make-tracks
+track_start <- 90000
+track_end <- 150000
+
+# genes track
+sgd_genes_trk <-
+  GeneRegionTrack(
+    TxDb.Scerevisiae.UCSC.sacCer3.sgdGene,
+    chromosome = "chrII",
+    start = track_start,
+    end = track_end,
+    background.title = "white",
+    col.title = "black",
+    fontsize = 16
+  )
+
+# signal tracks
+track_info <-
+  tibble(
+    file_name = c(
+      "CutRun_Reb1_lt120.bw",
+      "CutRun_Abf1_lt120.bw",
+      "CutRun_Reb1_gt150.bw",
+      "CutRun_Abf1_gt150.bw"
+    ),
+    sample_type = c(
+      "Reb1_Short", "Abf1_Short",
+      "Reb1_Long", "Abf1_Long"
+    )
+  ) |>
+  mutate(
+    file_path = here("data/block-dna", file_name),
+    big_wig = purrr::map(
+      file_path, ~ import.bw(.x, as = "GRanges")
+    ),
+    data_track = purrr::map2(
+      big_wig, sample_type,
+      ~ DataTrack(
+        .x,
+        name = .y,
+        background.title = "white",
+        col.title = "black",
+        col.axis = "black",
+        fontsize = 16
+      )
+    )
+  ) |>
+  dplyr::select(sample_type, big_wig, data_track)
+
+# x-axis track
+x_axis_trk <- GenomeAxisTrack(
+  col = "black",
+  col.axis = "black",
+  fontsize = 16
+)
+```
+
+
+```{r}
+#| label: plot-tracks
+plotTracks(
+  c(
+    sgd_genes_trk,
+    track_info$data_track,
+    x_axis_trk
+  ),
+  from = track_start,
+  to = track_end,
+  chromosome = "chrII",
+  transcriptAnnotation = "gene",
+  shape = "arrow",
+  type = "histogram"
+)
+abf1_tbl <- read_bigwig(here("data/block-dna/CutRun_Abf1_lt120.bw"))
+total_reads <- 16e6
+
+genome <- read_genome(here("data/block-dna/sacCer3.chrom.sizes"))
+genome_size <- sum(genome$size)
+
+genome_lambda <- total_reads / genome_size
+peak_calls <-
+  abf1_tbl |>
+  # define single-base sites
+  mutate(
+    midpoint = start + round((end - start) / 2),
+    start = midpoint,
+    end = start + 1,
+    # use the poisson to calculate a p-value with the genome-wide lambda
+    pval = dpois(score, genome_lambda),
+    # convert p-values to FDR
+    fdr = p.adjust(pval, method = "fdr")
+  )
+
+peak_calls_sig <-
+  filter(
+    peak_calls,
+    fdr == 0
+  ) |>
+  # collapse neighboring, significant sites
+  bed_merge(max_dist = 20)
+
+filter(
+  peak_calls_sig,
+  chrom == "chrII" &
+    start >= track_start &
+    end <= track_end
+)
+
+peak_calls_gr <-
+  GRanges(
+    seqnames = peak_calls_sig$chrom,
+    ranges = IRanges(peak_calls_sig$start, peak_calls_sig$end)
+  )
+```
 
 ## Motif discovery
 

diff --git a/slides/slides-21.qmd b/slides/slides-21.qmd
@@ -57,7 +57,7 @@ library(rGADEM)
 library(seqLogo)
 ```
 
-## Examine genome coverage
+
 
 ```{r}
 #| label: make-tracks
@@ -133,6 +133,46 @@ plotTracks(
   shape = "arrow",
   type = "histogram"
 )
+abf1_tbl <- read_bigwig(here("data/block-dna/CutRun_Abf1_lt120.bw"))
+total_reads <- 16e6
+
+genome <- read_genome(here("data/block-dna/sacCer3.chrom.sizes"))
+genome_size <- sum(genome$size)
+
+genome_lambda <- total_reads / genome_size
+peak_calls <-
+  abf1_tbl |>
+  # define single-base sites
+  mutate(
+    midpoint = start + round((end - start) / 2),
+    start = midpoint,
+    end = start + 1,
+    # use the poisson to calculate a p-value with the genome-wide lambda
+    pval = dpois(score, genome_lambda),
+    # convert p-values to FDR
+    fdr = p.adjust(pval, method = "fdr")
+  )
+
+peak_calls_sig <-
+  filter(
+    peak_calls,
+    fdr == 0
+  ) |>
+  # collapse neighboring, significant sites
+  bed_merge(max_dist = 20)
+
+filter(
+  peak_calls_sig,
+  chrom == "chrII" &
+    start >= track_start &
+    end <= track_end
+)
+
+peak_calls_gr <-
+  GRanges(
+    seqnames = peak_calls_sig$chrom,
+    ranges = IRanges(peak_calls_sig$start, peak_calls_sig$end)
+  )
 ```
 
 ## How do proteins recognize specific locations in the genome to bind?
@@ -166,7 +206,7 @@ We'll use the [rGADEM](https://bioconductor.org/packages/release/bioc/html/rGADE
 2.  Provide those sequences and the genomic background to `rGADEM::GADEM()`, which runs uses an Expectation-Maximization (EM) approach to identify and refine motifs.
 3.  Examine the discovered motifs, and plot as a logo using `seqLogo::seqLogo()`.
 
-```{r, eval = FALSE}
+```{r}
 #| label: motif-discovery
 #| echo: true
 
@@ -192,7 +232,7 @@ nOccurrences(gadem)
 
 Now let's look at the sequence logo for the top hit.
 
-```{r, eval = FALSE}
+```{r}
 #| label: plot-logo
 #| echo: true