forked from boulderrinnlab/CLASS_2023
-
Notifications
You must be signed in to change notification settings - Fork 0
/
super_meta_plot.Rmd
89 lines (61 loc) · 2.67 KB
/
super_meta_plot.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
---
title: "Untitled"
author: "JR"
date: "4/28/2023"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{R}
# filter to super binders
peak_occurrence_df <- peak_occurrence_df %>%
mutate(superbinders = peak_occurrence_df$number_of_dbp > 200)
# setting col of superbinders
peak_occurrence_df <- peak_occurrence_df %>%
mutate(superbinder2 = ifelse(peak_occurrence_df$superbinders ==T, "Superbinder", "notsuperbinder"))
# superbinder promoters
super_proms <- subset(peak_occurrence_df, superbinder2 == "Superbinder")
super_proms <- dplyr::select(super_proms, "gene_id")
# non super binder proms
non_super_proms <- subset(peak_occurrence_df, superbinder2 == "notsuperbinder")
non_super_proms <- dplyr::select(non_super_proms, "gene_id")
# subet mRNA and lncRNA promoters by super binders
super_gr <- lncrna_mrna_promoters[lncrna_mrna_promoters$gene_id %in% super_proms$gene_id]
non_super_gr <- lncrna_mrna_promoters[lncrna_mrna_promoters$gene_id %in% non_super_proms$gene_id]
# setting up superbinders metaplot_Df
superbinder_metaplot_df <- data.frame(x = integer(), dens = numeric(), dbp = character())
i=1
# for loop to populate super binder _metaplot
for(i in 1:length(filtered_consensus_list)) {
print(names(filtered_consensus_list)[[i]])
tmp_df <- profile_tss(filtered_consensus_list[[i]], lncrna_mrna_promoters = super_gr)
tmp_df$dbp <- names(filtered_consensus_list)[[i]]
superbinder_metaplot_df <- bind_rows(superbinder_metaplot_df, tmp_df)
}
# non super binder meta_df
non_superbinder_metaplot_df <- data.frame(x = integer(), dens = numeric(), dbp = character())
i= 1
# for loop to populate mRNA_metaplot
for(i in 1:length(filtered_consensus_list)) {
print(names(filtered_consensus_list)[[i]])
tmp_df <- profile_tss(filtered_consensus_list[[i]], lncrna_mrna_promoters = non_super_gr)
tmp_df$dbp <- names(filtered_consensus_list)[[i]]
non_superbinder_metaplot_df <- bind_rows(non_superbinder_metaplot_df, tmp_df)
}
# now adding the information of gene type
non_superbinder_metaplot_df$gene_type <- "non_super_binder"
superbinder_metaplot_df$gene_type <- "superbinder"
combined_super_binder_metaplot_profile <- bind_rows(non_superbinder_metaplot_df, superbinder_metaplot_df)
ggplot(combined_super_binder_metaplot_profile,
aes(x = x, y = dens, color = gene_type )) +
geom_vline(xintercept = 0, lty = 2) +
geom_line(size = 1.5) +
facet_wrap(dbp ~ ., scales = "free_y") +
ggtitle("Promoter Metaplot") +
scale_x_continuous(breaks = c(-1000, 0, 1000),
labels = c("-1kb", "TSS", "+1kb"),
name = "") +
ylab("Peak frequency") +
scale_color_manual(values = c("#424242","#a8404c"))
```