Skip to content

Latest commit

 

History

History
executable file
·
516 lines (415 loc) · 18 KB

Lysis_sensitivity.md

File metadata and controls

executable file
·
516 lines (415 loc) · 18 KB

Purpose:

Figure 2 - sensitivity plots

Protocol:

1. Load the following packages:

library(tidyverse)
library(ggsignif)
library(ggplotify)
library(ggrepel)
library(edgeR)
library(genefilter)
library(grid)
library(gridExtra)
library(ggsci)
library(UpSetR)
library(cowplot)

2. Load following functions:

### all necessary custom functions are in the following script
source(paste0(here::here(),"/0_Scripts/custom_functions.R"))

theme_pub <- theme_bw() + theme(plot.title = element_text(hjust = 0.5, size=18, face="bold"),
                                     axis.text = element_text(colour="black", size=14), 
                                     axis.title=element_text(size=16,face="bold"), 
                                     legend.text=element_text(size=14),
                                     legend.position="right",
                                     axis.line.x = element_line(colour = "black"), 
                                     axis.line.y = element_line(colour = "black"),
                                     strip.background=element_blank(), 
                                     strip.text=element_text(size=16))  

#prevent scientific notation
options(scipen=999)

fig_path<-paste0(here::here(),"/1_RNA_isolation/")

3. Gtype

gtype_human <- data.frame( species="human", getbiotype("hsapiens_gene_ensembl",species="human"))

gtype_mouse <- data.frame( species="mouse", getbiotype("mmusculus_gene_ensembl",species="mouse"))

4. Load Data

inf <- read.csv(paste0(fig_path,"sample_info.csv"), header = T, stringsAsFactors = F)

inf$Sample <- as.character(inf$Sample)

inf<-inf %>% 
  mutate(Condition=case_when(Condition=="Incubation + ProtK"~"Magnetic Beads",
                             TRUE~Condition),
         XC=if_else(Celltype=="HEK",BC,XC))

counts_hek <- readRDS(paste0(fig_path,"Bulk_opt_lysis_test_2_HEK.dgecounts.rds"))
counts_pbmc <- readRDS(paste0(fig_path,"Bulk_opt_lysis_PBMCs.dgecounts.rds"))
counts_tissue <- readRDS(paste0(fig_path,"Bulk_opt_lysis_Tissue.dgecounts.rds"))

# HEK cells 
coding_genes_hek<-rownames(counts_hek$umicount$inex$all)[!(rownames(counts_hek$umicount$inex$all)%in%gtype_human$Gencode)]

5. Subset for only coding Genes and retrieve Gene and UMI counts

The colapse_downsampled_counts() function iterates through the list of different downsampling depth generated by zUMIs and extracts number of Genes and number of UMIs

#HEK

hek_inex_ds_df<-collapse_downsampled_counts(zumismat = counts_hek,
                                            type="inex",
                                            frac.samples = 0.25,
                                            genes=coding_genes_hek, 
                                            umi = T)
hek_inex_ds_df_unfilt<-collapse_downsampled_counts(zumismat = counts_hek,
                                                   type="inex",
                                                   frac.samples = 0.0,
                                                   genes=coding_genes_hek, 
                                                   umi = T)
hek_exon_ds_df<-collapse_downsampled_counts(zumismat = counts_hek,
                                            type="exon",
                                            frac.samples = 0.25,
                                            genes=coding_genes_hek, 
                                            umi = T)
#PBMC
coding_genes_pbmc<-rownames(counts_pbmc$umicount$inex$all)[!(rownames(counts_pbmc$umicount$inex$all)%in%gtype_human$Gencode)]

pbmc_inex_ds_df<-collapse_downsampled_counts(zumismat = counts_pbmc,
                                             type="inex",
                                             frac.samples = 0.25,
                                             genes=coding_genes_pbmc, 
                                             umi = T)

pbmc_inex_ds_df_unfilt<-collapse_downsampled_counts(zumismat = counts_pbmc,
                                                    type="inex",
                                                    frac.samples = 0.0,
                                                    genes=coding_genes_pbmc,
                                                    umi = T)

pbmc_exon_ds_df<-collapse_downsampled_counts(zumismat = counts_pbmc,
                                             type="exon",
                                             frac.samples = 0.25,
                                             genes=coding_genes_pbmc,
                                             umi = T)
#Striatal Tissue

coding_genes_tissue <-rownames(counts_tissue$umicount$inex$all)[!(rownames(counts_tissue$umicount$inex$all)%in%gtype_mouse$Gencode)]

tissue_inex_ds_df<-collapse_downsampled_counts(zumismat = counts_tissue,
                                               type="inex",
                                               frac.samples = 0.25,
                                               genes=coding_genes_tissue,
                                               umi = T)

tissue_inex_ds_df_unfilt<-collapse_downsampled_counts(zumismat = counts_tissue,
                                                      type="inex",
                                                      frac.samples = 0.0,
                                                      genes=coding_genes_tissue, 
                                                      umi = T)

tissue_exon_ds_df<-collapse_downsampled_counts(zumismat = counts_tissue,
                                               type="exon",
                                               frac.samples = 0.25,
                                               genes=coding_genes_tissue, 
                                               umi = T)
#combine all
inex_ds_df <- dplyr::bind_rows(hek_inex_ds_df, pbmc_inex_ds_df, tissue_inex_ds_df)
inex_ds_df_unfilt <- dplyr::bind_rows(hek_inex_ds_df_unfilt, pbmc_inex_ds_df_unfilt, tissue_inex_ds_df_unfilt)
exon_ds_df <- dplyr::bind_rows(hek_exon_ds_df, pbmc_exon_ds_df, tissue_exon_ds_df)

#add info
inex_ds_df <- dplyr::inner_join(inex_ds_df, inf, by= "XC")
inex_ds_df_unfilt <- dplyr::inner_join(inex_ds_df_unfilt, inf, by= "XC")
exon_ds_df <- dplyr::inner_join(exon_ds_df, inf, by= "XC")

#keep Column and Magnetic Beads only
inex_ds_df <- inex_ds_df[inex_ds_df$Condition %in% c( "Magnetic Beads", "Column"),]
inex_ds_df_unfilt <- inex_ds_df_unfilt[inex_ds_df_unfilt$Condition %in% c( "Magnetic Beads", "Column"),]
exon_ds_df <- exon_ds_df[exon_ds_df$Condition %in% c( "Magnetic Beads", "Column"),]

#remove Sequencing depth that was just reached by few samples 

inex_ds_df <- inex_ds_df[inex_ds_df$depth != 2000000,]
inex_ds_df_unfilt <- inex_ds_df_unfilt[inex_ds_df_unfilt$depth != 2000000,]
exon_ds_df <- exon_ds_df[exon_ds_df$depth != 2000000,]

6. Figure 2D - Sensitivity, intron + exon counts, filtered

Plot UMI Counts

a1 <- ggplot(data = inex_ds_df, aes(x = depth, y = UMIs, color = Condition, group = Condition))+
  geom_smooth(method = "loess", se = F)+
  geom_point(size =2, aes(shape=Condition))+
  facet_grid(~Celltype)+
  #scale_x_continuous(breaks=seq(0,1000000,250000)) +
  xlab("Sequencing Depth")+
  theme_pub+
  scale_color_manual(values = c("gray70","#008080"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none",
        axis.title.x=element_blank(),
        axis.text.x=element_blank())

Plot Gene Counts

b1 <- ggplot(data = inex_ds_df, aes(x = depth, y = Genes, color = Condition, group = Condition))+
  geom_smooth(method = "loess", se = F)+
  geom_point(size =2, aes(shape=Condition))+
  facet_grid(~Celltype)+
  xlab("Sequencing Depth")+
  theme_pub+
  scale_color_manual(values = c("gray70","#008080"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom",
        legend.title = element_blank())

Combine Plots

legend <- cowplot::get_legend(b1)
fig2d <- cowplot::plot_grid(a1, b1 + theme(legend.position = "none"),
  ncol = 1,
  nrow = 2, 
  rel_heights = c(3,4)
)
fig2d <- cowplot::plot_grid(fig2d, legend,
  ncol = 1,
  nrow = 2,
  rel_heights = c(6,1)
)
fig2d

7. Figure S2 - Sensitivity, intron + exon counts, unfiltered

Plot UMI Counts

a2 <- ggplot(data = inex_ds_df_unfilt, aes(x = depth, y = UMIs, color = Condition, group = Condition))+
  geom_smooth(method = "loess", se = F)+
  geom_point(size =2, aes(shape=Condition))+
  facet_grid(~Celltype)+
  xlab("Sequencing Depth")+
  theme_pub+
  scale_color_manual(values = c("gray70","#008080"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none",
        axis.title.x=element_blank(),
        axis.text.x=element_blank())

Plot Gene Counts

b2 <- ggplot(data = inex_ds_df_unfilt, aes(x = depth, y = Genes, color = Condition, group = Condition))+
  geom_smooth(method = "loess", se = F)+
  geom_point(size =2, aes(shape=Condition))+
  facet_grid(~Celltype)+
  xlab("Sequencing Depth")+
  theme_pub+
  scale_color_manual(values = c("gray70","#008080"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom",
        legend.title = element_blank())

Combine Plots

legend2 <- cowplot::get_legend(b2)
figS2_unfilt <- cowplot::plot_grid(a2, b2 + theme(legend.position = "none"),
  ncol = 1,
  nrow = 2, 
  rel_heights = c(3,4)
)
figS2_unfilt <- cowplot::plot_grid(figS2_unfilt, legend2,
  ncol = 1,
  nrow = 2,
  rel_heights = c(6,1)
)
figS2_unfilt

8. Figure S2 - Sensitivity, exonic counts, filtered

Plot UMI Counts

a3 <- ggplot(data = exon_ds_df, aes(x = depth, y = UMIs, color = Condition, group = Condition))+
  geom_smooth(method = "loess", se = F)+
  geom_point(size =2, aes(shape=Condition))+
  facet_grid(~Celltype)+
  xlab("Sequencing Depth")+
  theme_pub+
  scale_color_manual(values = c("gray70","#008080"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none",
        axis.title.x=element_blank(),
        axis.text.x=element_blank())

Plot Gene Counts

b3 <- ggplot(data = exon_ds_df, aes(x = depth, y = Genes, color = Condition, group = Condition))+
  geom_smooth(method = "loess", se = F)+
  geom_point(size =2, aes(shape=Condition))+
  facet_grid(~Celltype)+
  xlab("Sequencing Depth")+
  theme_pub+
  scale_color_manual(values = c("gray70","#008080"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom",
        legend.title = element_blank())

Combine Plots

legend3 <- cowplot::get_legend(b3)
figS2_ex <- cowplot::plot_grid(a3, b3 + theme(legend.position = "none"),
  ncol = 1,
  nrow = 2, 
  rel_heights = c(3,4)
)
figS2_ex <- cowplot::plot_grid(figS2_ex, legend3,
  ncol = 1,
  nrow = 2,
  rel_heights = c(6,1)
)
figS2_ex

9. Fig S2 - UpsetR Hek

Upset plots are a nice way to display overlapp between several groups in this case the overlap in detected genes in the different conditions is ploted.

Make upset() plot for HEK cells

# Filter other Conditions in HEK cells

hek_inf_b <- (inf %>% filter(Condition == "Magnetic Beads") %>% filter(Celltype == "HEK"))$XC
hek_inf_c <- (inf %>% filter(Condition == "Column") %>% filter(Celltype == "HEK"))$XC

## we take the downsampled counts here 

hek_inex_umi_500 <- as.matrix(counts_hek$umicount$inex$downsampling$downsampled_500000) %>% remove_Geneversion()
hek_upsetR_df <- hek_inex_umi_500[whichgenes_reproducible(hek_inex_umi_500,1,0.25), ]

hek_upsetR_df <- bind_cols(as.data.frame(rowSums(hek_upsetR_df[,colnames(hek_upsetR_df) %in% hek_inf_c])),
                        as.data.frame(rowSums(hek_upsetR_df[,colnames(hek_upsetR_df) %in% hek_inf_b])),
                        as.data.frame(rownames(hek_upsetR_df)))

colnames(hek_upsetR_df) <- c("Column",
                          "Magnetic_Beads",
                          "ENSEMBL")

## Make dataframe with Genes IDs detected in either of the two Conditions
hek_upsetR_df <- hek_upsetR_df %>%
  mutate('Column' = ifelse(Column > 0, paste(ENSEMBL), NA),
         'Magnetic_Beads' = ifelse(Magnetic_Beads > 0, paste(ENSEMBL), NA)) %>%
  dplyr::select(-ENSEMBL) %>%
  filter_all(any_vars(!is.na(.)))

## Make upset plot from this df

upset_hek <- upset(fromList(hek_upsetR_df), order.by = "freq",
                 main.bar.color = "#1c1f4c", 
                 sets.bar.color = "#1c1f4c", 
                 mainbar.y.label = "Gene Intersections", 
                 sets.x.label="Genes per Condition",
                 show.numbers = F, 
                 text.scale = c(1.5, 1.5, 1.5, 1.3, 1.5, 1.5), 
                 point.size = 3.5,
                 mainbar.y.max = 20000)

upset_hek

hek_sum_df<-table(fromList(hek_upsetR_df)$Column,fromList(hek_upsetR_df)$Magnetic_Beads) %>%
  as.data.frame() %>% 
  mutate(type=if_else(Var1==Var2,"shared","unique")) %>% 
  group_by(type) %>% 
  summarize(count=sum(Freq)) %>% 
  t() %>% 
  as.data.frame() %>% 
  filter(c(F,T)) %>% 
  mutate(total=as.integer(V1)+as.integer(V2),
         sample="HEK")

Make upset() plot for PBMCs

pbmc_inf_b <- (inf %>% filter(Condition == "Magnetic Beads") %>% filter(Celltype == "PBMCs"))$XC
pbmc_inf_c <- (inf %>% filter(Condition == "Column") %>% filter(Celltype == "PBMCs"))$XC
pbmc_inex_umi_500 <- as.matrix(counts_pbmc$umicount$inex$downsampling$downsampled_500000) %>% remove_Geneversion()
pbmc_upsetR_df <- pbmc_inex_umi_500[whichgenes_reproducible(pbmc_inex_umi_500,1,0.25), ]


pbmc_upsetR_df <- bind_cols(as.data.frame(rowSums(pbmc_upsetR_df[,colnames(pbmc_upsetR_df) %in% pbmc_inf_c])),
                        as.data.frame(rowSums(pbmc_upsetR_df[,colnames(pbmc_upsetR_df) %in% pbmc_inf_b])),
                        as.data.frame(rownames(pbmc_upsetR_df)))
colnames(pbmc_upsetR_df) <- c("Column",
                          "Magnetic_Beads",
                          "ENSEMBL")

## Make dataframe with Genes IDs detected in either of the two Conditions

pbmc_upsetR_df <- pbmc_upsetR_df %>%
  mutate('Column' = ifelse(Column > 0, paste(ENSEMBL), NA),
         'Magnetic_Beads' = ifelse(Magnetic_Beads > 0, paste(ENSEMBL), NA)) %>%
  dplyr::select(-ENSEMBL) %>%
  filter_all(any_vars(!is.na(.)))

## Make upset plot from this df

upset_pbmc <- upset(fromList(pbmc_upsetR_df), order.by = "freq",
                 main.bar.color = "#037272", 
                 sets.bar.color = "#037272", 
                 mainbar.y.label = "Gene Intersections", 
                 sets.x.label="Genes per Condition",
                 show.numbers = F, 
                 text.scale = c(1.5, 1.5, 1.5, 1.3, 1.5, 1.5), 
                 point.size = 3.5,
                 mainbar.y.max = 20000)
upset_pbmc

pbmc_sum_df<-table(fromList(pbmc_upsetR_df)$Column,fromList(pbmc_upsetR_df)$Magnetic_Beads) %>% 
  as.data.frame() %>% 
  mutate(type=if_else(Var1==Var2,"shared","unique")) %>% 
  group_by(type) %>% 
  summarize(count=sum(Freq)) %>% 
  t() %>% 
  as.data.frame() %>% 
  filter(c(F,T)) %>% 
  mutate(total=as.integer(V1)+as.integer(V2),
         sample="PBMC")

Make upset() plot for Striatal Tissue

#tissue
tissue_inf_b <- (inf %>% filter(Condition == "Magnetic Beads") %>% filter(Celltype == "Tissue"))$XC
tissue_inf_c <- (inf %>% filter(Condition == "Column") %>% filter(Celltype == "Tissue"))$XC
tissue_inex_umi_500 <- as.matrix(counts_tissue$umicount$inex$downsampling$downsampled_500000) %>% remove_Geneversion()
tissue_upsetR_df <- tissue_inex_umi_500[whichgenes_reproducible(tissue_inex_umi_500,1,0.25), ]

tissue_upsetR_df <- bind_cols(as.data.frame(rowSums(tissue_upsetR_df[,colnames(tissue_upsetR_df) %in% tissue_inf_c])),
                        as.data.frame(rowSums(tissue_upsetR_df[,colnames(tissue_upsetR_df) %in% tissue_inf_b])),
                        as.data.frame(rownames(tissue_upsetR_df)))
colnames(tissue_upsetR_df) <- c("Column",
                          "Magnetic_Beads",
                          "ENSEMBL")

## Make dataframe with Genes IDs detected in either of the two Conditions

tissue_upsetR_df <- tissue_upsetR_df %>%
  mutate('Column' = ifelse(Column > 0, paste(ENSEMBL), NA),
         'Magnetic_Beads' = ifelse(Magnetic_Beads > 0, paste(ENSEMBL), NA)) %>%
  dplyr::select(-ENSEMBL) %>%
  filter_all(any_vars(!is.na(.)))


## Make upset plot from this df
upset_tissue <- upset(fromList(tissue_upsetR_df), order.by = "freq",
                 main.bar.color = "#fec20f", 
                 sets.bar.color = "#fec20f", 
                 mainbar.y.label = "Gene Intersections", 
                 sets.x.label="Genes per Condition",
                 show.numbers = F, 
                 text.scale = c(1.5, 1.5, 1.5, 1.3, 1.5, 1.5), 
                 point.size = 3.5,
                 mainbar.y.max = 20000)

upset_tissue

tissue_sum_df<-table(fromList(tissue_upsetR_df)$Column,fromList(tissue_upsetR_df)$Magnetic_Beads) %>% 
  as.data.frame() %>% 
  mutate(type=if_else(Var1==Var2,"shared","unique")) %>% 
  group_by(type) %>% 
  summarize(count=sum(Freq)) %>% 
  t() %>% 
  as.data.frame() %>% 
  filter(c(F,T)) %>% 
  mutate(total=as.integer(V1)+as.integer(V2),
         sample="Tissue")

bind_rows(hek_sum_df,pbmc_sum_df,tissue_sum_df) %>% 
  mutate(frac_unique=as.integer(V2)/total,
         frac_shared=1-frac_unique)
##      V1    V2 total sample frac_unique frac_shared
## 1 18747   489 19236    HEK  0.02542109   0.9745789
## 2 16289  2793 19082   PBMC  0.14636831   0.8536317
## 3 16341  1479 17820 Tissue  0.08299663   0.9170034
figS2_upset <-cowplot::plot_grid(as.grob(upset_hek), as.grob(upset_pbmc), as.grob(upset_tissue), 
                 rows = 1, 
                 labels = c("HEK Cells", "PBMCs", "Tissue"),
                 label_x = 0.5)

figS2_upset