Skip to content

Commit

Permalink
Merge pull request #105 from mcvickerlab/experimental
Browse files Browse the repository at this point in the history
final push
  • Loading branch information
somet3000 authored Aug 24, 2024
2 parents e26c0dc + 0d9364b commit dd5cada
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 0 deletions.
41 changes: 41 additions & 0 deletions src/gasperini/visualization/plot_pairs_per_gene_330_pairs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# This script plots the distribution of number of enhancer pairs per gene for
# the set of 264 tested enhancer pairs.
#
# Author: Karthik Guruvayurappan

library(stats)
library(dplyr)
library(ggplot2)

# read in results from 330 enhancer-enhancer-gene sets
results <- read.csv('data/gasperini/processed/enhancer_pairs_330_models.csv')

# filter out NA values
results <- results[complete.cases(results), ]

# plot distribution of number of enhancer pairs per gene
plot_df <- results %>% count(gene.list)

plot <- ggplot(plot_df, aes(x = n)) +
geom_histogram(fill = 'darkgray', color = 'darkgray') +
theme_classic() +
scale_x_continuous(expand = c(0.02, 0)) +
scale_y_continuous(expand = c(0, 0)) +
xlab("Number of Enhancer Pairs") +
ylab("Count") +
theme(
axis.line = element_line(linewidth = 1),
axis.title.x = element_text(size = 24, color = 'black'),
axis.title.y = element_text(size = 24, color = 'black'),
axis.text = element_text(size = 24, color = 'black'),
axis.ticks = element_line(color = 'black', linewidth = 1),
axis.ticks.length = unit(2, 'mm'),
plot.margin = rep(unit(10, 'mm'), 4),
)

ggsave(
plot = plot,
filename = 'out/pairs_per_gene_330_pairs.pdf',
device = 'pdf'
)

70 changes: 70 additions & 0 deletions src/gasperini/visualization/plot_pairs_per_gene_at_scale.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# This script plots the number of enhancer pairs per gene for the at-scale
# analysis.
#
# Author: Karthik Guruvayurappan

library(stats)
library(dplyr)
library(ggplot2)

# read in at-scale enhancer pair model results across all 32 batches
models <- data.frame()

for (i in 1:32) {
batch.file.name <- paste0(
'data/gasperini/processed/enhancer_pairs_at_scale_',
i,
'.csv'
)
batch.models <- read.csv(batch.file.name)
models <- rbind(models, batch.models)
}

# filter for cases where all coefficients exist
models <- models[complete.cases(models), ]

# read in the "true" double perturbation counts
perturbation_counts <- read.csv(
paste0(
'data/gasperini/processed/',
'enhancer_pair_efficiency_adjusted_double_perturb_counts.csv'
)
)

# merge model outputs with perturbation counts
models <- merge(
models,
perturbation_counts,
by.x = c('enhancer.1.list', 'enhancer.2.list', 'gene.list'),
by.y = c('enhancer_1_list', 'enhancer_2_list', 'gene_list')
)

# filter for true 10 cell threshold
models <- models[models$double_perturbation_counts >= 10, ]

# count number of pairs per gene
plot_df <- models %>% count(gene.list)

plot <- ggplot(plot_df, aes(x = n)) +
geom_histogram(fill = 'darkgray', color = 'darkgray') +
theme_classic() +
scale_x_continuous(expand = c(0.02, 0)) +
scale_y_continuous(expand = c(0, 0)) +
xlab("Number of Enhancer Pairs") +
ylab("Count") +
theme(
axis.line = element_line(linewidth = 1),
axis.title.x = element_text(size = 24, color = 'black'),
axis.title.y = element_text(size = 24, color = 'black'),
axis.text = element_text(size = 24, color = 'black'),
axis.ticks = element_line(color = 'black', linewidth = 1),
axis.ticks.length = unit(2, 'mm'),
plot.margin = rep(unit(10, 'mm'), 4),
)

ggsave(
plot = plot,
filename = 'out/pairs_per_gene_at_scale.pdf',
device = 'pdf'
)

Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ models <- models[models$double_perturbation_counts >= 10, ]

# print out how many models remain after filtering
print(dim(models))
write.csv(
models,
'data/gasperini/processed/enhancer_pairs_at_scale.csv',
row.names = FALSE
)

# sort results by p-value
models <- models[order(models$interaction.pvalues), ]
Expand Down

0 comments on commit dd5cada

Please sign in to comment.