-
Notifications
You must be signed in to change notification settings - Fork 0
/
bak_7_combining_mouseEf.R
173 lines (133 loc) · 6.01 KB
/
bak_7_combining_mouseEf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
library(ggplot2)
library(pheatmap)
library(doParallel) # more flexible than the library parallel
library(RSvgDevice)
## raw count data
Mm.nRC <- read.table("output_data/Mm_norm_counts.csv", sep=",")
Ef.nRC <- read.table("output_data/Ef_norm_counts.csv", sep=",")
both.col <- intersect(colnames(Mm.nRC), colnames(Ef.nRC))
## Reid and Berriman approach: We randomized all profiles and
## calculated the Pearson correlation coefficient between every
## interspecific pair of genes. We repeated this 10^5 times and
## calculated the P-value as the number of times a randomized pair of
## genes profiles were correlated at least as well as the real
## profiles, divided by 10^5
## Cors <- cor(t(Mm.nRC[, both.col]), t(Ef.nRC[, both.col]))
## registerDoParallel(cores=20)
## outer.reps <- 5000
## inner.reps <- 100
## ## sum up parallel the numbers the random correlation values are
## ## bigger than the real correlations
## parallel.RnB <- function(inner.reps){
## randCors <-
## foreach(icount(inner.reps), .combine="+") %dopar% {
## rand.col <- sample(both.col, length(both.col))
## RC <- cor(t(Mm.nRC[, rand.col]),
## t(Ef.nRC[, both.col]))
## res <- abs(RC)>abs(Cors)
## ## avoid an accumulation of NAs for In cor(t(Mm.nRC[,
## ## both.col]), t(Ef.nRC[, both.col])) : the standard
## ## deviation is zero
## res[is.na(res)] <- TRUE
## return(res)
## }
## return(randCors)
## }
## ## a large number of non-parallelized chunks. Putting everything in
## ## the parallel function would eat up memory
## RnB <- matrix(0, nrow=nrow(Cors), ncol=ncol(Cors))
## for(i in 1:outer.reps){
## ## sum each result with what has been there before
## RnB.new <- parallel.RnB(inner.reps)
## print(paste("finished outer rep", i))
## RnB <- RnB + RnB.new
## }
## file.RnB <- paste("//home/ele/RnB_",
## round(as.vector(Sys.time())), ".Rdata", sep="")
## save(RnB, file = file.RnB)
## RnB.final <- RnB / (outer.reps*inner.reps)
## prod.file.RnB <- paste("/home/ele/RnB_Prod_",
## round(as.vector(Sys.time())), ".Rdata", sep="")
## save(RnB.final, file = prod.file.RnB)
## the raw values are not needed
## ## load("/SAN/Eimeria_Totta/RnB_1478263654.Rdata")
## just load the products in RnB.final
load("/SAN/Eimeria_Totta/RnB_Prod_1478263755.Rdata")
is.zero <- (RnB.final==0)
is.zero.all.cols <- apply(is.zero, 2, any)
Ef.genes.interacting <- colnames(RnB.final)[is.zero.all.cols]
is.zero.all.rows <- apply(is.zero, 1, any)
Mm.genes.interacting <- rownames(RnB.final)[is.zero.all.rows]
## the interaction network for later
inter.net <- is.zero[is.zero.all.rows, is.zero.all.cols]
interactions.per.Ef.gene <- data.frame(apply(is.zero, 2, sum))
names(interactions.per.Ef.gene) <- "num.interactions"
interactions.per.Ef.gene$from.to <- "Ef.to.Mm"
interactions.per.Mm.gene <- data.frame(apply(is.zero, 1, sum))
names(interactions.per.Mm.gene) <- "num.interactions"
interactions.per.Mm.gene$from.to <- "Mm.to.Ef"
interactions.per.gene <- rbind(interactions.per.Ef.gene,
interactions.per.Mm.gene)
## the clusters to compare
hcluster <- list()
hcluster[["Mm"]] <- read.table("output_data/Mm_hclustered_cycle.csv", sep=",",
header=TRUE)
hcluster[["Ef"]] <- read.table("output_data/Ef_hclustered_cycle.csv", sep=",",
header=TRUE)
set.seed(242)
values.RnB <- as.vector(RnB.final)
flat.RnB <- sample(values.RnB, 100000) # select a number of interactions to plot distribution
flat.RnB <- as.data.frame(flat.RnB)
names(flat.RnB) <- "RnB"
flat.RnB$kind <- "Overall"
values.DE.RnB <- as.vector(RnB.final[rownames(hcluster[["Mm"]]),
rownames(hcluster[["Ef"]])])
flat.DE.RnB <- sample(values.DE.RnB, 100000)
flat.DE.RnB <- as.data.frame(flat.DE.RnB)
names(flat.DE.RnB) <- "RnB"
flat.DE.RnB$kind <- "DA mRNAs"
flat <- rbind(flat.RnB, flat.DE.RnB)
## y = counts or density?
devSVG(file = "figures/Figure5a_distributionRnB.svg", height = 5, width = 6)
ggplot(flat, aes(x=RnB, y =..count.., color=kind)) +
geom_density() +
geom_hline(color = "white", yintercept = 0) + # puts white line over ugly line parallel to x-axis
scale_y_continuous("Density") +
scale_x_continuous("ISIGEM score") +
ggtitle("Distribution of ISIGEM scores") +
scale_color_manual(values=c("#e19e2e", "#006400")) +
theme_bw(20) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position = c(0.8, 0.8))
dev.off()
## plot mouse and parasite of howe often zero is observed together
devSVG(file = "figures/Figure5b_zeroDist.svg", height = 5, width = 6)
ggplot(interactions.per.gene, aes(x=num.interactions, y=..count..,
color=from.to)) +
geom_freqpoly(stat="bin", binwidth=1) +
scale_y_sqrt("Times reported", breaks=seq(0, 100, by=10)^2) +
scale_x_sqrt("Number of interactions",
breaks=seq(0, 35, by=5)^2, limits=(c(0, NA))) +
ggtitle("Interactions per gene") +
scale_color_manual(values=c("#e19e2e", "#006400"),
breaks=c("Ef.to.Mm", "Mm.to.Ef"),
labels=c("Parasite to mouse", "Mouse to parasite")) +
theme_bw(20) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position = c(0.65, 0.8))
dev.off()
library(ggnet) #install from github with devtools::install_github("briatte/ggnet")
library(sna)
library(intergraph) # install by devtools::install_github("mbojan/intergraph")
col <- c("actor" = "#006400", "event" = "#e19e2e")
net <- network(inter.net, directed = FALSE,
matrix.type = "bipartite")
pdf("figures/Figure5c_interactionNet.pdf")
ggnet2(net, color = "mode", palette = col, size=0.5,
edge.alpha = 0.2, edge.width = 0.3)
dev.off()
## Figure 5d produced....?