-
Notifications
You must be signed in to change notification settings - Fork 4
/
Developmental reconstruction pipline
213 lines (182 loc) · 14.3 KB
/
Developmental reconstruction pipline
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#####################################Session 1
###Calculate relationship for proximal time points
##H1-->S0
##S0-->S1.24(S1_D1)
##S1.24(S1_D1)-->S1(S1_D2)
##S1(S1_D2)-->S2.24(S2_D1)
##S2.24(S2_D1)-->S2.48(S2_D2)
##S2.48(S2_D2)-->S2(S2_D3)
##S2(S2_D3)-->S3
##S3-->S4
##S4-->S5
##S5-->S6
##S6-->S7
###dge raw data are available at GSE143783
###################################
###########################3#######
##H1-->S0
H1_S0.tree.prep<-Tree.build.prepare(dge1=H1.dge,dge2=s0.dge,name1="H1",name2="s0",first.reso=c(0.03,0.03))
H1_S0.tree.1.ob<-Tree.build.1(H1_S0.tree.prep)
H1_S0.tree.2nd.primary_0.06.list<-Tree.build.2nd.clustering(H1_S0.tree.prep,H1_S0.tree.1.ob,second.reso=c(0.06,0.06))
H1_S0.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(H1_S0.tree.2nd.primary_0.06.list,H1_S0.tree.prep,H1_S0.tree.1.ob,singledog.reso=0.06)
H1_S0.tree.2nd.treemade_0.06.ob<-Tree.build.2nd.treemaking(H1_S0.tree.2nd.primary_0.06.list.patched,second.reso=c(0.06,0.06),upstremename="H1",downstremename="s0",relationtext=1.8)
##S0-->S1.24
S0_S1.24.tree.prep<-Tree.build.prepare(dge1=s0.dge,dge2=s1.24.dge,name1="s0",name2="s1.24",first.reso=c(0.03,0.03))
S0_S1.24.tree.1.ob<-Tree.build.1(S0_S1.24.tree.prep)
S0_S1.24.tree.2nd.primary_0.06.list<-Tree.build.2nd.clustering(S0_S1.24.tree.prep,S0_S1.24.tree.1.ob,second.reso=c(0.06,0.06))
S0_S1.24.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(S0_S1.24.tree.2nd.primary_0.06.list,S0_S1.24.tree.prep,S0_S1.24.tree.1.ob,singledog.reso=0.06)
S0_S1.24.tree.2nd.treemade_0.06.ob<-Tree.build.2nd.treemaking(S0_S1.24.tree.2nd.primary_0.06.list.patched,second.reso=c(0.06,0.06),upstremename="s0",downstremename="s1.24",relationtext=1.8)
##S1.24-->S1
S1.24_S1.tree.prep<-Tree.build.prepare(dge1=s1.24.dge,dge2=s1.dge,name1="s1.24",name2="s1",first.reso=c(0.03,0.03))
S1.24_S1.tree.1.ob<-Tree.build.1(S1.24_S1.tree.prep)
S1.24_S1.tree.2nd.primary_0.06.list<-Tree.build.2nd.clustering(S1.24_S1.tree.prep,S1.24_S1.tree.1.ob,second.reso=c(0.06,0.06))
S1.24_S1.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(S1.24_S1.tree.2nd.primary_0.06.list,S1.24_S1.tree.prep,S1.24_S1.tree.1.ob,singledog.reso=0.06)
S1.24_S1.tree.2nd.treemade_0.06.ob<-Tree.build.2nd.treemaking(S1.24_S1.tree.2nd.primary_0.06.list.patched,second.reso=c(0.06,0.06),upstremename="s1.24",downstremename="s1_",relationtext=1.8)
#S1-->S2.24
S1_S2.24.tree.prep<-Tree.build.prepare(dge1=s1.dge,dge2=s2.24.dge,name1="s1",name2="s2.24",first.reso=c(0.03,0.05))
S1.24_S1.tree.2nd.primary_0.06.list.patchedS1_S2.24.tree.1.ob<-Tree.build.1(S1_S2.24.tree.prep)
S1_S2.24.tree.2nd.primary_0.06.list<-Tree.build.2nd.clustering(S1_S2.24.tree.prep,S1_S2.24.tree.1.ob,second.reso=c(0.06,0.1))
S1_S2.24.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(S1_S2.24.tree.2nd.primary_0.06.list,S1_S2.24.tree.prep,S1_S2.24.tree.1.ob,singledog.reso=0.06)
S1_S2.24.tree.2nd.treemade_0.06.ob<-Tree.build.2nd.treemaking(S1_S2.24.tree.2nd.primary_0.06.list.patched,second.reso=c(0.06,0.1),upstremename="s1",downstremename="s2.24",relationtext=1.8)
# S2.24.all -->S2.48.all
S2.24_S2.48.tree.prep<-Tree.build.prepare(dge1=s2.24.dge,dge2=s2.48.dge,name1="s2.24",name2="s2.48",first.reso=c(0.05,0.03)) ## This is a primary object for first layer clustering under this resolution
S2.24_S2.48.tree.1.ob<-Tree.build.1(S2.24_S2.48.tree.prep) # This is a list of tree making result for the first layer clustering
S2.24_S2.48.tree.2nd.primary_0.06_0.03.list<-Tree.build.2nd.clustering(S2.24_S2.48.tree.prep,S2.24_S2.48.tree.1.ob,second.reso=c(0.1,0.03))
S2.24_S2.48.tree.2nd.primary_0.06_0.03.list.patched<-Tree.build.2nd.clustering.patch(S2.24_S2.48.tree.2nd.primary_0.06_0.03.list,S2.24_S2.48.tree.prep,S2.24_S2.48.tree.1.ob,singledog.reso=0.06)
S2.24_S2.48.tree.2nd.treemade_0.06_0.03.ob<-Tree.build.2nd.treemaking(S2.24_S2.48.tree.2nd.primary_0.06_0.03.list.patched,second.reso=c(0.06,0.03),upstremename="s2.24",downstremename="s2.48")
##S2.48-->S2
S2.48_S2.all.tree.prep<-Tree.build.prepare(dge1=s2.48.dge,dge2=s2.all.dge,name1="s2.48",name2="s2.all",first.reso=c(0.03,0.03))
S2.48_S2.all.tree.1.ob<-Tree.build.1(S2.48_S2.all.tree.prep)
S2.48_S2.all.tree.2nd.primary_0.06.list<-Tree.build.2nd.clustering(S2.48_S2.all.tree.prep,S2.48_S2.all.tree.1.ob,second.reso=c(0.03,0.06))
S2.48_S2.all.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(S2.48_S2.all.tree.2nd.primary_0.06.list,S2.48_S2.all.tree.prep,S2.48_S2.all.tree.1.ob,singledog.reso=0.06)
S2.48_S2.all.tree.2nd.treemade_0.06.ob<-Tree.build.2nd.treemaking(S2.48_S2.all.tree.2nd.primary_0.06.list.patched,second.reso=c(0.03,0.06),upstremename="s2.48",downstremename="s2.all",relationtext=1.8)
##S2-->S3
S2.all_S3.all.tree.prep<-Tree.build.prepare(dge1=s2.all.dge,dge2=s3.all.dge,name1="s2.all",name2="s3.all",first.reso=c(0.03,0.03))
S2.all_S3.all.tree.1.ob<-Tree.build.1(S2.all_S3.all.tree.prep)
S2.all_S3.all.tree.2nd.primary_0.06.list<-Tree.build.2nd.clustering(S2.all_S3.all.tree.prep,S2.all_S3.all.tree.1.ob,second.reso=c(0.06,0.06))
S2.all_S3.all.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(S2.all_S3.all.tree.2nd.primary_0.06.list,S2.all_S3.all.tree.prep,S2.all_S3.all.tree.1.ob,singledog.reso=0.06)
S2.all_S3.all.tree.2nd.treemade_0.06.ob<-Tree.build.2nd.treemaking(S2.all_S3.all.tree.2nd.primary_0.06.list.patched,second.reso=c(0.06,0.06),upstremename="s2.all",downstremename="s3.all",relationtext=1.8)
#S3.all -->S4.B
S3.all_S4.B.tree.prep<-Tree.build.prepare(dge1=s3.all.dge,dge2=s4.B.dge,name1="s3.all",name2="s4.B",first.reso=c(0.03,0.015))
S3.all_S4.B.tree.1.ob<-Tree.build.1(S3.all_S4.B.tree.prep)
S3.all_S4.B.tree.2nd.primary_0.06_0.3.list<-Tree.build.2nd.clustering(S3.all_S4.B.tree.prep,S3.all_S4.B.tree.1.ob,second.reso=c(0.06,0.3))
S3.all_S4.B.tree.2nd.primary_0.06_0.3.list.patched<-Tree.build.2nd.clustering.patch(S3.all_S4.B.tree.2nd.primary_0.06_0.3.list,S3.all_S4.B.tree.prep,S3.all_S4.B.tree.1.ob,singledog.reso=0.3)
S3.all_S4.B.tree.2nd.treemade_0.06_0.3.ob<-Tree.build.2nd.treemaking(S3.all_S4.B.tree.2nd.primary_0.06_0.3.list.patched,second.reso=c(0.06,0.3),singledog.reso=0.3,upstremename="s3.all",downstremename="s4.B")
#S4.B-->S5.all
S4.B_S5.all.tree.prep<-Tree.build.prepare(dge1=s4.B.dge,dge2=s5.all.dge,name1="s4.B",name2="s5.all",first.reso=c(0.015,0.03))
S4.B_S5.all.tree.1.ob<-Tree.build.1(S4.B_S5.all.tree.prep)
S4.B_S5.all.tree.2nd.primary_0.06_0.3.list<-Tree.build.2nd.clustering(S4.B_S5.all.tree.prep,S4.B_S5.all.tree.1.ob,second.reso=c(0.3,0.3))
S4.B_S5.all.tree.2nd.primary_0.06_0.3.list.patched<-Tree.build.2nd.clustering.patch(S4.B_S5.all.tree.2nd.primary_0.06_0.3.list,S4.B_S5.all.tree.prep,S4.B_S5.all.tree.1.ob,singledog.reso=0.3)
S4.B_S5.all.tree.2nd.treemade_0.06_0.3.ob<-Tree.build.2nd.treemaking(S4.B_S5.all.tree.2nd.primary_0.06_0.3.list.patched,second.reso=c(0.3,0.3),singledog.reso=0.3,upstremename="s4.B",downstremename="s5.all")
#S5.all-->S6.A
S5.all_S6.A.tree.prep<-Tree.build.prepare(dge1=s5.all.dge,dge2=s6.A.dge,name1="s5.all",name2="s6.A",first.reso=c(0.03,0.03))
S5.all_S6.A.tree.1.ob<-Tree.build.1(S5.all_S6.A.tree.prep)
S5.all_S6.A.tree.2nd.primary_0.3_0.3.list<-Tree.build.2nd.clustering(S5.all_S6.A.tree.prep,S5.all_S6.A.tree.1.ob,second.reso=c(0.3,0.3))
S5.all_S6.A.tree.2nd.primary_0.3_0.3.list.patched<-Tree.build.2nd.clustering.patch(S5.all_S6.A.tree.2nd.primary_0.3_0.3.list,S5.all_S6.A.tree.prep,S5.all_S6.A.tree.1.ob,singledog.reso=0.06)
S5.all_S6.A.tree.2nd.treemade_0.3_0.3.ob<-Tree.build.2nd.treemaking(S5.all_S6.A.tree.2nd.primary_0.3_0.3.list.patched,second.reso=c(0.3,0.3),upstremename="s5.all",downstremename="s6.A")
#S6.A -->S7.B
S6.A_S7.B.tree.prep<-Tree.build.prepare(dge1=s6.A.dge,dge2=s7.B.dge,name1="s6.A",name2="s7.B",first.reso=c(0.03,0.03))
S6.A_S7.B.tree.1.ob<-Tree.build.1(S6.A_S7.B.tree.prep)
S6.A_S7.B.tree.2nd.primary_0.3_0.3.list<-Tree.build.2nd.clustering(S6.A_S7.B.tree.prep,S6.A_S7.B.tree.1.ob,second.reso=c(0.3,0.3))
S6.A_S7.B.tree.2nd.primary_0.3_0.3.list.patched<-Tree.build.2nd.clustering.patch(S6.A_S7.B.tree.2nd.primary_0.3_0.3.list,S5.all_S6.A.tree.prep,S6.A_S7.B.tree.1.ob,singledog.reso=0.06)
S6.A_S7.B.tree.2nd.treemade_0.3_0.3.ob<-Tree.build.2nd.treemaking(S6.A_S7.B.tree.2nd.primary_0.3_0.3.list.patched,second.reso=c(0.3,0.3),upstremename="s6.A",downstremename="s7.B")
#####################################Session 2
### Generate the network(graph) that connect all populations
## The input is the objects calculated above
##H1_S0.tree.2nd.treemade_0.06.ob
##S0_S1.24.tree.2nd.treemade_0.06.ob
##S1.24_S1.tree.2nd.treemade_0.08.ob
##S1_S2.24.tree.2nd.treemade_0.08.ob
##S2.24_S2.48.tree.2nd.treemade_0.06_0.03.ob
##S2.48_S2.all.tree.2nd.treemade_0.06.ob
##S2.all_S3.all.tree.2nd.treemade_0.06.ob
##S3.all_S4.B.tree.2nd.treemade_0.06_0.3.ob
##S4.B_S5.all.tree.2nd.treemade_0.06_0.3.ob
##S5.all_S6.A.tree.2nd.treemade_0.3_0.3.ob
##S6.A_S7.B.tree.2nd.treemade_0.3_0.3.ob
###################################
###########################3#######
objlist<-list(H1_S0.tree.2nd.treemade_0.06.ob,S0_S1.24.tree.2nd.treemade_0.06.ob,S1.24_S1.tree.2nd.treemade_0.08.ob,S1_S2.24.tree.2nd.treemade_0.08.ob,S2.24_S2.48.tree.2nd.treemade_0.06_0.03.ob,
S2.48_S2.all.tree.2nd.treemade_0.06.ob,S2.all_S3.all.tree.2nd.treemade_0.06.ob,S3.all_S4.B.tree.2nd.treemade_0.06_0.3.ob,S4.B_S5.all.tree.2nd.treemade_0.06_0.3.ob,
S5.all_S6.A.tree.2nd.treemade_0.3_0.3.ob,S6.A_S7.B.tree.2nd.treemade_0.3_0.3.ob)
all.nodes<-c()
all.edges<-c()
n=1
for (subtree in objlist)
{
print(n)
n=n+1
for (sublineage in names(subtree$deeper.relation.plots.lst))
{
edge1<-subtree$deeper.relation.plots.lst[[sublineage]]$matrix.advance %>% .$segData.2_1.ordered %>% as.data.frame() %>% .[,c(1,2)] %>% cbind(.,strength=rep("strong",nrow(.)))
if(!is.null(subtree$deeper.relation.plots.lst[[sublineage]]$matrix.advance$segData.2_1.internal.ordered))
{
edge2<-subtree$deeper.relation.plots.lst[[sublineage]]$matrix.advance %>% .$segData.2_1.internal.ordered %>% as.data.frame() %>% .[,c(1,2)] %>% cbind(.,strength=rep("strong",nrow(.)))
}else
{
edge2<-c()
}
if(!is.null(subtree$deeper.relation.plots.lst[[sublineage]]$matrix.advance$segData.2_1.weak.ordered))
{
edge3<-subtree$deeper.relation.plots.lst[[sublineage]]$matrix.advance %>% .$segData.2_1.weak.ordered %>% as.data.frame() %>% .[,c(1,2)] %>% cbind(.,strength=rep("week",nrow(.)))
}else
{
edge3<-c()
}
edges.cur<-rbind(edge1,edge2,edge3)
nodes.stage.cur<-c(as.character(edges.cur[,1]),as.character(edges.cur[,2])) %>% strsplit(.,"_") %>% lapply(.,function(x){x[1]}) %>% unlist
nodes.cur<-c(as.character(edges.cur[,1]),as.character(edges.cur[,2])) %>% data.frame(nodes=.,stage=nodes.stage.cur) %>% .[!duplicated(.),]
all.nodes<-rbind(all.nodes,nodes.cur)
all.edges<-rbind(all.edges,edges.cur)
}
}
allcluster.dist.mtx<-myddply.center(hESCdiff.beta.allcell.pca.withinfo,"Sample")
all.edges<-cbind(all.edges,distance=apply(all.edges,1,function(x){allcluster.dist.mtx$centers.dist.mtx[as.character(x[1]),as.character(x[2])]}))
all.nodes<-all.nodes[!duplicated(all.nodes),]
hESCdiff.beta.allcell.mygraph<-makelayout(nodes.data=all.nodes,edges.data=all.edges,manu.xaxis=nametransform,byy="finalname")
#####################################Session 3
### Visulizations and gene module clustering
###################################
###########################3#######
## hESCdiff.beta.allcell.pca.withinfo is a pca matrix for all cells
all.edges.bin.data.normed_UMI.list<-Get.pseudotime.byStage.pairing(pcawithinfo=hESCdiff.beta.allcell.pca.withinfo,edges.data=all.edges,object=hESCdiff.beta.allcell.seurate,PCnumber=10)
## Map all the bin data onto the tree plot
all.edge.edges.bin.data.withTreepos<-c()
for (theedge in names(all.edges.bin.data.normed_UMI.list))
{
bins<-all.edges.bin.data.normed_UMI.list[[theedge]] %>% dim %>% .[1]
cur.edge<-strsplit(theedge,"!") %>% unlist
# hESCdiff.beta.allcell.mygraph$nodes.data.x.justed
cur.edge.pos<-subset(hESCdiff.beta.allcell.mygraph$nodes.data.x.justed,nodes %in% cur.edge)
cur.edge.pos$nodes<-factor(cur.edge.pos$nodes,levels=cur.edge)
cur.edge.pos<-cur.edge.pos[order(cur.edge.pos$nodes),]
walk.unit.x<-(as.numeric(cur.edge.pos[2,c("x","y")])-as.numeric(cur.edge.pos[1,c("x","y")]))[1]/(bins-1)
walk.unit.y<-(as.numeric(cur.edge.pos[2,c("x","y")])-as.numeric(cur.edge.pos[1,c("x","y")]))[2]/(bins-1)
cur.bins.coordinates<-cur.edge.pos[1,c("x","y")]
for (i in 1:(bins-1))
{
tmp.xy<-c(cur.bins.coordinates[nrow(cur.bins.coordinates),1]+walk.unit.x,cur.bins.coordinates[nrow(cur.bins.coordinates),2]+walk.unit.y)
cur.bins.coordinates<-rbind(cur.bins.coordinates,tmp.xy)
}
cur.edge.edges.bin.data.withTreepos<-all.edges.bin.data.normed_UMI.list[[theedge]] %>% cbind(.,cur.bins.coordinates) %>% t
colnames(cur.edge.edges.bin.data.withTreepos)<-paste(theedge,colnames(cur.edge.edges.bin.data.withTreepos),sep="::")
all.edge.edges.bin.data.withTreepos<-cbind(all.edge.edges.bin.data.withTreepos,cur.edge.edges.bin.data.withTreepos)
}
datatoplot<-t(all.edge.edges.bin.data.withTreepos) %>% as.data.frame()
## Tree plot example for any given gene
ggplot(datatoplot)+aes_string("x","y",color="AFP")+geom_point()+scale_color_gradientn(colors=c("red","gold","white","steelblue","grey"),values=c(1,0.95,0.85,0.3,0),trans="log10")
## Gene modules
gene.cluster.forallbranchs<-patternning.pearson_forallbranch(intermediateDFs=datatoplot[,1:(ncol(datatoplot)-4)],top10.cutoff=0.01)
all.edge.edges.bin.data.withTreepos.normed<-apply(datatoplot,2,function(x){x/sum(x)})
data.frame(Top20mean=apply(datatoplot[,1:(ncol(datatoplot)-4)],2,function(x){mean(x[order(x,decreasing=T)][1:20])}),CV=apply(datatoplot[,1:(ncol(datatoplot)-4)],2,function(vector){(sd(vector))^2/mean(vector)})) -> Geneinfo.ontree
Traininggenes<-subset(Geneinfo.ontree,Top20mean>0.05) %>% .[order(.$CV,decreasing=T),] %>% .[1:2000,]
Traininggene.ontree.scaled<-datatoplot[,row.names(Traininggenes)] %>% apply(.,2,scale) #2020-6-22-a
gene.clusters.k<-cluster::pam(as.dist(sqrt(2*(1-gene.cluster.forallbranchs[row.names(Traininggenes),row.names(Traininggenes)]))),k=64)
##### Map the rest of genes using KNN
cl<-gene.clusters.k$clustering
Traininggene.ontree.scaled.mtx<-Traininggene.ontree.scaled %>% t
test.genes<-subset(Geneinfo.ontree,Top20mean>0.05) %>% .[order(.$CV,decreasing=T),] %>% .[2001:nrow(.),] %>% row.names
test.mtx<- datatoplot[,test.genes[test.genes %in% colnames(datatoplot)]] %>% apply(.,2,scale) %>% t
test.cl<-class::knn(train=Traininggene.ontree.scaled.mtx,test=test.mtx,cl=cl,k=5)
names(test.cl)<-row.names(test.mtx)
tree.gene.clusters.all<-c(gene.clusters.k$clustering,test.cl)