-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathloadbam.sup.r
105 lines (96 loc) · 3.59 KB
/
loadbam.sup.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#assert existence of
#commonfile
source(commonfile)
#bamfile
load(bamfile)
#pwmfile
#load(paste0(pwmdir,pwmid,'.pwmout.RData'))
#tmpdir
bamnames = names(allreads[[1]])
obschrnames=names(allreads)
preads=allreads[[obschrnames[1]]][[1]]$plus
cutat=10
#stablize the variance (helps when there are few high coverage sites).
tfun <- function(x){
y = x
x[x>cutat]=cutat
y[x>0] = sqrt(x[x>0])
y
}
unlink(paste0(tmpdir,'/',pwmid,'/','*tf',pwmid,'*'))
use.w=!is.null(whitelist)
if(use.w){
wtable=read.table(whitelist)
white.list=lapply(levels(wtable[,1]),function(i){
wtchr=wtable[wtable[,1]==i,]
ir=IRanges(wtchr[,2],wtchr[,3])
})
names(white.list)=levels(wtable[,1])
}
dir.create(paste0(tmpdir,'/',pwmid),recursive=T)
makeTFmatrix <- function(coords,prefix='',offset=0){
cwidth = width(coords[[1]][1])
obschrnames=names(allreads)
validchr = obschrnames[which(obschrnames%in%ncoords)]
readcov=lapply(1:length(bamnames),function(j){
sapply(validchr,function(i){length(allreads[[i]][[j]]$plus)+length(allreads[[i]][[j]]$minus)})/seqlengths(genome)[validchr]
})
readfact = lapply(readcov,function(i){i/i[1]})
slen = seqlengths(genome)
scrd =sapply(coords,length)
minbgs=floor(max(10000,sum(scrd))*(scrd/sum(scrd)));
for(chr in validchr){
chrlen = slen[chr]
print(chr)
chrcoord=coords[[chr]]
pos.mat = do.call(rBind,lapply(1:length(bamnames),function(i){
pluscoord=allreads[[chr]][[i]]$plus
if(length(pluscoord)>0){
rre = rle(sort(pluscoord))
irp=IRanges(start=rre$values,width=1)
fos=findOverlaps(chrcoord,irp)
uquery=queryHits(fos)
querycoord=rre$values[subjectHits(fos)]
uoffset = querycoord-start(chrcoord)[uquery]+1
rval= rre$lengths[subjectHits(fos)] / readfact[[i]][chr]
pos.triple = cbind(round(uquery),round(uoffset),tfun(rval))
pos.mat=sparseMatrix(i=round(uoffset),j=round(uquery),x=tfun(rval),dims=c(2*wsize,length(chrcoord)),giveCsparse=T)
}else{
pos.triple=cbind(1,1,0)
pos.mat=Matrix(0,nrow=2*wsize,ncol=length(chrcoord))
}
pos.mat
}))
#
neg.mat = do.call(rBind,lapply(1:length(bamnames),function(i){
minuscoord=allreads[[chr]][[i]]$minus
if(length(minuscoord)>0){
rre = rle(sort(minuscoord))
irp=IRanges(start=rre$values,width=1)
fos=findOverlaps(chrcoord,irp)
uquery=queryHits(fos)
querycoord=rre$values[subjectHits(fos)]
uoffset = querycoord-start(chrcoord)[uquery]+1
rval= rre$lengths[subjectHits(fos)] / readfact[[i]][chr]
neg.triple = cbind(round(uquery),round(uoffset),tfun(rval))
neg.mat=sparseMatrix(i=round(uoffset),j=round(uquery),x=tfun(rval),dims=c(2*wsize,length(chrcoord)),giveCsparse=T)
}else{
neg.triple=cbind(1,1,0)
neg.mat=Matrix(0,nrow=2*wsize,ncol=length(chrcoord))
}
neg.mat
}))
#
save(pos.mat,neg.mat,file=paste0(tmpdir,'/',pwmid,'/',prefix,'tf',pwmid,'-',chr,'.RData'))
gc()
}
}
load(paste0(pwmdir,pwmid,'.pwmout.RData'))
coords2=sapply(coords.short,flank,width=wsize,both=T)
makeTFmatrix(coords2,'positive.')
load(paste0(pwmdir.bg,pwmid,'.pwmout.RData'))
coords2=sapply(coords.short,flank,width=wsize,both=T)
makeTFmatrix(coords2,'background.',10000)
load(paste0(pwmdir,pwmid,'.pwmout.RData'))
#
#####