-
Notifications
You must be signed in to change notification settings - Fork 0
/
seqbias_pipe.sh
executable file
·201 lines (179 loc) · 6 KB
/
seqbias_pipe.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
timestamp() {
date +"%Y%m%d%H%M%S"
}
ref=$1
chroms=$2
bam=$3
k=$4
outdir=$5
prefix=$6
opt=$7
resume=0
if [[ $opt == "--resume" ]]
then
step=$8
case $step in
baseline)
resume=1;;
bias)
resume=2;;
covariance)
resume=3;;
correct)
resume=4;;
rebias)
resume=5;;
bam)
resume=6;;
bigwig)
resume=7;;
esac
echo "Resuming at step $resume ($step)"
fi
bamnpy=$outdir/$prefix.bam.npy
baseline=$outdir/$prefix.read_50_baseline.csv
kmer_bias=$outdir/$prefix.${k}mer_frequencies.csv
tile_cov=$outdir/$prefix.tile_covariance.npy
corrected_weights=$outdir/$prefix.${k}mer_adjusted.read_weights.npy
set -e
mkdir -p $outdir
# ---------------------------------------------------------
# filter, index, and convert BAM to more usable format
#
if [ ! -e $bamnpy ]
then
echo [$(timestamp)] start: BAM filter, index, convert to npy
echo step begin
read_len=$(samtools view $bam | head -n 1 | awk '{print $10}' - | wc | awk '{print $3}' -)
python bam2npy.py $bam $chroms $bamnpy
echo [$(timestamp)] end: BAM filter, index, convert to npy
else
read_len=$(samtools view $bam | head -n 1 | awk '{print $10}' - | wc | awk '{print $3}' -)
fi
# get read length
echo "$prefix read length: $read_len"
if [ $read_len == 2 ]
then
read_len=20
echo "$prefix read length adjusted to: $read_len"
fi
echo "$prefix read length: $read_len" >> debug.out
# ---------------------------------------------------------
# ---------------------------------------------------------
# compute nucleotide bias
#
if [ $resume -lt 1 ]
then
echo [$(timestamp)] start: Compute nucleotide bias
python compute_bias.py $bamnpy $ref $chroms 1 $outdir/$prefix.allele_frequencies.csv --read_len $read_len
echo [$(timestamp)] end: Compute nucleotide bias
fi
# ---------------------------------------------------------
# ---------------------------------------------------------
# compute baseline (read +- 50)
#
if [ $resume -lt 2 ]
then
echo [$(timestamp)] start: Compute $k-mer baseline
echo step baseline
python compute_baseline.py $bamnpy $ref $chroms $k $baseline
# --mask $alignability_mask
echo [$(timestamp)] end: Compute $k-mer baseline
fi
# ---------------------------------------------------------
# ---------------------------------------------------------
# compute k-mer bias
#
if [ $resume -lt 3 ]
then
echo [$(timestamp)] start: Compute $k-mer bias
echo step bias
python compute_bias.py $bamnpy $ref $chroms $k $kmer_bias --read_len $read_len
echo [$(timestamp)] end: Compute $k-mer bias
fi
# ---------------------------------------------------------
# ---------------------------------------------------------
# compute tile covariance matrix
#
if [ $resume -lt 4 ]
then
echo [$(timestamp)] start: Compute tile covariance matrix
echo step covariance
python correlate_bias.py $bamnpy $ref $chroms $kmer_bias $tile_cov --plot
echo [$(timestamp)] end: Compute tile covariance matrix
fi
# ---------------------------------------------------------
# ---------------------------------------------------------
# correct using k-mer bias and tile covariance matrix
#
if [ $resume -lt 5 ]
then
echo [$(timestamp)] start: Correct bias
echo step correct
python correct_bias.py $bamnpy $ref $chroms $baseline $kmer_bias $outdir/$prefix.${k}mer_adjusted.allele_frequencies.csv $corrected_weights $tile_cov --read_len $read_len
echo [$(timestamp)] end: Correct bias
fi
# ---------------------------------------------------------
if [ $resume -lt 6 ]
then
# ---------------------------------------------------------
# compute corrected nucleotide bias
#
echo [$(timestamp)] start: Compute corrected nucleotide bias
echo step rebias
python compute_bias.py $corrected_weights $ref $chroms 1 $outdir/$prefix.${k}mer_adjusted.allele_frequencies.csv --read_len $read_len
echo [$(timestamp)] end: Compute corrected nucleotide bias
# ---------------------------------------------------------
# ---------------------------------------------------------
# compute corrected k-mer bias
#
echo [$(timestamp)] start: Compute corrected $k-mer bias
python compute_bias.py $corrected_weights $ref $chroms $k $outdir/$prefix.${k}mer_adjusted.${k}mer_frequencies.csv --read_len $read_len
echo [$(timestamp)] end: Compute corrected $k-mer bias
# ---------------------------------------------------------
# ---------------------------------------------------------
# various plots, results, diagnostics
#
echo [$(timestamp)] start: Plot adjusted nucleotide frequencies
python plot_csv.py $outdir/$prefix.allele_frequencies.csv --out $outdir/$prefix.allele_frequencies.png
python plot_csv.py $outdir/$prefix.${k}mer_adjusted.allele_frequencies.csv --out $outdir/$prefix.${k}mer_adjusted.allele_frequencies.png
echo [$(timestamp)] end: Plot adjusted nucleotide frequencies
#
# elsewhere:
# - Compute and plot TFBS bias (before and after correction)
# see bsub_tfbs.sh
# - Correlation with ChIP-seq peaks
# see bsub_compare.sh
# ---------------------------------------------------------
fi
# ---------------------------------------------------------
# Output BAM format with XW weight flag
#
if [ $resume -lt 7 ]
then
echo [$(timestamp)] start: Writing BAM output
echo step bam
python npy2bam.py $chroms $outdir/$prefix.${k}mer_adjusted.read_weights.npy $bam $outdir/$prefix.adjusted.bam --noy --tag
echo [$(timestamp)] end: Writing BAM output
echo [$(timestamp)] start: Indexing BAM
samtools index $outdir/$prefix.adjusted.bam
echo [$(timestamp)] end: Indexing BAM
fi
# ---------------------------------------------------------
# ---------------------------------------------------------
# Convert weighted bam to wig/bw
#
#if [ $resume -lt 8 ]
#then
# echo [$(timestamp)] start: Writing wig/bw output
# echo step bigwig
# # right now, this is fixed to hg19
# sh pileup_wig/wigify.sh $outdir/$prefix.adjusted.bam $outdir/$prefix.adjusted $chroms
# echo [$(timestamp)] end: Writing wig/bw output
#fi
# ---------------------------------------------------------
cd $outdir
cd ..
rm -f last
dir=$(ls -t | head -n 1)
ln -s $dir last