forked from awilderman/HiC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcorrect_and_call.sh
205 lines (171 loc) · 7.15 KB
/
correct_and_call.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# reveiw diagnostic plot and correct
export MAIL=awilderman@uchc.edu
export MCSOURCE="/home/FCAM/awilderman/ANALYSIS/HiC/Human/CS17_03-29-21/HiCExplorer"
export OUTDIR=/home/FCAM/awilderman/ANALYSIS/HiC/CNCC/HiCExplorer
export LABEL=WT_CNCC
export RESOLUTION=10000
export RESONE=20K
export RESTWO=50K
export RESTHREE=100K
export RESFOUR=500K
export REGION=chr7:25000000-28000000
export GENEFILE="/home/FCAM/awilderman/ANALYSIS/HiC/Human/CS17_03-29-21/HiCExplorer/region_genes.bed"
export DEPTH=3000000 #span of region in bp
echo "#!/bin/bash
#SBATCH --job-name=HiCExplorer_correct_call_TADs_Loops
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 16
#SBATCH --partition=general
#SBATCH --qos=general
#SBATCH --mem-per-cpu=4G
#SBATCH --mail-type=BEGIN,END
#SBATCH --mail-user="$MAIL"
#SBATCH -o HiCExplorer_correct_%j.out
#SBATCH -e HiCExplorer_correct_%j.err
source "$MCSOURCE"/.bashrc_miniconda3
conda activate "$MCSOURCE"/hicexplorer
cd "$OUTDIR"
## Correct and Compare normalized matrices, use merged bins for best resolution
# Correct (ICE normalize using filter thresholds decided on using the diagnostic plot)
hicCorrectMatrix correct --correctionMethod ICE -m "$LABEL"_"$RESOLUTION".h5 --filterThreshold -T T -o "$LABEL"_"$RESOLUTION"_corrected_matrix.h5
hicCorrectMatrix correct --correctionMethod ICE -m "$LABEL"_"$RESOLUTION"merged_to_"$RESONE"_matrix.h5 --filterThreshold -T T -o "$LABEL"_"$RESOLUTION"merged_to_"$RESONE"_corrected_matrix.h5
hicCorrectMatrix correct --correctionMethod ICE -m "$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_matrix.h5 --filterThreshold -T T -o "$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_corrected_matrix.h5
hicCorrectMatrix correct --correctionMethod ICE -m "$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_matrix.h5 --filterThreshold -T T -o "$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_corrected_matrix.h5
hicCorrectMatrix correct --correctionMethod ICE -m "$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_matrix.h5 --filterThreshold -T T -o "$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_corrected_matrix.h5
# Plot region of interest at three higher resolutions (e.g. 10Kb, 20Kb, 50Kb)
hicPlotMatrix -m \
"$LABEL"_"$RESOLUTION"_corrected_matrix.h5 \
--clearMaskedBins \
--region "$REGION" \
-o "$LABEL"_"$RESOLUTION"_corrected_plot.png
hicPlotMatrix -m \
"$LABEL"_"$RESOLUTION"merged_to_"$RESONE"_corrected_matrix.h5 \
--clearMaskedBins \
--region "$REGION" \
-o "$LABEL"_"$RESOLUTION"merged_to_"$RESONE"_corrected_plot.png
hicPlotMatrix -m \
"$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_corrected_matrix.h5 \
--clearMaskedBins \
--region "$REGION" \
-o "$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_corrected_plot.png
# Plot each whole chromosome as a separate figure, two lower resolutions (e.g. 100Kb, 500Kb)
hicPlotMatrix -m \
"$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_corrected_matrix.h5 \
--clearMaskedBins \
--perChromosome \
-o "$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_corrected_plot_perChrom.png
hicPlotMatrix -m \
"$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_corrected_matrix.h5 \
--clearMaskedBins \
--perChromosome \
-o "$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_corrected_plot_perChrom.png
# call TADs
hicFindTADs -m "$LABEL"_"$RESOLUTION"merged_to_"$RESONE"_corrected_matrix.h5 \
--outPrefix "$LABEL"_"$RESOLUTION"merged_to_"$RESONE"_corrected_1_05_TADs \
--correctForMultipleTesting fdr \
--thresholdComparisons 0.1 \
--delta 0.05 \
-p 16
hicFindTADs -m "$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_corrected_matrix.h5 \
--outPrefix "$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_corrected_1_05_TADs \
--correctForMultipleTesting fdr \
--thresholdComparisons 0.1 \
--delta 0.05 \
-p 16
hicFindTADs -m "$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_corrected_matrix.h5 \
--outPrefix "$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_corrected_1_05_TADs \
--correctForMultipleTesting fdr \
--thresholdComparisons 0.1 \
--delta 0.05 \
-p 16
hicFindTADs -m "$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_corrected_matrix.h5 \
--outPrefix "$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_corrected_1_05_TADs \
--correctForMultipleTesting fdr \
--thresholdComparisons 0.1 \
--delta 0.05 \
-p 16
### Create the .ini file
echo \"[x-axis]
where = top
[hic matrix]
file = "$LABEL"_"$RESOLUTION"_corrected_matrix.h5
title = "$LABEL"_"$RESOLUTION"
# depth is the maximum distance plotted in bp. In Hi-C tracks
# the height of the track is calculated based on the depth such
# that the matrix does not look deformed
depth = "$DEPTH"
colormap = Reds
file_type = hic_matrix
show_masked_bins = false
[tads]
file = "$LABEL"_"$RESOLUTION"merged_to_"$RESFOUR"_corrected_1_05_TADs_domains.bed
file_type = domains
title = "$RESFOUR" TADs
display=triangles
border_color = black
color = none
# the tads are overlay over the hic-matrix
# the share-y options sets the y-axis to be shared
# between the Hi-C matrix and the TADs.
overlay_previous = share-y
[spacer]
height = 0.5
[hic matrix]
file = "$LABEL"_"$RESOLUTION"_corrected_matrix.h5
title = "$LABEL"_"$RESOLUTION"
# depth is the maximum distance plotted in bp. In Hi-C tracks
# the height of the track is calculated based on the depth such
# that the matrix does not look deformed
depth = "$DEPTH"
colormap = Reds
file_type = hic_matrix
show_masked_bins = false
[tads]
file = "$LABEL"_"$RESOLUTION"merged_to_"$RESTHREE"_corrected_1_05_TADs_domains.bed
file_type = domains
title = "$RESTHREE" TADs
display=triangles
border_color = black
color = none
# the tads are overlay over the hic-matrix
# the share-y options sets the y-axis to be shared
# between the Hi-C matrix and the TADs.
overlay_previous = share-y
[spacer]
height = 0.5
[hic matrix]
file = "$LABEL"_"$RESOLUTION"_corrected_matrix.h5
title = "$LABEL"_"$RESOLUTION"
# depth is the maximum distance plotted in bp. In Hi-C tracks
# the height of the track is calculated based on the depth such
# that the matrix does not look deformed
depth = "$DEPTH"
colormap = Reds
file_type = hic_matrix
show_masked_bins = false
[tads]
file = "$LABEL"_"$RESOLUTION"merged_to_"$RESTWO"_corrected_1_05_TADs_domains.bed
file_type = domains
title = "$RESTWO" TADs
display=triangles
border_color = black
color = none
# the tads are overlay over the hic-matrix
# the share-y options sets the y-axis to be shared
# between the Hi-C matrix and the TADs.
overlay_previous = share-y
[spacer]
height = 0.5
[bed file test]
file = "$GENEFILE"
height = 10
title = Genes
fontsize = 12
file_type = bed\" > hic_"$LABEL"_track.ini
hicPlotTADs --tracks hic_"$LABEL"_track.ini -o "$LABEL"_hic_track.png --region "$REGION"
hicDetectLoops -m "$LABEL"_"$RESOLUTION"_corrected_matrix.h5 -o "$LABEL"_"$RESOLUTION"_loops.bedgraph --maxLoopDistance 4000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05
hicPlotMatrix -m "$LABEL"_"$RESOLUTION"_corrected_matrix.h5 -o "$LABEL"_"$RESOLUTION"_loops_plot.pdf --log1p --region "$REGION" --loops "$LABEL"_"$RESOLUTION"_loops.bedgraph
hicDetectLoops -m "$LABEL"_"$RESOLUTION"_corrected_matrix.h5 -o "$LABEL"_"$RESOLUTION"_loops_p1.bedgraph --maxLoopDistance 4000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.1
hicPlotMatrix -m "$LABEL"_"$RESOLUTION"_corrected_matrix.h5 -o "$LABEL"_"$RESOLUTION"_loops_p1_plot.pdf --log1p --region "$REGION" --loops "$LABEL"_"$RESOLUTION"_loops_p1.bedgraph
conda deactivate" > hicexplorer_${LABEL}_TADs_and_Loops.txt