-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDeepLearningPEAS.sh
90 lines (69 loc) · 2.81 KB
/
DeepLearningPEAS.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/bin/bash
args=()
((index=0))
for i in "$@"
do
args[${index}]="$i"
((index++))
done
inDir="${args[0]}"
prefix="${args[1]}"
outDir="${args[2]}"
fasta="${args[3]}"
homerref="${args[4]}"
homermotifs="${args[5]}"
conservation="${args[6]}"
ctcfmotifs="${args[7]}"
inputpeaks="${args[8]}"
jarpath="${args[9]}/"
CHRFILE="${args[10]}"
keepdups="${args[11]}"
CHROMOSOMES=()
while IFS= read -r line; do
CHROMOSOMES+=("$line")
done < ${CHRFILE}
cd "${outDir}"
mkdir peak_features
cd peak_features
##Start with peaks and extract features.
##Sort BAM #TODO add option to skip this step if already sorted
#echo "--- Sorting bam file. ---"
#echo "${inDir}/${prefix}.bam"
#samtools sort -T PEASEXTRACT_${prefix} -o ${prefix}_sorted.bam "${inDir}/${prefix}.bam"
cp "${inDir}/${prefix}.bam" ${prefix}_sorted.bam
samtools index ${prefix}_sorted.bam
#
echo "--- Calling annotations & known motifs. ---"
#HOMER Annotations
annotatePeaks.pl "${inputpeaks}" "${homerref}" -m "${homermotifs}" -nmotifs > ${prefix}_peaks_annotated.bed
#call denovo motifs
echo "--- Calling denovo motifs. ---"
findMotifsGenome.pl "${inputpeaks}" "${fasta}" "${outDir}/denovo"
mkdir "${outDir}/denovo/merge"
cp "${outDir}/denovo/homerResults/"*.motif "${outDir}/denovo/merge"
rm "${outDir}/denovo/merge/"*.similar*
rm "${outDir}/denovo/merge/"*RV.motif
cat "${outDir}/denovo/merge/"*.motif >> "${outDir}/denovo/merge/merged.motifs"
#call motifs with homer again using denovo motifs file homerMotifs.all.motifs
annotatePeaks.pl "${inputpeaks}" "${homerref}" -m "${outDir}/denovo/merge/merged.motifs" -nmotifs > ${prefix}_peaks_denovo.bed
echo "--- Calling CTCF motifs. ---"
annotatePeaks.pl "${inputpeaks}" "${homerref}" -m "${ctcfmotifs}" -nmotifs > ${prefix}_peaks_ctcf.bed
#Get the insert size threshold to remove outlier inserts
echo "--- Getting insert size threshold. ---"
java -jar "${jarpath}PEASTools.jar" insertsizethresh "${prefix}_sorted.bam" "${outDir}/peak_features" ${CHRFILE} ${keepdups}
thresh=$(cat "thresh.txt")
#Get Insert features
echo "--- Getting insert features. ---"
for i in ${CHROMOSOMES[@]}
do
chr=$i
java -jar "${jarpath}PEASTools.jar" insertmetrics "${chr}" "${chr}.bam" "${inputpeaks}" "${prefix}_${chr}_insertmetrics.txt" "$thresh" ${keepdups}
rm ${chr}.bam
cat ${prefix}_${chr}_insertmetrics.txt >> ${prefix}_insertmetrics.txt
rm "${prefix}_${chr}_insertmetrics.txt"
done
echo "--- Getting conservation scores. ---"
#Get Conservation Scores
java -jar "${jarpath}PEASTools.jar" conservation "${inputpeaks}" "${conservation}" "${prefix}_conservation.txt"
echo "--- Merging features. ---"
java -jar "${jarpath}PEASTools.jar" mergedl "${inputpeaks}" "${prefix}_peaks_annotated.bed" "${prefix}_insertmetrics.txt" "${prefix}_conservation.txt" "${prefix}_peaks_denovo.bed" "${prefix}_peaks_ctcf.bed" "${prefix}_features.txt" "MERGED"