-
Notifications
You must be signed in to change notification settings - Fork 1
/
hmmer-run_SF.sh
executable file
·73 lines (54 loc) · 1.93 KB
/
hmmer-run_SF.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
set -o pipefail
set -o errexit
if [[ $# -ne 3 ]]
then
echo "usage: $0 input_folder path_and_name_of_the_profile number_of_threads" 1>&2
exit 1
fi
if [ ! -d $1 ]
then
echo "$1 is not a directory!" 1>&2
exit 1
fi
if [ ! -s $2 ]
then
echo "$2 file not found!" 1>&2
exit 1
fi
dir=$1
hmm_profile_name=$2
threads=$3
# FEDOR: I usually use .fa extension
files=( $(find "$dir" -type f -name '*.fa' -print) )
p="$(basename "$hmm_profile_name")"
bp="${p%.*}"
for filename in "${files[@]}"
do
n="$(basename "$filename")"
bn="${n%.*}"
echo "$filename"
# HMMER analysis
nhmmer --cpu $threads --notextw --noali --tblout nhmmer-$bp-vs-$bn-tbl.out -o /dev/null $hmm_profile_name $filename
# Converting HMM table output to the BED format with filtering by threshold score to length
# https://github.com/enigene/hmmertblout2bed
awk -v th=0.7 -f hmmertblout2bed.awk nhmmer-$bp-vs-$bn-tbl.out > nhmmer-$bp-vs-$bn-tbl.bed
# FEDOR: remove huge .out file as soon as it's used
rm nhmmer-$bp-vs-$bn-tbl.out
# Sorting by name and coordinates
# recommended running sort with option --temporary-directory=/path/to/another/local/physical/disk
sort -k 1.4,1 -k 2,2n nhmmer-$bp-vs-$bn-tbl.bed > _nhmmer-t0-$bn.bed
# Filter by score in each region
bedmap --max-element --fraction-either 0.1 _nhmmer-t0-$bn.bed > _nhmmer-t1-$bn.bed
# Filter unique elements
# FEDOR: don't skip SF monomers
awk "{if(!(\$0 in a)){a[\$0]; print}}" _nhmmer-t1-$bn.bed > _nhmmer-t0-$bn.bed
# FEDOR: addicional overlap filtering
python3 overlap_filter.py _nhmmer-t0-$bn.bed > AS-SF-vs-$bn.bed
# FEDOR: AS-strand annotation. "+" is blue, "-" is red
awk -F $'\t' 'BEGIN {OFS = FS} {if ($6=="+") {$9="0,0,255"}; if ($6=="-") {$9="255,0,0"} print $0}' AS-SF-vs-$bn.bed > AS-strand-vs-$bn.bed
# Delete temporary files
rm _nhmmer-t1-$bn.bed
rm _nhmmer-t0-$bn.bed
rm nhmmer-$bp-vs-$bn-tbl.bed
done