-
Notifications
You must be signed in to change notification settings - Fork 0
/
trycycler_medaka.sh
executable file
·125 lines (109 loc) · 3.54 KB
/
trycycler_medaka.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/bash
# This script Trycycler consensus sequences using Medaka.
# Copyright (C) 2024 Yu Wan <wanyuac@gmail.com>
# Licensed under the GNU General Public Licence version 3 (GPLv3) <https://www.gnu.org/licenses/>.
# First version: 7 November 2024; last update: 17 November 2024
# Help information ###############
display_usage() {
echo "
This script polishes Trycycler consensus sequences using Medaka and after the read partitioning.
Parameters:
-d=*: The parental directory of cluster_* subdirectories generated by Trycycler (default: 2_clusters)
-m=*: The basecalling model
-s=*: Sample name for output filenames (default: isolate)
-o=*: Directory for concatenated consensus sequences with and without Medaka polishing (default: 3_polish)
-t=*: Number of threads (default: 1)
Example useage:
~/bin/Assembly_toolkit/trycycler_medaka.sh -d=2_clusters -m=r941_min_hac_g507 -s=isolate1 -o=3_polish -t=16 > trycycler_medaka.log
Dependency: medaka (medaka_consensus)
"
}
# Main ###############
if [ -z "$1" ]
then
display_usage
exit
fi
# Set default values
parental_dir='2_clusters'
sample_name='isolate'
polish_dir='3_polish'
t=1
# Read parameters
for arg in "$@"
do
case $arg in
-d=*)
parental_dir="${arg#*=}"
;;
-m=*)
model="${arg#*=}"
;;
-s=*)
sample_name="${arg#*=}"
;;
-o=*)
polish_dir="${arg#*=}"
;;
-t=*)
t="${arg#*=}"
;;
*)
;;
esac
done
# Sanity check
if [ ! -d "$parental_dir" ]
then
echo "Parameter error: $parental_dir was not found. Exit."
exit 1
fi
if [ -z "$model" ]
then
echo "Parameter error: A model must be specified."
fi
if [ $t -lt 1 ]
then
echo "Parameter error: the number of threads is negative. Reset it to 1."
t=1
fi
medaka_version=`medaka --version`
echo "Sample: $sample_name"
echo "Input directory: $parental_dir"
echo "Model: $model"
echo "Threads: $t"
echo "$medaka_version"
# Run Medaka
medaka_log="$parental_dir/1_${sample_name}_trycycler_consensus_medaka.log"
if [ -f "$medaka_log" ]
then
echo "Remove existing log file $medaka_log"
rm $medaka_log # Remove the log file if it exists before this run
fi
for cluster_dir in `ls -d -1 $parental_dir/cluster_*`
do
d=`basename $cluster_dir` # cluster_*
reads="$cluster_dir/4_reads.fastq"
consensus_fasta="$cluster_dir/7_final_consensus.fasta"
medaka_temp_outdir="$cluster_dir/medaka_tmp"
if [ -f "$reads" ] && [ -f "$consensus_fasta" ]
then
echo "[$(date)]Polish $consensus_fasta with $reads"
medaka_consensus -i "$reads" -d "$consensus_fasta" -o "$medaka_temp_outdir" -m "$model" -t "$t" >> "$medaka_log" 2>&1
mv "$medaka_temp_outdir/consensus.fasta" "$cluster_dir/8_medaka.fasta"
rm -r "$medaka_temp_outdir" $cluster_dir/*.fai $cluster_dir/*.mmi
else
echo "Input error: inputs $reads and/or $consensus_fasta were not found."
exit 1
fi
done
# Concatenate unpolished and polished consensus sequences
if [ ! -d "$polish_dir" ]
then
echo "[$(date)] Create the output directory $polish_dir"
mkdir -p "$polish_dir"
fi
mv $medaka_log $polish_dir
echo "[$(date)] Concatenate consensus sequences and save them in subdirectory $polish_dir"
cat "$parental_dir"/cluster_*/7_final_consensus.fasta > "$polish_dir/1_${sample_name}_trycycler_consensus_raw.fasta" # Unpolished
cat "$parental_dir"/cluster_*/8_medaka.fasta > "$polish_dir/2_${sample_name}_trycycler_consensus_medaka.fasta" # Medaka-polished