-
Notifications
You must be signed in to change notification settings - Fork 0
/
trycycler_reconcile.sh
executable file
·109 lines (100 loc) · 3.23 KB
/
trycycler_reconcile.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/bin/bash
# This is a wrapper for the 'trycycler reconcile' command.
# Copyright (C) 2024 Yu Wan <wanyuac@gmail.com>
# Licensed under the GNU General Public Licence version 3 (GPLv3) <https://www.gnu.org/licenses/>.
# First version: 6 November 2024; last update: 7 November 2024
# Help information ###############
display_usage() {
echo "
This script renames bad contigs to exclude them from inputs of command 'trycycler reconcile'.
Parameters:
-r=*: The input long reads
-d=*: The input directory or the output directory of 'trcycyler cluster', which serves as the input directory for 'trycycler reconcile' (default: 2_clusters)
-s=*: The common suffix to add to log files (default: none)
-t=*: Number of threads (default: 1)
-p: Generate a pairwise dot plot for contigs in each cluster
Example useage:
~/bin/Assembly_toolkit/trycycler_reconcile.sh -i=2_clusters -n=3 -s=r1 -t=16 -p
Dependencies: trycycler, perl.
"
}
# Main ###############
if [ -z "$1" ]
then
display_usage
exit
fi
# Set default values
dir_in='2_clusters'
t=1
draw_plot=false
# Read parameters
for arg in "$@"
do
case $arg in
-r=*)
reads="${arg#*=}"
;;
-d=*)
dir_in="${arg#*=}"
;;
-s=*)
s="${arg#*=}"
;;
-t=*)
t="${arg#*=}"
if [ $t -lt 1 ]
then
echo "Error: the number of threads is negative. Reset to 1."
t=1
fi
;;
-p)
draw_plot=true
;;
*)
;;
esac
done
# Determine the number of clusters
if [ -d "$dir_in" ]
then
n=$(ls -d -1 $dir_in/cluster_*/ | wc -l)
else
echo "Error: input directory $dir_in was not found. Exit"
exit 1
fi
# Run 'trycycler reconcile'
if [ -f "$reads" ]
then
echo "Read file: $reads"
echo "Input directory: $dir_in"
echo "Number of clusters: $n"
echo "Number of threads: $t"
for i in `seq 1 ${n}`
do
c=$(printf "%03d" "$i") # '001', '002', '003', ...
echo "Reconcile contigs in cluster $c"
if [ ! -z "$s" ]
then
log_filename="reconcile_cluster_${c}_${s}" # Filenames do not start from "cluster_${c}" to avoid confusion with the cluster_* glob for the 'trycycler partition' command.
else
log_filename="reconcile_cluster_${c}"
fi
cluster_dir="$dir_in/cluster_${c}"
trycycler reconcile --reads $reads --cluster_dir "$cluster_dir" --threads $t > "${log_filename}.tmp" 2>&1
log_file="$dir_in/${log_filename}.log"
perl -pe 's/\x1b\[[0-9;]*[mG]//g' "${log_filename}.tmp" > "$log_file" # Remove colour code from the original log file
rm "${log_filename}.tmp"
echo "Cluster $c has been reconciled. Log file: ${log_file}."
if [ "$draw_plot" == true ]
then
dotplot="$cluster_dir/cluster_${c}_dotplots.png"
echo "Create a pairwise dotplot $dotplot"
trycycler dotplot --cluster_dir "$cluster_dir" # Create dotplots.png in the cluster directory
mv "$cluster_dir/dotplots.png" "$dotplot" # To make it easier to pool figures into a single directory
fi
done
else
echo "Error: read file $reads was not found. Exit."
fi