-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseqimpSummarise
executable file
·77 lines (63 loc) · 2.55 KB
/
seqimpSummarise
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash
# Tommaso Leonardi, 2016
# Reads a SeqIMP description files and produces
# a properly formatted summary table of counts.
# Check proper number of arguments
if [ ! $# == 2 ]
then
echo "
Usage: merge_count_files.sh analysis_folder description.txt
analysis folder: path to the analysis folder created by SequenceImp
description.txt: description file submitted to SequenceImp
"
exit
fi
# Check if the analysis directory exists
if [ ! -d $1 ]
then
echo "$1: doesn't exist or is not a directory"
echo "Please provide a valid path to the analysis directory"
exit 1
fi
ANALYSIS_DIR=$1
# Check if the description file exists
if [ ! -f $2 ]
then
echo "$2: No such file"
echo "Please provide a valid description file"
exit 1
fi
DESCRIPTION_FILE=$2
#Load file names in array
FILES=($(awk 'NR>1{printf("%s ",$1)}' $DESCRIPTION_FILE ))
# Check that all count files exist
for i in "${FILES[@]}"; do
if [ ! -f $ANALYSIS_DIR/$i/miRNA_ANALYSIS/$i.mature.counts.txt ]
then
echo "$ANALYSIS_DIR/$i/miRNA_ANALYSIS/$i.mature.counts.txt: No such file"
echo "Please make sure that all files described in $2 have a corresponding count files"
exit 1
fi
done;
TMP="$(mktemp -p /tmp/)"
TMP2="$(mktemp -p /tmp/)"
function cleanup {
rm -f $TMP
rm -f $TMP2
}
trap cleanup EXIT
# Join the first two files to TMP
join -1 4 -2 4 -a 1 -t $'\t' <(sort -k4,4 $ANALYSIS_DIR/${FILES[0]}/miRNA_ANALYSIS/${FILES[0]}.mature.counts.txt ) <(sort -k4,4 $ANALYSIS_DIR/${FILES[1]}/miRNA_ANALYSIS/${FILES[1]}.mature.counts.txt ) > $TMP
# For each subsequent file, join it with TMP
for i in $(seq 2 $(expr ${#FILES[@]} - 1)); do
join -1 1 -2 4 -a 1 -t $'\t' $TMP <(sort -k4,4 $ANALYSIS_DIR/${FILES[$i]}/miRNA_ANALYSIS/${FILES[$i]}.mature.counts.txt ) > $TMP2
mv $TMP2 $TMP
done;
# Do some basic error checking making sure that all the 'precursor' columns contain the same value
awk -v FILES="$(echo ${FILES[@]})" 'BEGIN{OFS=FS="\t";NFILES=split(FILES,F," ")} {PRE=$2; for(i=8;i < NFILES*6+1;i+=6){if($i!=PRE){printf("There was an error with matching lines: %s\n",NR) | "cat >&2" ;exit 1}}}' $TMP
if [ $? -ne 0 ]; then
echo "Exiting"
exit 1;
fi
# Print only the interesting columns with an appropriate header
awk -v FILES="$(echo ${FILES[@]})" 'BEGIN{OFS=FS="\t";NFILES=split(FILES,F," ")} $1=="Unique_ID"{printf("Unique_ID\tMature\tPrecursor");for(i=1;i<=NFILES;i++){printf("\t%s_Mapped_Reads\t%s_Unique_Reads",F[i],F[i])}printf("\n")};$1!="Unique_ID"{printf("%s\t%s\t%s",$1,$2,$3); for(i=6;i < NFILES*6+1;i+=6){MAPPED=i;UNIQUE=i+1;printf("\t%s\t%s",$(MAPPED),$(UNIQUE))}printf("\n")}' $TMP