From b75e7f4e450d38515c0acdff6d78f1a4ede6dbe6 Mon Sep 17 00:00:00 2001
From: Michaela Mueller <mumichae@in.tum.de>
Date: Wed, 10 Feb 2021 22:47:28 +1100
Subject: [PATCH 1/2] add error messages to MAE shell scripts

---
 drop/modules/mae-pipeline/MAE/ASEReadCounter.sh | 12 ++++++++++--
 drop/modules/mae-pipeline/MAE/filterSNVs.sh     | 10 ++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh
index ac8e6c2a..d47efef2 100755
--- a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh
+++ b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh
@@ -45,7 +45,6 @@ chr_subset=$(comm -12 <(cut -f1 -d" " ${canonical} | sort -u) <(echo ${vcf_chr}
 chr_subset=$(comm -12 <(echo ${bam_chr} | xargs -n1) <(echo ${chr_subset} | xargs -n1) | uniq)
 
 for chr in $chr_subset; do
-  echo $chr
   $gatk ASEReadCounter \
     -R ${fasta} \
     -I ${bam_file} \
@@ -57,10 +56,19 @@ for chr in $chr_subset; do
     tail -n+2 >>$tmp
 done
 
-echo $mae_id
 cat $tmp | awk -v id="${mae_id}" \
   -F $'\t' 'BEGIN {OFS = FS} NR==1{print $0, "ID"} NR>1{print $0, id}' |
   bgzip >${output}
 rm ${tmp}
 
 zcat ${output} | head
+
+num_out=$(zcat "${output}" | wc -l )
+if [ "${num_out}" -lt 2 ]
+then
+  echo 'ERROR: No allele-specific counts'
+  echo "MAE ID: ${mae_id}"
+  echo "VCF file: ${vcf_file}"
+  echo "BAM file: ${bam_file}"
+  exit 1
+fi
diff --git a/drop/modules/mae-pipeline/MAE/filterSNVs.sh b/drop/modules/mae-pipeline/MAE/filterSNVs.sh
index f428b2b7..9d30e65c 100755
--- a/drop/modules/mae-pipeline/MAE/filterSNVs.sh
+++ b/drop/modules/mae-pipeline/MAE/filterSNVs.sh
@@ -51,5 +51,15 @@ else  # VCF and BAM have same chromosome format
     rm ${tmp}.tbi
 fi
 
+num_out=$(zcat "${output}" | grep -vc '#' )
+if [ "${num_out}" -eq 0 ]
+then
+  echo "ERROR: no entries after filtering for SNVs"
+  echo "VCF ID: ${vcf_id}"
+  echo "VCF file: ${vcf_file}"
+  echo "BAM file: ${bam_file}"
+  exit 1
+fi
+
 $bcftools index -t ${output}
 

From 1c9e6a2429c2419a9d34af66499d7ce1a433ea15 Mon Sep 17 00:00:00 2001
From: Michaela Mueller <mumichae@in.tum.de>
Date: Tue, 25 May 2021 21:25:31 +0200
Subject: [PATCH 2/2] More verbose error message

---
 .../mae-pipeline/MAE/ASEReadCounter.sh        | 24 +++++++++++--------
 drop/modules/mae-pipeline/MAE/filterSNVs.sh   | 10 ++++----
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh
index b358bbb8..a4290269 100755
--- a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh
+++ b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh
@@ -31,8 +31,8 @@ header+="lowBaseQDepth\trawDepth\totherBases\timproperPairs"
 echo -e $header >> $tmp
 
 # get chr format
-bam_chr=$($samtools idxstats ${bam_file} | grep -vP "\t0\t0" | cut -f1) # only chr with coverage
-vcf_chr=$($bcftools view ${vcf_file} | cut -f1 | grep -v '#' | uniq)
+bam_chr=$($samtools idxstats ${bam_file} | grep -vP "\t0\t0" | cut -f1 | sort -u) # only chr with coverage
+vcf_chr=$($bcftools view ${vcf_file} | cut -f1 | grep -v '#' | sort -u)
 if [ "$(echo ${vcf_chr} | grep -c 'chr')" -eq 0 ]; then
   echo "use NCBI format"
   canonical=$ncbi2ucsc
@@ -42,8 +42,8 @@ else
 fi
 
 # subset to standard chromosomes
-chr_subset=$(comm -12 <(cut -f1 -d" " ${canonical} | sort -u) <(echo "${vcf_chr}" | sort -u))
-chr_subset=$(comm -12 <(echo "${bam_chr}" | sort -u) <(echo "${chr_subset}") | uniq)
+chr_subset=$(comm -12 <(cut -f1 -d" " ${canonical} | sort -u) <(echo "${vcf_chr}"))
+chr_subset=$(comm -12 <(echo "${bam_chr}") <(echo "${chr_subset}") | uniq)
 
 for chr in $chr_subset; do
   $gatk ASEReadCounter \
@@ -62,14 +62,18 @@ cat $tmp | awk -v id="${mae_id}" \
   bgzip >${output}
 rm ${tmp}
 
-zcat ${output} | head
-
 num_out=$(zcat "${output}" | wc -l )
 if [ "${num_out}" -lt 2 ]
 then
-  echo 'ERROR: No allele-specific counts'
-  echo "MAE ID: ${mae_id}"
-  echo "VCF file: ${vcf_file}"
-  echo "BAM file: ${bam_file}"
+  printf  "%s\n" "" "ERROR: No allele-specific counts" \
+    "  Make sure that the chromosome styles of the FASTA reference and BAM file match." \
+    "  If that isn't the issue, check that your VCF and BAM files are correctly formatted." \
+    "  If this problem persists and if this is your only sample causing issues, consider removing it from your analysis, as a last resort." \
+    "" "  MAE ID: ${mae_id}" \
+    "  VCF file: ${vcf_file}" \
+    "  BAM file: ${bam_file}" \
+    "  FASTA file: ${fasta}"
   exit 1
 fi
+
+zcat ${output} | head
diff --git a/drop/modules/mae-pipeline/MAE/filterSNVs.sh b/drop/modules/mae-pipeline/MAE/filterSNVs.sh
index 9d30e65c..54386d47 100755
--- a/drop/modules/mae-pipeline/MAE/filterSNVs.sh
+++ b/drop/modules/mae-pipeline/MAE/filterSNVs.sh
@@ -54,10 +54,12 @@ fi
 num_out=$(zcat "${output}" | grep -vc '#' )
 if [ "${num_out}" -eq 0 ]
 then
-  echo "ERROR: no entries after filtering for SNVs"
-  echo "VCF ID: ${vcf_id}"
-  echo "VCF file: ${vcf_file}"
-  echo "BAM file: ${bam_file}"
+  printf  "%s\n" "" "ERROR: No entries after filtering for SNVs" \
+  "  Make sure that the VCF file is correctly formatted and contains heterozygous variants." \
+  "  This analysis is independent per sample, so consider removing the sample from your analysis as a last resort." \
+  "" "  VCF ID: ${vcf_id}" \
+  "  VCF file: ${vcf_file}" \
+  "  BAM file: ${bam_file}"
   exit 1
 fi