From b75e7f4e450d38515c0acdff6d78f1a4ede6dbe6 Mon Sep 17 00:00:00 2001 From: Michaela Mueller Date: Wed, 10 Feb 2021 22:47:28 +1100 Subject: [PATCH 1/2] add error messages to MAE shell scripts --- drop/modules/mae-pipeline/MAE/ASEReadCounter.sh | 12 ++++++++++-- drop/modules/mae-pipeline/MAE/filterSNVs.sh | 10 ++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh index ac8e6c2a..d47efef2 100755 --- a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh +++ b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh @@ -45,7 +45,6 @@ chr_subset=$(comm -12 <(cut -f1 -d" " ${canonical} | sort -u) <(echo ${vcf_chr} chr_subset=$(comm -12 <(echo ${bam_chr} | xargs -n1) <(echo ${chr_subset} | xargs -n1) | uniq) for chr in $chr_subset; do - echo $chr $gatk ASEReadCounter \ -R ${fasta} \ -I ${bam_file} \ @@ -57,10 +56,19 @@ for chr in $chr_subset; do tail -n+2 >>$tmp done -echo $mae_id cat $tmp | awk -v id="${mae_id}" \ -F $'\t' 'BEGIN {OFS = FS} NR==1{print $0, "ID"} NR>1{print $0, id}' | bgzip >${output} rm ${tmp} zcat ${output} | head + +num_out=$(zcat "${output}" | wc -l ) +if [ "${num_out}" -lt 2 ] +then + echo 'ERROR: No allele-specific counts' + echo "MAE ID: ${mae_id}" + echo "VCF file: ${vcf_file}" + echo "BAM file: ${bam_file}" + exit 1 +fi diff --git a/drop/modules/mae-pipeline/MAE/filterSNVs.sh b/drop/modules/mae-pipeline/MAE/filterSNVs.sh index f428b2b7..9d30e65c 100755 --- a/drop/modules/mae-pipeline/MAE/filterSNVs.sh +++ b/drop/modules/mae-pipeline/MAE/filterSNVs.sh @@ -51,5 +51,15 @@ else # VCF and BAM have same chromosome format rm ${tmp}.tbi fi +num_out=$(zcat "${output}" | grep -vc '#' ) +if [ "${num_out}" -eq 0 ] +then + echo "ERROR: no entries after filtering for SNVs" + echo "VCF ID: ${vcf_id}" + echo "VCF file: ${vcf_file}" + echo "BAM file: ${bam_file}" + exit 1 +fi + $bcftools index -t ${output} From 1c9e6a2429c2419a9d34af66499d7ce1a433ea15 Mon Sep 17 00:00:00 2001 From: Michaela Mueller Date: Tue, 25 May 2021 21:25:31 +0200 Subject: [PATCH 2/2] More verbose error message --- .../mae-pipeline/MAE/ASEReadCounter.sh | 24 +++++++++++-------- drop/modules/mae-pipeline/MAE/filterSNVs.sh | 10 ++++---- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh index b358bbb8..a4290269 100755 --- a/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh +++ b/drop/modules/mae-pipeline/MAE/ASEReadCounter.sh @@ -31,8 +31,8 @@ header+="lowBaseQDepth\trawDepth\totherBases\timproperPairs" echo -e $header >> $tmp # get chr format -bam_chr=$($samtools idxstats ${bam_file} | grep -vP "\t0\t0" | cut -f1) # only chr with coverage -vcf_chr=$($bcftools view ${vcf_file} | cut -f1 | grep -v '#' | uniq) +bam_chr=$($samtools idxstats ${bam_file} | grep -vP "\t0\t0" | cut -f1 | sort -u) # only chr with coverage +vcf_chr=$($bcftools view ${vcf_file} | cut -f1 | grep -v '#' | sort -u) if [ "$(echo ${vcf_chr} | grep -c 'chr')" -eq 0 ]; then echo "use NCBI format" canonical=$ncbi2ucsc @@ -42,8 +42,8 @@ else fi # subset to standard chromosomes -chr_subset=$(comm -12 <(cut -f1 -d" " ${canonical} | sort -u) <(echo "${vcf_chr}" | sort -u)) -chr_subset=$(comm -12 <(echo "${bam_chr}" | sort -u) <(echo "${chr_subset}") | uniq) +chr_subset=$(comm -12 <(cut -f1 -d" " ${canonical} | sort -u) <(echo "${vcf_chr}")) +chr_subset=$(comm -12 <(echo "${bam_chr}") <(echo "${chr_subset}") | uniq) for chr in $chr_subset; do $gatk ASEReadCounter \ @@ -62,14 +62,18 @@ cat $tmp | awk -v id="${mae_id}" \ bgzip >${output} rm ${tmp} -zcat ${output} | head - num_out=$(zcat "${output}" | wc -l ) if [ "${num_out}" -lt 2 ] then - echo 'ERROR: No allele-specific counts' - echo "MAE ID: ${mae_id}" - echo "VCF file: ${vcf_file}" - echo "BAM file: ${bam_file}" + printf "%s\n" "" "ERROR: No allele-specific counts" \ + " Make sure that the chromosome styles of the FASTA reference and BAM file match." \ + " If that isn't the issue, check that your VCF and BAM files are correctly formatted." \ + " If this problem persists and if this is your only sample causing issues, consider removing it from your analysis, as a last resort." \ + "" " MAE ID: ${mae_id}" \ + " VCF file: ${vcf_file}" \ + " BAM file: ${bam_file}" \ + " FASTA file: ${fasta}" exit 1 fi + +zcat ${output} | head diff --git a/drop/modules/mae-pipeline/MAE/filterSNVs.sh b/drop/modules/mae-pipeline/MAE/filterSNVs.sh index 9d30e65c..54386d47 100755 --- a/drop/modules/mae-pipeline/MAE/filterSNVs.sh +++ b/drop/modules/mae-pipeline/MAE/filterSNVs.sh @@ -54,10 +54,12 @@ fi num_out=$(zcat "${output}" | grep -vc '#' ) if [ "${num_out}" -eq 0 ] then - echo "ERROR: no entries after filtering for SNVs" - echo "VCF ID: ${vcf_id}" - echo "VCF file: ${vcf_file}" - echo "BAM file: ${bam_file}" + printf "%s\n" "" "ERROR: No entries after filtering for SNVs" \ + " Make sure that the VCF file is correctly formatted and contains heterozygous variants." \ + " This analysis is independent per sample, so consider removing the sample from your analysis as a last resort." \ + "" " VCF ID: ${vcf_id}" \ + " VCF file: ${vcf_file}" \ + " BAM file: ${bam_file}" exit 1 fi