From aae31bf93f5829b7e8eb496037ff8dd441a486f7 Mon Sep 17 00:00:00 2001 From: Smith Nicholas Date: Fri, 26 Aug 2022 09:08:52 +0200 Subject: [PATCH] check for empty Platform (PL) in bam RG --- .../rvc-pipeline/GATK_BASH/changeHeader.sh | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh b/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh index 75e746a9..3e97d2db 100755 --- a/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh +++ b/drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh @@ -18,41 +18,50 @@ output_bai=$6 output_newHeader=$7 +SM_internalHeader="" +PL_internalHeader="" samtools view -H $input_bam |grep "^@RG" |grep SM |head -1| while read header ; do for i in $header; do if [[ $i == "SM:"* ]]; then - internalHeader=${i:3} - break - else - internalHeader="" + SM_internalHeader=${i:3} + fi + if [[ $i == "PL:"* ]]; then + PL_internalHeader=${i:3} fi done - if [[ $internalHeader == $sample ]]; then - echo "Internal Header $internalHeader matches $sample" |tee $log - echo "Internal Header is designated: $internalHeader" |tee -a $log + if [[ $SM_internalHeader == $sample && $PL_internalHeader != "" ]]; then + echo "Internal Header $SM_internalHeader matches $sample" |tee $log + echo "Internal Header is designated: $SM_internalHeader" |tee -a $log echo "SampleID is $sample" |tee -a $log ln -f $input_bam $output_bam ln -f $input_bai $output_bai echo "Done Linking files" samtools view -H $input_bam > $output_newHeader else - echo "WARNING" - echo "Internal Header is designated: $internalHeader" |tee $log - echo "SampleID is $sample" |tee -a $log - echo "Forcing $internalHeader to match $sample" |tee -a $log - - samtools view -H $input_bam > $output_newHeader - echo $output_newHeader + if [[ $SM_internalHeader != $sample ]]; then + echo "WARNING" + echo "Internal Header is designated: $SM_internalHeader" |tee $log + echo "SampleID is $sample" |tee -a $log + echo "Forcing $SM_internalHeader to match $sample" |tee -a $log - # sed using regEx in place substitiute 'SM:' followed by any thing that isn't tab or newLine. and then replace it with the sampleID and the delimiter (tab or newLine) that matched in the 1st expression. - sed -E -i "s/(SM:[^\t|\n]*)(\t|\n*)/SM:${sample}\2/" ${output_newHeader} + # sed using regEx in place substitiute 'SM:' followed by any thing that isn't tab or newLine. and then replace it with the sampleID and the delimiter (tab or newLine) that matched in the 1st expression. + samtools view -H $input_bam > $output_newHeader + sed -E -i "s/(SM:[^\t|\n]*)(\t|\n*)/SM:${sample}\2/" ${output_newHeader} + fi + if [[ $PL_internalHeader == "" ]]; then + echo "WARNING" + echo "Internal PL Header is not designated: $PL_internalHeader" |tee $log + echo "PL cannot be empty. Using a Dummy: dummyPL " |tee -a $log + echo "Forcing $PL_internalHeader to dummyPL" |tee -a $log + # sed using regEx in place substitiute '@RG' with @RG\tPL:dummyPL + samtools view -H $input_bam > $output_newHeader + sed -E -i "s/^@RG/@RG\tPL:dummyPL/" ${output_newHeader} + fi samtools reheader $output_newHeader $input_bam > $output_bam - samtools index -b $output_bam - fi echo "new header can be found here:$output_newHeader" |tee -a $log done