Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check for empty Platform (PL) in bam RG #364

Merged
merged 7 commits into from
Nov 10, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 27 additions & 18 deletions drop/modules/rvc-pipeline/GATK_BASH/changeHeader.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,41 +18,50 @@ output_bai=$6
output_newHeader=$7


SM_internalHeader=""
PL_internalHeader=""
samtools view -H $input_bam |grep "^@RG" |grep SM |head -1|
while read header ; do
for i in $header; do
if [[ $i == "SM:"* ]]; then
internalHeader=${i:3}
break
else
internalHeader=""
SM_internalHeader=${i:3}
fi
if [[ $i == "PL:"* ]]; then
PL_internalHeader=${i:3}
fi
done

if [[ $internalHeader == $sample ]]; then
echo "Internal Header $internalHeader matches $sample" |tee $log
echo "Internal Header is designated: $internalHeader" |tee -a $log
if [[ $SM_internalHeader == $sample && $PL_internalHeader != "" ]]; then
echo "Internal Header $SM_internalHeader matches $sample" |tee $log
echo "Internal Header is designated: $SM_internalHeader" |tee -a $log
echo "SampleID is $sample" |tee -a $log
ln -f $input_bam $output_bam
ln -f $input_bai $output_bai
echo "Done Linking files"
samtools view -H $input_bam > $output_newHeader
else
echo "WARNING"
echo "Internal Header is designated: $internalHeader" |tee $log
echo "SampleID is $sample" |tee -a $log
echo "Forcing $internalHeader to match $sample" |tee -a $log

samtools view -H $input_bam > $output_newHeader
echo $output_newHeader
if [[ $SM_internalHeader != $sample ]]; then
echo "WARNING"
echo "Internal Header is designated: $SM_internalHeader" |tee $log
echo "SampleID is $sample" |tee -a $log
echo "Forcing $SM_internalHeader to match $sample" |tee -a $log

# sed using regEx in place substitiute 'SM:' followed by any thing that isn't tab or newLine. and then replace it with the sampleID and the delimiter (tab or newLine) that matched in the 1st expression.
sed -E -i "s/(SM:[^\t|\n]*)(\t|\n*)/SM:${sample}\2/" ${output_newHeader}
# sed using regEx in place substitiute 'SM:' followed by any thing that isn't tab or newLine. and then replace it with the sampleID and the delimiter (tab or newLine) that matched in the 1st expression.
samtools view -H $input_bam > $output_newHeader
sed -E -i "s/(SM:[^\t|\n]*)(\t|\n*)/SM:${sample}\2/" ${output_newHeader}
fi
if [[ $PL_internalHeader == "" ]]; then
echo "WARNING"
echo "Internal PL Header is not designated: $PL_internalHeader" |tee $log
echo "PL cannot be empty. Using a Dummy: dummyPL " |tee -a $log
echo "Forcing $PL_internalHeader to dummyPL" |tee -a $log

# sed using regEx in place substitiute '@RG' with @RG\tPL:dummyPL
samtools view -H $input_bam > $output_newHeader
sed -E -i "s/^@RG/@RG\tPL:dummyPL/" ${output_newHeader}
fi
samtools reheader $output_newHeader $input_bam > $output_bam

samtools index -b $output_bam

fi
echo "new header can be found here:$output_newHeader" |tee -a $log
done