From 4bd970b716987b1f97949708c9d53f5329f0fdeb Mon Sep 17 00:00:00 2001 From: cfarkas Date: Wed, 28 Dec 2022 16:21:22 -0300 Subject: [PATCH] Update add_ncbi_annotation.sh # perl -lne 'print "@m" if @m=(/((?:transcript_id|gene_id)\s+\S+)/g);' novel-genes-coding.gtf > novel_annotated.tab awk '{print $(NF)}' novel_annotated.tab > novel-coding-transcripts.matches sed -i 's/;//g' novel-coding-transcripts.matches sed -i 's/"//g' novel-coding-transcripts.matches awk '!a[$0]++' novel-coding-transcripts.matches > novel-coding-transcripts.tab && rm novel-coding-transcripts.matches mv novel-coding-transcripts.tab novel-coding-transcripts.matches # --- bash_scripts/add_ncbi_annotation.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/bash_scripts/add_ncbi_annotation.sh b/bash_scripts/add_ncbi_annotation.sh index ec43a25..4abc52b 100644 --- a/bash_scripts/add_ncbi_annotation.sh +++ b/bash_scripts/add_ncbi_annotation.sh @@ -467,14 +467,12 @@ cp ./transcriptome_metrics/novel-transcripts-lncRNA.fa ./ cp ./transcriptome_metrics/known-transcripts-lncRNA.fa ./ # perl -lne 'print "@m" if @m=(/((?:transcript_id|gene_id)\s+\S+)/g);' novel-genes-coding.gtf > novel_annotated.tab -sed -i 's/transcript_id //g' novel_annotated.tab -sed -i 's/;/\t/g' novel_annotated.tab -sed -i 's/gene_id//g' novel_annotated.tab -sed -i 's/"//g' novel_annotated.tab -awk '!a[$0]++' novel_annotated.tab > novel-transcripts_and_genes.tab && rm novel_annotated.tab -awk '{print $2"\t"$1}' novel-transcripts_and_genes.tab > novel-coding-genes-and-transcripts.tab && rm novel-transcripts_and_genes.tab -awk '{print $1}' novel-coding-genes-and-transcripts.tab > novel-coding-transcripts.matches -rm novel-coding-genes-and-transcripts.tab +awk '{print $(NF)}' novel_annotated.tab > novel-coding-transcripts.matches +sed -i 's/;//g' novel-coding-transcripts.matches +sed -i 's/"//g' novel-coding-transcripts.matches +awk '!a[$0]++' novel-coding-transcripts.matches > novel-coding-transcripts.tab && rm novel-coding-transcripts.matches +mv novel-coding-transcripts.tab novel-coding-transcripts.matches +# seqkit fx2tab cds.fa > cds.tab seqkit fx2tab prot.fa > prot.tab grep -w -F -f novel-coding-transcripts.matches cds.tab > novel-coding-cds.tab