Skip to content

Commit

Permalink
Update add_ncbi_annotation.sh
Browse files Browse the repository at this point in the history
#
perl -lne 'print "@m" if @m=(/((?:transcript_id|gene_id)\s+\S+)/g);' novel-genes-coding.gtf > novel_annotated.tab
awk '{print $(NF)}' novel_annotated.tab > novel-coding-transcripts.matches
sed -i 's/;//g' novel-coding-transcripts.matches
sed -i 's/"//g' novel-coding-transcripts.matches
awk '!a[$0]++' novel-coding-transcripts.matches > novel-coding-transcripts.tab && rm novel-coding-transcripts.matches
mv novel-coding-transcripts.tab novel-coding-transcripts.matches
#
  • Loading branch information
cfarkas authored Dec 28, 2022
1 parent 626aabe commit 4bd970b
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions bash_scripts/add_ncbi_annotation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -467,14 +467,12 @@ cp ./transcriptome_metrics/novel-transcripts-lncRNA.fa ./
cp ./transcriptome_metrics/known-transcripts-lncRNA.fa ./
#
perl -lne 'print "@m" if @m=(/((?:transcript_id|gene_id)\s+\S+)/g);' novel-genes-coding.gtf > novel_annotated.tab
sed -i 's/transcript_id //g' novel_annotated.tab
sed -i 's/;/\t/g' novel_annotated.tab
sed -i 's/gene_id//g' novel_annotated.tab
sed -i 's/"//g' novel_annotated.tab
awk '!a[$0]++' novel_annotated.tab > novel-transcripts_and_genes.tab && rm novel_annotated.tab
awk '{print $2"\t"$1}' novel-transcripts_and_genes.tab > novel-coding-genes-and-transcripts.tab && rm novel-transcripts_and_genes.tab
awk '{print $1}' novel-coding-genes-and-transcripts.tab > novel-coding-transcripts.matches
rm novel-coding-genes-and-transcripts.tab
awk '{print $(NF)}' novel_annotated.tab > novel-coding-transcripts.matches
sed -i 's/;//g' novel-coding-transcripts.matches
sed -i 's/"//g' novel-coding-transcripts.matches
awk '!a[$0]++' novel-coding-transcripts.matches > novel-coding-transcripts.tab && rm novel-coding-transcripts.matches
mv novel-coding-transcripts.tab novel-coding-transcripts.matches
#
seqkit fx2tab cds.fa > cds.tab
seqkit fx2tab prot.fa > prot.tab
grep -w -F -f novel-coding-transcripts.matches cds.tab > novel-coding-cds.tab
Expand Down

0 comments on commit 4bd970b

Please sign in to comment.