Skip to content

Commit

Permalink
use ncbi table to convert from id to name for fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
josephwb committed Nov 26, 2018
1 parent 88ca974 commit ffcd624
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions src/change_ncbi_to_name_fasta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import sys
import seq
import os
import argparse as ap

def generate_argparser():
parser = ap.ArgumentParser(prog="change_ncbi_to_name_fasta.py",
formatter_class=ap.ArgumentDefaultsHelpFormatter)
parser = ap.ArgumentParser()
parser.add_argument("-t", "--table", type=str, help="NCBI translation table", required=True)
parser.add_argument("-i", "--infile", type=str, help="Input fasta alignment", required=True)
parser.add_argument("-o", "--outfile", type=str, help="Output fasta alignment", required=True)
return parser

if __name__ == "__main__":
parser = generate_argparser()
if len(sys.argv[1:]) == 0:
sys.argv.append("-h")
args = parser.parse_args(sys.argv[1:])

tab = open(args.table, "r")
idn = {}
for i in tab:
spls = i.strip().split("\t")
idn[spls[1]] = spls[4]
tab.close()
outf = open(args.outfile, "w")
for i in seq.read_fasta_file_iter(args.infile):
i.name = idn[i.name].replace(" ", "_")
outf.write(i.get_fasta())
outf.close()

1 comment on commit ffcd624

@josephwb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this is similar to change_id_to_name_fasta.py (except using argparse), with the only real difference being the column of the table being used for the key (1 in the new file versus 3 in the old file). I don't know if you have different kinds of tables, but the old file did not work with the NCBI table. This one matches what is used in change_ncbi_to_name_fasta.py for trees.

Please sign in to comment.