#!/usr/bin/env python """ blastn_add_taxonomy V1.0 martenhoogeveen@naturalis.nl This script adds the taxonomy to the BLAST output. The input is de folder path that contains the blast results. """ import json, sys, argparse, os, glob from add_taxonomy_scripts.genbank import Genbank from add_taxonomy_scripts.gbif import Gbif from add_taxonomy_scripts.bold import Bold from add_taxonomy_scripts.privatebold import PrivateBold from add_taxonomy_scripts.unite import Unite from add_taxonomy_scripts.silva import Silva import sqlite3 # Retrieve the commandline arguments parser = argparse.ArgumentParser(description='Add taxonomy to BLAST output') parser.add_argument('-i', '--blast_input_folder', metavar='input folder with BLAST custom outfmt 6 output', dest='blastinputfolder', type=str,help='input folder', required=True) parser.add_argument('-t', '--taxonomy_reference', metavar='taxonomy reference', dest='rankedlineage', type=str, required=False, nargs='?', default="taxonomy_reference.json") parser.add_argument('-m', '--merged', metavar='merged taxonids', dest='merged', type=str, help='merged taxon id json', required=False, nargs='?', default="merged_taxonomy.json") parser.add_argument('-ts', '--taxonomy_source', dest='taxonomy_source', type=str, required=False, nargs='?', default="default") parser.add_argument('-o', '--output', metavar='output', dest='output', type=str, help='output file, BLAST hits with taxonomy', required=False, nargs='?', default="") parser.add_argument('-taxonomy_db', dest='taxonomy_db', type=str, required=False) args = parser.parse_args() def add_taxonomy(file, genbank, bold, gbif, privatebold, unite, silva): with open(file, "r") as blasthits, open(args.blastinputfolder.strip() + "/taxonomy_"+ os.path.basename(file), "a") as output, open(args.blastinputfolder.strip() + "/orginaltaxonomy_"+ os.path.basename(file), "a") as output2: for line in blasthits: if line.split("\t")[0] == "#Query ID": output.write(line.strip()+"\t#Source\t#Taxonomy\n") output2.write(line.strip() + "\t#Source\t#Taxonomy\n") else: if line.split("\t")[1].split("|")[0] == "BOLD": line_taxonomy = bold.find_bold_taxonomy(line, "bold") elif line.split("\t")[1].split("|")[0] == "klasse": line_taxonomy = bold.find_bold_taxonomy(line, "klasse") elif line.split("\t")[1].split("|")[0] == "private_BOLD": line_taxonomy = privatebold.find_private_bold_taxonomy(line) elif line.split("\t")[1].split("|")[0] == "UNITE": line_taxonomy = unite.find_unite_taxonomy(line) elif line.split("\t")[1].split("|")[0] == "silva": line_taxonomy = silva.find_silva_taxonomy(line) else: line_taxonomy = genbank.find_genbank_taxonomy(line) output2.write(line_taxonomy.strip()+"\n") if args.taxonomy_source == "GBIF": line_taxonomy = gbif.find_gbif_taxonomy(line_taxonomy) output.write(line_taxonomy.encode('utf-8').strip()+"\n") elif args.taxonomy_source == "default": output.write(bytes(line_taxonomy,'utf-8').decode('utf-8','ignore').strip()+"\n") def process_files(): genbank = Genbank(args.rankedlineage, args.merged) bold = Bold() privatebold = PrivateBold() unite = Unite() silva = Silva() if args.taxonomy_source == "GBIF": gbif = Gbif(args.taxonomy_db) else: gbif = "" files = [x for x in sorted(glob.glob(args.blastinputfolder.strip() + "/*.tabular"))] for file in files: add_taxonomy(file, genbank, bold, gbif, privatebold, unite, silva) def main(): process_files() if __name__ == "__main__": main()