Skip to content

Commit

Permalink
Updated file handling with new VMR on sheet 1 not 0
Browse files Browse the repository at this point in the history
  • Loading branch information
amillard committed Oct 13, 2024
1 parent bc78349 commit 5eafe6f
Show file tree
Hide file tree
Showing 25 changed files with 42,481 additions and 4 deletions.
4 changes: 2 additions & 2 deletions taxmyphage/download_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def check_blastDB(
)
print_error("Will download the database now and create database")

url = "https://millardlab-inphared.s3.climb.ac.uk/Bacteriophage_genomes.fasta.gz"
url = "https://millardlab-taxmyphage.s3.climb.ac.uk/Bacteriophage_genomes_MSL39v1.fasta.gz"

create_folder(os.path.dirname(blastdb_path))

Expand Down Expand Up @@ -202,7 +202,7 @@ def check_mash_index(mash_index_path: str, install: bool = False) -> None:
)
print_error("Will download the database now and create database")

url = "https://millardlab-inphared.s3.climb.ac.uk/ICTV_2023.msh"
url = "https://millardlab-taxmyphage.s3.climb.ac.uk/ICTV_MSL39v1.msh"

create_folder(os.path.dirname(mash_index_path))

Expand Down
11 changes: 9 additions & 2 deletions taxmyphage/handle_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,15 @@ def read_VMR(VMR_path: str) -> pd.DataFrame:
Returns:
pd.DataFrame: DataFrame containing the VMR data
"""

taxa_df = pd.read_excel(VMR_path, sheet_name=0)
#update to VMR means sheet posistion changes- change below to 1 or find automatically
#taxa_df = pd.read_excel(VMR_path, sheet_name=0)

excel_file = pd.ExcelFile(VMR_path)
sheet_names = excel_file.sheet_names
# Find the first sheet number that contains 'VMR'
vmr_sheet_index = next((index for index, sheet in enumerate(sheet_names) if 'VMR' in sheet), None)

taxa_df = pd.read_excel(VMR_path, sheet_name=vmr_sheet_index)

# Print the DataFrame and rename a column
# ic(taxa_df.head())
Expand Down
26 changes: 26 additions & 0 deletions taxmyphage_results/Results_per_genome/UP30/Output_of_taxonomy.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
genome genus_cluster species_cluster Isolate ID Species Sort Isolate Sort Realm Subrealm Kingdom Subkingdom Phylum Subphylum Class Subclass Order Suborder Family Subfamily Genus Subgenus Species Exemplar or additional isolate Virus name(s) Virus name abbreviation(s) Virus isolate designation Genbank Virus REFSEQ accession Virus REFSEQ NCBI taxid Genome coverage Genome composition Host source Accessions Link
MF093736 1 1 1001687.0 1271.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus ISF002 E Shigella phage vB_SsoS-ISF002 Not Defined Yet Not Defined Yet MF093736 NC_041995 2006922 Complete genome dsDNA bacteria NCBI Nucleotide
MG049919 1 1 1001686.0 1270.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus ISF001 E Shigella phage vB_SflS-ISF001 Not Defined Yet Not Defined Yet MG049919 NC_047863 2048005 Complete genome dsDNA bacteria NCBI Nucleotide
LC516895 1 2 1001684.0 1268.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus DELF2 E Escherichia phage vB_EcoS-DELF2 Not Defined Yet Not Defined Yet LC516895 NC_049832 2697005 Complete genome dsDNA bacteria NCBI Nucleotide
MZ501092 1 3 1024948.0 1274.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus leonhardeuler E Escherichia phage LeonhardEuler LeonhardEuler Not Defined Yet MZ501092 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
KP085586 1 4 1001690.0 1277.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus PSf2 E Shigella phage pSf-2 Not Defined Yet Not Defined Yet KP085586 NC_026010 1572702 Complete genome dsDNA bacteria NCBI Nucleotide
MF468274 1 5 1001691.0 1282.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus Sfin1 E Shigella phage Sfin-1 Not Defined Yet Not Defined Yet MF468274 NC_047998 2024314 Complete genome dsDNA bacteria NCBI Nucleotide
KU194205 1 6 1001689.0 1273.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus JMPW2 E Escherichia phage JMPW2 Not Defined Yet Not Defined Yet KU194205 NC_041873 1772218 Complete genome dsDNA bacteria NCBI Nucleotide
AY216660 1 7 1001696.0 1287.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus T1 E Escherichia phage T1 Not Defined Yet Not Defined Yet AY216660 NC_005833 2492962 Complete genome dsDNA bacteria NCBI Nucleotide
OL960582 1 8 1024952.0 1279.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus SA12KD E Escherichia phage vB_EcoS_SA12KD SA12KD Not Defined Yet OL960582 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
MH051911 1 9 1001685.0 1269.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus IME18 E Enterobacteria phage vB_EcoS_IME18 Not Defined Yet Not Defined Yet MH051911 NC_047959 2163886 Complete genome dsDNA bacteria NCBI Nucleotide
KY985004 1 10 1001693.0 1284.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus SH2 E Escherichia phage vB_EcoS_SH2 Not Defined Yet Not Defined Yet KY985004 NC_047828 1983554 Complete genome dsDNA bacteria NCBI Nucleotide
JX912252 1 11 1001682.0 1264.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus ADB2 E Escherichia phage ADB-2 Not Defined Yet Not Defined Yet JX912252 NC_019725 1216926 Complete genome dsDNA bacteria NCBI Nucleotide
HM035024 1 12 1001695.0 1286.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus Shfl1 E Shigella phage Shfl1 Not Defined Yet Not Defined Yet HM035024 NC_015456 2919551 Complete genome dsDNA bacteria NCBI Nucleotide
query_UP30 1 13 Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet Not Defined Yet
OQ223306 1 14 1024951.0 1278.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus S202 E Shigella phage S2_02 S2_02 Not Defined Yet OQ223306 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
MT682715 1 15 1024946.0 1266.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus chapo E Escherichia phage vB_EcoS_Chapo Chapo Not Defined Yet MT682715 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
OL960579 1 16 1024953.0 1280.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus SA30RD E Escherichia phage vB_EcoS_SA30RD SA30RD Not Defined Yet OL960579 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
MN296515 1 17 1001692.0 1283.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus Sfin3 E Shigella phage Sfin-3 Not Defined Yet Not Defined Yet MN296515 NC_049831 2848075 Complete genome dsDNA bacteria NCBI Nucleotide
OP455115 1 18 1024950.0 1276.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus LHE71 E Escherichia phage LHE71 LHE71 Not Defined Yet OP455115 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
MH285980 1 19 1001683.0 1265.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus BIFF E Escherichia phage Eco_BIFF Not Defined Yet Not Defined Yet MH285980 NC_047996 2175169 Complete genome dsDNA bacteria NCBI Nucleotide
KX828710 1 20 1001694.0 1285.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus SH6 E Shigella phage SH6 Not Defined Yet Not Defined Yet KX828710 NC_047785 1913048 Complete genome dsDNA bacteria NCBI Nucleotide
OP094641 1 21 1024949.0 1275.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus Lg3 E Escherichia phage Lg3 Lg3 Not Defined Yet OP094641 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
OL960578 1 22 1024954.0 1281.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus SA32RD E Escherichia phage vB_EcoS_SA32RD SA32RD Not Defined Yet OL960578 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
OL800706 1 23 1024947.0 1267.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus CLBP3 E Escherichia phage CLB_P3 CLB_P3 Not Defined Yet OL800706 Not Defined Yet Not Defined Yet Complete genome dsDNA bacteria NCBI Nucleotide
KU194206 1 24 1001688.0 1272.0 1.0 Duplodnaviria Not Defined Yet Heunggongvirae Not Defined Yet Uroviricota Not Defined Yet Caudoviricetes Not Defined Yet Not Defined Yet Not Defined Yet Drexlerviridae Tunavirinae Tunavirus Not Defined Yet Tunavirus JMPW1 E Escherichia phage JMPW1 Not Defined Yet Not Defined Yet KU194206 NC_041874 1772219 Complete genome dsDNA bacteria NCBI Nucleotide
47 changes: 47 additions & 0 deletions taxmyphage_results/Results_per_genome/UP30/Summary_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
--------------- TAXMYPHAGE RESULTS ---------------

Query sequence can be classified within a current genus and represents a new species, it is in:

Class: Caudoviricetes
Family: Drexlerviridae
Subfamily: Tunavirinae
Genus: Tunavirus
Species: Tunavirus new_name

-------------- INFORMATION MESSAGES --------------

INFO: Current ICTV taxonomy and the clustering on genomic similarity algorithm output appear to be consistent at the genus level

The data from the initial mash searching is below as tsv format

/!\ Remember taxmyPHAGE compared against viruses classified by the ICTV. Allowing you determine if it represents a new species or genus.
It does not tell you if it is similar to other phages that have yet to be classified.
You can do this by comparison with INPHARED database if you wish https://github.com/RyanCook94/inphared or BLAST etc.

------------------ MASH RESULTS ------------------

Reference Query distance p-value shared-hashes ANI
./Tunavirus/HM035024.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0820539 0 490/5000
./Tunavirus/OL960578.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0836797 0 472/5000
./Tunavirus/MZ501092.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.083772 0 471/5000
./Tunavirus/MH051911.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0845181 0 463/5000
./Tunavirus/MN296515.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0847068 0 461/5000
./Tunavirus/KY985004.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0851824 0 456/5000
./Tunavirus/KP085586.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0854706 0 453/5000
./Tunavirus/OQ223306.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0859554 0 448/5000
./Tunavirus/LC516895.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0864461 0 443/5000
./Tunavirus/MH285980.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.087043 0 437/5000
./Tunavirus/MF093736.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0873447 0 434/5000
./Tunavirus/OL960579.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0880576 0 427/5000
./Tunavirus/KU194206.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0880576 0 427/5000
./Tunavirus/JX912252.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0901662 0 407/5000
./Tunavirus/KX828710.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0901662 0 407/5000
./Tunavirus/MF468274.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0903831 0 405/5000
./Tunavirus/OP455115.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0903831 0 405/5000
./Tunavirus/OL800706.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0914855 0 395/5000
./Tunavirus/KU194205.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0936649 0 376/5000
./Tunavirus/MT682715.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0936649 0 376/5000
./Tunavirus/OP094641.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0940198 0 373/5000
./Tunavirus/OL960582.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0943778 0 370/5000
./Tunavirus/AY216660.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.0985328 0 337/5000
./Tunavirus/MG049919.fna /home/andrew/tax_myPHAGE/taxmyphage_results/Results_per_genome/UP30/query.fasta 0.108654 0 269/5000
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 5eafe6f

Please sign in to comment.