diff --git a/midas2/__init__.py b/midas2/__init__.py index ae7044e8d11..960ff7b30aa 100644 --- a/midas2/__init__.py +++ b/midas2/__init__.py @@ -3,4 +3,4 @@ import sys assert sys.version_info >= (3, 7), "Python version >= 3.7 is required." -version = "1.0.1" +version = "1.0.2" \ No newline at end of file diff --git a/midas2/models/midasdb.py b/midas2/models/midasdb.py index 9ad1b202e7f..8b79e155afc 100644 --- a/midas2/models/midasdb.py +++ b/midas2/models/midasdb.py @@ -174,14 +174,15 @@ def fetch_tarball(self, filename, list_of_species): _fetched_files = [_fetch_file_from_s3(args_list[0])] fetched_files = dict(zip(list_of_species, _fetched_files)) - for species_id in list_of_species: - fetched_filenames = tarball_mapping[filename] - for _filename in fetched_filenames: - genome_id = self.get_repgenome_id(species_id) - _fetched_file = self.get_target_layout(_filename, False, species_id, genome_id) - md5_fetched = file_md5sum(_fetched_file) - md5_lookup = self.md5sum[filename][species_id][_filename] - assert md5_fetched == md5_lookup, f"Error for downloading {_fetched_file} from {filename}. Please delete the folder and redownload." + if self.has_md5sum: + for species_id in list_of_species: + fetched_filenames = tarball_mapping[filename] + for _filename in fetched_filenames: + genome_id = self.get_repgenome_id(species_id) + _fetched_file = self.get_target_layout(_filename, False, species_id, genome_id) + md5_fetched = file_md5sum(_fetched_file) + md5_lookup = self.md5sum[filename][species_id][_filename] + assert md5_fetched == md5_lookup, f"Error for downloading {_fetched_file} from {filename}. Please delete the folder and redownload." return fetched_files # Single Copy Marker Genes DB @@ -189,33 +190,36 @@ def fetch_tarball(self, filename, list_of_species): fetched_dir = _fetch_file_from_s3(self.construct_file_tuple(filename)) fetched_files = self.get_target_layout("marker_db", False) fetched_files = dict(zip(MARKER_FILE_EXTS, fetched_files)) - for _ in MARKER_FILE_EXTS: - _fetched_file = fetched_files[_] - md5_fetched = file_md5sum(_fetched_file) - md5_lookup = self.md5sum[filename][_] - assert md5_fetched == md5_lookup, f"Error for downloadding {_fetched_file} from {filename}. Please delete the folder and redownload." + if self.has_md5sum: + for _ in MARKER_FILE_EXTS: + _fetched_file = fetched_files[_] + md5_fetched = file_md5sum(_fetched_file) + md5_lookup = self.md5sum[filename][_] + assert md5_fetched == md5_lookup, f"Error for downloadding {_fetched_file} from {filename}. Please delete the folder and redownload." return {filename: fetched_dir} if filename == "markerdb_models": fetched_dir = _fetch_file_from_s3(self.construct_file_tuple(filename)) - for _ in tarball_mapping[filename]: - _fetched_file = self.get_target_layout(f"marker_db_{_}", False) - md5_fetched = file_md5sum(_fetched_file) - md5_lookup = self.md5sum[filename][_] - assert md5_fetched == md5_lookup, f"Error for downloadding {_fetched_file} from {filename}. Please delete the folder and redownload." + if self.has_md5sum: + for _ in tarball_mapping[filename]: + _fetched_file = self.get_target_layout(f"marker_db_{_}", False) + md5_fetched = file_md5sum(_fetched_file) + md5_lookup = self.md5sum[filename][_] + assert md5_fetched == md5_lookup, f"Error for downloadding {_fetched_file} from {filename}. Please delete the folder and redownload." return {filename: fetched_dir} # Chunks if filename == "chunks": fetched_dir = _fetch_file_from_s3(self.construct_file_tuple(filename)) - fetched_filenames = tarball_mapping[filename] - rep_genomes = self.uhgg.representatives - for sid, gid in rep_genomes.items(): - for i, ct in enumerate(fetched_filenames): - _fetched_file = self.get_target_layout(ct, False, sid, gid, DEFAULT_CHUNKS[i]) - md5_fetched = file_md5sum(_fetched_file) - md5_lookup = self.md5sum[filename][sid][ct] - assert md5_fetched == md5_lookup, f"Error for downloadding {_fetched_file} from {filename}. Please delete the folder and redownload." + if self.has_md5sum: + fetched_filenames = tarball_mapping[filename] + rep_genomes = self.uhgg.representatives + for sid, gid in rep_genomes.items(): + for i, ct in enumerate(fetched_filenames): + _fetched_file = self.get_target_layout(ct, False, sid, gid, DEFAULT_CHUNKS[i]) + md5_fetched = file_md5sum(_fetched_file) + md5_lookup = self.md5sum[filename][sid][ct] + assert md5_fetched == md5_lookup, f"Error for downloadding {_fetched_file} from {filename}. Please delete the folder and redownload." return {filename: fetched_dir} # Single File: key of the tarball layout diff --git a/midas2/subcommands/build_bowtie2db.py b/midas2/subcommands/build_bowtie2db.py index 26db8a35991..90526943ca4 100644 --- a/midas2/subcommands/build_bowtie2db.py +++ b/midas2/subcommands/build_bowtie2db.py @@ -89,7 +89,7 @@ def build_bowtie2db(args): if args.bt2_indexes_name == "repgenomes": tsprint(f"MIDAS2::build_bowtie2_repgenomes_indexes::start") - midas_db.fetch_files("representative_genome", species_ids_of_interest) + midas_db.fetch_files("repgenome", species_ids_of_interest) contigs_files = midas_db.fetch_files("representative_genome", species_ids_of_interest) tsprint(contigs_files) build_bowtie2_db(args.bt2_indexes_dir, args.bt2_indexes_name, contigs_files, args.num_cores)