From a28a8f6df7a59c06f7030eddcae8e3ab371bbb99 Mon Sep 17 00:00:00 2001 From: Matt Olm Date: Thu, 27 Feb 2020 08:57:23 -0800 Subject: [PATCH] 2.5.3 --- CHANGELOG.md | 5 +++++ drep/VERSION | 2 +- drep/d_cluster.py | 49 +++++++++++++++++++++++++++-------------------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dc7279..2b29109 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project (attempts to) adhere to [Semantic Versioning](http://semver.org/). +## [2.5.3] - 2020-02-27 +### Fixed +- More bug fixes related to FastANI +- Allow loading of cached Ndb.csv + ## [2.5.2] - 2020-02-26 ### Fixed - The bug I tried to fix in 2.5.1 is able to fix itself diff --git a/drep/VERSION b/drep/VERSION index f225a78..aedc15b 100644 --- a/drep/VERSION +++ b/drep/VERSION @@ -1 +1 @@ -2.5.2 +2.5.3 diff --git a/drep/d_cluster.py b/drep/d_cluster.py index 6edbb04..a57fc81 100644 --- a/drep/d_cluster.py +++ b/drep/d_cluster.py @@ -193,33 +193,40 @@ def cluster_genomes(genome_list, data_folder, **kwargs): kwargs.get('wd')._wipe_secondary_clusters() if not kwargs.get('SkipSecondary', False): + # See if cached + cached = (debug and wd.hasDb('Ndb')) + + if cached: + logging.info('3. Loading cached secondary clustering') + Ndb = wd.get_db('Ndb') + + # Get rid of broken ones + Ndb = Ndb.dropna(subset=['reference']) + + logging.info('3. Secondary clustering cache loaded') + # Run comparisons, make Ndb - _print_time_estimate(Bdb, Cdb, algorithm, kwargs.get('processors', 6)) - Ndb = pd.DataFrame() - for bdb, name in iteratre_clusters(Bdb,Cdb): - logging.debug('running cluster {0}'.format(name)) - #logging.debug('total memory - {0:.2f} Mbp'.format(int(process.memory_info().rss)/1000000)) - try: + else: + _print_time_estimate(Bdb, Cdb, algorithm, kwargs.get('processors', 6)) + Ndb = pd.DataFrame() + for bdb, name in iteratre_clusters(Bdb,Cdb): + logging.debug('running cluster {0}'.format(name)) + #logging.debug('total memory - {0:.2f} Mbp'.format(int(process.memory_info().rss)/1000000)) ndb = compare_genomes(bdb, algorithm, data_folder, **kwargs) - ndb['primary_cluster'] = name - Ndb = Ndb.append(ndb) - - except: - logging.error("CRITICAL ERROR WITH PRIMARY CLUSTER {0}; TRYING AGAIN".format(name)) - try: + if len(ndb) == 0: + logging.error("CRITICAL ERROR WITH PRIMARY CLUSTER {0}; TRYING AGAIN".format(name)) ndb = compare_genomes(bdb, algorithm, data_folder, **kwargs) - if len(ndb) > 0: - ndb['primary_cluster'] = name - Ndb = Ndb.append(ndb) - else: - logging.error("CRITICAL ERROR AGAIN WITH PRIMARY CLUSTER {0}; SKIPPING".format(name)) - except: + + if len(ndb) > 0: + ndb['primary_cluster'] = name + Ndb = Ndb.append(ndb) + else: logging.error("DOUBLE CRITICAL ERROR AGAIN WITH PRIMARY CLUSTER {0}; SKIPPING".format(name)) - if debug: - logging.debug("Debug mode on - saving Ndb ASAP") - wd.store_db(Ndb, 'Ndb') + if debug: + logging.debug("Debug mode on - saving Ndb ASAP") + wd.store_db(Ndb, 'Ndb') # Run clustering on Ndb Cdb, c2ret = _cluster_Ndb(Ndb, comp_method=algorithm, **kwargs)