diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b29109..bde0b67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project (attempts to) adhere to [Semantic Versioning](http://semver.org/). +## [2.5.4] - 2020-03-06 +- Trying to fix a bug related to pandas categories + ## [2.5.3] - 2020-02-27 ### Fixed - More bug fixes related to FastANI diff --git a/drep/VERSION b/drep/VERSION index aedc15b..fe16b34 100644 --- a/drep/VERSION +++ b/drep/VERSION @@ -1 +1 @@ -2.5.3 +2.5.4 diff --git a/drep/d_cluster.py b/drep/d_cluster.py index a57fc81..8f6186e 100644 --- a/drep/d_cluster.py +++ b/drep/d_cluster.py @@ -596,8 +596,8 @@ def all_vs_all_MASH(Bdb, data_folder, **kwargs): uCols = ['genome1','genome2','dist'] dTypes = {'genome1':'category', 'genome2':'category', 'dist':np.float32} Mdb = pd.read_csv(file, names=iniCols, usecols=uCols, dtype=dTypes, sep='\t') - Mdb['genome1'] = Mdb['genome1'].apply(_get_genome_name_from_fasta) - Mdb['genome2'] = Mdb['genome2'].apply(_get_genome_name_from_fasta) + Mdb['genome1'] = Mdb['genome1'].apply(_get_genome_name_from_fasta).astype('category') + Mdb['genome2'] = Mdb['genome2'].apply(_get_genome_name_from_fasta).astype('category') Mdb['similarity'] = 1 - Mdb['dist'] # Filter out those genomes that are in the MASH folder but shouldn't be in Mdb