diff --git a/CHANGES.md b/CHANGES.md index 0f8033abc..9e0e1a1ec 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,11 @@ ## __NEXT__ +### Bug Fixes + +* filter: Improve speed of checking duplicates in metadata, especially for large files. [#1466][] (@victorlin) + +[#1466]: https://github.com/nextstrain/augur/pull/1466 ## 24.4.0 (15 May 2024) diff --git a/augur/filter/_run.py b/augur/filter/_run.py index 9a82c7f00..682ff46dc 100644 --- a/augur/filter/_run.py +++ b/augur/filter/_run.py @@ -180,7 +180,7 @@ def run(args): for metadata in metadata_reader: duplicate_strains = ( set(metadata.index[metadata.index.duplicated()]) | - set(metadata.index[metadata.index.isin(metadata_strains)]) + (set(metadata.index) & metadata_strains) ) if len(duplicate_strains) > 0: cleanup_outputs(args)