diff --git a/src/sourmash/__init__.py b/src/sourmash/__init__.py index 0077ea199c..9f6e98cd91 100644 --- a/src/sourmash/__init__.py +++ b/src/sourmash/__init__.py @@ -5,6 +5,7 @@ import re import math import os +from deprecation import deprecated from ._lowlevel import ffi, lib @@ -30,13 +31,63 @@ MAX_HASH = get_minhash_max_hash() from .signature import ( - load_signatures, + load_signatures as load_signatures_private, load_one_signature, SourmashSignature, save_signatures, ) -from .sbtmh import load_sbt_index, search_sbt_index, create_sbt_index +@deprecated(deprecated_in="3.5.1", removed_in="5.0", + current_version=VERSION, + details='Use load_file_as_signatures instead.') +def load_signatures(*args, **kwargs): + """Load a JSON string with signatures into classes. + + Returns list of SourmashSignature objects. + + Note, the order is not necessarily the same as what is in the source file. + + This function has been deprecated as of 3.5.1; please use + 'load_file_as_signatures' instead. Note that in 4.0, the 'quiet' argument + has been removed and the function no longer outputs to stderr. + Moreover, do_raise is now True by default. + """ + return load_signatures_private(*args, **kwargs) + +from .sbtmh import load_sbt_index as load_sbt_index_private +from .sbtmh import search_sbt_index as search_sbt_index_private + +@deprecated(deprecated_in="3.5.1", removed_in="5.0", + current_version=VERSION, + details='Use load_file_as_index instead.') +def load_sbt_index(*args, **kwargs): + """Load and return an SBT index. + + This function has been deprecated as of 3.5.1; please use + 'load_file_as_index' instead. + """ + return load_sbt_index_private(*args, **kwargs) + + +@deprecated(deprecated_in="3.5.1", removed_in="5.0", + current_version=VERSION, + details='Use the new Index API instead.') +def search_sbt_index(*args, **kwargs): + """\ + Search an SBT index `tree` with signature `query` for matches above + `threshold`. + + Usage: + + for match_sig, similarity in search_sbt_index(tree, query, threshold): + ... + + This function has been deprecated as of 3.5.1; please use + 'idx = load_file_as_index(...); idx.search(query, threshold=...)' instead. + """ + return load_sbt_index_private(*args, **kwargs) + +from .sbtmh import create_sbt_index from . import lca from . import sbt from . import sbtmh diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py index 73b39adef8..415e6e99ce 100644 --- a/src/sourmash/commands.py +++ b/src/sourmash/commands.py @@ -8,7 +8,8 @@ import screed from .compare import compare_all_pairs, compare_serial_containment -from . import MinHash, load_sbt_index, create_sbt_index +from . import MinHash +from .sbtmh import load_sbt_index, create_sbt_index from . import signature as sig from . import sourmash_args from .logging import notify, error, print_results, set_quiet diff --git a/src/sourmash/index.py b/src/sourmash/index.py index b67c6d4707..8dd4069f04 100644 --- a/src/sourmash/index.py +++ b/src/sourmash/index.py @@ -145,7 +145,7 @@ def save(self, path): @classmethod def load(cls, location): from .signature import load_signatures - si = load_signatures(location) + si = load_signatures(location, do_raise=True) lidx = LinearIndex(si, filename=location) return lidx diff --git a/src/sourmash/lca/command_index.py b/src/sourmash/lca/command_index.py index 2676e40bc3..d4febd4ae1 100644 --- a/src/sourmash/lca/command_index.py +++ b/src/sourmash/lca/command_index.py @@ -6,7 +6,7 @@ import csv from collections import defaultdict -from sourmash import sourmash_args, load_signatures +from sourmash import sourmash_args from sourmash.sourmash_args import load_file_as_signatures from sourmash.logging import notify, error, debug, set_quiet from . import lca_utils diff --git a/src/sourmash/signature.py b/src/sourmash/signature.py index cac62fbc13..b067b7f0e7 100644 --- a/src/sourmash/signature.py +++ b/src/sourmash/signature.py @@ -222,7 +222,6 @@ def _detect_input_type(data): def load_signatures( data, ksize=None, select_moltype=None, ignore_md5sum=False, do_raise=False, - quiet=False ): """Load a JSON string with signatures into classes. @@ -248,8 +247,6 @@ def load_signatures( input_type = _detect_input_type(data) if input_type == SigInput.UNKNOWN: - if not quiet: - error("Error in parsing signature; quitting. Cannot open file or invalid signature") if do_raise: raise Exception("Error in parsing signature; quitting. Cannot open file or invalid signature") return @@ -301,9 +298,6 @@ def load_signatures( yield sig except Exception as e: - if not quiet: - error("Error in parsing signature; quitting.") - error("Exception: {}", str(e)) if do_raise: raise diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index 4bb537478b..258b99cee0 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -1,5 +1,5 @@ """ -Utility functions for dealing with input args to the sourmash command line. +Utility functions for sourmash CLI commands. """ import sys import os @@ -9,7 +9,7 @@ import screed -from sourmash import load_sbt_index +from sourmash.sbtmh import load_sbt_index from sourmash.lca.lca_db import load_single_database import sourmash.exceptions @@ -365,7 +365,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None): # special case stdin if not loaded and filename == '-': - db = sourmash.load_signatures(sys.stdin, quiet=True, do_raise=True) + db = signature.load_signatures(sys.stdin, do_raise=True) db = list(db) loaded = True dbtype = DatabaseType.SIGLIST @@ -376,7 +376,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None): for thisfile in traverse_find_sigs([filename], traverse_yield_all): try: with open(thisfile, 'rt') as fp: - x = sourmash.load_signatures(fp, quiet=True, do_raise=True) + x = signature.load_signatures(fp, do_raise=True) siglist = list(x) all_sigs.extend(siglist) except (IOError, sourmash.exceptions.SourmashError): @@ -394,7 +394,7 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None): # CTB: could make this a generator, with some trickery; but for # now, just force into list. with open(filename, 'rt') as fp: - db = sourmash.load_signatures(fp, quiet=True, do_raise=True) + db = signature.load_signatures(fp, do_raise=True) db = list(db) loaded = True diff --git a/tests/test_sbt.py b/tests/test_sbt.py index 1271c3b460..a825067cc0 100644 --- a/tests/test_sbt.py +++ b/tests/test_sbt.py @@ -9,7 +9,7 @@ from sourmash.exceptions import IndexNotSupported from sourmash.sbt import SBT, GraphFactory, Leaf, Node from sourmash.sbtmh import (SigLeaf, search_minhashes, - search_minhashes_containment) + search_minhashes_containment, load_sbt_index) from sourmash.sbt_storage import (FSStorage, RedisStorage, IPFSStorage, ZipStorage) @@ -775,7 +775,7 @@ def test_sbt_protein_command_index(c): c.run_sourmash('index', db_out, sigfile1, sigfile2, '--scaled', '100', '-k', '19', '--protein') - db2 = sourmash.load_sbt_index(db_out) + db2 = load_sbt_index(db_out) sig1 = sourmash.load_one_signature(sigfile1) sig2 = sourmash.load_one_signature(sigfile2) @@ -821,7 +821,7 @@ def test_sbt_hp_command_index(c): c.run_sourmash('index', db_out, sigfile1, sigfile2, '--scaled', '100', '-k', '19', '--hp') - db2 = sourmash.load_sbt_index(db_out) + db2 = load_sbt_index(db_out) sig1 = sourmash.load_one_signature(sigfile1) sig2 = sourmash.load_one_signature(sigfile2) @@ -867,7 +867,7 @@ def test_sbt_dayhoff_command_index(c): c.run_sourmash('index', db_out, sigfile1, sigfile2, '--scaled', '100', '-k', '19', '--dayhoff') - db2 = sourmash.load_sbt_index(db_out) + db2 = load_sbt_index(db_out) sig1 = sourmash.load_one_signature(sigfile1) sig2 = sourmash.load_one_signature(sigfile2) diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index 40cc83ffb9..7fc2df5915 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -4121,6 +4121,12 @@ def test_do_sourmash_index_zipfile_append(c): first_half = testdata_sigs[:half_point] second_half = testdata_sigs[half_point:] + print(first_half) + print(second_half) + + # should be no overlap + assert not set(first_half).intersection(set(second_half)) + with pytest.warns(None) as record: c.run_sourmash('index', '-k', '31', 'zzz.sbt.zip', *first_half) @@ -4138,7 +4144,8 @@ def test_do_sourmash_index_zipfile_append(c): c.run_sourmash('index', "--append", '-k', '31', 'zzz.sbt.zip', *second_half) # UserWarning is raised when there are duplicated entries in the zipfile - assert not record + print(record) + assert not record, record print(c) assert c.last_result.status == 0 diff --git a/utils/check-tree.py b/utils/check-tree.py index 4091c7ce84..12fc0190de 100644 --- a/utils/check-tree.py +++ b/utils/check-tree.py @@ -15,7 +15,7 @@ def main(): p.add_argument('sbt') args = p.parse_args() - db = sourmash.load_sbt_index(args.sbt) + db = sourmash.sbtmh.load_sbt_index(args.sbt) threshold = THRESHOLD for leaf in db.leaves():