Skip to content

Commit

Permalink
[MRG] fix Signature.minhash API during sourmash sketch (#2329)
Browse files Browse the repository at this point in the history
Addresses #1167 by fixing `signature_first_mh` in the signature FFI to
support both `KmerMinHash` and `KmerMinHashBTree`; this way, sketches
generated by the Python `_signatures_for_sketch_factory` (which are
BTree sketches in Rust) can be passed back to
Python.

Specifically, this PR:
* implements `From<&KmerMinHashBTree>` for `KmerMinHash`;
* adds an arm to the match statement in the FFI function to catch
`KmerMinHashBTree` structs and convert them into `KmerMinHash` structs
suitable for returning to Python;
* provides a default match arm to catch (e.g.) `HyperLogLog` and raise
an error back to Python reporting that it found an unsupported sketch
type;
* adds a little to the Python tests to prevent backsliding.

Note that I don't _think_ this does any more copying of sketches than
the current codebase does.

And, on that halcyonic day when the FFI properly wraps
`KmerMinHashBTree`, this code can be replaced by returning the proper
Rust object without conversion.

Fixes #1167.
  • Loading branch information
ctb authored Oct 15, 2022
1 parent 5abc988 commit 2198b32
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 5 deletions.
17 changes: 12 additions & 5 deletions src/core/src/ffi/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::io;
use std::os::raw::c_char;
use std::slice;

use crate::errors::SourmashError;

use crate::encodings::HashFunctions;
use crate::signature::Signature;
use crate::sketch::Sketch;
Expand Down Expand Up @@ -165,11 +167,16 @@ ffi_fn! {
unsafe fn signature_first_mh(ptr: *const SourmashSignature) -> Result<*mut SourmashKmerMinHash> {
let sig = SourmashSignature::as_rust(ptr);

if let Some(Sketch::MinHash(mh)) = sig.signatures.get(0) {
Ok(SourmashKmerMinHash::from_rust(mh.clone()))
} else {
// TODO: need to select the correct one
unimplemented!()
match sig.signatures.get(0) {
Some(Sketch::MinHash(mh)) => {
Ok(SourmashKmerMinHash::from_rust(mh.clone()))
},
Some(Sketch::LargeMinHash(mh_btree)) => {
Ok(SourmashKmerMinHash::from_rust(mh_btree.into()))
},
_ => Err(SourmashError::Internal {
message: "found unsupported sketch type".to_string()
}),
}
}
}
Expand Down
24 changes: 24 additions & 0 deletions src/core/src/sketch/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1617,6 +1617,30 @@ impl From<KmerMinHashBTree> for KmerMinHash {
}
}

impl From<&KmerMinHashBTree> for KmerMinHash {
fn from(other: &KmerMinHashBTree) -> KmerMinHash {
let mut new_mh = KmerMinHash::new(
other.scaled(),
other.ksize() as u32,
other.hash_function(),
other.seed(),
other.track_abundance(),
other.num(),
);

let mins = other.mins.iter().copied().collect();
let abunds = other
.abunds
.as_ref()
.map(|abunds| abunds.values().cloned().collect());

new_mh.mins = mins;
new_mh.abunds = abunds;

new_mh
}
}

impl From<KmerMinHash> for KmerMinHashBTree {
fn from(other: KmerMinHash) -> KmerMinHashBTree {
let mut new_mh = KmerMinHashBTree::new(
Expand Down
4 changes: 4 additions & 0 deletions tests/test_sourmash_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ def test_dna_defaults():
assert not params.hp
assert not params.protein

siglist = factory()
sig = siglist[0]
sig.minhash


def test_dna_override_1():
factory = _signatures_for_sketch_factory(['k=21,scaled=2000,abund'],
Expand Down

0 comments on commit 2198b32

Please sign in to comment.