Skip to content

Commit

Permalink
basic tests for picklist functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jun 12, 2021
1 parent 3ecfb48 commit 505b04f
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/sourmash/sig/picklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
preprocess['md5'] = lambda x: x

# identifier matches/prefix foo - space delimited identifiers
preprocess['ident'] = lambda x: x.split(' ')[0].split('.')[0]
preprocess['ident.'] = lambda x: x.split(' ')[0]
preprocess['ident.'] = lambda x: x.split(' ')[0].split('.')[0]
preprocess['ident'] = lambda x: x.split(' ')[0]

# match 8 characters
preprocess['md5prefix8'] = lambda x: x[:8]
Expand Down
155 changes: 155 additions & 0 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,161 @@ def test_sig_extract_7_no_ksize(c):
assert len(siglist) == 3


def test_sig_extract_8_picklist_md5(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# select on any of these attributes
row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
md5full='09a08691ce52952152f0e866a59f6261',
md5short='09a08691ce5295215',
fullIdent='NC_009665.1',
nodotIdent='NC_009665')

# make picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
w = csv.DictWriter(csvfp, fieldnames=row.keys())
w.writeheader()
w.writerow(row)

picklist_arg = f"{picklist_csv}:md5full:md5"
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

# stdout should be new signature
out = runtmp.last_result.out

test_extract_sig = sourmash.load_one_signature(sig47)
actual_extract_sig = sourmash.load_one_signature(out)

assert actual_extract_sig == test_extract_sig


def test_sig_extract_8_picklist_name(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# select on any of these attributes
row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
md5full='09a08691ce52952152f0e866a59f6261',
md5short='09a08691ce5295215',
fullIdent='NC_009665.1',
nodotIdent='NC_009665')

# make picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
w = csv.DictWriter(csvfp, fieldnames=row.keys())
w.writeheader()
w.writerow(row)

picklist_arg = f"{picklist_csv}:exactName:name"
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

# stdout should be new signature
out = runtmp.last_result.out

test_extract_sig = sourmash.load_one_signature(sig47)
actual_extract_sig = sourmash.load_one_signature(out)

assert actual_extract_sig == test_extract_sig


def test_sig_extract_8_picklist_ident(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# select on any of these attributes
row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
md5full='09a08691ce52952152f0e866a59f6261',
md5short='09a08691ce5295215',
fullIdent='NC_009665.1',
nodotIdent='NC_009665')

# make picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
w = csv.DictWriter(csvfp, fieldnames=row.keys())
w.writeheader()
w.writerow(row)

picklist_arg = f"{picklist_csv}:fullIdent:ident"
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

# stdout should be new signature
out = runtmp.last_result.out

test_extract_sig = sourmash.load_one_signature(sig47)
actual_extract_sig = sourmash.load_one_signature(out)

assert actual_extract_sig == test_extract_sig


def test_sig_extract_8_picklist_ident_dot(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# select on any of these attributes
row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
md5full='09a08691ce52952152f0e866a59f6261',
md5short='09a08691ce5295215',
fullIdent='NC_009665.1',
nodotIdent='NC_009665')

# make picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
w = csv.DictWriter(csvfp, fieldnames=row.keys())
w.writeheader()
w.writerow(row)

picklist_arg = f"{picklist_csv}:nodotIdent:ident."
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

# stdout should be new signature
out = runtmp.last_result.out

test_extract_sig = sourmash.load_one_signature(sig47)
actual_extract_sig = sourmash.load_one_signature(out)

assert actual_extract_sig == test_extract_sig


def test_sig_extract_8_picklist_md5_short(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# select on any of these attributes
row = dict(exactName='NC_009665.1 Shewanella baltica OS185, complete genome',
md5full='09a08691ce52952152f0e866a59f6261',
md5short='09a08691ce5295215',
fullIdent='NC_009665.1',
nodotIdent='NC_009665')

# make picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
w = csv.DictWriter(csvfp, fieldnames=row.keys())
w.writeheader()
w.writerow(row)

picklist_arg = f"{picklist_csv}:md5short:md5prefix8"
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

# stdout should be new signature
out = runtmp.last_result.out

test_extract_sig = sourmash.load_one_signature(sig47)
actual_extract_sig = sourmash.load_one_signature(out)

assert actual_extract_sig == test_extract_sig


@utils.in_tempdir
def test_sig_flatten_1(c):
# extract matches to several names from among several signatures & flatten
Expand Down

0 comments on commit 505b04f

Please sign in to comment.