Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix table.get("wrong_length_string") gives core dump #585

Merged
merged 7 commits into from
Sep 1, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
2014-08-30 Rhys Kidd <rhyskidd@gmail.com>

* khmer/_khmermodule.cc: fix table.get("wrong_length_string") gives core dump
* lib/kmer_hash.cc: improve quality of exception error message
* tests/{test_counting_hash,test_counting_single,test_hashbits,
test_hashbits_obj}.py: add regression unit tests

2014-08-28 Titus Brown <titus@idyll.org>

* scripts/normalize-by-median.py: added reporting output after main loop
Expand Down
14 changes: 14 additions & 0 deletions khmer/_khmermodule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,13 @@ static PyObject * hash_get(PyObject * self, PyObject * args)
count = counting->get_count((unsigned int) pos);
} else if (PyString_Check(arg)) {
std::string s = PyString_AsString(arg);

if (strlen(s.c_str()) < counting->ksize()) {
PyErr_SetString(PyExc_ValueError,
"string length must >= the counting table k-mer size");
return NULL;
}

count = counting->get_count(s.c_str());
}

Expand Down Expand Up @@ -2328,6 +2335,13 @@ static PyObject * hashbits_get(PyObject * self, PyObject * args)
count = hashbits->get_count((unsigned int) pos);
} else if (PyString_Check(arg)) {
std::string s = PyString_AsString(arg);

if (strlen(s.c_str()) < hashbits->ksize()) {
PyErr_SetString(PyExc_ValueError,
"string length must >= the presence table k-mer size");
return NULL;
}

count = hashbits->get_count(s.c_str());
} else {
PyErr_SetString(PyExc_ValueError, "must pass in an int or string");
Expand Down
2 changes: 1 addition & 1 deletion lib/kmer_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ HashIntoType _hash(const char * kmer, const WordLength k,
{
// sizeof(HashIntoType) * 8 bits / 2 bits/base
if (!(k <= sizeof(HashIntoType)*4) || !(strlen(kmer) >= k)) {
throw khmer_exception();
throw khmer_exception("Supplied kmer string doesn't match the underlying k-size.");
}

HashIntoType h = 0, r = 0;
Expand Down
16 changes: 16 additions & 0 deletions tests/test_counting_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,22 @@ def test_badget():
print str(err)


def test_badget_2():
countingtable = khmer.new_counting_hash(6, 1e6)

countingtable.consume(DNA)

assert countingtable.get("AGCTTT") == 1

assert countingtable.get("GATGAG") == 0

try:
countingtable.get("AGCTT")
assert 0, "this should fail"
except ValueError, err:
print str(err)


def test_badtrim():
countingtable = khmer.new_counting_hash(6, 1e6, 2)

Expand Down
18 changes: 18 additions & 0 deletions tests/test_counting_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,24 @@ def test_get_mincount_rc():
assert x == 2


def test_badget():
kh = khmer.new_hashtable(6, 4 ** 10)

DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG"

kh.consume(DNA)

assert kh.get("AGCTTT") == 1

assert kh.get("GATGAG") == 0

try:
kh.get("AGCTT")
assert 0, "this should fail"
except ValueError, err:
print str(err)


def test_64bitshift():
kh = khmer.new_hashtable(25, 4)
fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG"
Expand Down
19 changes: 19 additions & 0 deletions tests/test_hashbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,25 @@ def test_simple_median():
assert average == 1.0
assert stddev == 0.0


def test_badget():
hbts = khmer.new_hashbits(6, 1e6, 1)

dna = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG"

hbts.consume(dna)

assert hbts.get("AGCTTT") == 1

assert hbts.get("GATGAG") == 0

try:
hbts.get("AGCTT")
assert 0, "this should fail"
except ValueError, err:
print str(err)


####


Expand Down
18 changes: 18 additions & 0 deletions tests/test_hashbits_obj.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,24 @@ def test_simple_median():
assert stddev == 0.0


def test_badget():
hbts = khmer.Hashbits(6, 1e6, 1)

dna = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG"

hbts.consume(dna)

assert hbts.get("AGCTTT") == 1

assert hbts.get("GATGAG") == 0

try:
hbts.get("AGCTT")
assert 0, "this should fail"
except ValueError, err:
print str(err)


def test_bad_primes():
try:
countingtable = khmer._Hashbits.__new__(
Expand Down