From cc3546d5fc8d77c3f647b9574021e429843a8656 Mon Sep 17 00:00:00 2001 From: Camille Scott Date: Sun, 23 Nov 2014 19:41:37 -0500 Subject: [PATCH 1/6] Initial implementation of read-only buffer access to raw tables --- khmer/_khmermodule.cc | 37 +++++++++++++++++++++++++++++++++++++ lib/counting.hh | 6 ++++++ 2 files changed, 43 insertions(+) diff --git a/khmer/_khmermodule.cc b/khmer/_khmermodule.cc index 4b29501d26..64248c432a 100644 --- a/khmer/_khmermodule.cc +++ b/khmer/_khmermodule.cc @@ -682,6 +682,40 @@ PyObject * hash_abundance_distribution_with_reads_parser(khmer_KCountingHash_Object * me, PyObject * args); +static +PyObject * +hash_get_raw_tables(khmer_KCountingHash_Object * self, PyObject * args) +{ + CountingHash * counting = self->counting; + + Byte ** table_ptrs = counting->get_raw_tables(); + std::vector sizes = counting->get_tablesizes(); + + PyObject * raw_tables = PyList_New(sizes.size()); + for (unsigned int i=0; iobj = NULL; + buf->len = sizes[i]; + buf->readonly = 1; + buf->ndim = 1; + buf->format = NULL; + buf->shape = NULL; + buf->strides = NULL; + buf->suboffsets = NULL; + buf->internal = NULL; + bufs.push_back(buf); + */ + PyObject * buf = PyBuffer_FromMemory(table_ptrs[i], sizes[i]); + if(!PyBuffer_Check(buf)) + return NULL; + PyList_SET_ITEM(raw_tables, i, buf); + //Py_XDECREF(buf); + } + + return raw_tables; +} + static PyObject * hash_set_use_bigcount(khmer_KCountingHash_Object * me, PyObject * args) @@ -1532,6 +1566,9 @@ static PyMethodDef khmer_counting_methods[] = { }, { "output_fasta_kmer_pos_freq", (PyCFunction)hash_output_fasta_kmer_pos_freq, METH_VARARGS, "" }, { "get", (PyCFunction)hash_get, METH_VARARGS, "Get the count for the given k-mer" }, + { "get_raw_tables", (PyCFunction)hash_get_raw_tables, + METH_VARARGS, "Get a list of the raw tables as memoryview objects" + }, { "get_min_count", (PyCFunction)hash_get_min_count, METH_VARARGS, "Get the smallest count of all the k-mers in the string" }, { "get_max_count", (PyCFunction)hash_get_max_count, METH_VARARGS, "Get the largest count of all the k-mers in the string" }, { "get_median_count", (PyCFunction)hash_get_median_count, METH_VARARGS, "Get the median, average, and stddev of the k-mer counts in the string" }, diff --git a/lib/counting.hh b/lib/counting.hh index aae5b87ded..cc1cc1300c 100644 --- a/lib/counting.hh +++ b/lib/counting.hh @@ -87,6 +87,12 @@ public: } } + // Writing to the tables outside of defined methods has undefined behavior! + // As such, this should only be used to return read-only interfaces + Byte ** get_raw_tables() { + return _counts; + } + virtual BoundedCounterType test_and_set_bits(const char * kmer) { BoundedCounterType x = get_count(kmer); // @CTB just hash it, yo. From 0cdfeb1018dae1c6c5b2ff665069e0c7163da8e9 Mon Sep 17 00:00:00 2001 From: Camille Scott Date: Sun, 23 Nov 2014 19:53:32 -0500 Subject: [PATCH 2/6] Remove some code cruft --- khmer/_khmermodule.cc | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/khmer/_khmermodule.cc b/khmer/_khmermodule.cc index 64248c432a..9ef8eb95fe 100644 --- a/khmer/_khmermodule.cc +++ b/khmer/_khmermodule.cc @@ -693,24 +693,10 @@ hash_get_raw_tables(khmer_KCountingHash_Object * self, PyObject * args) PyObject * raw_tables = PyList_New(sizes.size()); for (unsigned int i=0; iobj = NULL; - buf->len = sizes[i]; - buf->readonly = 1; - buf->ndim = 1; - buf->format = NULL; - buf->shape = NULL; - buf->strides = NULL; - buf->suboffsets = NULL; - buf->internal = NULL; - bufs.push_back(buf); - */ PyObject * buf = PyBuffer_FromMemory(table_ptrs[i], sizes[i]); if(!PyBuffer_Check(buf)) return NULL; PyList_SET_ITEM(raw_tables, i, buf); - //Py_XDECREF(buf); } return raw_tables; From bbad77bce04e9977fcebcd7b31daa5ac918796ea Mon Sep 17 00:00:00 2001 From: Camille Scott Date: Sun, 23 Nov 2014 20:26:00 -0500 Subject: [PATCH 3/6] Add tests for get_raw_tables() --- tests/test_counting_hash.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_counting_hash.py b/tests/test_counting_hash.py index 65055ef695..f76ea81db3 100644 --- a/tests/test_counting_hash.py +++ b/tests/test_counting_hash.py @@ -100,6 +100,25 @@ def test_collision_3(self): assert hi.get(GG) == 2 +def test_get_raw_tables(): + ht = khmer.new_counting_hash(20, 1e5, 4) + tables = ht.get_raw_tables() + + for size, table in zip(ht.hashsizes(), tables): + assert type(table) is buffer + assert size == len(table) + +def test_get_raw_tables_view(): + ht = khmer.new_counting_hash(20, 1e5, 4) + tables = ht.get_raw_tables() + for t in tables: + m = memoryview(t) + assert sum(m.tolist()) == 0 + ht.consume('AAAATTTTCCCCGGGGAAAA') + for t in tables: + m = memoryview(t) + assert sum(m.tolist()) == 1 + @attr('linux') def test_toobig(): From 80b8475fed3651b43b38bc2c46a08ee6d1d203dd Mon Sep 17 00:00:00 2001 From: Kevin Murray Date: Tue, 10 Mar 2015 10:15:44 +1100 Subject: [PATCH 4/6] make format the get_raw_tables changes from #671 --- khmer/_khmermodule.cc | 8 +++++--- lib/counting.hh | 3 ++- tests/test_counting_hash.py | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/khmer/_khmermodule.cc b/khmer/_khmermodule.cc index 9ef8eb95fe..d3282d9212 100644 --- a/khmer/_khmermodule.cc +++ b/khmer/_khmermodule.cc @@ -694,8 +694,9 @@ hash_get_raw_tables(khmer_KCountingHash_Object * self, PyObject * args) PyObject * raw_tables = PyList_New(sizes.size()); for (unsigned int i=0; i Date: Tue, 10 Mar 2015 12:46:22 +1100 Subject: [PATCH 5/6] fix pylint complaints about new single-letter vars In test_counting_hash.py --- tests/test_counting_hash.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_counting_hash.py b/tests/test_counting_hash.py index 68f2099f51..4670343c8f 100644 --- a/tests/test_counting_hash.py +++ b/tests/test_counting_hash.py @@ -113,13 +113,13 @@ def test_get_raw_tables(): def test_get_raw_tables_view(): ht = khmer.new_counting_hash(20, 1e5, 4) tables = ht.get_raw_tables() - for t in tables: - m = memoryview(t) - assert sum(m.tolist()) == 0 + for tab in tables: + memv = memoryview(tab) + assert sum(memv.tolist()) == 0 ht.consume('AAAATTTTCCCCGGGGAAAA') - for t in tables: - m = memoryview(t) - assert sum(m.tolist()) == 1 + for tab in tables: + memv = memoryview(tab) + assert sum(memv.tolist()) == 1 @attr('linux') From 4d12776678c7fe002aed33343e9153fbf4738351 Mon Sep 17 00:00:00 2001 From: Kevin Murray Date: Tue, 10 Mar 2015 12:57:58 +1100 Subject: [PATCH 6/6] Document #671 in the changelog. --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4d56b90c77..51ffde73d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-03-10 Camille Scott + + * lib/counting.hh, khmer/_khmermodule.cc: Expose the raw tables of + count-min sketches to the world of python using a buffer interface. + * tests/test_counting_hash.py: Tests of the above functionality. + 2015-03-08 Michael R. Crusoe * Makefile: make 'pep8' target be more verbose