Merge branch 'master' of https://github.com/ged-lab/khmer into fix/di…

…b-lab#745
drtamermansour · Feb 24, 2015 · 5368387 · 5368387
2 parents f944af3 + 43e2217
commit 5368387
Show file tree

Hide file tree

Showing 60 changed files with 2,257 additions and 2,400 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,90 @@
+2015-02-24  Luiz Irber  <irberlui@msu.edu>
+
+   * khmer/_khmermodule.cc: Update extension to use recommended practices,
+   PyLong instead of PyInt, Type initialization, PyBytes instead of PyString.
+   Replace common initialization with explicit type structs, and all types
+   conform to the CPython checklist.
+
+2015-02-24  Tamer A. Mansour  <drtamermansour@gmail.com>
+
+   * scripts/abundance-dist.py: Use CSV format for the histogram. Includes
+   column headers.
+   * tests/test_scripts.py: add coverage for the new --csv option in
+   abundance-dist.py
+
+2015-02-24  Michael R. Crusoe  <mcrusoe@msu.edu>
+
+   * jenkins-build.sh: remove examples/stamps/do.sh testing for now; takes too
+   long to run on every build. Related to #836
+
+2015-02-24  Kevin Murray  <spam@kdmurray.id.au>
+
+   * scripts/interleave-reads.py: Make the output file name print nicely.
+
+2015-02-23  Titus Brown  <titus@idyll.org>
+
+   * khmer/utils.py: added 'check_is_left' and 'check_is_right' functions;
+   fixed bug in check_is_pair.
+   * tests/test_functions.py: added tests for now-fixed bug in check_is_pair,
+   as well as 'check_is_left' and 'check_is_right'.
+   * scripts/interleave-reads.py: updated to handle Casava 1.8 formatting.
+   * scripts/split-paired-reads.py: fixed bug where sequences with bad names
+   got dropped; updated to properly handle Casava 1.8 names in FASTQ files.
+   * scripts/count-median.py: added '--csv' output format; updated to properly
+   handle Casava 1.8 FASTQ format when '--csv' is specified.
+   * scripts/normalize-by-median.py: replaced pair checking with
+   utils.check_is_pair(), which properly handles Casava 1.8 FASTQ format.
+   * tests/test_scripts.py: updated script tests to check Casava 1.8
+   formatting; fixed extract-long-sequences.py test.
+   * scripts/{extract-long-sequences.py,extract-paired-reads.py,
+   fastq-to-fasta.py,readstats.py,sample-reads-randomly.py,trim-low-abund.py},
+   khmer/thread_utils.py: updated to handle Casava 1.8 FASTQ format by
+   setting parse_description=False in screed.open(...).
+   * tests/test-data/{paired-mixed.fq,paired-mixed.fq.pe,random-20-a.fq,
+   test-abund-read-2.fq,test-abund-read-2.paired2.fq,test-abund-read-paired.fa,
+   test-abund-read-paired.fq}: switched some sequences over to Casava 1.8
+   format, to test format handling.
+   * tests/test-data/{casava_18-pe.fq,test-reads.fq.gz}: new test file for
+   Casava 1.8 format handling.
+   * tests/test-data/{overlap.curve,paired-mixed.fq.1,paired-mixed.fq.2,
+   simple_1.fa,simple_2.fa,simple_3.fa,test-colors.fa,test-est.fa,
+   test-graph3.fa,test-graph4.fa,test-graph6.fa}: removed no-longer used
+   test files.
+
+2015-02-23  Titus Brown  <titus@idyll.org>
+
+   * setup.cfg: set !linux flag by default, to avoid running tests that
+   request too much memory when 'nosetests' is run.  (This is an OS difference
+   where Mac OS X attempts to allocate as much memory as requested, while
+   on Linux it just crashes).
+
+2015-02-23  Michael R. Crusoe  <mcrusoe@msu.edu>
+
+   * khmer/{__init__.py,_khmermodule.cc},lib/{hashbits.cc,hashbits.hh,
+   hashtable,tests/test_{c_wrapper,read_parsers}.py: remove unused callback
+   functionality
+
+2015-02-23  Michael R. Crusoe  <mcrusoe@msu.edu>
+
+   * setup.py: point to the latest screed release candidate to work around
+   versioneer bug.
+
+2015-02-23  Tamer A. Mansour  <drtamermansour@gmail.com>
+
+   * examples/stamps/do.sh: the argument --savehash was changed to --savetable
+   and change mode to u+x
+   * jenkins-build.sh: add a test to check for the do.sh file
+
+2015-02-23  Kevin Murray  <spam@kdmurray.id.au>
+
+   * khmer/load_pe.py: Remove unused/undocumented module. See #784
+
+2015-02-21  Hussien Alameldin  <hussien@msu.edu>
+
+   * sandbox/normalize-by-align.py: "copyright header 2013-2015 was added"
+   * sandbob/read_aligner.py: "copyright header 2013-2015 was added"
+   * sandbox/slice-reads-by-coverage.py: "copyright header 2014  was added"
+
 2015-02-21  Hussien Alameldin  <hussien@msu.edu>
 
    * sandbox/calc-best-assembly.py, collect-variants.py, graph-size.py: Set executable bits using "chmod +x"

diff --git a/examples/stamps/do.sh b/examples/stamps/do.sh
@@ -10,7 +10,7 @@ load-into-counting.py -x 1e8 -k 20 stamps-reads.ct \
 abundance-dist.py stamps-reads.ct ../../data/stamps-reads.fa.gz \
 	stamps-reads.hist
 normalize-by-median.py -k 20 -C 10 -x 1e8 ../../data/stamps-reads.fa.gz \
-	--savehash stamps-dn.ct
+	--savetable stamps-dn.ct
 abundance-dist.py stamps-dn.ct stamps-reads.fa.gz.keep stamps-dn.hist
 do-partition.py -k 32 -x 1e8 -s 1e4 -T 8 stamps-part \
 	../../data/stamps-reads.fa.gz
@@ -27,7 +27,7 @@ abundance-dist.py stamps-part.g1.ct stamps-part.group0001.fa stamps-part.g1.hist
 
 filter-abund.py stamps-dn.ct stamps-reads.fa.gz.keep
 normalize-by-median.py -x 1e8 -k 20 -C 10 stamps-reads.fa.gz.keep.abundfilt \
-	--savehash stamps-dn3.ct
+	--savetable stamps-dn3.ct
 
 abundance-dist.py stamps-dn3.ct stamps-reads.fa.gz.keep.abundfilt.keep \
 	stamps-dn3.hist
diff --git a/jenkins-build.sh b/jenkins-build.sh
@@ -64,3 +64,7 @@ if type sloccount >/dev/null 2>&1
 then
 	make sloccount.sc
 fi
+
+# takes too long to run on every build
+#bash -ex -c 'cd examples/stamps/; ./do.sh' || { echo examples/stamps/do.sh no longer runs; /bin/false; }
+
diff --git a/khmer/__init__.py b/khmer/__init__.py
@@ -8,12 +8,10 @@
 This is khmer; please see http://khmer.readthedocs.org/.
 """
 
-from khmer._khmer import _new_counting_hash
-from khmer._khmer import _new_hashbits
-from khmer._khmer import set_reporting_callback
-from khmer._khmer import _LabelHash
-from khmer._khmer import _Hashbits
-from khmer._khmer import _HLLCounter
+from khmer._khmer import CountingHash
+from khmer._khmer import LabelHash as _LabelHash
+from khmer._khmer import Hashbits as _Hashbits
+from khmer._khmer import HLLCounter as _HLLCounter
 from khmer._khmer import ReadAligner
 
 from khmer._khmer import forward_hash  # figuregen/*.py
@@ -59,7 +57,7 @@ def new_hashbits(k, starting_size, n_tables=2):
     """
     primes = get_n_primes_above_x(n_tables, starting_size)
 
-    return _new_hashbits(k, primes)
+    return _Hashbits(k, primes)
 
 
 def new_counting_hash(k, starting_size, n_tables=2):
@@ -73,7 +71,7 @@ def new_counting_hash(k, starting_size, n_tables=2):
     """
     primes = get_n_primes_above_x(n_tables, starting_size)
 
-    return _new_counting_hash(k, primes)
+    return CountingHash(k, primes)
 
 
 def load_hashbits(filename):
@@ -82,7 +80,7 @@ def load_hashbits(filename):
     Keyword argument:
     filename -- the name of the hashbits file
     """
-    hashtable = _new_hashbits(1, [1])
+    hashtable = _Hashbits(1, [1])
     hashtable.load(filename)
 
     return hashtable
@@ -94,22 +92,12 @@ def load_counting_hash(filename):
     Keyword argument:
     filename -- the name of the counting_hash file
     """
-    hashtable = _new_counting_hash(1, [1])
+    hashtable = CountingHash(1, [1])
     hashtable.load(filename)
 
     return hashtable
 
 
-def _default_reporting_callback(info, n_reads, other):
-    print '...', info, n_reads, other
-
-
-def reset_reporting_callback():
-    set_reporting_callback(_default_reporting_callback)
-
-reset_reporting_callback()
-
-
 def extract_hashbits_info(filename):
     """Open the given hashbits file and return a tuple of information.