From df6f5b06c4a92bcfe41b99c9a82b5fe587f578ff Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Thu, 26 Feb 2015 07:30:43 -0500
Subject: [PATCH 1/3] more sandbox cleanup

---
 ChangeLog                       |  12 +++
 doc/dev/scripts-and-sandbox.txt |   3 +
 sandbox/README.rst              |  51 ++++++-----
 sandbox/combine-pe.py           |  66 --------------
 sandbox/compare-partitions.py   |  68 --------------
 sandbox/count-within-radius.py  |  60 -------------
 sandbox/degree-by-position.py   |  47 ----------
 sandbox/dn-identify-errors.py   | 147 ------------------------------
 sandbox/ec.py                   |  60 -------------
 sandbox/error-correct-pass2.py  |  85 ------------------
 sandbox/find-unpart.py          |  53 -----------
 sandbox/normalize-by-align.py   | 155 --------------------------------
 sandbox/read_aligner.py         |  70 ---------------
 sandbox/shuffle-fasta.py        |  27 ------
 sandbox/to-casava-1.8-fastq.py  |  61 -------------
 sandbox/uniqify-sequences.py    |  67 --------------
 sandbox/write-interleave.py     |  29 ------
 17 files changed, 42 insertions(+), 1019 deletions(-)
 delete mode 100755 sandbox/combine-pe.py
 delete mode 100755 sandbox/compare-partitions.py
 delete mode 100755 sandbox/count-within-radius.py
 delete mode 100755 sandbox/degree-by-position.py
 delete mode 100755 sandbox/dn-identify-errors.py
 delete mode 100755 sandbox/ec.py
 delete mode 100755 sandbox/error-correct-pass2.py
 delete mode 100755 sandbox/find-unpart.py
 delete mode 100755 sandbox/normalize-by-align.py
 delete mode 100755 sandbox/read_aligner.py
 delete mode 100755 sandbox/shuffle-fasta.py
 delete mode 100755 sandbox/to-casava-1.8-fastq.py
 delete mode 100755 sandbox/uniqify-sequences.py
 delete mode 100755 sandbox/write-interleave.py

diff --git a/ChangeLog b/ChangeLog
index fa8761eba8..f2b4df0580 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2015-02-23  Titus Brown  <titus@idyll.org>
+
+   * sandbox/{combine-pe.py,compare-partitions.py,count-within-radius.py,
+   degree-by-position.py,dn-identify-errors.py,ec.py,error-correct-pass2.py,
+   find-unpart.py,normalize-by-align.py,read_aligner.py,shuffle-fasta.py,
+   to-casava-1.8-fastq.py,uniqify-sequences.py}: removed from sandbox/ as
+   obsolete/unmaintained.
+   * sandbox/README.rst: updated to reflect readstats.py and trim-low-abund.py
+   promotion to sandbox/.
+   * doc/dev/scripts-and-sandbox.txt: updated to reflect sandbox/ script name
+   preferences, and note to remove from README.rst when moved over to scripts/.
+
 2015-02-25  Aditi Gupta  <agupta@msu.edu>
 
    * sandbox/{collect-reads.py, correct-errors.py, 
diff --git a/doc/dev/scripts-and-sandbox.txt b/doc/dev/scripts-and-sandbox.txt
index 552141614c..a13097d176 100644
--- a/doc/dev/scripts-and-sandbox.txt
+++ b/doc/dev/scripts-and-sandbox.txt
@@ -41,6 +41,8 @@ All scripts in ``sandbox/`` must:
 * have a hash-bang line (``#! /usr/bin/env python2``) at the top
 * be command-line executable (``chmod a+x``)
 * have a Copyright message (see below)
+* have lowercase names
+* use '-' as a word separator, rather than '_' or CamelCase
 
 All *new* scripts being added to ``sandbox/`` should:
 
@@ -113,3 +115,4 @@ development/PR checklist::
    - [ ] standard command line options are implemented
    - [ ] version and citation information is output to STDERR (`khmer_args.info(...)`)
    - [ ] runtime diagnostic information (progress, etc.) is output to STDERR
+   - [ ] script has been removed from sandbox/README.rst
diff --git a/sandbox/README.rst b/sandbox/README.rst
index aa393a6a31..34d9269895 100644
--- a/sandbox/README.rst
+++ b/sandbox/README.rst
@@ -6,10 +6,15 @@ scripts that we have not fully tested.  They are also not under
 semantic versioning, so their functionality and command line arguments
 may change without notice.
 
-We are still in the middle of triaging and documenting the various scripts.
+We are still triaging and documenting the various scripts.
 
 ----
 
+Awaiting promotion to sandbox:
+
+* unique-kmers.py - estimate the number of k-mers present in a file with the HyperLogLog low-memory probabilistic cardinality estimation algorithm.
+* correct-errors.py - streaming error correction.
+
 Scripts with recipes:
 
 * calc-median-distribution.py - plot coverage distribution; see `khmer-recipes #1 <https://github.com/ged-lab/khmer-recipes/tree/master/001-extract-reads-by-coverage>`__
@@ -21,13 +26,10 @@ To keep, document, and build recipes for:
 
 * abundance-hist-by-position.py - look at abundance of k-mers by position within read; use with fasta-to-abundance-hist.py
 * assemstats3.py - print out assembly statistics
+* build-sparse-graph.py - code for building a sparse graph (by Camille Scott)
 * calc-best-assembly.py - calculate the "best assembly" - used in metagenome protocol
-* calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference.
-* calc-median-distribution.py - plot coverage distribution; see `khmer-recipes #1 <https://github.com/ged-lab/khmer-recipes/tree/master/001-extract-reads-by-coverage>`__
-* collect-variants.py
-* combine-pe.py - combine partitions based on shared PE reads.
-* compare-partitions.py
-* dn-identify-errors.py - prototype script to identify errors in reads based on diginorm principles
+* calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference. (Used/tested in `2014 paper on semi-streaming algorithms <https://github.com/ged-lab/2014-streaming/blob/master/>`__)
+* collect-variants.py - used in a `gist <https://gist.github.com/ctb/6eaef7971ea429ab348d>`__
 * extract-single-partition.py - extract all the sequences that belong to a specific partition, from a file with multiple partitions
 * fasta-to-abundance-hist.py - generate abundance of k-mers by position within reads; use with abundance-hist-by-position.py
 * filter-below-abund.py - like filter-abund, but trim off high-abundance k-mers
@@ -41,9 +43,7 @@ To keep, document, and build recipes for:
 * normalize-by-median-pct.py - see blog post on Trinity in silico norm (http://ivory.idyll.org/blog/trinity-in-silico-normalize.html)
 * print-stoptags.py - print out the stoptag k-mers
 * print-tagset.py - print out the tagset k-mers
-* readstats.py - print out read statistics
 * renumber-partitions.py - systematically renumber partitions
-* shuffle-fasta.py - FASTA file shuffler for small FASTA files
 * shuffle-reverse-rotary.py - FASTA file shuffler for larger FASTA files
 * split-fasta.py - break a FASTA file up into smaller chunks
 * stoptag-abundance-hist.py - print out abundance histogram of stoptags
@@ -55,9 +55,7 @@ To keep, document, and build recipes for:
 * sweep-reads.py - various ways to extract reads based on k-mer overlap
 * sweep-reads2.py - various ways to extract reads based on k-mer overlap
 * sweep-reads3.py - various ways to extract reads based on k-mer overlap
-* to-casava-1.8-fastq.py - convert reads to different Casava format
-* trim-low-abund.py - streaming k-mer abundance trimming; see filter-abund for non-streaming, and look to `khmer-recipes #6 <https://github.com/ged-lab/khmer-recipes/blob/master/006-streaming-sequence-trimming/index.rst>`__ for usage.
-* write-trimmomatic.py
+* write-trimmomatic.py - used to build Trimmomatic command lines in `khmer-protocols <http://khmer-protocols.readthedocs.org/en/latest/>`__
 
 Good ideas to rewrite using newer tools/approaches:
 
@@ -67,20 +65,25 @@ Good ideas to rewrite using newer tools/approaches:
 * bloom_count_intersection.py - look at unique and disjoint #s of k-mers
 * split-sequences-by-length.py - break up short reads by length
 
-To examine:
+----
 
-* build-sparse-graph.py - code for building a sparse graph (by Camille Scott)
-* count-within-radius.py - calculating graph density by position with seq
-* degree-by-position.py - calculating graph degree by position in seq
-* ec.py - new error correction foo
-* error-correct-pass2.py - new error correction foo
-* find-unpart.py - something to do with finding unpartitioned sequences
-* normalize-by-align.py  - new error correction foo
-* read_aligner.py - new error correction foo
-* uniqify-sequences.py - print out paths that are unique in the graph
-* write-interleave.py - is this used by any protocol etc?
+Present in commit d295bc847 but removed thereafter:
+
+* `combine-pe.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/combine-pe.py>`__ - combine partitions based on shared PE reads.
+* `compare-partitions.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/compare-partitions.py>`__ - compare read membership in partitions.
+* `count-within-radius.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/count-within-radius.py>`__ - calculating graph density by position with seq
+* `degree-by-position.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/degree-by-position.py>`__ - calculating graph degree by position in seq
+* `dn-identify-errors.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/dn-identify-errors.py>`__ - prototype script to identify errors in reads based on diginorm principles
+* `ec.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/ec.py>`__ - new error correction foo
+* `error-correct-pass2.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/error-correct-pass2.py>`__ - new error correction foo
+* `find-unpart.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/find-unpart.py>`__ - something to do with finding unpartitioned sequences
+* `normalize-by-align.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/normalize-by-align.py>`__  - new error correction foo
+* `read_aligner.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/read_aligner.py>`__ - new error correction foo
+* `shuffle-fasta.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/shuffle-fasta.py>`__ - FASTA file shuffler for small FASTA files
+* `to-casava-1.8-fastq.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/to-casava-1.8-fastq.py>`__ - convert reads to different Casava format
+* `uniqify-sequences.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/uniqify-sequences.py>`__ - print out paths that are unique in the graph
+* `write-interleave.py <https://github.com/ged-lab/khmer/blob/d295bc8477022e8c34649f131a2abe333a891d3d/sandbox/write-interleave.py>`__ - is this used by any protocol etc?
 
-----
 
 Present in commit 691b0b3ae but removed thereafter:
 
diff --git a/sandbox/combine-pe.py b/sandbox/combine-pe.py
deleted file mode 100755
index 9558a585a3..0000000000
--- a/sandbox/combine-pe.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import sys
-import khmer
-from screed.fasta import fasta_iter
-
-K = 32
-
-###
-
-
-def get_partition(record):
-    pid = record['name'].rsplit('\t', 1)[1]
-    return int(pid)
-
-###
-
-
-def main():
-    ht = khmer.new_hashbits(K, 1, 1)
-
-    ht.consume_partitioned_fasta(sys.argv[1])
-    before = ht.count_partitions()
-
-    last_name = None
-    last_record = None
-    for n, record in enumerate(
-            fasta_iter(open(sys.argv[1]), parse_description=False)):
-        if n % 10000 == 0:
-            print '...', n
-
-        name = record['name'].split()[0]
-        name = name.split('/', 1)[0]
-
-        if name == last_name:
-            if 1:
-                pid_1 = ht.get_partition_id(last_record['sequence'][:K])
-                pid_2 = ht.get_partition_id(record['sequence'][:K])
-
-                ht.join_partitions(pid_1, pid_2)
-            else:                           # TEST
-                pid_1 = get_partition(last_record)
-                pid_2 = get_partition(record)
-                assert pid_1 == pid_2, (last_record, record, pid_1, pid_2)
-
-        last_name = name
-        last_record = record
-
-    ht.output_partitions(sys.argv[1], sys.argv[1] + '.paired')
-    print 'before:', before
-    after = ht.count_partitions()
-    print 'after:', after
-
-    n_combined = before[0] - after[0]
-    print 'combined:', n_combined
-
-    # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/compare-partitions.py b/sandbox/compare-partitions.py
deleted file mode 100755
index 4ea764bc42..0000000000
--- a/sandbox/compare-partitions.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-# only works for small files...
-
-import sys
-from screed.fasta import fasta_iter
-
-
-def read_partition_file(fp):
-    for n, line in enumerate(fp):
-        if n % 2 == 0:
-            surrendered = False
-            name, partition_id = line[1:].strip().rsplit('\t', 1)
-
-            if '*' in partition_id:
-                partition_id = int(partition_id[:-1])
-                surrendered = True
-            else:
-                partition_id = int(partition_id)
-        else:
-            sequence = line.strip()
-
-            yield name, partition_id, surrendered, sequence
-
-
-def main():
-    (filename1, filename2) = sys.argv[1:]
-
-    p1 = {}
-    s1 = {}
-    for name, pid, _, _ in read_partition_file(open(filename1)):
-        name = name.split('\t')[0]
-        x = p1.get(pid, set())
-        x.add(name)
-        p1[pid] = x
-
-        s1[name] = pid
-
-    p2 = {}
-    s2 = {}
-    for name, pid, _, _ in read_partition_file(open(filename2)):
-        name = name.split('\t')[0]
-        x = p2.get(pid, set())
-        x.add(name)
-        p2[pid] = x
-
-        s2[name] = pid
-
-    found = set()
-    for name in s1:
-        pid = s1[name]
-        pid2 = s2[name]
-
-        x1 = p1[pid]
-        x2 = p2[pid2]
-
-        if x1 != x2 and pid not in found:
-            print pid, pid2, len(x1), len(x2)
-            found.add(pid)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/count-within-radius.py b/sandbox/count-within-radius.py
deleted file mode 100755
index 2849bc2ebb..0000000000
--- a/sandbox/count-within-radius.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import sys
-import screed.fasta
-import os
-import khmer
-
-K = 32
-HASHTABLE_SIZE = int(8e9)
-N_HT = 4
-RADIUS = 100
-
-###
-
-MAX_DENSITY = 2000
-
-
-def main():
-    infile = sys.argv[1]
-    outfile = sys.argv[2]
-    if len(sys.argv) > 3:
-        RADIUS = int(sys.argv[3])
-
-    print 'saving to:', outfile
-
-    print 'making hashtable'
-    ht = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
-
-    print 'eating', infile
-    ht.consume_fasta(infile)
-
-    print 'loading'
-    ht.save(outfile + '.ht')
-
-    outfp = open(outfile, 'w')
-    for n, record in enumerate(screed.open(infile)):
-        if n % 10000 == 0:
-            print '... saving', n
-        seq = record['sequence']
-
-        for pos in range(0, len(seq), 200):
-            subseq = seq[pos:pos + 200]
-
-            middle = (len(subseq) - K + 1) / 2
-
-            density = ht.count_kmers_within_radius(
-                subseq[middle:middle + K], RADIUS,
-                MAX_DENSITY)
-            density /= float(RADIUS)
-
-            print >>outfp, '>%s d=%.3f\n%s' % (record['name'], density, subseq)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/degree-by-position.py b/sandbox/degree-by-position.py
deleted file mode 100755
index 7649a1e08a..0000000000
--- a/sandbox/degree-by-position.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import khmer
-import sys
-from screed.fasta import fasta_iter
-
-K = 32
-HASHTABLE_SIZE = int(8e9)
-N_HT = 4
-
-
-def main():
-    outfp = open(sys.argv[2], 'w')
-
-    ht = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
-    ht.consume_fasta(sys.argv[1])
-
-    hist = [0] * 200
-    histcount = [0] * 200
-    for n, record in enumerate(fasta_iter(open(sys.argv[1]))):
-        if n % 10000 == 0:
-            print '...', n
-
-        seq = record['sequence']
-        for pos in range(0, len(seq) - K + 1):
-            kmer = seq[pos:pos + K]
-            count = ht.kmer_degree(kmer)
-
-            hist[pos] += count
-            histcount[pos] += 1
-
-    for i in range(len(hist)):
-        total = hist[i]
-        count = histcount[i]
-        if not count:
-            continue
-
-        print >>outfp, i, total, count, total / float(count)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/dn-identify-errors.py b/sandbox/dn-identify-errors.py
deleted file mode 100755
index 769a505691..0000000000
--- a/sandbox/dn-identify-errors.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-"""
-Streaming error trimming based on digital normalization.
-
-% python sandbox/trim-low-abund.py [ <data1> [ <data2> [ ... ] ] ]
-
-Use -h for parameter help.
-"""
-import sys
-import screed
-import os
-import khmer
-from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-import argparse
-
-DEFAULT_NORMALIZE_LIMIT = 20
-DEFAULT_CUTOFF = 2
-
-DEFAULT_K = 32
-DEFAULT_N_HT = 4
-DEFAULT_MIN_HASHSIZE = 1e6
-
-
-def main():
-    parser = argparse.ArgumentParser(description='XXX')
-
-    env_ksize = os.environ.get('KHMER_KSIZE', DEFAULT_K)
-    env_n_hashes = os.environ.get('KHMER_N_HASHES', DEFAULT_N_HT)
-    env_hashsize = os.environ.get('KHMER_MIN_HASHSIZE', DEFAULT_MIN_HASHSIZE)
-
-    parser.add_argument('--ksize', '-k', type=int, dest='ksize',
-                        default=env_ksize,
-                        help='k-mer size to use')
-    parser.add_argument('--n_hashes', '-N', type=int, dest='n_hashes',
-                        default=env_n_hashes,
-                        help='number of hash tables to use')
-    parser.add_argument('--hashsize', '-x', type=float, dest='min_hashsize',
-                        default=env_hashsize,
-                        help='lower bound on hashsize to use')
-
-    parser.add_argument('--cutoff', '-C', type=int, dest='abund_cutoff',
-                        help='remove k-mers below this abundance',
-                        default=DEFAULT_CUTOFF)
-
-    parser.add_argument('--normalize-to', '-Z', type=int, dest='normalize_to',
-                        help='base cutoff on median k-mer abundance of this',
-                        default=DEFAULT_NORMALIZE_LIMIT)
-
-    parser.add_argument('--mrna', '-m', dest='is_mrna',
-                        help='treat as mRNAseq data',
-                        default=True, action='store_true')
-
-    parser.add_argument('--genome', '-g', dest='is_genome',
-                        help='treat as genomic data (uniform coverage)',
-                        default=False, action='store_true')
-
-    parser.add_argument('--metagenomic', '-M',
-                        dest='is_metagenomic',
-                        help='treat as metagenomic data',
-                        default=True, action='store_true')
-
-    parser.add_argument('input_filenames', nargs='+')
-    args = parser.parse_args()
-
-    K = args.ksize
-    HT_SIZE = args.min_hashsize
-    N_HT = args.n_hashes
-
-    CUTOFF = args.abund_cutoff
-    NORMALIZE_LIMIT = args.normalize_to
-
-    is_variable_abundance = True        # conservative
-    if args.is_genome:
-        is_variable_abundance = False
-
-    errors = [0] * 1000
-
-    print 'making hashtable'
-    ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
-
-    save_pass2 = 0
-
-    pass2list = []
-    for filename in args.input_filenames:
-        pass2filename = os.path.basename(filename) + '.pass2'
-        trimfilename = os.path.basename(filename) + '.abundtrim'
-
-        pass2list.append((pass2filename, trimfilename))
-
-        pass2fp = open(pass2filename, 'w')
-        trimfp = open(trimfilename, 'w')
-
-        for n, read in enumerate(screed.open(filename)):
-            if n % 10000 == 0:
-                print '...', n, filename, save_pass2
-            seq = read.sequence.replace('N', 'A')
-            med, _, _ = ht.get_median_count(seq)
-
-            if med < NORMALIZE_LIMIT:
-                ht.consume(seq)
-                pass2fp.write('>%s\n%s\n' % (read.name, read.sequence))
-                save_pass2 += 1
-            else:
-                trim_seq, trim_at = ht.trim_on_abundance(seq, CUTOFF)
-                if trim_at < len(seq):
-                    errors[trim_at] += 1
-                if trim_at >= K:
-                    trimfp.write('>%s\n%s\n' % (read.name, trim_seq))
-
-        pass2fp.close()
-        trimfp.close()
-
-        print 'saved %d of %d to pass2fp' % (save_pass2, n,)
-
-    for pass2filename, trimfilename in pass2list:
-        for n, read in enumerate(screed.open(pass2filename)):
-            if n % 10000 == 0:
-                print '... x 2', n, filename
-
-            trimfp = open(trimfilename, 'a')
-
-            seq = read.sequence.replace('N', 'A')
-            med, _, _ = ht.get_median_count(seq)
-
-            if med >= NORMALIZE_LIMIT or not is_variable_abundance:
-                trim_seq, trim_at = ht.trim_on_abundance(seq, CUTOFF)
-                if trim_at < len(seq):
-                    errors[trim_at] += 1
-                if trim_at >= K:
-                    trimfp.write('>%s\n%s\n' % (read.name, trim_seq))
-            else:
-                trimfp.write('>%s\n%s\n' % (read.name, read.sequence))
-
-    os.unlink(pass2filename)
-
-    fp = open('err-profile.out', 'w')
-    for pos, count in enumerate(errors):
-        print >>fp, pos, count
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/ec.py b/sandbox/ec.py
deleted file mode 100755
index 27c5c578aa..0000000000
--- a/sandbox/ec.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import screed
-import khmer
-import sys
-
-import math
-
-
-def main():
-    hash_filename = sys.argv[1]
-    input_filename = sys.argv[2]
-    output_filename = sys.argv[3]
-    max_error_region = int(sys.argv[4])
-
-    C = 20  # 20
-
-    corrected = 0
-    uncorrected = 0
-
-    outfp = open(output_filename, 'w')
-
-    ht = khmer.load_counting_hash(hash_filename)
-    aligner = khmer.ReadAligner(ht, 1, C, max_error_region)
-
-    K = ht.ksize()
-
-    for n, record in enumerate(screed.open(input_filename)):
-        if n % 1000 == 0:
-            print n
-
-        seq = record.sequence
-        seq_name = record.name
-
-        seq = seq.replace('N', 'A')
-
-        grXreAlign, reXgrAlign = aligner.align(seq)
-
-        if len(reXgrAlign) > 0:
-            graph_seq = grXreAlign.replace('-', '')
-            corrected += 1
-            outfp.write('>%s\n%s\n' % (seq_name, graph_seq))
-        else:
-            uncorrected += 1
-            outfp.write('>%s\n%s\n' % (seq_name, seq))
-
-
-    print 'corrected', corrected
-    print 'uncorrected', uncorrected
-
-    outfp.close()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/error-correct-pass2.py b/sandbox/error-correct-pass2.py
deleted file mode 100755
index 393eb0da6c..0000000000
--- a/sandbox/error-correct-pass2.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-"""
-Error correct reads based on a counting hash from a diginorm step.
-Output sequences will be put in @@@.
-
-% python scripts/error-correct-pass2 <counting.ct> <data1> [ <data2> <...> ]
-
-Use '-h' for parameter help.
-"""
-import sys
-import screed.fasta
-import os
-import khmer
-from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-
-from khmer.khmer_args import build_counting_args
-
-###
-
-DEFAULT_COVERAGE = 20
-DEFAULT_MAX_ERROR_REGION = 40
-
-
-def main():
-    parser = build_counting_multifile_args()
-    parser.add_argument('--cutoff', '-C', dest='coverage',
-                        default=DEFAULT_COVERAGE, type=int,
-                        help="Diginorm coverage.")
-    parser.add_argument('--max-error-region', '-M', dest='max_error_region',
-                        default=DEFAULT_MAX_ERROR_REGION, type=int,
-                        help="Max length of error region allowed")
-    args = parser.parse_args()
-
-    counting_ht = args.input_table
-    infiles = args.input_filenames
-
-    print 'file with ht: %s' % counting_ht
-
-    print 'loading hashtable'
-    ht = khmer.load_counting_hash(counting_ht)
-    K = ht.ksize()
-    C = args.coverage
-    max_error_region = args.max_error_region
-
-    print "K:", K
-    print "C:", C
-    print "max error region:", max_error_region
-
-    # the filtering function.
-    def process_fn(record):
-        # read_aligner is probably not threadsafe?
-        aligner = khmer.ReadAligner(ht, 1, C, max_error_region)
-
-        name = record['name']
-        seq = record['sequence']
-
-        seq = seq.replace('N', 'A')
-
-        grXreAlign, reXgrAlign = aligner.align(seq)
-
-        if len(reXgrAlign) > 0:
-            graph_seq = grXreAlign.replace('-', '')
-            seq = graph_seq
-
-        return name, seq
-
-    # the filtering loop
-    for infile in infiles:
-        print 'filtering', infile
-        outfile = os.path.basename(infile) + '.corr'
-        outfp = open(outfile, 'w')
-
-        tsp = ThreadedSequenceProcessor(process_fn)
-        tsp.start(verbose_loader(infile), outfp)
-
-        print 'output in', outfile
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/find-unpart.py b/sandbox/find-unpart.py
deleted file mode 100755
index b7864df6a0..0000000000
--- a/sandbox/find-unpart.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import khmer
-import sys
-import os
-import gc
-import glob
-
-TRAVERSE_ON_UNPART = True
-STOP_BIG_TRAVERSALS = True
-
-
-def main():
-    already_part = sys.argv[1]
-    new_to_part = sys.argv[2]
-    basename = os.path.basename(new_to_part)
-    pmap_filename = sys.argv[3]
-
-    # if not os.path.exists(already_part):
-    #    print '%s doesn\'t exist! dying.' % already_part
-    #    sys.exit(0)
-
-    # create a fake-ish ht; K matters, but not hashtable size.
-    ht = khmer.load_hashbits(already_part + '.ht')
-    ht.load_tagset(already_part + '.tagset')
-    ht.merge_subset_from_disk(pmap_filename)
-
-    # find singletons
-    n_singletons = ht.find_unpart(
-        new_to_part, TRAVERSE_ON_UNPART, STOP_BIG_TRAVERSALS)
-    print 'found:', n_singletons
-
-    print 'saving', basename + '.unpart'
-    n_partitions = ht.output_partitions(new_to_part, basename + '.unpart')
-    print 'saving', basename + '.pmap'
-    ht.save_partitionmap(basename + '.pmap')
-
-    ###
-
-    (n_partitions, n_singletons) = ht.count_partitions()
-
-    print 'output partitions:', n_partitions
-    print 'pmap partitions:', n_partitions
-    print 'singletons:', n_singletons
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/normalize-by-align.py b/sandbox/normalize-by-align.py
deleted file mode 100755
index 293156fdcd..0000000000
--- a/sandbox/normalize-by-align.py
+++ /dev/null
@@ -1,155 +0,0 @@
-#! /usr/bin/env python2
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2013-2014. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-
-"""
-XXX
-
-Eliminate reads with minimum k-mer abundance higher than
-DESIRED_COVERAGE.  Output sequences will be placed in 'infile.keep'.
-
-% python scripts/normalize-by-min.py [ -C <cutoff> ] <data1> <data2> ...
-
-Use '-h' for parameter help.
-"""
-
-import sys
-import screed
-import os
-import khmer
-from khmer.khmer_args import build_counting_args
-
-DEFAULT_MINIMUM_COVERAGE = 5
-
-
-def main():
-    parser = build_counting_args()
-    parser.add_argument("-t", "--trusted-cutoff", dest="trusted_cutoff", type=int, default=3)
-    parser.add_argument("--bits-theta", help="Tuning parameter controlling trade off of speed vs alignment sensitivity", default=1.0, type=float, dest="bits_theta")
-    parser.add_argument('-C', '--cutoff', type=int, dest='cutoff',
-                        default=DEFAULT_MINIMUM_COVERAGE)
-    parser.add_argument('-s', '--savehash', dest='savehash', default='')
-    parser.add_argument('-l', '--loadhash', dest='loadhash',
-                        default='')
-    parser.add_argument('--details-out', dest="details_out")
-    parser.add_argument('input_filenames', nargs='+')
-
-    args = parser.parse_args()
-
-    if not args.quiet:
-        print >>sys.stderr, '\nPARAMETERS:'
-        print >>sys.stderr, ' - kmer size =    %d \t\t(-k)' % args.ksize
-        print >>sys.stderr, ' - n hashes =     %d \t\t(-N)' % args.n_tables
-        print >>sys.stderr, ' - min hashsize = %-5.2g \t(-x)' % \
-            args.min_tablesize
-        print >>sys.stderr, ''
-        print >>sys.stderr, 'Estimated memory usage is %.2g bytes ' \
-            '(n_hashes x min_hashsize)' % (
-            args.n_tables * args.min_tablesize)
-        print >>sys.stderr, '-' * 8
-
-    K = args.ksize
-    HT_SIZE = args.min_tablesize
-    N_HT = args.n_tables
-    DESIRED_COVERAGE = args.cutoff
-
-    filenames = args.input_filenames
-
-    if args.loadhash:
-        print 'loading hashtable from', args.loadhash
-        ht = khmer.load_counting_hash(args.loadhash)
-    else:
-        print 'making hashtable'
-        ht = khmer.new_counting_hash(K, HT_SIZE, N_HT)
-
-    aligner = khmer.ReadAligner(ht, args.trusted_cutoff, args.bits_theta)
-            
-    if args.details_out != None:
-        details_out = open(args.details_out, "w")
-    else:
-        details_out = None
-
-    total = 0
-    discarded = 0
-    for input_filename in filenames:
-        output_name = os.path.basename(input_filename) + '.keepalign'
-        outfp = open(output_name, 'w')
-
-        for n, record in enumerate(screed.open(input_filename)):
-            if n > 0 and n % 10000 == 0:
-                print '... kept', total - discarded, 'of', total, ', or', \
-                    int(100. - discarded / float(total) * 100.), '%'
-                print '... in file', input_filename
-
-            total += 1
-
-            if len(record.sequence) < K:
-                continue
-
-            seq = record.sequence.upper().replace('N', 'A')
-
-            ##
-            score, graph_alignment, read_alignment, truncated = aligner.align(record.sequence)
-
-            keep = False
-            if truncated:
-                keep = True
-            else:
-                if False:
-                    graph_seq = graph_alignment.replace("-", "")
-                else:
-                    graph_seq = ""
-                    for i in range(len(graph_alignment)):
-                        if graph_alignment[i] == "-":
-                            graph_seq += read_alignment[i]
-                        else:
-                            graph_seq += graph_alignment[i]
-
-                mincount = ht.get_min_count(graph_seq)
-                keep = True
-                seq = graph_seq
-
-                #if mincount < DESIRED_COVERAGE:
-                #    keep = True
-                #    seq = graph_seq
-                #else:
-                #    assert not keep
-
-            if details_out != None:
-                details_out.write("+{7}\t{0:0.2f}\t{3}\t{4}\nread:      {6}\ngraph_aln: {1}\nread_aln:  {2}\nstored_seq:{5}\n".format(score, graph_alignment, read_alignment, truncated, keep, seq, record.sequence, record.name))
-
-
-            if keep:
-                ht.consume(seq)
-                outfp.write('>%s\n%s\n' % (record.name, seq))
-            else:
-                discarded += 1
-
-        if total:
-            print 'DONE with', input_filename, '; kept', total - discarded, 'of',\
-            total, 'or', int(100. - discarded / float(total) * 100.), '%'
-        print 'output in', output_name
-
-    if args.savehash:
-        print 'Saving hashfile through', input_filename
-        print '...saving to', args.savehash
-        ht.save(args.savehash)
-
-    # Change 0.2 only if you really grok it.  HINT: You don't.
-    fp_rate = khmer.calc_expected_collisions(ht)
-    print 'fp rate estimated to be %1.3f' % fp_rate
-
-    if fp_rate > 0.20:
-        print >>sys.stderr, "**"
-        print >>sys.stderr, "** ERROR: the counting hash is too small for"
-        print >>sys.stderr, "** this data set.  Increase hashsize/num ht."
-        print >>sys.stderr, "**"
-        print >>sys.stderr, "** Do not use these results!!"
-        sys.exit(-1)
-
-if __name__ == '__main__':
-    main()
-
-# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/read_aligner.py b/sandbox/read_aligner.py
deleted file mode 100755
index ca8ef896d4..0000000000
--- a/sandbox/read_aligner.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#! /usr/bin/env python2
-
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2013-2015. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-
-"""
-Error correct reads based on a counting hash from a diginorm step.
-Output sequences will be put in @@@.
-
-% python scripts/error-correct-pass2 <counting.ct> <data1> [ <data2> <...> ]
-
-Use '-h' for parameter help.
-"""
-import sys
-import screed
-import os
-import khmer
-from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
-
-from khmer.khmer_args import build_counting_args
-from khmer.khmer_args import add_loadhash_args
-
-###
-
-DEFAULT_COVERAGE = 20
-DEFAULT_MAX_ERROR_REGION = 40
-
-
-def main():
-    parser = build_counting_args()
-    parser.add_argument("--trusted-cov", dest="trusted_cov", type=int, default=2)
-    parser.add_argument("--theta", type=float, default=1.0)
-    parser.add_argument("input_table")
-    parser.add_argument("input_filenames", nargs="+")
-    add_loadhash_args(parser)
-
-    args = parser.parse_args()
-
-    counting_ht = args.input_table
-    infiles = args.input_filenames
-
-    print >>sys.stderr, 'file with ht: %s' % counting_ht
-
-    print >>sys.stderr, 'loading hashtable'
-    ht = khmer.load_counting_hash(counting_ht)
-    K = ht.ksize()
-
-    aligner = khmer.ReadAligner(ht, args.trusted_cov, args.theta) # counting hash, trusted kmer coverage cutoff, bits theta (threshold value for terminating unproductive alignemnts)
-    
-    ### the filtering loop
-    for infile in infiles:
-        print >>sys.stderr, 'aligning', infile
-        for n, record in enumerate(screed.open(infile)):
-
-            name = record['name']
-            seq = record['sequence'].upper()
-            print >>sys.stderr, name
-            print >>sys.stderr, seq
-
-            score, graph_alignment, read_alignment, truncated = aligner.align(seq)
-            print >>sys.stderr, score
-            print >>sys.stderr, graph_alignment
-            print >>sys.stderr, read_alignment
-            print >>sys.stderr, truncated
-            print ">{0}\n{1}".format(name, graph_alignment)
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/shuffle-fasta.py b/sandbox/shuffle-fasta.py
deleted file mode 100755
index 707e5fe08a..0000000000
--- a/sandbox/shuffle-fasta.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-# this only works for small files! it loads everything into mem.
-import sys
-from screed.fasta import fasta_iter
-import random
-
-
-def main():
-    d = dict([(r['name'], r['sequence']) for r in fasta_iter(open(sys.argv[1]))])
-
-    ks = d.keys()
-    random.shuffle(ks)
-
-    for k in ks:
-        s = d[k]
-
-        print '>%s\n%s' % (k, s)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/to-casava-1.8-fastq.py b/sandbox/to-casava-1.8-fastq.py
deleted file mode 100755
index b4b5e6db07..0000000000
--- a/sandbox/to-casava-1.8-fastq.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-
-
-import functools
-import re
-import argparse
-
-from khmer import ReadParser
-
-
-resub_read_1 = functools.partial(re.sub, r"^(.*)/1$", r"\1 1:N:0:NNNNN")
-resub_read_2 = functools.partial(re.sub, r"^(.*)/2$", r"\1 2:N:0:NNNNN")
-
-
-def setup_cl_parser():
-
-    parser = \
-        argparse.ArgumentParser(
-            description=
-            "Convert the older FASTQ format to the Casava >= 1.8 FASTQ format."
-        )
-    parser.add_argument("input_filename")
-    parser.add_argument("output_filename")
-
-    return parser
-
-
-def main():
-
-    cl_parser = setup_cl_parser()
-    cl_args = cl_parser.parse_args()
-
-    # Note: Only use 1 thread to ensure same ordering of reads.
-    rparser = ReadParser(cl_args.input_filename, 1)
-
-    with open(cl_args.output_filename, "w") as output_file:
-
-        for read in rparser:
-
-            new_name = resub_read_1(read.name)
-            new_name = resub_read_2(new_name)
-
-            output_file.write(
-                "@{name}\n{sequence}\n+\n{quality}\n".format(
-                    name=new_name,
-                    sequence=read.sequence,
-                    quality=read.quality,
-                )
-            )
-
-
-if "__main__" == __name__:
-    main()
-
-# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
diff --git a/sandbox/uniqify-sequences.py b/sandbox/uniqify-sequences.py
deleted file mode 100755
index c9090083b8..0000000000
--- a/sandbox/uniqify-sequences.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import khmer
-import sys
-import screed
-
-K = 20
-HASHTABLE_SIZE = int(2.5e8)
-N_HT = 4
-
-UNIQUE_LEN = 100
-UNIQUE_F = 0.9
-
-OUTPUT_WINDOW = 100
-OUTPUT_OVERLAP = 10
-
-def main():
-
-    kh = khmer.new_hashbits(K, HASHTABLE_SIZE, N_HT)
-
-    discarded = 0
-    kept_kmers = 0
-    total_kmers = 0
-
-    total_out = 0
-    for filename in sys.argv[1:]:
-        n_out = 0
-        for n, record in enumerate(screed.open(filename)):
-            if n > 0 and n % 10000 == 0:
-                print >>sys.stderr, '...', n, discarded
-                print >>sys.stderr, '==>', total_kmers, kept_kmers, int(
-                    float(kept_kmers) / float(total_kmers) * 100.)
-            seq = record.sequence
-            seq = seq.replace('N', 'G')
-
-            paths = kh.extract_unique_paths(seq, UNIQUE_LEN, UNIQUE_F)
-
-            kh.consume(seq)
-            total_kmers += len(seq) - K + 1
-
-            if not len(paths):
-                discarded += 1
-                continue
-
-            for i, path in enumerate(paths):
-                n_out += 1
-
-                if len(path) < OUTPUT_WINDOW:
-                    total_out += 1
-                    print '>%d\n%s' % (total_out, path)
-                    continue
-
-                for start in range(0, len(path) - OUTPUT_WINDOW + 1,
-                                   OUTPUT_OVERLAP):
-                    total_out += 1
-                    subpath = path[start:start + OUTPUT_WINDOW]
-                    print '>%d\n%s' % (total_out, subpath)
-
-        print >>sys.stderr, '%d for %s' % (n_out, filename)
-
-if __name__ == '__main__':
-    main()
diff --git a/sandbox/write-interleave.py b/sandbox/write-interleave.py
deleted file mode 100755
index 54e7e7860b..0000000000
--- a/sandbox/write-interleave.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#! /usr/bin/env python2
-#
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
-# Contact: khmer-project@idyll.org
-#
-import sys
-import screed
-import os.path
-
-def main():
-    for filename in sys.argv[1:]:
-        assert 'R1' in filename
-        filename2 = filename.replace('R1', 'R2')
-
-        r1 = iter(screed.open(filename)).next()
-        r2 = iter(screed.open(filename2)).next()
-
-        assert r1.name == r2.name, (r1.name, r2.name)
-
-        final = filename.replace('R1', '')
-        print 'python /root/khmer/sandbox/interleave.py %s %s | gzip -9c > %s' % (
-            filename, filename2, final)
-
-if __name__ == '__main__':
-    main()
-
-# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:

From d587101a07e268efe23a02ce95a681b7c51e2185 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Thu, 26 Feb 2015 07:48:10 -0500
Subject: [PATCH 2/3] moved calc-error-profile into list of scripts to be
 promoted

---
 sandbox/README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sandbox/README.rst b/sandbox/README.rst
index 34d9269895..61c5cc1631 100644
--- a/sandbox/README.rst
+++ b/sandbox/README.rst
@@ -12,8 +12,9 @@ We are still triaging and documenting the various scripts.
 
 Awaiting promotion to sandbox:
 
-* unique-kmers.py - estimate the number of k-mers present in a file with the HyperLogLog low-memory probabilistic cardinality estimation algorithm.
+* calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference. (Used/tested in `2014 paper on semi-streaming algorithms <https://github.com/ged-lab/2014-streaming/blob/master/>`__)
 * correct-errors.py - streaming error correction.
+* unique-kmers.py - estimate the number of k-mers present in a file with the HyperLogLog low-memory probabilistic cardinality estimation algorithm.
 
 Scripts with recipes:
 
@@ -28,7 +29,6 @@ To keep, document, and build recipes for:
 * assemstats3.py - print out assembly statistics
 * build-sparse-graph.py - code for building a sparse graph (by Camille Scott)
 * calc-best-assembly.py - calculate the "best assembly" - used in metagenome protocol
-* calc-error-profile.py - calculate a per-base "error profile" for shotgun sequencing data, w/o a reference. (Used/tested in `2014 paper on semi-streaming algorithms <https://github.com/ged-lab/2014-streaming/blob/master/>`__)
 * collect-variants.py - used in a `gist <https://gist.github.com/ctb/6eaef7971ea429ab348d>`__
 * extract-single-partition.py - extract all the sequences that belong to a specific partition, from a file with multiple partitions
 * fasta-to-abundance-hist.py - generate abundance of k-mers by position within reads; use with abundance-hist-by-position.py

From 22774dbe3b8d7688e974e8cf83ae9c2a39d103fe Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Sat, 28 Feb 2015 07:52:00 -0500
Subject: [PATCH 3/3] fixed minor space issues

---
 ChangeLog | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c5d511f440..3ca12fa9c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,7 +2,7 @@
 
    * sandbox/{combine-pe.py,compare-partitions.py,count-within-radius.py,
    degree-by-position.py,dn-identify-errors.py,ec.py,error-correct-pass2.py,
-   find-unpart.py,normalize-by-align.py,read_aligner.py,shuffle-fasta.py,
+   find-unpart.py,normalize-by-align.py,read-aligner.py,shuffle-fasta.py,
    to-casava-1.8-fastq.py,uniqify-sequences.py}: removed from sandbox/ as
    obsolete/unmaintained.
    * sandbox/README.rst: updated to reflect readstats.py and trim-low-abund.py
@@ -13,13 +13,13 @@
 2015-02-25  Hussien Alameldin  <hussien@msu.edu>
 
    * sandbox/bloom_count.py: renamed to bloom-count.py
-   * sandbox/bloom_count_intersection.py: renamed to 
+   * sandbox/bloom_count_intersection.py: renamed to
      bloom-count-intersection.py
    * sandbox/read_aligner.py: renamed to read-aligner.py
 
 2015-02-26  Tamer A. Mansour  <drtamermansour@gmail.com>
 
-   * scripts/abundance-dist-single.py: Use CSV format for the histogram. 
+   * scripts/abundance-dist-single.py: Use CSV format for the histogram.
    Includes column headers.
    * tests/test_scripts.py: add a test function for the --csv option in
    abundance-dist-single.py