dib-lab · camillescott · Aug 26, 2015 · Aug 18, 2015 · Aug 18, 2015 · Aug 18, 2015
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,42 @@
+2015-08-24  Michael R. Crusoe  <crusoe@ucdavis.edu>
+
+   * khmer/khmer_args.py: Replaced sanitize_epilog() with santize_help() that
+   reflows the text of ArgParse descriptions and epilog while preserving the
+   formatting. Enhanced removal of Sphinx directives by replacing double
+   backticks with the double quote character.
+   * scripts/*.py: Renamed sanitize_epilog to sanitize_help; leading newlines
+   from triple-quoted epilogs removed; formatting made consistent;
+   sanitize_help and ComboFormatter added where it was missing; a couple
+   script specific epilog reformatting (for use of `:doc:` and a
+   hyperlink).
+   * scripts/{count-median,filter-abund-single}.py: Fixed printing of output
+   file name to do so instead of printing information about the file handle.
+   * scripts/count-median.py: Added missing command so that example given
+   actually works.
+   * scripts/filter-abund-single.py: Removed redundant printing of output file
+   names.
+   * scripts/normalize-by-median.py: Removed unused option "-d" from an example
+   command (left over from the "--dump-frequency" era).
+   * scripts/{partition-graph.py,do-partition.py}: Fixed erasure of the queue
+   module name in the worker functions, which is necessary for basic
+   functionality.
+   * scripts/{do-partition,abundance-dist,abundance-dist-single,
+   extract-long-sequences}.py: Added an example command to the epilog.
+   * tests/khmer_tst_utils.py: Added 'name' attribute to make the fake
+   sys.stdout more like a read stdout object.
+   * oxli/__init__.py: removed redundant and unused help text
+   * scripts/{abundance-dist,annotate-partitions,count-median,
+   extract-long-sequences,extract-paired-reads,extract-partitions,
+   fastq-to-fasta,filter-abund,filter-stopgaps,interleave-reads,
+   load-into-graph,merge-partitions,normalize-by-median,partition-graph,
+   readstats,sample-reads-randomly,split-paired-reads}.py: made "--version"
+   and the citation header consistent across the scripts.
+   * tests/test_scripts.py: added tests for the "--version" and citation
+   header behavior.
+   * tests/test_normalize_by_median.py: updated test for 'quiet' mode as
+   citation header still prints to STDERR.
+   * setup.py,tests/test_scripts.py: turned off the "oxli" script for v2.0.
+
 2015-08-17  Michael R. Crusoe  <crusoe@ucdavis.edu>
 
    * Makefile: remove BASH shell designation that appears to be incompatible

diff --git a/doc/run-corn-50m.sh b/doc/run-corn-50m.sh
@@ -12,8 +12,10 @@
 #      https://s3.amazonaws.com/public.ged.msu.edu/khmer/iowa-corn-50m.fa.gz
 #
 
+set -e
+set -x
+
 KHMER_PATH=$1
-export PYTHONPATH=$KHMER_PATH/python
 
 SCRIPTPATH=$KHMER_PATH/scripts
 

diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
@@ -13,6 +13,7 @@
 import os
 import argparse
 import math
+import textwrap
 from argparse import _VersionAction
 from collections import namedtuple
 
@@ -392,10 +393,24 @@ def add_threading_args(parser):
                         help='Number of simultaneous threads to execute')
 
 
-def sanitize_epilog(parser):
-    parser.epilog = parser.epilog.replace(
-        '//', '/').replace(':option:', '').replace(
-            ':program:', '').replace('::', ':')
+def sanitize_help(parser):
+    """Remove Sphinx directives & reflow text to width of 79 characters."""
+    wrapper = textwrap.TextWrapper(width=79)
+    parser.description = wrapper.fill(parser.description)
+    if not parser.epilog:
+        return parser
+    cleanlog = parser.epilog.replace(':option:', '').replace(
+        ':program:', '').replace('::', ':').replace('``', '"')
+    newlog = prev_section = ""
+    for section in cleanlog.split('\n\n'):
+        if section.startswith('    '):
+            newlog += section + '\n'
+        else:
+            if prev_section.startswith('    '):
+                newlog += '\n'
+            newlog += wrapper.fill(section) + '\n\n'
+        prev_section = section
+    parser.epilog = newlog
     return parser
 
 _algorithms = {

diff --git a/oxli/__init__.py b/oxli/__init__.py
@@ -13,7 +13,7 @@
 import argparse
 import sys
 import textwrap
-from khmer import khmer_args
+from khmer.khmer_args import build_nodegraph_args
 from oxli import build_graph
 
 
@@ -30,15 +30,12 @@ def get_parser():
 
     # build-graph (formerly load-graph.py) parsers here
     parser_build_graph = \
-        subparsers.add_parser('build-graph',
-                              help="Load sequences into the compressible graph"
-                              "format plus optional tagset",
-                              description="Load sequences into the "
-                              "compressible graph format plus optional tagset")
-
-    khmer_args.build_nodegraph_args("Load sequences into the compressible"
-                                    "graph format plus optional tagset.",
-                                    None, parser=parser_build_graph)
+        subparsers.add_parser(
+            name='build-graph',
+            help="Load sequences into the compressible graph format "
+            "plus optional tagset")
+
+    parser_build_graph = build_nodegraph_args(parser=parser_build_graph)
     build_graph.build_parser(parser_build_graph)
     parser_build_graph.set_defaults(func=build_graph.main)
 

diff --git a/oxli/build_graph.py b/oxli/build_graph.py
@@ -43,8 +43,6 @@ def build_parser(parser):
 
 
 def main(args):
-    info('build-graph.py', ['graph', 'SeqAn'])
-
     report_on_config(args, graphtype='nodegraph')
     base = args.output_filename
     filenames = args.input_filenames

diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py
@@ -22,7 +22,7 @@
 import khmer
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, report_on_config, info,
-                              calculate_graphsize, sanitize_epilog)
+                              calculate_graphsize, sanitize_help)
 from khmer.kfile import check_input_files, check_space
 from khmer.kfile import check_space_for_graph
 import argparse
@@ -68,7 +68,7 @@ def get_parser():
 def main():
 
     info('collect-reads.py', ['counting'])
-    args = sanitize_epilog(get_parser()).parse_args()
+    args = sanitize_help(get_parser()).parse_args()
     report_on_config(args)
 
     base = args.output_countgraph_filename

diff --git a/sandbox/correct-reads.py b/sandbox/correct-reads.py
@@ -26,7 +26,7 @@
 import argparse
 
 from khmer.khmer_args import (build_counting_args, info, add_loadgraph_args,
-                              report_on_config, sanitize_epilog)
+                              report_on_config, sanitize_help)
 from khmer.utils import write_record, write_record_pair, broken_paired_reader
 from khmer.kfile import (check_space, check_space_for_graph,
                          check_valid_file_exists)
@@ -114,7 +114,7 @@ def get_parser():
 
 def main():
     info('correct-reads.py', ['streaming'])
-    args = sanitize_epilog(get_parser()).parse_args()
+    args = sanitize_help(get_parser()).parse_args()
 
     ###
 

diff --git a/sandbox/estimate_optimal_hash.py b/sandbox/estimate_optimal_hash.py
@@ -29,7 +29,7 @@
 from __future__ import print_function
 import argparse
 import khmer, oxli
-from khmer.khmer_args import info, optimal_size, sanitize_epilog
+from khmer.khmer_args import info, optimal_size, sanitize_help
 import textwrap
 import sys
 
@@ -70,7 +70,7 @@ def get_parser():
 
 def main():
     info('estimate_optimal_hash.py', ['counting'])
-    args = sanitize_epilog(get_parser()).parse_args()
+    args = sanitize_help(get_parser()).parse_args()
     N = args.N
     if args.M:
         M = args.M

diff --git a/sandbox/saturate-by-median.py b/sandbox/saturate-by-median.py
@@ -22,7 +22,7 @@
 
 from khmer.khmer_args import (build_counting_args, add_loadgraph_args,
                               report_on_config, info, create_countgraph,
-                              sanitize_epilog)
+                              sanitize_help)
 import argparse
 from khmer.kfile import (check_space, check_space_for_graph,
                          check_valid_file_exists)
@@ -178,7 +178,7 @@ def get_parser():
 
 def main():  # pylint: disable=too-many-branches,too-many-statements
     info('saturate-by-median.py', ['diginorm'])
-    parser = sanitize_epilog(get_parser())
+    parser = sanitize_help(get_parser())
     args = parser.parse_args()
 
     report_on_config(args)

diff --git a/sandbox/sweep-files.py b/sandbox/sweep-files.py
@@ -37,7 +37,7 @@
 import time
 import khmer
 from khmer.khmer_args import (build_nodegraph_args, report_on_config, info,
-                              sanitize_epilog)
+                              sanitize_help)
 
 DEFAULT_OUT_PREF = 'reads'
 DEFAULT_RANGE = -1
@@ -101,7 +101,7 @@ def clear(self):
 
 def main():
     #info('sweep-files.py', ['sweep'])
-    parser = sanitize_epilog(get_parser())
+    parser = sanitize_help(get_parser())
     args = parser.parse_args()
 
     if args.max_tablesize < MIN_HSIZE:

diff --git a/sandbox/sweep-reads.py b/sandbox/sweep-reads.py
@@ -39,7 +39,7 @@
 import time
 import khmer
 from khmer.khmer_args import (build_nodegraph_args, report_on_config, info,
-                              sanitize_epilog)
+                              sanitize_help)
 from khmer.kfile import (check_input_files, check_valid_file_exists,
                          check_space)
 
@@ -206,7 +206,7 @@ def get_parser():
 
 def main():
     info('sweep-reads-buffered.py', ['sweep'])
-    parser = sanitize_epilog(get_parser())
+    parser = sanitize_help(get_parser())
     args = parser.parse_args()
 
     if args.max_tablesize < MAX_HSIZE:

diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
@@ -25,19 +25,24 @@
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, add_threading_args,
                               report_on_config, info, calculate_graphsize,
-                              sanitize_epilog)
+                              sanitize_help)
 from khmer.kfile import (check_input_files, check_space_for_graph)
 
 
 def get_parser():
-    epilog = '''
-    Note that with :option:`-b` this script is constant memory; in exchange,
-    k-mer counts will stop at 255. The memory usage of this script with
-    :option:`-b` will be about 1.15x the product of the :option:`-x` and
-    :option:`-N` numbers.
+    epilog = '''\
+    Note that with :option:`-b`/:option:`--no-bigcount` this script is constant
+    memory; in exchange, k-mer counts will stop at 255. The memory usage of
+    this script with :option:`-b` will be about 1.15x the product of the
+    :option:`-x` and :option:`-N` numbers.
 
     To count k-mers in multiple files use :program:`load_into_counting.py` and
     :program:`abundance_dist.py`.
+
+    Example::
+
+        abundance-dist-single.py -x 1e7 -N 2 -k 17 \\
+                tests/test-data/test-abund-read-2.fa test-dist
     '''
     parser = build_counting_args(
         descr="Calculate the abundance distribution of k-mers from a "
@@ -69,7 +74,7 @@ def get_parser():
 
 def main():  # pylint: disable=too-many-locals,too-many-branches
     info('abundance-dist-single.py', ['counting', 'SeqAn'])
-    args = sanitize_epilog(get_parser()).parse_args()
+    args = sanitize_help(get_parser()).parse_args()
     report_on_config(args)
 
     check_input_files(args.input_sequence_filename, args.force)

diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py
@@ -19,16 +19,26 @@
 import csv
 import khmer
 import argparse
+import textwrap
 import os
+from khmer import __version__
 from khmer.kfile import check_input_files
-from khmer.khmer_args import info
+from khmer.khmer_args import (info, sanitize_help, ComboFormatter,
+                              _VersionStdErrAction)
 
 
 def get_parser():
+    epilog = """\
+    Example::
+
+        load-into-countgraph.py -x 1e7 -N 2 -k 17 counts \\
+                tests/test-data/test-abund-read-2.fa
+        abundance-dist.py counts tests/test-data/test-abund-read-2.fa test-dist
+    """
     parser = argparse.ArgumentParser(
         description="Calculate abundance distribution of the k-mers in "
         "the sequence file using a pre-made k-mer countgraph.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        formatter_class=ComboFormatter, epilog=textwrap.dedent(epilog))
 
     parser.add_argument('input_count_graph_filename', help='The name of the'
                         ' input k-mer countgraph file.')
@@ -46,8 +56,8 @@ def get_parser():
     parser.add_argument('-b', '--no-bigcount', dest='bigcount', default=True,
                         action='store_false',
                         help='Do not count k-mers past 255')
-    parser.add_argument('--version', action='version', version='%(prog)s ' +
-                        khmer.__version__)
+    parser.add_argument('--version', action=_VersionStdErrAction,
+                        version='khmer {v}'.format(v=__version__))
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Continue even if specified input files '
                         'do not exist or are empty.')
@@ -56,7 +66,7 @@ def get_parser():
 
 def main():
     info('abundance-dist.py', ['counting'])
-    args = get_parser().parse_args()
+    args = sanitize_help(get_parser()).parse_args()
 
     infiles = [args.input_count_graph_filename,
                args.input_sequence_filename]

diff --git a/scripts/annotate-partitions.py b/scripts/annotate-partitions.py
@@ -20,20 +20,21 @@
 import os
 import argparse
 import textwrap
-import khmer
 import sys
+from khmer import __version__, Nodegraph
 from khmer.kfile import check_input_files, check_space
-from khmer.khmer_args import info
+from khmer.khmer_args import (info, sanitize_help, ComboFormatter,
+                              _VersionStdErrAction)
 
 DEFAULT_K = 32
 
 
 def get_parser():
-    epilog = """
-    Load in a partitionmap (generally produced by partition-graph.py or
-    merge-partitions.py) and annotate the sequences in the given files with
-    their partition IDs. Use :program:`extract-partitions.py` to extract
-    sequences into separate group files.
+    epilog = """\
+    Load in a partitionmap (generally produced by :program:`partition-graph.py`
+    or :program:`merge-partitions.py`) and annotate the sequences in the given
+    files with their partition IDs. Use :program:`extract-partitions.py` to
+    extract sequences into separate group files.
 
     Example (results will be in ``random-20-a.fa.part``)::
 
@@ -44,8 +45,7 @@ def get_parser():
     """
     parser = argparse.ArgumentParser(
         description="Annotate sequences with partition IDs.",
-        epilog=textwrap.dedent(epilog),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        epilog=textwrap.dedent(epilog), formatter_class=ComboFormatter)
 
     parser.add_argument('--ksize', '-k', type=int, default=DEFAULT_K,
                         help="k-mer size (default: %d)" % DEFAULT_K)
@@ -54,20 +54,20 @@ def get_parser():
     parser.add_argument('input_filenames', metavar='input_sequence_filename',
                         nargs='+', help='input FAST[AQ] sequences to '
                         'annotate.')
-    parser.add_argument('--version', action='version', version='%(prog)s ' +
-                        khmer.__version__)
+    parser.add_argument('--version', action=_VersionStdErrAction,
+                        version='khmer {v}'.format(v=__version__))
     parser.add_argument('-f', '--force', default=False, action='store_true',
                         help='Overwrite output file if it exists')
     return parser
 
 
 def main():
     info('annotate-partitions.py', ['graph'])
-    args = get_parser().parse_args()
+    args = sanitize_help(get_parser()).parse_args()
 
     ksize = args.ksize
     filenames = args.input_filenames
-    nodegraph = khmer.Nodegraph(ksize, 1, 1)
+    nodegraph = Nodegraph(ksize, 1, 1)
 
     partitionmap_file = args.graphbase + '.pmap.merged'