Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

script help formatting #1268

Merged
merged 12 commits into from
Aug 26, 2015
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,42 @@
2015-08-24 Michael R. Crusoe <crusoe@ucdavis.edu>

* khmer/khmer_args.py: Replaced sanitize_epilog() with santize_help() that
reflows the text of ArgParse descriptions and epilog while preserving the
formatting. Enhanced removal of Sphinx directives by replacing double
backticks with the double quote character.
* scripts/*.py: Renamed sanitize_epilog to sanitize_help; leading newlines
from triple-quoted epilogs removed; formatting made consistent;
sanitize_help and ComboFormatter added where it was missing; a couple
script specific epilog reformatting (for use of `:doc:` and a
hyperlink).
* scripts/{count-median,filter-abund-single}.py: Fixed printing of output
file name to do so instead of printing information about the file handle.
* scripts/count-median.py: Added missing command so that example given
actually works.
* scripts/filter-abund-single.py: Removed redundant printing of output file
names.
* scripts/normalize-by-median.py: Removed unused option "-d" from an example
command (left over from the "--dump-frequency" era).
* scripts/{partition-graph.py,do-partition.py}: Fixed erasure of the queue
module name in the worker functions, which is necessary for basic
functionality.
* scripts/{do-partition,abundance-dist,abundance-dist-single,
extract-long-sequences}.py: Added an example command to the epilog.
* tests/khmer_tst_utils.py: Added 'name' attribute to make the fake
sys.stdout more like a read stdout object.
* oxli/__init__.py: removed redundant and unused help text
* scripts/{abundance-dist,annotate-partitions,count-median,
extract-long-sequences,extract-paired-reads,extract-partitions,
fastq-to-fasta,filter-abund,filter-stopgaps,interleave-reads,
load-into-graph,merge-partitions,normalize-by-median,partition-graph,
readstats,sample-reads-randomly,split-paired-reads}.py: made "--version"
and the citation header consistent across the scripts.
* tests/test_scripts.py: added tests for the "--version" and citation
header behavior.
* tests/test_normalize_by_median.py: updated test for 'quiet' mode as
citation header still prints to STDERR.
* setup.py,tests/test_scripts.py: turned off the "oxli" script for v2.0.

2015-08-17 Michael R. Crusoe <crusoe@ucdavis.edu>

* Makefile: remove BASH shell designation that appears to be incompatible
Expand Down
4 changes: 3 additions & 1 deletion doc/run-corn-50m.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
# https://s3.amazonaws.com/public.ged.msu.edu/khmer/iowa-corn-50m.fa.gz
#

set -e
set -x
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prints out each line as it is executing and stops on first error


KHMER_PATH=$1
export PYTHONPATH=$KHMER_PATH/python

SCRIPTPATH=$KHMER_PATH/scripts

Expand Down
23 changes: 19 additions & 4 deletions khmer/khmer_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os
import argparse
import math
import textwrap
from argparse import _VersionAction
from collections import namedtuple

Expand Down Expand Up @@ -392,10 +393,24 @@ def add_threading_args(parser):
help='Number of simultaneous threads to execute')


def sanitize_epilog(parser):
parser.epilog = parser.epilog.replace(
'//', '/').replace(':option:', '').replace(
':program:', '').replace('::', ':')
def sanitize_help(parser):
"""Remove Sphinx directives & reflow text to width of 79 characters."""
wrapper = textwrap.TextWrapper(width=79)
parser.description = wrapper.fill(parser.description)
if not parser.epilog:
return parser
cleanlog = parser.epilog.replace(':option:', '').replace(
':program:', '').replace('::', ':').replace('``', '"')
newlog = prev_section = ""
for section in cleanlog.split('\n\n'):
if section.startswith(' '):
newlog += section + '\n'
else:
if prev_section.startswith(' '):
newlog += '\n'
newlog += wrapper.fill(section) + '\n\n'
prev_section = section
parser.epilog = newlog
return parser

_algorithms = {
Expand Down
17 changes: 7 additions & 10 deletions oxli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import argparse
import sys
import textwrap
from khmer import khmer_args
from khmer.khmer_args import build_nodegraph_args
from oxli import build_graph


Expand All @@ -30,15 +30,12 @@ def get_parser():

# build-graph (formerly load-graph.py) parsers here
parser_build_graph = \
subparsers.add_parser('build-graph',
help="Load sequences into the compressible graph"
"format plus optional tagset",
description="Load sequences into the "
"compressible graph format plus optional tagset")

khmer_args.build_nodegraph_args("Load sequences into the compressible"
"graph format plus optional tagset.",
None, parser=parser_build_graph)
subparsers.add_parser(
name='build-graph',
help="Load sequences into the compressible graph format "
"plus optional tagset")

parser_build_graph = build_nodegraph_args(parser=parser_build_graph)
build_graph.build_parser(parser_build_graph)
parser_build_graph.set_defaults(func=build_graph.main)

Expand Down
2 changes: 0 additions & 2 deletions oxli/build_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ def build_parser(parser):


def main(args):
info('build-graph.py', ['graph', 'SeqAn'])

report_on_config(args, graphtype='nodegraph')
base = args.output_filename
filenames = args.input_filenames
Expand Down
4 changes: 2 additions & 2 deletions sandbox/collect-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import khmer
from khmer import khmer_args
from khmer.khmer_args import (build_counting_args, report_on_config, info,
calculate_graphsize, sanitize_epilog)
calculate_graphsize, sanitize_help)
from khmer.kfile import check_input_files, check_space
from khmer.kfile import check_space_for_graph
import argparse
Expand Down Expand Up @@ -68,7 +68,7 @@ def get_parser():
def main():

info('collect-reads.py', ['counting'])
args = sanitize_epilog(get_parser()).parse_args()
args = sanitize_help(get_parser()).parse_args()
report_on_config(args)

base = args.output_countgraph_filename
Expand Down
4 changes: 2 additions & 2 deletions sandbox/correct-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import argparse

from khmer.khmer_args import (build_counting_args, info, add_loadgraph_args,
report_on_config, sanitize_epilog)
report_on_config, sanitize_help)
from khmer.utils import write_record, write_record_pair, broken_paired_reader
from khmer.kfile import (check_space, check_space_for_graph,
check_valid_file_exists)
Expand Down Expand Up @@ -114,7 +114,7 @@ def get_parser():

def main():
info('correct-reads.py', ['streaming'])
args = sanitize_epilog(get_parser()).parse_args()
args = sanitize_help(get_parser()).parse_args()

###

Expand Down
4 changes: 2 additions & 2 deletions sandbox/estimate_optimal_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from __future__ import print_function
import argparse
import khmer, oxli
from khmer.khmer_args import info, optimal_size, sanitize_epilog
from khmer.khmer_args import info, optimal_size, sanitize_help
import textwrap
import sys

Expand Down Expand Up @@ -70,7 +70,7 @@ def get_parser():

def main():
info('estimate_optimal_hash.py', ['counting'])
args = sanitize_epilog(get_parser()).parse_args()
args = sanitize_help(get_parser()).parse_args()
N = args.N
if args.M:
M = args.M
Expand Down
4 changes: 2 additions & 2 deletions sandbox/saturate-by-median.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from khmer.khmer_args import (build_counting_args, add_loadgraph_args,
report_on_config, info, create_countgraph,
sanitize_epilog)
sanitize_help)
import argparse
from khmer.kfile import (check_space, check_space_for_graph,
check_valid_file_exists)
Expand Down Expand Up @@ -178,7 +178,7 @@ def get_parser():

def main(): # pylint: disable=too-many-branches,too-many-statements
info('saturate-by-median.py', ['diginorm'])
parser = sanitize_epilog(get_parser())
parser = sanitize_help(get_parser())
args = parser.parse_args()

report_on_config(args)
Expand Down
4 changes: 2 additions & 2 deletions sandbox/sweep-files.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import time
import khmer
from khmer.khmer_args import (build_nodegraph_args, report_on_config, info,
sanitize_epilog)
sanitize_help)

DEFAULT_OUT_PREF = 'reads'
DEFAULT_RANGE = -1
Expand Down Expand Up @@ -101,7 +101,7 @@ def clear(self):

def main():
#info('sweep-files.py', ['sweep'])
parser = sanitize_epilog(get_parser())
parser = sanitize_help(get_parser())
args = parser.parse_args()

if args.max_tablesize < MIN_HSIZE:
Expand Down
4 changes: 2 additions & 2 deletions sandbox/sweep-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import time
import khmer
from khmer.khmer_args import (build_nodegraph_args, report_on_config, info,
sanitize_epilog)
sanitize_help)
from khmer.kfile import (check_input_files, check_valid_file_exists,
check_space)

Expand Down Expand Up @@ -206,7 +206,7 @@ def get_parser():

def main():
info('sweep-reads-buffered.py', ['sweep'])
parser = sanitize_epilog(get_parser())
parser = sanitize_help(get_parser())
args = parser.parse_args()

if args.max_tablesize < MAX_HSIZE:
Expand Down
19 changes: 12 additions & 7 deletions scripts/abundance-dist-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,24 @@
from khmer import khmer_args
from khmer.khmer_args import (build_counting_args, add_threading_args,
report_on_config, info, calculate_graphsize,
sanitize_epilog)
sanitize_help)
from khmer.kfile import (check_input_files, check_space_for_graph)


def get_parser():
epilog = '''
Note that with :option:`-b` this script is constant memory; in exchange,
k-mer counts will stop at 255. The memory usage of this script with
:option:`-b` will be about 1.15x the product of the :option:`-x` and
:option:`-N` numbers.
epilog = '''\
Note that with :option:`-b`/:option:`--no-bigcount` this script is constant
memory; in exchange, k-mer counts will stop at 255. The memory usage of
this script with :option:`-b` will be about 1.15x the product of the
:option:`-x` and :option:`-N` numbers.

To count k-mers in multiple files use :program:`load_into_counting.py` and
:program:`abundance_dist.py`.

Example::

abundance-dist-single.py -x 1e7 -N 2 -k 17 \\
tests/test-data/test-abund-read-2.fa test-dist
'''
parser = build_counting_args(
descr="Calculate the abundance distribution of k-mers from a "
Expand Down Expand Up @@ -69,7 +74,7 @@ def get_parser():

def main(): # pylint: disable=too-many-locals,too-many-branches
info('abundance-dist-single.py', ['counting', 'SeqAn'])
args = sanitize_epilog(get_parser()).parse_args()
args = sanitize_help(get_parser()).parse_args()
report_on_config(args)

check_input_files(args.input_sequence_filename, args.force)
Expand Down
20 changes: 15 additions & 5 deletions scripts/abundance-dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,26 @@
import csv
import khmer
import argparse
import textwrap
import os
from khmer import __version__
from khmer.kfile import check_input_files
from khmer.khmer_args import info
from khmer.khmer_args import (info, sanitize_help, ComboFormatter,
_VersionStdErrAction)


def get_parser():
epilog = """\
Example::

load-into-countgraph.py -x 1e7 -N 2 -k 17 counts \\
tests/test-data/test-abund-read-2.fa
abundance-dist.py counts tests/test-data/test-abund-read-2.fa test-dist
"""
parser = argparse.ArgumentParser(
description="Calculate abundance distribution of the k-mers in "
"the sequence file using a pre-made k-mer countgraph.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
formatter_class=ComboFormatter, epilog=textwrap.dedent(epilog))

parser.add_argument('input_count_graph_filename', help='The name of the'
' input k-mer countgraph file.')
Expand All @@ -46,8 +56,8 @@ def get_parser():
parser.add_argument('-b', '--no-bigcount', dest='bigcount', default=True,
action='store_false',
help='Do not count k-mers past 255')
parser.add_argument('--version', action='version', version='%(prog)s ' +
khmer.__version__)
parser.add_argument('--version', action=_VersionStdErrAction,
version='khmer {v}'.format(v=__version__))
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Continue even if specified input files '
'do not exist or are empty.')
Expand All @@ -56,7 +66,7 @@ def get_parser():

def main():
info('abundance-dist.py', ['counting'])
args = get_parser().parse_args()
args = sanitize_help(get_parser()).parse_args()

infiles = [args.input_count_graph_filename,
args.input_sequence_filename]
Expand Down
26 changes: 13 additions & 13 deletions scripts/annotate-partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,21 @@
import os
import argparse
import textwrap
import khmer
import sys
from khmer import __version__, Nodegraph
from khmer.kfile import check_input_files, check_space
from khmer.khmer_args import info
from khmer.khmer_args import (info, sanitize_help, ComboFormatter,
_VersionStdErrAction)

DEFAULT_K = 32


def get_parser():
epilog = """
Load in a partitionmap (generally produced by partition-graph.py or
merge-partitions.py) and annotate the sequences in the given files with
their partition IDs. Use :program:`extract-partitions.py` to extract
sequences into separate group files.
epilog = """\
Load in a partitionmap (generally produced by :program:`partition-graph.py`
or :program:`merge-partitions.py`) and annotate the sequences in the given
files with their partition IDs. Use :program:`extract-partitions.py` to
extract sequences into separate group files.

Example (results will be in ``random-20-a.fa.part``)::

Expand All @@ -44,8 +45,7 @@ def get_parser():
"""
parser = argparse.ArgumentParser(
description="Annotate sequences with partition IDs.",
epilog=textwrap.dedent(epilog),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
epilog=textwrap.dedent(epilog), formatter_class=ComboFormatter)

parser.add_argument('--ksize', '-k', type=int, default=DEFAULT_K,
help="k-mer size (default: %d)" % DEFAULT_K)
Expand All @@ -54,20 +54,20 @@ def get_parser():
parser.add_argument('input_filenames', metavar='input_sequence_filename',
nargs='+', help='input FAST[AQ] sequences to '
'annotate.')
parser.add_argument('--version', action='version', version='%(prog)s ' +
khmer.__version__)
parser.add_argument('--version', action=_VersionStdErrAction,
version='khmer {v}'.format(v=__version__))
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


def main():
info('annotate-partitions.py', ['graph'])
args = get_parser().parse_args()
args = sanitize_help(get_parser()).parse_args()

ksize = args.ksize
filenames = args.input_filenames
nodegraph = khmer.Nodegraph(ksize, 1, 1)
nodegraph = Nodegraph(ksize, 1, 1)

partitionmap_file = args.graphbase + '.pmap.merged'

Expand Down
Loading