broadinstitute · dpark01 · Jan 27, 2015 · Jan 20, 2015
diff --git a/assembly.py b/assembly.py
@@ -95,7 +95,7 @@ def assemble_trinity(inBam, outFasta, clipDb, n_reads=100000, outReads=None):
 
 def parser_assemble_trinity(parser=argparse.ArgumentParser()):
  parser.add_argument('inBam',
- help='Input reads, BAM format.')
+ help='Input unaligned reads, BAM format.')
  parser.add_argument('clipDb',
  help='Trimmomatic clip DB.')
  parser.add_argument('outFasta',
@@ -153,13 +153,13 @@ def order_and_orient(inFasta, inReference, outFasta, inReads=None):
 
 def parser_order_and_orient(parser=argparse.ArgumentParser()):
  parser.add_argument('inFasta',
- help='Input assembly/contigs, FASTA format.')
+ help='Input de novo assembly/contigs, FASTA format.')
  parser.add_argument('inReference',
- help='Reference genome, FASTA format.')
+ help='Reference genome for ordering, orienting, and merging contigs, FASTA format.')
  parser.add_argument('outFasta',
- help='Output assembly, FASTA format.')
+ help='Output assembly, FASTA format, with the same number of chromosomes as inReference, and in the same order.')
  parser.add_argument('--inReads', default=None,
- help='Input reads in BAM format.')
+ help='Input reads in unaligned BAM format. These can be used to improve the merge process.')
  util.cmd.common_args(parser, (('loglevel',None), ('version',None), ('tmpDir',None)))
  util.cmd.attach_main(parser, order_and_orient, split_args=True)
  return parser
@@ -239,9 +239,9 @@ def impute_from_reference(inFasta, inReference, outFasta,
 
 def parser_impute_from_reference(parser=argparse.ArgumentParser()):
  parser.add_argument('inFasta',
- help='Input assembly/contigs, FASTA format.')
+ help='Input assembly/contigs, FASTA format, already ordered, oriented and merged with inReference.')
  parser.add_argument('inReference',
- help='Reference genome, FASTA format.')
+ help='Reference genome to impute with, FASTA format.')
  parser.add_argument('outFasta',
  help='Output assembly, FASTA format.')
  parser.add_argument("--newName", default=None,
@@ -329,7 +329,7 @@ def parser_refine_assembly(parser=argparse.ArgumentParser()):
  parser.add_argument('inFasta',
  help='Input assembly, FASTA format, pre-indexed for Picard, Samtools, and Novoalign.')
  parser.add_argument('inBam',
- help='Input reads, BAM format.')
+ help='Input reads, unaligned BAM format.')
  parser.add_argument('outFasta',
  help='Output refined assembly, FASTA format, indexed for Picard, Samtools, and Novoalign.')
  parser.add_argument('--outBam',

diff --git a/docs/install.rst b/docs/install.rst
@@ -40,6 +40,9 @@ as well::
 
  pip install snakemake==3.2 yappi=0.94
 
+However, most of the real functionality is encapsulated in the command line
+tools, which can be used without any of the pipeline infrastructure.
+
 You should either sudo pip install or use a virtualenv (recommended).
 
 

diff --git a/taxon_filter.py b/taxon_filter.py
@@ -25,32 +25,33 @@ def parser_deplete_human(parser=argparse.ArgumentParser()):
  parser.add_argument('inBam',
  help='Input BAM file.')
  parser.add_argument('revertBam',
- help='Output BAM file.')
+ help='Output BAM: read markup reverted with Picard.')
  parser.add_argument('bmtaggerBam',
- help='Output BAM file.')
+ help='Output BAM: depleted of human reads with BMTagger.')
  parser.add_argument('rmdupBam',
- help='Output BAM file.')
+ help='Output BAM: bmtaggerBam run through M-Vicuna duplicate removal.')
  parser.add_argument('blastnBam',
- help='Output BAM file.')
+ help='Output BAM: rmdupBam run through another depletion of human reads with BLASTN.')
  parser.add_argument('--taxfiltBam',
- help='Output BAM file.',
+ help='Output BAM: blastnBam run through taxonomic selection via LASTAL.',
  default=None)
  parser.add_argument('--bmtaggerDbs', nargs='+', required=True,
  help='''Reference databases (one or more) to deplete from input.
  For each db, requires prior creation of db.bitmask by bmtool,
  and db.srprism.idx, db.srprism.map, etc. by srprism mkindex.''')
  parser.add_argument('--blastDbs', nargs='+', required=True,
- help='One or more reference databases for blast.')
+ help='One or more reference databases for blast to deplete from input.')
  parser.add_argument('--lastDb',
- help='One reference database for last.',
+ help='One reference database for last (required if --taxfiltBam is specified).',
  default=None)
  parser.add_argument('--JVMmemory', default = tools.picard.FilterSamReadsTool.jvmMemDefault,
- help='JVM virtual memory size (default: %(default)s)')
+ help='JVM virtual memory size for Picard FilterSamReads (default: %(default)s)')
  util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmpDir', None)))
  util.cmd.attach_main(parser, main_deplete_human)
  return parser
 def main_deplete_human(args):
- '''Run the entire depletion pipeline: bmtagger, mvicuna, blastn, and maybe lastal'''
+ ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
+ Optionally, use lastal to select a specific taxon of interest.'''
  tools.picard.RevertSamTool().execute(args.inBam, args.revertBam,
  picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true'])
  multi_db_deplete_bam(args.revertBam, args.bmtaggerDbs, deplete_bmtagger_bam, args.bmtaggerBam, JVMmemory=args.JVMmemory)