Skip to content

Commit

Permalink
Improve docs re chimeric/unmapped/unpaired read pairs (#629)
Browse files Browse the repository at this point in the history
* updates unpaired/unmapped/chimeric options

* updates incorrect indentation in Utilities.Start()

* updates incorrect indentation in Utilities.Start() - (2)

* updates test files for --help
  • Loading branch information
TomSmithCGAT authored Mar 20, 2024
1 parent ed9965f commit d2acc3d
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 54 deletions.
20 changes: 11 additions & 9 deletions tests/count_help
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,6 @@ Options:
--mapping-quality=MAPPING_QUALITY
Minimum mapping quality for a read to be retained
[default=0]
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'correct' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'correct' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'correct' [default=use]
--ignore-umi Ignore UMI and dedup only on position
--ignore-tlen Option to dedup paired end reads based solely on
read1, whether or not the template length is the same
Expand All @@ -90,6 +81,17 @@ Options:
-o, --out-sam Output alignments in sam format [default=False]
--no-sort-output Don't Sort the output

Dedup and Count SAM/BAM options:
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard' or
'use' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard' or 'use' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard'or
'use' [default=use]

input/output options:
-I FILE, --stdin=FILE
file to read stdin from [default = stdin].
Expand Down
20 changes: 11 additions & 9 deletions tests/dedup_help
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,6 @@ Options:
--mapping-quality=MAPPING_QUALITY
Minimum mapping quality for a read to be retained
[default=0]
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'correct' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'correct' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'correct' [default=use]
--ignore-umi Ignore UMI and dedup only on position
--ignore-tlen Option to dedup paired end reads based solely on
read1, whether or not the template length is the same
Expand All @@ -100,6 +91,17 @@ Options:
-o, --out-sam Output alignments in sam format [default=False]
--no-sort-output Don't Sort the output

Dedup and Count SAM/BAM options:
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard' or
'use' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard' or 'use' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard'or
'use' [default=use]

input/output options:
-I FILE, --stdin=FILE
file to read stdin from [default = stdin].
Expand Down
20 changes: 11 additions & 9 deletions tests/group_help
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,6 @@ Options:
--mapping-quality=MAPPING_QUALITY
Minimum mapping quality for a read to be retained
[default=0]
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'correct' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'correct' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'correct' [default=use]
--ignore-umi Ignore UMI and dedup only on position
--ignore-tlen Option to dedup paired end reads based solely on
read1, whether or not the template length is the same
Expand All @@ -103,6 +94,17 @@ Options:
-o, --out-sam Output alignments in sam format [default=False]
--no-sort-output Don't Sort the output

Group SAM/BAM options:
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'output' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'output' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'output' [default=use]

input/output options:
-I FILE, --stdin=FILE
file to read stdin from [default = stdin].
Expand Down
77 changes: 53 additions & 24 deletions umi_tools/Utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ class method (:func:`cachedmethod`) calls.
import regex
from umi_tools import __version__

from builtins import bytes, chr


class DefaultOptions:
stdlog = sys.stdout
Expand Down Expand Up @@ -599,6 +597,8 @@ def Start(parser=None,
add_extract_options=False,
add_group_dedup_options=True,
add_sam_options=True,
add_dedup_count_sam_options=False,
add_group_sam_options=False,
add_umi_grouping_options=True,
return_parser=False):
"""set up an experiment.
Expand Down Expand Up @@ -887,27 +887,6 @@ def Start(parser=None,
group.add_option("--output-unmapped", dest="output_unmapped", action="store_true",
default=False, help=optparse.SUPPRESS_HELP)

group.add_option("--unmapped-reads", dest="unmapped_reads",
type="choice",
choices=("discard", "use", "output"),
default="discard",
help=("How to handle unmapped reads. Options are "
"'discard', 'use' or 'correct' [default=%default]"))

group.add_option("--chimeric-pairs", dest="chimeric_pairs",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle chimeric read pairs. Options are "
"'discard', 'use' or 'correct' [default=%default]"))

group.add_option("--unpaired-reads", dest="unpaired_reads",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle unpaired reads. Options are "
"'discard', 'use' or 'correct' [default=%default]"))

group.add_option("--ignore-umi", dest="ignore_umi",
action="store_true", help="Ignore UMI and dedup"
" only on position", default=False)
Expand Down Expand Up @@ -943,6 +922,56 @@ def Start(parser=None,

parser.add_option_group(group)

if add_dedup_count_sam_options:
group = OptionGroup(parser, "Dedup and Count SAM/BAM options")

group.add_option("--unmapped-reads", dest="unmapped_reads",
type="choice",
choices=("discard", "use"),
default="discard",
help=("How to handle unmapped reads. Options are "
"'discard' or 'use' [default=%default]"))

group.add_option("--chimeric-pairs", dest="chimeric_pairs",
type="choice",
choices=("discard", "use"),
default="use",
help=("How to handle chimeric read pairs. Options are "
"'discard' or 'use' [default=%default]"))

group.add_option("--unpaired-reads", dest="unpaired_reads",
type="choice",
choices=("discard", "use"),
default="use",
help=("How to handle unpaired reads. Options are "
"'discard'or 'use' [default=%default]"))
parser.add_option_group(group)

if add_group_sam_options:
group = OptionGroup(parser, "Group SAM/BAM options")

group.add_option("--unmapped-reads", dest="unmapped_reads",
type="choice",
choices=("discard", "use", "output"),
default="discard",
help=("How to handle unmapped reads. Options are "
"'discard', 'use' or 'output' [default=%default]"))

group.add_option("--chimeric-pairs", dest="chimeric_pairs",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle chimeric read pairs. Options are "
"'discard', 'use' or 'output' [default=%default]"))

group.add_option("--unpaired-reads", dest="unpaired_reads",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle unpaired reads. Options are "
"'discard', 'use' or 'output' [default=%default]"))
parser.add_option_group(group)

if add_pipe_options:
group = OptionGroup(parser, "input/output options")
group.add_option("-I", "--stdin", dest="stdin", type="string",
Expand Down Expand Up @@ -1188,7 +1217,7 @@ def validateExtractOptions(options):
"(starting with 'umi_') %s, %s" % (
options.pattern, options.pattern2))

return(extract_cell, extract_umi)
return (extract_cell, extract_umi)


def validateSamOptions(options, group=False):
Expand Down
2 changes: 1 addition & 1 deletion umi_tools/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def main(argv=None):
parser.add_option_group(group)

# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(parser, argv=argv, add_group_dedup_options=False)
(options, args) = U.Start(parser, argv=argv, add_group_dedup_options=False, add_dedup_count_sam_options=True)

options.per_gene = True # hardcodes counting to per-gene only

Expand Down
2 changes: 1 addition & 1 deletion umi_tools/dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def main(argv=None):
parser.add_option_group(group)

# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(parser, argv=argv)
(options, args) = U.Start(parser, argv=argv, add_dedup_count_sam_options=True)

U.validateSamOptions(options, group=False)

Expand Down
2 changes: 1 addition & 1 deletion umi_tools/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def main(argv=None):
parser.add_option_group(group)

# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(parser, argv=argv)
(options, args) = U.Start(parser, argv=argv, add_group_sam_options=True)

U.validateSamOptions(options, group=True)

Expand Down

0 comments on commit d2acc3d

Please sign in to comment.