From d2acc3d2465cd33e5f41ba5d40863aa8dbbe5603 Mon Sep 17 00:00:00 2001 From: Tom Smith Date: Wed, 20 Mar 2024 09:53:27 +0000 Subject: [PATCH] Improve docs re chimeric/unmapped/unpaired read pairs (#629) * updates unpaired/unmapped/chimeric options * updates incorrect indentation in Utilities.Start() * updates incorrect indentation in Utilities.Start() - (2) * updates test files for --help --- tests/count_help | 20 ++++++----- tests/dedup_help | 20 ++++++----- tests/group_help | 20 ++++++----- umi_tools/Utilities.py | 77 +++++++++++++++++++++++++++++------------- umi_tools/count.py | 2 +- umi_tools/dedup.py | 2 +- umi_tools/group.py | 2 +- 7 files changed, 89 insertions(+), 54 deletions(-) diff --git a/tests/count_help b/tests/count_help index d173957..39208a6 100644 --- a/tests/count_help +++ b/tests/count_help @@ -71,15 +71,6 @@ Options: --mapping-quality=MAPPING_QUALITY Minimum mapping quality for a read to be retained [default=0] - --unmapped-reads=UNMAPPED_READS - How to handle unmapped reads. Options are 'discard', - 'use' or 'correct' [default=discard] - --chimeric-pairs=CHIMERIC_PAIRS - How to handle chimeric read pairs. Options are - 'discard', 'use' or 'correct' [default=use] - --unpaired-reads=UNPAIRED_READS - How to handle unpaired reads. Options are 'discard', - 'use' or 'correct' [default=use] --ignore-umi Ignore UMI and dedup only on position --ignore-tlen Option to dedup paired end reads based solely on read1, whether or not the template length is the same @@ -90,6 +81,17 @@ Options: -o, --out-sam Output alignments in sam format [default=False] --no-sort-output Don't Sort the output + Dedup and Count SAM/BAM options: + --unmapped-reads=UNMAPPED_READS + How to handle unmapped reads. Options are 'discard' or + 'use' [default=discard] + --chimeric-pairs=CHIMERIC_PAIRS + How to handle chimeric read pairs. Options are + 'discard' or 'use' [default=use] + --unpaired-reads=UNPAIRED_READS + How to handle unpaired reads. Options are 'discard'or + 'use' [default=use] + input/output options: -I FILE, --stdin=FILE file to read stdin from [default = stdin]. diff --git a/tests/dedup_help b/tests/dedup_help index 1620217..6e167bf 100644 --- a/tests/dedup_help +++ b/tests/dedup_help @@ -81,15 +81,6 @@ Options: --mapping-quality=MAPPING_QUALITY Minimum mapping quality for a read to be retained [default=0] - --unmapped-reads=UNMAPPED_READS - How to handle unmapped reads. Options are 'discard', - 'use' or 'correct' [default=discard] - --chimeric-pairs=CHIMERIC_PAIRS - How to handle chimeric read pairs. Options are - 'discard', 'use' or 'correct' [default=use] - --unpaired-reads=UNPAIRED_READS - How to handle unpaired reads. Options are 'discard', - 'use' or 'correct' [default=use] --ignore-umi Ignore UMI and dedup only on position --ignore-tlen Option to dedup paired end reads based solely on read1, whether or not the template length is the same @@ -100,6 +91,17 @@ Options: -o, --out-sam Output alignments in sam format [default=False] --no-sort-output Don't Sort the output + Dedup and Count SAM/BAM options: + --unmapped-reads=UNMAPPED_READS + How to handle unmapped reads. Options are 'discard' or + 'use' [default=discard] + --chimeric-pairs=CHIMERIC_PAIRS + How to handle chimeric read pairs. Options are + 'discard' or 'use' [default=use] + --unpaired-reads=UNPAIRED_READS + How to handle unpaired reads. Options are 'discard'or + 'use' [default=use] + input/output options: -I FILE, --stdin=FILE file to read stdin from [default = stdin]. diff --git a/tests/group_help b/tests/group_help index f2e5879..0978047 100644 --- a/tests/group_help +++ b/tests/group_help @@ -84,15 +84,6 @@ Options: --mapping-quality=MAPPING_QUALITY Minimum mapping quality for a read to be retained [default=0] - --unmapped-reads=UNMAPPED_READS - How to handle unmapped reads. Options are 'discard', - 'use' or 'correct' [default=discard] - --chimeric-pairs=CHIMERIC_PAIRS - How to handle chimeric read pairs. Options are - 'discard', 'use' or 'correct' [default=use] - --unpaired-reads=UNPAIRED_READS - How to handle unpaired reads. Options are 'discard', - 'use' or 'correct' [default=use] --ignore-umi Ignore UMI and dedup only on position --ignore-tlen Option to dedup paired end reads based solely on read1, whether or not the template length is the same @@ -103,6 +94,17 @@ Options: -o, --out-sam Output alignments in sam format [default=False] --no-sort-output Don't Sort the output + Group SAM/BAM options: + --unmapped-reads=UNMAPPED_READS + How to handle unmapped reads. Options are 'discard', + 'use' or 'output' [default=discard] + --chimeric-pairs=CHIMERIC_PAIRS + How to handle chimeric read pairs. Options are + 'discard', 'use' or 'output' [default=use] + --unpaired-reads=UNPAIRED_READS + How to handle unpaired reads. Options are 'discard', + 'use' or 'output' [default=use] + input/output options: -I FILE, --stdin=FILE file to read stdin from [default = stdin]. diff --git a/umi_tools/Utilities.py b/umi_tools/Utilities.py index 0f1e551..da2689e 100644 --- a/umi_tools/Utilities.py +++ b/umi_tools/Utilities.py @@ -258,8 +258,6 @@ class method (:func:`cachedmethod`) calls. import regex from umi_tools import __version__ -from builtins import bytes, chr - class DefaultOptions: stdlog = sys.stdout @@ -599,6 +597,8 @@ def Start(parser=None, add_extract_options=False, add_group_dedup_options=True, add_sam_options=True, + add_dedup_count_sam_options=False, + add_group_sam_options=False, add_umi_grouping_options=True, return_parser=False): """set up an experiment. @@ -887,27 +887,6 @@ def Start(parser=None, group.add_option("--output-unmapped", dest="output_unmapped", action="store_true", default=False, help=optparse.SUPPRESS_HELP) - group.add_option("--unmapped-reads", dest="unmapped_reads", - type="choice", - choices=("discard", "use", "output"), - default="discard", - help=("How to handle unmapped reads. Options are " - "'discard', 'use' or 'correct' [default=%default]")) - - group.add_option("--chimeric-pairs", dest="chimeric_pairs", - type="choice", - choices=("discard", "use", "output"), - default="use", - help=("How to handle chimeric read pairs. Options are " - "'discard', 'use' or 'correct' [default=%default]")) - - group.add_option("--unpaired-reads", dest="unpaired_reads", - type="choice", - choices=("discard", "use", "output"), - default="use", - help=("How to handle unpaired reads. Options are " - "'discard', 'use' or 'correct' [default=%default]")) - group.add_option("--ignore-umi", dest="ignore_umi", action="store_true", help="Ignore UMI and dedup" " only on position", default=False) @@ -943,6 +922,56 @@ def Start(parser=None, parser.add_option_group(group) + if add_dedup_count_sam_options: + group = OptionGroup(parser, "Dedup and Count SAM/BAM options") + + group.add_option("--unmapped-reads", dest="unmapped_reads", + type="choice", + choices=("discard", "use"), + default="discard", + help=("How to handle unmapped reads. Options are " + "'discard' or 'use' [default=%default]")) + + group.add_option("--chimeric-pairs", dest="chimeric_pairs", + type="choice", + choices=("discard", "use"), + default="use", + help=("How to handle chimeric read pairs. Options are " + "'discard' or 'use' [default=%default]")) + + group.add_option("--unpaired-reads", dest="unpaired_reads", + type="choice", + choices=("discard", "use"), + default="use", + help=("How to handle unpaired reads. Options are " + "'discard'or 'use' [default=%default]")) + parser.add_option_group(group) + + if add_group_sam_options: + group = OptionGroup(parser, "Group SAM/BAM options") + + group.add_option("--unmapped-reads", dest="unmapped_reads", + type="choice", + choices=("discard", "use", "output"), + default="discard", + help=("How to handle unmapped reads. Options are " + "'discard', 'use' or 'output' [default=%default]")) + + group.add_option("--chimeric-pairs", dest="chimeric_pairs", + type="choice", + choices=("discard", "use", "output"), + default="use", + help=("How to handle chimeric read pairs. Options are " + "'discard', 'use' or 'output' [default=%default]")) + + group.add_option("--unpaired-reads", dest="unpaired_reads", + type="choice", + choices=("discard", "use", "output"), + default="use", + help=("How to handle unpaired reads. Options are " + "'discard', 'use' or 'output' [default=%default]")) + parser.add_option_group(group) + if add_pipe_options: group = OptionGroup(parser, "input/output options") group.add_option("-I", "--stdin", dest="stdin", type="string", @@ -1188,7 +1217,7 @@ def validateExtractOptions(options): "(starting with 'umi_') %s, %s" % ( options.pattern, options.pattern2)) - return(extract_cell, extract_umi) + return (extract_cell, extract_umi) def validateSamOptions(options, group=False): diff --git a/umi_tools/count.py b/umi_tools/count.py index 6d0d609..4439e9a 100644 --- a/umi_tools/count.py +++ b/umi_tools/count.py @@ -91,7 +91,7 @@ def main(argv=None): parser.add_option_group(group) # add common options (-h/--help, ...) and parse command line - (options, args) = U.Start(parser, argv=argv, add_group_dedup_options=False) + (options, args) = U.Start(parser, argv=argv, add_group_dedup_options=False, add_dedup_count_sam_options=True) options.per_gene = True # hardcodes counting to per-gene only diff --git a/umi_tools/dedup.py b/umi_tools/dedup.py index a518fc6..e5acc9f 100644 --- a/umi_tools/dedup.py +++ b/umi_tools/dedup.py @@ -201,7 +201,7 @@ def main(argv=None): parser.add_option_group(group) # add common options (-h/--help, ...) and parse command line - (options, args) = U.Start(parser, argv=argv) + (options, args) = U.Start(parser, argv=argv, add_dedup_count_sam_options=True) U.validateSamOptions(options, group=False) diff --git a/umi_tools/group.py b/umi_tools/group.py index 8780800..9767ace 100644 --- a/umi_tools/group.py +++ b/umi_tools/group.py @@ -150,7 +150,7 @@ def main(argv=None): parser.add_option_group(group) # add common options (-h/--help, ...) and parse command line - (options, args) = U.Start(parser, argv=argv) + (options, args) = U.Start(parser, argv=argv, add_group_sam_options=True) U.validateSamOptions(options, group=True)