qiime2 · colinbrislawn · Sep 29, 2024 · Sep 29, 2024 · Sep 29, 2024 · Oct 3, 2024
diff --git a/q2_vsearch/_chimera.py b/q2_vsearch/_chimera.py
@@ -15,7 +15,8 @@
 from ._format import UchimeStatsFmt
 
 
-_uchime_defaults = {'dn': 1.4,
+_uchime_defaults = {'method': 'uchime',
+                    'dn': 1.4,
                     'mindiffs': 3,
                     'mindiv': 0.8,
                     'minh': 0.28,
@@ -68,26 +69,29 @@ def _uchime_ref(sequences, table, reference_sequences, dn, mindiffs,
 
 def uchime_denovo(sequences: DNAFASTAFormat,
                   table: biom.Table,
+                  method: str = _uchime_defaults['method'],
                   dn: float = _uchime_defaults['dn'],
                   mindiffs: int = _uchime_defaults['mindiffs'],
                   mindiv: float = _uchime_defaults['mindiv'],
                   minh: float = _uchime_defaults['minh'],
                   xn: float = _uchime_defaults['xn']) \
                   -> (DNAFASTAFormat, DNAFASTAFormat, UchimeStatsFmt):
     cmd, chimeras, nonchimeras, uchime_stats = \
-        _uchime_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn)
+        _uchime_denovo(sequences, table, method,
+                       dn, mindiffs, mindiv, minh, xn)
     return chimeras, nonchimeras, uchime_stats
 
 
-def _uchime_denovo(sequences, table, dn, mindiffs, mindiv, minh, xn):
+def _uchime_denovo(sequences, table, method,
+                   dn, mindiffs, mindiv, minh, xn):
     # this function only exists to simplify testing
     chimeras = DNAFASTAFormat()
     nonchimeras = DNAFASTAFormat()
     uchime_stats = UchimeStatsFmt()
     with tempfile.NamedTemporaryFile() as fasta_with_sizes:
         _fasta_with_sizes(str(sequences), fasta_with_sizes.name, table)
         cmd = ['vsearch',
-               '--uchime_denovo', fasta_with_sizes.name,
+               '--' + method + '_denovo', fasta_with_sizes.name,
                '--uchimeout', str(uchime_stats),
                '--nonchimeras', str(nonchimeras),
                '--chimeras', str(chimeras),

diff --git a/q2_vsearch/citations.bib b/q2_vsearch/citations.bib
@@ -19,3 +19,32 @@ @article{rideout2014subsampled
   publisher={PeerJ Inc.},
   doi={10.7717/peerj.545}
 }
+
+@article{edgar2011uchime,
+  title={UCHIME improves sensitivity and speed of chimera detection},
+  author={Edgar, Robert C and Haas, Brian J and Clemente, Jose C and Quince, Christopher and Knight, Rob},
+  journal={Bioinformatics},
+  volume={27},
+  number={16},
+  pages={2194--2200},
+  year={2011},
+  publisher={Oxford University Press}
+}
+
+@article{edgar2016uchime2,
+  title={UCHIME2: improved chimera prediction for amplicon sequencing},
+  author={Edgar, Robert C},
+  journal={BioRxiv},
+  pages={074252},
+  year={2016},
+  publisher={Cold Spring Harbor Laboratory}
+}
+
+@article{edgar2016unoise2,
+  title={UNOISE2: improved error-correction for Illumina 16S and ITS amplicon sequencing},
+  author={Edgar, Robert C},
+  journal={BioRxiv},
+  pages={081257},
+  year={2016},
+  publisher={Cold Spring Harbor Laboratory}
+}
diff --git a/q2_vsearch/plugin_setup.py b/q2_vsearch/plugin_setup.py
@@ -371,7 +371,7 @@
         'nonchimeras': 'The non-chimeric sequences.',
         'stats': 'Summary statistics from chimera checking.'
     },
-    name='Reference-based chimera filtering with vsearch.',
+    name='Reference-based chimera filtering.',
     description=('Apply the vsearch uchime_ref method to identify chimeric '
                  'feature sequences. The results of this method can be used '
                  'to filter chimeric features from the corresponding feature '
@@ -385,6 +385,8 @@
         'sequences': FeatureData[Sequence],
         'table': FeatureTable[Frequency]},
     parameters={
+        'method': qiime2.plugin.Str % qiime2.plugin.Choices(
+            ['uchime', 'uchime2', 'uchime3']),
         'dn': qiime2.plugin.Float % qiime2.plugin.Range(0., None),
         'mindiffs': qiime2.plugin.Int % qiime2.plugin.Range(1, None),
         'mindiv': qiime2.plugin.Float % qiime2.plugin.Range(0., None),
@@ -404,12 +406,17 @@
                   'abundances).'),
     },
     parameter_descriptions={
+        'method': ('Denovo chimera detection based on uchime (Edgar 2011), '
+                   'uchime2 (Edgar 2016), or uchime3 (Edgar 2016).'),
         'dn': ('No vote pseudo-count, corresponding to the parameter n in '
                'the chimera scoring function.'),
-        'mindiffs': 'Minimum number of differences per segment.',
-        'mindiv': 'Minimum divergence from closest parent.',
+        'mindiffs': ('Minimum number of differences per segment. '
+                     'Ignored for uchime2 and uchime3.'),
+        'mindiv': ('Minimum divergence from closest parent. '
+                   'Ignored for uchime2 and uchime3.'),
         'minh': ('Minimum score (h). Increasing this value tends to reduce '
-                 'the number of false positives and to decrease sensitivity.'),
+                 'the number of false positives and to decrease sensitivity. '
+                 'Ignored for uchime2 and uchime3.'),
         'xn': ('No vote weight, corresponding to the parameter beta in the '
                'scoring function.'),
     },
@@ -418,12 +425,13 @@
         'nonchimeras': 'The non-chimeric sequences.',
         'stats': 'Summary statistics from chimera checking.'
     },
-    name='De novo chimera filtering with vsearch.',
-    description=('Apply the vsearch uchime_denovo method to identify chimeric '
-                 'feature sequences. The results of this method can be used '
-                 'to filter chimeric features from the corresponding feature '
-                 'table. For more details, please refer to the vsearch '
-                 'documentation.')
+    name='De novo chimera filtering.',
+    description=('Apply one of the vsearch uchime*_denovo methods to '
+                 'identify chimeric feature sequences. '
+                 'The results of these methods can be used to filter chimeric '
+                 'features from the corresponding feature table. '
+                 'For more details, please refer to the vsearch manual.'),
+    citations=[citations['edgar2011uchime', 'edgar2016uchime2', 'edgar2016unoise2']]
 )
 
 

diff --git a/q2_vsearch/tests/test_chimera.py b/q2_vsearch/tests/test_chimera.py
@@ -47,6 +47,7 @@ def test_uchime_denovo(self):
 
         obs_chime = _read_seqs(chime)
         exp_chime = [self.input_sequences_list[3]]
+        # >feature4 is the chimera!
         self.assertEqual(obs_chime, exp_chime)
 
         # sequences are reverse-sorted by abundance in output
@@ -105,8 +106,10 @@ def test_uchime_denovo_no_chimeras_alt_params(self):
         with redirected_stdio(stderr=os.devnull):
             cmd, chime, nonchime, stats = _uchime_denovo(
                 sequences=self.input_sequences, table=self.input_table,
+                method='uchime3',
                 dn=42.42, mindiffs=4, mindiv=0.5, minh=0.42, xn=9.0)
         cmd = ' '.join(cmd)
+        self.assertTrue('--uchime3_denovo' in cmd)
         self.assertTrue('--dn 42.42' in cmd)
         self.assertTrue('--mindiffs 4' in cmd)
         self.assertTrue('--mindiv 0.5' in cmd)