Fix analysis artifact name (#2169)

antgonza · ElDeveloper · commit c8042c726015 · 2017-07-11T15:34:26.000-07:00
* fix analysis artifact name

* fix analysis artifact name

* params -&gt; md5

* ignoring params and target subfragment

* flake8

* remonving parent_processing
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -23,6 +23,7 @@
 from future.utils import viewitems
 from biom import load_table
 from biom.util import biom_open
+from re import sub
 import pandas as pd
 
 from qiita_core.exceptions import IncompetentQiitaDeveloperError
@@ -789,15 +790,12 @@ def build_files(self, merge_duplicated_sample_ids):
                          if bi['artifact_id'] == aid][0]
 
                 data_type = ainfo['data_type']
-                algorithm = ainfo['algorithm']
-                target_subfragment = ainfo['target_subfragment']
-                parameters = ['%s: %s' % (k, v)
-                              for k, v in viewitems(ainfo['parameters'])]
+                # algorithm is: processing_method | parent_processing, just
+                # keeping processing_method
+                algorithm = ainfo['algorithm'].split('|')[0].strip()
                 files = ainfo['files']
 
-                l = "%s || %s || %s || %s" % (
-                    data_type, algorithm, ','.join(target_subfragment),
-                    ', '.join(parameters))
+                l = "%s || %s" % (data_type, algorithm)
                 # deblur special case, we need to account for file name
                 if 'deblur-workflow' in algorithm:
                     # [0] there is always just one biom
@@ -841,8 +839,8 @@ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
 
             biom_files = []
             for label, tables in viewitems(grouped_samples):
-                data_type, algorithm, target_subfragment, \
-                    parameters, files = [l.strip() for l in label.split('||')]
+                data_type, algorithm, files = [
+                    l.strip() for l in label.split('||')]
 
                 new_table = None
                 artifact_ids = []
@@ -891,9 +889,10 @@ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
                                        "analysis due to rarefaction level")
 
                 # write out the file
-                info = "%s_%s_%s_%s_%s" % (
-                    data_type, algorithm, target_subfragment, parameters,
-                    files)
+                data_type = sub('[^0-9a-zA-Z]+', '', data_type)
+                algorithm = sub('[^0-9a-zA-Z]+', '', algorithm)
+                files = sub('[^0-9a-zA-Z]+', '', files)
+                info = "%s_%s_%s" % (data_type, algorithm, files)
                 fn = "%d_analysis_%s.biom" % (self._id, info)
                 biom_fp = join(base_fp, fn)
                 with biom_open(biom_fp, 'w') as f:
diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py
@@ -414,12 +414,11 @@ def test_build_mapping_file_duplicated_samples_merge(self):
     def test_build_biom_tables(self):
         analysis = self._create_analyses_with_samples()
         grouped_samples = {
-            '18S || algorithm || target_subfragment || parameters '
-            '|| files': [(4, ['1.SKB8.640193', '1.SKD8.640184',
-                              '1.SKB7.640196'])]}
+            '18S || algorithm || files': [
+                (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]}
         obs_bioms = analysis._build_biom_tables(grouped_samples)
-        biom_fp = self.get_fp("%s_analysis_18S_algorithm_target_subfra"
-                              "gment_parameters_files.biom" % analysis.id)
+        biom_fp = self.get_fp(
+            "%s_analysis_18S_algorithm_files.biom" % analysis.id)
         obs = [(a, basename(b)) for a, b in obs_bioms]
         self.assertEqual(obs, [('18S', basename(biom_fp))])
 
@@ -431,13 +430,13 @@ def test_build_biom_tables(self):
     def test_build_biom_tables_duplicated_samples_not_merge(self):
         analysis = self._create_analyses_with_samples()
         grouped_samples = {
-            '18S || algorithm || target_subfragment || parameters || files': [
+            '18S || algorithm || files': [
                 (4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']),
                 (5, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]}
         obs_bioms = analysis._build_biom_tables(grouped_samples, True)
         obs = [(a, basename(b)) for a, b in obs_bioms]
-        biom_fp = ("%s_analysis_18S_algorithm_target_subfragment_"
-                   "parameters_files.biom" % analysis.id)
+        biom_fp = (
+            "%s_analysis_18S_algorithm_files.biom" % analysis.id)
         self.assertEqual(obs, [('18S', biom_fp)])
 
         table = load_table(obs_bioms[0][1])
@@ -448,9 +447,8 @@ def test_build_biom_tables_duplicated_samples_not_merge(self):
 
     def test_build_biom_tables_raise_error_due_to_sample_selection(self):
         grouped_samples = {
-            '18S || algorithm || target_subfragment || parameters '
-            '|| files': [(4, ['sample_name_1', 'sample_name_2',
-                              'sample_name_3'])]}
+            '18S || algorithm || files': [
+                (4, ['sample_name_1', 'sample_name_2', 'sample_name_3'])]}
         with self.assertRaises(RuntimeError):
             self.analysis._build_biom_tables(grouped_samples)