Skip to content

Commit c8042c7

Browse files
antgonzaElDeveloper
authored andcommitted
Fix analysis artifact name (#2169)
* fix analysis artifact name * fix analysis artifact name * params -> md5 * ignoring params and target subfragment * flake8 * remonving parent_processing
1 parent 2294c3f commit c8042c7

File tree

2 files changed

+20
-23
lines changed

2 files changed

+20
-23
lines changed

qiita_db/analysis.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from future.utils import viewitems
2424
from biom import load_table
2525
from biom.util import biom_open
26+
from re import sub
2627
import pandas as pd
2728

2829
from qiita_core.exceptions import IncompetentQiitaDeveloperError
@@ -789,15 +790,12 @@ def build_files(self, merge_duplicated_sample_ids):
789790
if bi['artifact_id'] == aid][0]
790791

791792
data_type = ainfo['data_type']
792-
algorithm = ainfo['algorithm']
793-
target_subfragment = ainfo['target_subfragment']
794-
parameters = ['%s: %s' % (k, v)
795-
for k, v in viewitems(ainfo['parameters'])]
793+
# algorithm is: processing_method | parent_processing, just
794+
# keeping processing_method
795+
algorithm = ainfo['algorithm'].split('|')[0].strip()
796796
files = ainfo['files']
797797

798-
l = "%s || %s || %s || %s" % (
799-
data_type, algorithm, ','.join(target_subfragment),
800-
', '.join(parameters))
798+
l = "%s || %s" % (data_type, algorithm)
801799
# deblur special case, we need to account for file name
802800
if 'deblur-workflow' in algorithm:
803801
# [0] there is always just one biom
@@ -841,8 +839,8 @@ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
841839

842840
biom_files = []
843841
for label, tables in viewitems(grouped_samples):
844-
data_type, algorithm, target_subfragment, \
845-
parameters, files = [l.strip() for l in label.split('||')]
842+
data_type, algorithm, files = [
843+
l.strip() for l in label.split('||')]
846844

847845
new_table = None
848846
artifact_ids = []
@@ -891,9 +889,10 @@ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
891889
"analysis due to rarefaction level")
892890

893891
# write out the file
894-
info = "%s_%s_%s_%s_%s" % (
895-
data_type, algorithm, target_subfragment, parameters,
896-
files)
892+
data_type = sub('[^0-9a-zA-Z]+', '', data_type)
893+
algorithm = sub('[^0-9a-zA-Z]+', '', algorithm)
894+
files = sub('[^0-9a-zA-Z]+', '', files)
895+
info = "%s_%s_%s" % (data_type, algorithm, files)
897896
fn = "%d_analysis_%s.biom" % (self._id, info)
898897
biom_fp = join(base_fp, fn)
899898
with biom_open(biom_fp, 'w') as f:

qiita_db/test/test_analysis.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -414,12 +414,11 @@ def test_build_mapping_file_duplicated_samples_merge(self):
414414
def test_build_biom_tables(self):
415415
analysis = self._create_analyses_with_samples()
416416
grouped_samples = {
417-
'18S || algorithm || target_subfragment || parameters '
418-
'|| files': [(4, ['1.SKB8.640193', '1.SKD8.640184',
419-
'1.SKB7.640196'])]}
417+
'18S || algorithm || files': [
418+
(4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]}
420419
obs_bioms = analysis._build_biom_tables(grouped_samples)
421-
biom_fp = self.get_fp("%s_analysis_18S_algorithm_target_subfra"
422-
"gment_parameters_files.biom" % analysis.id)
420+
biom_fp = self.get_fp(
421+
"%s_analysis_18S_algorithm_files.biom" % analysis.id)
423422
obs = [(a, basename(b)) for a, b in obs_bioms]
424423
self.assertEqual(obs, [('18S', basename(biom_fp))])
425424

@@ -431,13 +430,13 @@ def test_build_biom_tables(self):
431430
def test_build_biom_tables_duplicated_samples_not_merge(self):
432431
analysis = self._create_analyses_with_samples()
433432
grouped_samples = {
434-
'18S || algorithm || target_subfragment || parameters || files': [
433+
'18S || algorithm || files': [
435434
(4, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']),
436435
(5, ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'])]}
437436
obs_bioms = analysis._build_biom_tables(grouped_samples, True)
438437
obs = [(a, basename(b)) for a, b in obs_bioms]
439-
biom_fp = ("%s_analysis_18S_algorithm_target_subfragment_"
440-
"parameters_files.biom" % analysis.id)
438+
biom_fp = (
439+
"%s_analysis_18S_algorithm_files.biom" % analysis.id)
441440
self.assertEqual(obs, [('18S', biom_fp)])
442441

443442
table = load_table(obs_bioms[0][1])
@@ -448,9 +447,8 @@ def test_build_biom_tables_duplicated_samples_not_merge(self):
448447

449448
def test_build_biom_tables_raise_error_due_to_sample_selection(self):
450449
grouped_samples = {
451-
'18S || algorithm || target_subfragment || parameters '
452-
'|| files': [(4, ['sample_name_1', 'sample_name_2',
453-
'sample_name_3'])]}
450+
'18S || algorithm || files': [
451+
(4, ['sample_name_1', 'sample_name_2', 'sample_name_3'])]}
454452
with self.assertRaises(RuntimeError):
455453
self.analysis._build_biom_tables(grouped_samples)
456454

0 commit comments

Comments
 (0)