@@ -771,6 +771,9 @@ def build_files(self, merge_duplicated_sample_ids):
771
771
# make testing much harder as we will need to have analyses at
772
772
# different stages and possible errors.
773
773
samples = self .samples
774
+ # gettin the info of all the artifacts to save SQL time
775
+ bioms_info = qdb .util .get_artifacts_bioms_information (
776
+ samples .keys ())
774
777
775
778
# figuring out if we are going to have duplicated samples, again
776
779
# doing it here cause it's computational cheaper
@@ -780,20 +783,32 @@ def build_files(self, merge_duplicated_sample_ids):
780
783
# are going to create
781
784
rename_dup_samples = False
782
785
grouped_samples = {}
783
- for k , v in viewitems (samples ):
784
- a = qdb .artifact .Artifact (k )
785
- p = a .processing_parameters
786
- if p is not None and p .command is not None :
787
- ref = (str (p .values ['reference' ])
788
- if 'reference' in p .values else 'na' )
789
- cid = str (p .command .id )
786
+ for aid , asamples in viewitems (samples ):
787
+ # find the artifat info, [0] there should be only 1 info
788
+ ainfo = [bi for bi in bioms_info
789
+ if bi ['artifact_id' ] == aid ][0 ]
790
+
791
+ data_type = ainfo ['data_type' ]
792
+ algorithm = ainfo ['algorithm' ]
793
+ target_subfragment = ainfo ['target_subfragment' ]
794
+ parameters = ['%s: %s' % (k , v )
795
+ for k , v in viewitems (ainfo ['parameters' ])]
796
+ files = ainfo ['files' ]
797
+
798
+ l = "%s || %s || %s || %s" % (
799
+ data_type , algorithm , ',' .join (target_subfragment ),
800
+ ', ' .join (parameters ))
801
+ # deblur special case, we need to account for file name
802
+ if 'deblur-workflow' in algorithm :
803
+ # [0] there is always just one biom
804
+ l += " || %s" % [f for f in files
805
+ if f .endswith ('.biom' )][0 ]
790
806
else :
791
- ref = 'na'
792
- cid = 'na'
793
- l = "%s.%s.%s" % (a .data_type , ref , cid )
807
+ l += " ||"
808
+
794
809
if l not in grouped_samples :
795
810
grouped_samples [l ] = []
796
- grouped_samples [l ].append ((k , v ))
811
+ grouped_samples [l ].append ((aid , asamples ))
797
812
# 2. if rename_dup_samples is still False, make sure that we don't
798
813
# need to rename samples by checking that there are not
799
814
# duplicated samples per group
@@ -826,7 +841,9 @@ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
826
841
827
842
biom_files = []
828
843
for label , tables in viewitems (grouped_samples ):
829
- data_type , reference_id , command_id = label .split ('.' )
844
+ data_type , algorithm , target_subfragment , \
845
+ parameters , files = [l .strip () for l in label .split ('||' )]
846
+
830
847
new_table = None
831
848
artifact_ids = []
832
849
for aid , samples in tables :
@@ -873,25 +890,16 @@ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
873
890
raise RuntimeError ("All samples filtered out from "
874
891
"analysis due to rarefaction level" )
875
892
876
- # add the metadata column for study the samples come from,
877
- # this is useful in case the user download the bioms
878
- study_md = {'study' : artifact .study .title ,
879
- 'artifact_ids' : ', ' .join (artifact_ids ),
880
- 'reference_id' : reference_id ,
881
- 'command_id' : command_id }
882
- samples_md = {sid : study_md for sid in new_table .ids ()}
883
- new_table .add_metadata (samples_md , axis = 'sample' )
884
-
885
893
# write out the file
886
- fn = "%d_analysis_dt-%s_r-%s_c-%s.biom" % (
887
- self ._id , data_type , reference_id , command_id )
894
+ info = "%s_%s_%s_%s_%s" % (
895
+ data_type , algorithm , target_subfragment , parameters ,
896
+ files )
897
+ fn = "%d_analysis_%s.biom" % (self ._id , info )
888
898
biom_fp = join (base_fp , fn )
889
899
with biom_open (biom_fp , 'w' ) as f :
890
900
new_table .to_hdf5 (
891
- f , "Generated by Qiita. Analysis %d Datatype %s "
892
- "Reference %s Command %s" % (self ._id , data_type ,
893
- reference_id , command_id ))
894
-
901
+ f , "Generated by Qiita, analysis id: %d, info: %s" % (
902
+ self ._id , label ))
895
903
biom_files .append ((data_type , biom_fp ))
896
904
return biom_files
897
905
0 commit comments