@@ -72,7 +72,22 @@ def _redbiom_search(self, query, search_on, callback):
72
72
import qiita_db .sql_connection as qdbsc
73
73
if features :
74
74
if search_on in ('metadata' , 'observations' ):
75
+ # This query basically selects all studies/prep-infos
76
+ # that contain the given sample ids (main_query); with
77
+ # those we select the artifacts that are only BIOM and
78
+ # their parent (artifact_query); then we can select
79
+ # the processing parameters of the parents. The
80
+ # structure might seem excessive but it's the only way
81
+ # to work with the json elements of the database and
82
+ # make it run in an OK manner (see initial comments)
83
+ # in https://github.com/biocore/qiita/pull/2132. Also
84
+ # this query could be simplified if we used a newer
85
+ # version of postgres, which doesn't mean faster
86
+ # performance
75
87
sql = """
88
+ -- main_query will retrieve all basic information
89
+ -- about the study based on which samples that exist
90
+ -- in the prep info file, and their artifact_ids
76
91
WITH main_query AS (
77
92
SELECT study_title, study_id, artifact_id,
78
93
array_agg(DISTINCT sample_id) AS samples,
@@ -85,6 +100,11 @@ def _redbiom_search(self, query, search_on, callback):
85
100
JOIN qiita.study USING (study_id)
86
101
WHERE sample_id IN %s
87
102
GROUP BY study_title, study_id, artifact_id),
103
+ -- now, we can take all the artifacts and just select
104
+ -- the BIOMs, while selecting the parent of the
105
+ -- artifacts, note that we are selecting the main
106
+ -- columns (discardig children) from the main_query +
107
+ -- the children artifact_id
88
108
artifact_query AS (
89
109
SELECT study_title, study_id, samples,
90
110
name, command_id,
@@ -97,6 +117,10 @@ def _redbiom_search(self, query, search_on, callback):
97
117
JOIN qiita.artifact_type at ON (
98
118
at.artifact_type_id = a.artifact_type_id
99
119
AND artifact_type = 'BIOM')),
120
+ -- now, we can select the parent processing parameters
121
+ -- of the children, note that we are selecting all
122
+ -- columns returned from artifact_query and the
123
+ -- parent processing parameters
100
124
parent_query AS (
101
125
SELECT artifact_query.*,
102
126
array_agg(parent_params) as parent_parameters
@@ -112,6 +136,7 @@ def _redbiom_search(self, query, search_on, callback):
112
136
artifact_query.samples, artifact_query.name,
113
137
artifact_query.command_id,
114
138
artifact_query.artifact_id)
139
+ -- just select everything that is the parent_query
115
140
SELECT * FROM parent_query
116
141
ORDER BY parent_parameters, artifact_id
117
142
"""
0 commit comments