Skip to content

Commit 95c80c2

Browse files
committed
addressing @wasade comments
1 parent bfb7efc commit 95c80c2

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

qiita_pet/handlers/qiita_redbiom.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,22 @@ def _redbiom_search(self, query, search_on, callback):
7272
import qiita_db.sql_connection as qdbsc
7373
if features:
7474
if search_on in ('metadata', 'observations'):
75+
# This query basically selects all studies/prep-infos
76+
# that contain the given sample ids (main_query); with
77+
# those we select the artifacts that are only BIOM and
78+
# their parent (artifact_query); then we can select
79+
# the processing parameters of the parents. The
80+
# structure might seem excessive but it's the only way
81+
# to work with the json elements of the database and
82+
# make it run in an OK manner (see initial comments)
83+
# in https://github.com/biocore/qiita/pull/2132. Also
84+
# this query could be simplified if we used a newer
85+
# version of postgres, which doesn't mean faster
86+
# performance
7587
sql = """
88+
-- main_query will retrieve all basic information
89+
-- about the study based on which samples that exist
90+
-- in the prep info file, and their artifact_ids
7691
WITH main_query AS (
7792
SELECT study_title, study_id, artifact_id,
7893
array_agg(DISTINCT sample_id) AS samples,
@@ -85,6 +100,11 @@ def _redbiom_search(self, query, search_on, callback):
85100
JOIN qiita.study USING (study_id)
86101
WHERE sample_id IN %s
87102
GROUP BY study_title, study_id, artifact_id),
103+
-- now, we can take all the artifacts and just select
104+
-- the BIOMs, while selecting the parent of the
105+
-- artifacts, note that we are selecting the main
106+
-- columns (discardig children) from the main_query +
107+
-- the children artifact_id
88108
artifact_query AS (
89109
SELECT study_title, study_id, samples,
90110
name, command_id,
@@ -97,6 +117,10 @@ def _redbiom_search(self, query, search_on, callback):
97117
JOIN qiita.artifact_type at ON (
98118
at.artifact_type_id = a.artifact_type_id
99119
AND artifact_type = 'BIOM')),
120+
-- now, we can select the parent processing parameters
121+
-- of the children, note that we are selecting all
122+
-- columns returned from artifact_query and the
123+
-- parent processing parameters
100124
parent_query AS (
101125
SELECT artifact_query.*,
102126
array_agg(parent_params) as parent_parameters
@@ -112,6 +136,7 @@ def _redbiom_search(self, query, search_on, callback):
112136
artifact_query.samples, artifact_query.name,
113137
artifact_query.command_id,
114138
artifact_query.artifact_id)
139+
-- just select everything that is the parent_query
115140
SELECT * FROM parent_query
116141
ORDER BY parent_parameters, artifact_id
117142
"""

0 commit comments

Comments
 (0)