Skip to content

Redbiom initial code #2282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,6 @@ gg_13_8-*

# sphinx documentation
qiita_pet/static/doc/

# webdis log
webdis.log
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ install:
- redbiom admin create-context --name "qiita-test" --description "qiita-test context"
- redbiom admin load-sample-metadata --metadata `pwd`/qiita_db/support_files/test_data/templates/1_19700101-000000.txt
- redbiom admin load-sample-metadata-search --metadata `pwd`/qiita_db/support_files/test_data/templates/1_19700101-000000.txt
- redbiom admin load-sample-data --table `pwd`/qiita_db/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test
- redbiom admin load-sample-data --table `pwd`/qiita_db/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test --tag 4
- redbiom admin load-sample-data --table `pwd`/qiita_db/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table-for_redbiom_tests.biom --context qiita-test --tag 5
- mkdir ~/.qiita_plugins
- export REDBIOM_HOST=http://127.0.0.1:7379
- cp $PWD/qiita_core/support_files/BIOM\ type_2.1.4.conf ~/.qiita_plugins
before_script:
# EBI, see the end of before_install about why this block is commented out
Expand Down
2 changes: 1 addition & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ redbiom admin scripts-writable
redbiom admin create-context --name "qiita-test" --description "qiita-test context"
redbiom admin load-sample-metadata --metadata ${qdbd}/support_files/test_data/templates/1_19700101-000000.txt
redbiom admin load-sample-metadata-search --metadata ${qdbd}/support_files/test_data/templates/1_19700101-000000.txt
redbiom admin load-sample-data --table ${qdbd}/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test
redbiom admin load-sample-data --table ${qdbd}/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test --tag 1
```

Install Qiita development version and its python dependencies
Expand Down
Binary file not shown.
62 changes: 62 additions & 0 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,68 @@ def test_generate_study_list(self):
obs_info = qdb.util.generate_study_list([1, 2, 3, 4], False)
self.assertEqual(obs_info, exp_info)

# resetting to private and deleting the old study
qdb.artifact.Artifact(4).visibility = 'private'
qdb.study.Study.delete(new_study.id)

def test_generate_study_list_without_artifacts(self):
# creating a new study to make sure that empty studies are also
# returned
info = {"timeseries_type_id": 1, "metadata_complete": True,
"mixs_compliant": True, "number_samples_collected": 25,
"number_samples_promised": 28, "study_alias": "TST",
"study_description": "Some description of the study goes here",
"study_abstract": "Some abstract goes here",
"emp_person_id": qdb.study.StudyPerson(1),
"principal_investigator_id": qdb.study.StudyPerson(1),
"lab_person_id": qdb.study.StudyPerson(1)}
new_study = qdb.study.Study.create(
qdb.user.User('shared@foo.bar'), 'test_study_1', info=info)

exp_info = [
{'status': 'private', 'study_title': (
'Identification of the Microbiomes for Cannabis Soils'),
'metadata_complete': True, 'publication_pid': [
'123456', '7891011'], 'ebi_submission_status': 'submitted',
'study_id': 1, 'ebi_study_accession': 'EBI123456-BB',
'study_abstract': (
'This is a preliminary study to examine the microbiota '
'associated with the Cannabis plant. Soils samples from '
'the bulk soil, soil associated with the roots, and the '
'rhizosphere were extracted and the DNA sequenced. Roots '
'from three independent plants of different strains were '
'examined. These roots were obtained November 11, 2011 from '
'plants that had been harvested in the summer. Future studies '
'will attempt to analyze the soils and rhizospheres from the '
'same location at different time points in the plant '
'lifecycle.'), 'pi': ('PI_dude@foo.bar', 'PIDude'),
'publication_doi': ['10.100/123456', '10.100/7891011'],
'study_alias': 'Cannabis Soils', 'number_samples_collected': 27},
{'status': 'sandbox', 'study_title': 'test_study_1',
'metadata_complete': True, 'publication_pid': [],
'ebi_submission_status': 'not submitted',
'study_id': new_study.id, 'ebi_study_accession': None,
'study_abstract': 'Some abstract goes here',
'pi': ('lab_dude@foo.bar', 'LabDude'), 'publication_doi': [],
'study_alias': 'TST', 'number_samples_collected': 0}]
obs_info = qdb.util.generate_study_list_without_artifacts(
[1, 2, 3, 4], True)
self.assertEqual(obs_info, exp_info)

qdb.artifact.Artifact(4).visibility = 'public'
exp_info[0]['status'] = 'public'
obs_info = qdb.util.generate_study_list_without_artifacts(
[1, 2, 3, 4], True)
self.assertEqual(obs_info, exp_info)

obs_info = qdb.util.generate_study_list_without_artifacts(
[1, 2, 3, 4], False)
self.assertEqual(obs_info, exp_info)

# resetting to private and deleting the old study
qdb.artifact.Artifact(4).visibility = 'private'
qdb.study.Study.delete(new_study.id)

def test_get_artifacts_information(self):
# we are gonna test that it ignores 1 and 2 cause they are not biom,
# 4 has all information and 7 and 8 don't
Expand Down
96 changes: 79 additions & 17 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1310,24 +1310,86 @@ def generate_study_list(study_ids, public_only=False):
del info["shared_with_name"]
del info["shared_with_email"]

infolist.append({
'owner': info['owner'],
'study_alias': info['study_alias'],
'metadata_complete': info['metadata_complete'],
'publication_pid': info['publication_pid'],
'ebi_submission_status': info['ebi_submission_status'],
'shared': info['shared'],
'study_abstract': info['study_abstract'], 'pi': info['pi'],
'status': qdb.study.Study(info['study_id']).status,
'study_tags': info['study_tags'],
'publication_doi': info['publication_doi'],
'study_id': info['study_id'],
'ebi_study_accession': info['ebi_study_accession'],
'study_title': info['study_title'],
'number_samples_collected': info['number_samples_collected'],
'artifact_biom_ids': info['artifact_biom_ids']
})
info['status'] = qdb.study.Study(info['study_id']).status
infolist.append(info)
return infolist


def generate_study_list_without_artifacts(study_ids, public_only=False):
"""Get general study information without artifacts

Parameters
----------
study_ids : list of ints
The study ids to look for. Non-existing ids will be ignored
public_only : bool, optional
If true, return only public BIOM artifacts. Default: false.

Returns
-------
list of dict
The list of studies and their information

Notes
-----
The main select might look scary but it's pretty simple:
- We select the requiered fields from qiita.study and qiita.study_person
SELECT metadata_complete, study_abstract, study_id, study_alias,
study_title, ebi_study_accession, ebi_submission_status,
qiita.study_person.name AS pi_name,
qiita.study_person.email AS pi_email,
- the total number of samples collected by counting sample_ids
(SELECT COUNT(sample_id) FROM qiita.study_sample
WHERE study_id=qiita.study.study_id)
AS number_samples_collected]
- all the publications that belong to the study
(SELECT array_agg((publication, is_doi)))
FROM qiita.study_publication
WHERE study_id=qiita.study.study_id) AS publications
"""
with qdb.sql_connection.TRN:
sql = """
SELECT metadata_complete, study_abstract, study_id, study_alias,
study_title, ebi_study_accession, ebi_submission_status,
qiita.study_person.name AS pi_name,
qiita.study_person.email AS pi_email,
(SELECT COUNT(sample_id) FROM qiita.study_sample
WHERE study_id=qiita.study.study_id)
AS number_samples_collected,
(SELECT array_agg(row_to_json((publication, is_doi), true))
FROM qiita.study_publication
WHERE study_id=qiita.study.study_id) AS publications
FROM qiita.study
LEFT JOIN qiita.study_person ON (
study_person_id=principal_investigator_id)
WHERE study_id IN %s
ORDER BY study_id"""
qdb.sql_connection.TRN.add(sql, [tuple(study_ids)])
infolist = []
for info in qdb.sql_connection.TRN.execute_fetchindex():
info = dict(info)

# publication info
info['publication_doi'] = []
info['publication_pid'] = []
if info['publications'] is not None:
for p in info['publications']:
# f1-2 are the default names given
pub = p['f1']
is_doi = p['f2']
if is_doi:
info['publication_doi'].append(pub)
else:
info['publication_pid'].append(pub)
del info['publications']

# pi info
info["pi"] = (info['pi_email'], info['pi_name'])
del info["pi_email"]
del info["pi_name"]

info['status'] = qdb.study.Study(info['study_id']).status
infolist.append(info)
return infolist


Expand Down
119 changes: 119 additions & 0 deletions qiita_pet/handlers/qiita_redbiom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from requests import ConnectionError
from collections import defaultdict
import redbiom.summarize
import redbiom.search
import redbiom._requests
import redbiom.util
import redbiom.fetch
from tornado.gen import coroutine, Task

from qiita_core.util import execute_as_transaction
from qiita_db.util import generate_study_list_without_artifacts
from qiita_db.study import Study

from .base_handlers import BaseHandler


class RedbiomPublicSearch(BaseHandler):
@execute_as_transaction
def get(self, search):
self.render('redbiom.html')

def _redbiom_metadata_search(self, query, contexts):
study_artifacts = defaultdict(list)
message = ''
query = query.lower()
try:
samples = redbiom.search.metadata_full(query, False)
except TypeError:
message = (
'Not a valid search: "%s", are you sure this is a '
'valid metadata value?' % query)
except ValueError:
message = (
'Not a valid search: "%s", your query is too small '
'(too few letters), try a longer query' % query)
if not message:
sids = set([s.split('.', 1)[0] for s in samples])
for s in sids:
study_artifacts[s] = [a.id for a in Study(s).artifacts(
artifact_type='BIOM')]

return message, study_artifacts

def _redbiom_feature_search(self, query, contexts):
study_artifacts = defaultdict(list)
query = [f for f in query.split(' ')]
for ctx in contexts:
for idx in redbiom.util.ids_from(query, True, 'feature', ctx):
aid, sid = idx.split('_', 1)
sid = sid.split('.', 1)[0]
study_artifacts[sid].append(aid)

return '', study_artifacts

def _redbiom_taxon_search(self, query, contexts):
study_artifacts = defaultdict(list)
for ctx in contexts:
# find the features with those taxonomies and then search
# those features in the samples
features = redbiom.fetch.taxon_descendents(ctx, query)
for idx in redbiom.util.ids_from(features, True, 'feature',
ctx):
aid, sid = idx.split('_', 1)
sid = sid.split('.', 1)[0]
study_artifacts[sid].append(aid)

return '', study_artifacts

@execute_as_transaction
def _redbiom_search(self, query, search_on, callback):
search_f = {'metadata': self._redbiom_metadata_search,
'feature': self._redbiom_feature_search,
'taxon': self._redbiom_taxon_search}

message = ''
results = []

try:
df = redbiom.summarize.contexts()
except ConnectionError:
message = 'Redbiom is down - contact admin, thanks!'
else:
contexts = df.ContextName.values
if search_on in search_f:
message, study_artifacts = search_f[search_on](query, contexts)
if not message:
keys = study_artifacts.keys()
if keys:
results = generate_study_list_without_artifacts(
study_artifacts.keys(), True)
# inserting the artifact_biom_ids to the results
for i in range(len(results)):
results[i]['artifact_biom_ids'] = list(set(
study_artifacts[str(results[i]['study_id'])]))
else:
message = "No samples where found! Try again ..."
else:
message = ('Incorrect search by: you can use metadata, '
'features or taxon and you passed: %s' % search_on)

callback((results, message))

@coroutine
@execute_as_transaction
def post(self, search):
search = self.get_argument('search')
search_on = self.get_argument('search_on')

data, msg = yield Task(self._redbiom_search, search, search_on)

self.write({'status': 'success', 'message': msg, 'data': data})
Loading