qiita-spots · antgonza · Apr 26, 2017 · Apr 26, 2017 · Apr 26, 2017 · Apr 29, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -8,6 +8,7 @@ env:
     - TEST_ADD_STUDIES=True
 before_install:
   - redis-server --version
+  - redis-server /etc/redis/redis.conf --port 7777 &
   - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
   - chmod +x miniconda.sh
   - ./miniconda.sh -b
@@ -28,9 +29,14 @@ install:
   - source activate qiita
   - pip install -U pip
   - pip install sphinx sphinx-bootstrap-theme coveralls 'ipython[all]==2.4.1'
-  - travis_retry pip install . --process-dependency-links
   - 'echo "backend: Agg" > matplotlibrc'
-script:
+  - git clone https://github.com/nicolasff/webdis
+  - pushd webdis
+  - make
+  - ./webdis &
+  - popd
+  - travis_retry pip install . --process-dependency-links
+before_script:
   - export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg
   - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then
     export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg;
@@ -39,6 +45,15 @@ script:
   - ipython profile create qiita-general --parallel
   - qiita-env start_cluster qiita-general
   - qiita-env make --no-load-ontologies
+  # loading redbiom with Qiita's test set
+  # first let's make sure redis is empty
+  - curl -s http://127.0.0.1:7379/FLUSHALL > /dev/null
+  - redbiom admin create-context --name "qiita-test" --description "qiita-test context"
+  - redbiom admin load-sample-metadata --metadata `pwd`/qiita_db/support_files/test_data/templates/1_19700101-000000.txt
+  - redbiom admin load-sample-metadata-search --metadata `pwd`/qiita_db/support_files/test_data/templates/1_19700101-000000.txt
+  - redbiom admin load-observations --table `pwd`/qiita_db/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test
+  - redbiom admin load-sample-data --table `pwd`/qiita_db/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test
+script:
   - if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
   - if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
   - if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi

diff --git a/INSTALL.md b/INSTALL.md
@@ -60,6 +60,7 @@ Install the non-python dependencies
 
 * [PostgreSQL](http://www.postgresql.org/download/) (minimum required version 9.3.5, we have tested most extensively with 9.3.6)
 * [redis-server](http://redis.io) (we have tested most extensively with 2.8.17)
+* [webdis] (https://github.com/nicolasff/webdis) (latest version should be fine but we have tested the most with 9ee6fe2 - Feb 6, 2016)
 
 There are several options to install these dependencies depending on your needs:
 
@@ -87,6 +88,28 @@ brew update
 brew install homebrew/versions/redis28
 ```
 
+### webdis
+
+Note that this is the only package that assumes that Qiita is already installed (due to library dependencies). Also, that the general suggestion is to have 2 redis servers running, one for webdis/redbiom and the other for Qiita. The reason for multiple redis servers is so that the redbiom cache can be flushed without impacting the operation of the qiita server itself.
+
+The following instructions install, compile and pre-populates the redbiom redis DB so we assume that redis is running on the default port and that Qiita is fully installed as the redbiom package is installed with Qiita.
+
+```
+git clone https://github.com/nicolasff/webdis
+pushd webdis
+make
+./webdis &
+popd
+# note that this assumes that Qiita is already installed
+fp=`python -c 'import qiita_db; print qiita_db.__file__'`
+qdbd=`dirname $fp`
+redbiom admin create-context --name "qiita-test" --description "qiita-test context"
+redbiom admin load-sample-metadata --metadata ${qdbd}/support_files/test_data/templates/1_19700101-000000.txt
+redbiom admin load-sample-metadata-search --metadata ${qdbd}/support_files/test_data/templates/1_19700101-000000.txt
+redbiom admin load-observations --table ${qdbd}/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test
+redbiom admin load-sample-data --table ${qdbd}/support_files/test_data/processed_data/1_study_1001_closed_reference_otu_table.biom --context qiita-test
+```
+
 
 Install Qiita development version and its python dependencies
 -------------------------------------------------------------
@@ -163,6 +186,12 @@ Next, make a test environment:
 qiita-env make --no-load-ontologies
 ```
 
+Finally, redbiom relies on the REDBIOM_HOST environment variable to set the URL to query. By default is set to http://127.0.0.1:7379, which is the webdis default. For example you could:
+
+```bash
+export REDBIOM_HOST=http://my_host.com:7329
+```
+
 ## Start Qiita
 
 Start postgres (instructions vary depending on operating system and install method).

diff --git a/qiita_pet/handlers/qiita_redbiom.py b/qiita_pet/handlers/qiita_redbiom.py
@@ -0,0 +1,238 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from future.utils import viewitems
+from requests import ConnectionError
+import redbiom.summarize
+import redbiom.search
+import redbiom._requests
+import redbiom.util
+import redbiom.fetch
+from tornado.gen import coroutine, Task
+
+from qiita_core.util import execute_as_transaction
+
+from .base_handlers import BaseHandler
+
+
+class RedbiomPublicSearch(BaseHandler):
+    @execute_as_transaction
+    def get(self, search):
+        self.render('redbiom.html')
+
+    @execute_as_transaction
+    def _redbiom_search(self, query, search_on, callback):
+        error = False
+        message = ''
+        results = []
+
+        try:
+            df = redbiom.summarize.contexts()
+        except ConnectionError:
+            error = True
+            message = 'Redbiom is down - contact admin, thanks!'
+
+        if not error:
+            contexts = df.ContextName.values
+            query = query.lower()
+            features = []
+
+            if search_on in ('metadata', 'categories'):
+                try:
+                    features = redbiom.search.metadata_full(
+                        query, categories=(search_on == 'categories'))
+                except TypeError:
+                    error = True
+                    message = (
+                        'Not a valid search: "%s", are you sure this is a '
+                        'valid metadata %s?' % (
+                            query, 'value' if search_on == 'metadata' else
+                            'category'))
+                except ValueError:
+                    error = True
+                    message = (
+                        'Not a valid search: "%s", your query is too small '
+                        '(too few letters), try a longer query' % query)
+            elif search_on == 'observations':
+                features = [s.split('_', 1)[1] for context in contexts
+                            for s in redbiom.util.samples_from_observations(
+                                query.split(' '), True, context)]
+            else:
+                error = True
+                message = ('Incorrect search by: you can use observations '
+                           'or metadata and you passed: %s' % search_on)
+
+            if not error:
+                import qiita_db as qdb
+                import qiita_db.sql_connection as qdbsc
+                if features:
+                    if search_on in ('metadata', 'observations'):
+                        # This query basically selects all studies/prep-infos
+                        # that contain the given sample ids (main_query); with
+                        # those we select the artifacts that are only BIOM and
+                        # their parent (artifact_query); then we can select
+                        # the processing parameters of the parents. The
+                        # structure might seem excessive but it's the only way
+                        # to work with the json elements of the database and
+                        # make it run in an OK manner (see initial comments)
+                        # in https://github.com/biocore/qiita/pull/2132. Also
+                        # this query could be simplified if we used a newer
+                        # version of postgres, which doesn't mean faster
+                        # performance
+                        sql = """
+                        -- main_query will retrieve all basic information
+                        -- about the study based on which samples that exist
+                        -- in the prep info file, and their artifact_ids
+                        WITH main_query AS (
+                            SELECT study_title, study_id, artifact_id,
+                                array_agg(DISTINCT sample_id) AS samples,
+                                qiita.artifact_descendants(artifact_id) AS
+                                    children
+                            FROM qiita.study_prep_template
+                            JOIN qiita.prep_template USING (prep_template_id)
+                            JOIN qiita.prep_template_sample USING
+                                (prep_template_id)
+                            JOIN qiita.study USING (study_id)
+                            WHERE sample_id IN %s
+                            GROUP BY study_title, study_id, artifact_id),
+                        -- now, we can take all the artifacts and just select
+                        -- the BIOMs, while selecting the parent of the
+                        -- artifacts, note that we are selecting the main
+                        -- columns (discardig children) from the main_query +
+                        -- the children artifact_id
+                         artifact_query AS (
+                            SELECT study_title, study_id, samples,
+                                name, command_id,
+                                (main_query.children).artifact_id AS
+                                    artifact_id
+                            FROM main_query
+                            JOIN qiita.artifact a ON
+                                (main_query.children).artifact_id =
+                                    a.artifact_id
+                            JOIN qiita.artifact_type at ON (
+                                at.artifact_type_id = a.artifact_type_id
+                                AND artifact_type = 'BIOM')),
+                        -- now, we can select the parent processing parameters
+                        -- of the children, note that we are selecting all
+                        -- columns returned from artifact_query and the
+                        -- parent processing parameters
+                         parent_query AS (
+                            SELECT artifact_query.*,
+                                array_agg(parent_params) as parent_parameters
+                            FROM artifact_query
+                            LEFT JOIN qiita.parent_artifact pa ON (
+                                artifact_query.artifact_id = pa.artifact_id)
+                            LEFT JOIN qiita.artifact a ON (
+                                pa.parent_id = a.artifact_id),
+                                json_each_text(command_parameters)
+                                    parent_params
+                            GROUP BY artifact_query.study_title,
+                                artifact_query.study_id,
+                                artifact_query.samples, artifact_query.name,
+                                artifact_query.command_id,
+                                artifact_query.artifact_id)
+                        -- just select everything that is the parent_query
+                        SELECT * FROM parent_query
+                        ORDER BY parent_parameters, artifact_id
+                        """
+
+                        sql_params = """
+                        SELECT parameter_set_name, array_agg(ps) AS param_set
+                        FROM qiita.default_parameter_set,
+                            json_each_text(parameter_set) ps
+                        GROUP BY parameter_set_name"""
+
+                        with qdbsc.TRN:
+                            results = []
+                            commands = {}
+                            # obtaining all existing parameters, note that
+                            # they are not that many (~40) and we don't expect
+                            # to have a huge growth in the near future
+                            qdbsc.TRN.add(sql_params)
+                            params = {pname: eval(params) for pname, params
+                                      in qdbsc.TRN.execute_fetchindex()}
+
+                            # now let's get the actual artifacts
+                            qdbsc.TRN.add(sql, [tuple(features)])
+                            for row in qdbsc.TRN.execute_fetchindex():
+                                title, sid, samples, name, cid, aid, pp = row
+                                nr = {'study_title': title, 'study_id': sid,
+                                      'artifact_id': aid, 'aname': name,
+                                      'samples': samples}
+                                if cid is not None:
+                                    if cid not in commands:
+                                        c = qdb.software.Command(cid)
+                                        commands[cid] = '%s - %s v%s' % (
+                                            c.name, c.software.name,
+                                            c.software.version)
+
+                                    # [-1] taking the last cause it's sorted by
+                                    #      the number of overlapping parameters
+                                    # [0] then taking the first element that is
+                                    # the name of the parameter set
+                                    ppc = sorted(
+                                        [[k, len(eval(pp) & v)]
+                                         for k, v in viewitems(params)],
+                                        key=lambda x: x[1])[-1][0]
+
+                                    nr['command'] = '%s @ %s' % (
+                                        commands[cid], ppc)
+                                else:
+                                    nr['command'] = ''
+                                results.append(nr)
+                    else:
+                        sql = """
+                            WITH get_studies AS (
+                                SELECT
+                                    trim(table_name, 'sample_')::int AS
+                                        study_id,
+                                    array_agg(column_name::text) AS columns
+                                FROM information_schema.columns
+                                WHERE column_name IN %s
+                                    AND table_name LIKE 'sample_%%'
+                                    AND table_name NOT IN (
+                                        'prep_template',
+                                        'prep_template_sample')
+                                GROUP BY table_name)
+                            SELECT study_title, get_studies.study_id, columns
+                            FROM get_studies
+                            JOIN qiita.study ON get_studies.study_id =
+                                qiita.study.study_id"""
+                        with qdbsc.TRN:
+                            results = []
+                            qdbsc.TRN.add(sql, [tuple(features)])
+                            for row in qdbsc.TRN.execute_fetchindex():
+                                title, sid, cols = row
+                                nr = {'study_title': title, 'study_id': sid,
+                                      'artifact_id': None, 'aname': None,
+                                      'samples': cols,
+                                      'command': ', '.join(cols),
+                                      'software': None, 'version': None}
+                                results.append(nr)
+                else:
+                    error = True
+                    message = 'No samples where found! Try again ...'
+        callback((results, message))
+
+    @coroutine
+    @execute_as_transaction
+    def post(self, search):
+        search = self.get_argument('search', None)
+        search_on = self.get_argument('search_on', None)
+
+        data = []
+        if search is not None and search and search != ' ':
+            if search_on in ('observations', 'metadata', 'categories'):
+                data, msg = yield Task(
+                    self._redbiom_search, search, search_on)
+            else:
+                msg = 'Not a valid option for search_on'
+        else:
+            msg = 'Nothing to search for ...'
+
+        self.write({'status': 'success', 'message': msg, 'data': data})