Skip to content

Commit

Permalink
Add full text search for queries based on the Postgres tsvector type.…
Browse files Browse the repository at this point in the history
… (re getredash#260)
  • Loading branch information
jezdez authored and Allen Short committed Jan 8, 2018
1 parent 7c66d13 commit 712f577
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 23 deletions.
35 changes: 35 additions & 0 deletions migrations/versions/5ec5c84ba61e_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""empty message
Revision ID: 5ec5c84ba61e
Revises: d1eae8b9893e
Create Date: 2017-10-17 18:21:00.174015
"""
from alembic import op
import sqlalchemy as sa
import sqlalchemy_utils as su
import sqlalchemy_searchable as ss


# revision identifiers, used by Alembic.
revision = '5ec5c84ba61e'
down_revision = '58f810489c47'
branch_labels = None
depends_on = None


def upgrade():
conn = op.get_bind()
op.add_column('queries', sa.Column('search_vector', su.TSVectorType()))
op.create_index('ix_queries_search_vector', 'queries', ['search_vector'],
unique=False, postgresql_using='gin')
ss.sync_trigger(conn, 'queries', 'search_vector',
['name', 'description', 'query'])


def downgrade():
conn = op.get_bind()

ss.drop_trigger(conn, 'queries', 'search_vector')
op.drop_index('ix_queries_search_vector', table_name='queries')
op.drop_column('queries', 'search_vector')
48 changes: 48 additions & 0 deletions migrations/versions/6b5be7e0a0ef_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""empty message
Revision ID: 6b5be7e0a0ef
Revises: 5ec5c84ba61e
Create Date: 2017-11-02 20:42:13.356360
"""
from alembic import op
import sqlalchemy as sa
import sqlalchemy_searchable as ss


# revision identifiers, used by Alembic.
revision = '6b5be7e0a0ef'
down_revision = '5ec5c84ba61e'
branch_labels = None
depends_on = None


def upgrade():
ss.vectorizer.clear()

conn = op.get_bind()

metadata = sa.MetaData(bind=conn)
queries = sa.Table('queries', metadata, autoload=True)

@ss.vectorizer(queries.c.id)
def integer_vectorizer(column):
return sa.func.cast(column, sa.Text)

ss.sync_trigger(
conn,
'queries',
'search_vector',
['id', 'name', 'description', 'query'],
metadata=metadata
)


def downgrade():
conn = op.get_bind()
ss.drop_trigger(conn, 'queries', 'search_vector')
op.drop_index('ix_queries_search_vector', table_name='queries')
op.create_index('ix_queries_search_vector', 'queries', ['search_vector'],
unique=False, postgresql_using='gin')
ss.sync_trigger(conn, 'queries', 'search_vector',
['name', 'description', 'query'])
3 changes: 2 additions & 1 deletion redash/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ class QueryResultModelView(BaseModelView):

class QueryModelView(BaseModelView):
column_exclude_list = ('latest_query_data',)
form_excluded_columns = ('version', 'visualizations', 'alerts', 'org', 'created_at', 'updated_at', 'latest_query_data')
form_excluded_columns = ('version', 'visualizations', 'alerts', 'org', 'created_at',
'updated_at', 'latest_query_data', 'search_vector')


class DashboardModelView(BaseModelView):
Expand Down
7 changes: 5 additions & 2 deletions redash/handlers/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class QuerySearchResource(BaseResource):
@require_permission('view_query')
def get(self):
"""
Search query text, titles, and descriptions.
Search query text, names, and descriptions.
:qparam string q: Search term
Expand All @@ -50,7 +50,10 @@ def get(self):
'object_id': term,
'object_type': 'query',
})
return [q.to_dict(with_last_modified_by=False) for q in models.Query.search(term, self.current_user.group_ids, include_drafts=include_drafts)]
return [q.to_dict(with_last_modified_by=False)
for q in models.Query.search(term,
self.current_user.group_ids,
include_drafts=include_drafts)]


class QueryRecentResource(BaseResource):
Expand Down
69 changes: 49 additions & 20 deletions redash/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import xlsxwriter
from flask_login import AnonymousUserMixin, UserMixin
from flask_sqlalchemy import SQLAlchemy
from flask_sqlalchemy import SQLAlchemy, BaseQuery
from passlib.apps import custom_app_context as pwd_context
from redash import settings, redis_connection, utils
from redash.destinations import (get_configuration_schema_for_destination_type,
Expand All @@ -30,11 +30,13 @@
from sqlalchemy.event import listens_for
from sqlalchemy.ext.mutable import Mutable
from sqlalchemy.inspection import inspect
from sqlalchemy.orm import backref, joinedload, object_session, subqueryload
from sqlalchemy.orm import backref, joinedload, object_session
from sqlalchemy.orm.exc import NoResultFound # noqa: F401
from sqlalchemy.types import TypeDecorator
from sqlalchemy.orm.attributes import flag_modified
from functools import reduce
from sqlalchemy_searchable import SearchQueryMixin, make_searchable, vectorizer
from sqlalchemy_utils.types import TSVectorType


class SQLAlchemyExt(SQLAlchemy):
Expand All @@ -49,6 +51,21 @@ def apply_pool_defaults(self, app, options):
db = SQLAlchemyExt(session_options={
'expire_on_commit': False
})
# Make sure the SQLAlchemy mappers are all properly configured first.
# This is required by SQLAlchemy-Searchable as it adds DDL listeners
# on the configuration phase of models.
db.configure_mappers()

# listen to a few database events to set up functions, trigger updates
# and indexes for the full text search
make_searchable(options={'regconfig': 'pg_catalog.simple'})


class SearchBaseQuery(BaseQuery, SearchQueryMixin):
"""
The SQA query class to use when full text search is wanted.
"""


Column = functools.partial(db.Column, nullable=False)

Expand Down Expand Up @@ -861,7 +878,14 @@ class Query(ChangeTrackingMixin, TimestampMixin, BelongsToOrgMixin, db.Model):
schedule_until = Column(db.DateTime(True), nullable=True)
visualizations = db.relationship("Visualization", cascade="all, delete-orphan")
options = Column(MutableDict.as_mutable(PseudoJSON), default={})

search_vector = Column(TSVectorType('id', 'name', 'description', 'query',
weights={'name': 'A',
'id': 'B',
'description': 'C',
'query': 'D'}),
nullable=True)

query_class = SearchBaseQuery
__tablename__ = 'queries'
__mapper_args__ = {
"version_id_col": version,
Expand Down Expand Up @@ -987,27 +1011,24 @@ def outdated_queries(cls):
return outdated_queries.values()

@classmethod
def search(cls, term, group_ids, include_drafts=False):
# TODO: This is very naive implementation of search, to be replaced with PostgreSQL full-text-search solution.
where = (Query.name.ilike(u"%{}%".format(term)) |
Query.description.ilike(u"%{}%".format(term)))

if term.isdigit():
where |= Query.id == term

where &= Query.is_archived == False
def search(cls, term, group_ids, include_drafts=False, limit=20):
where = cls.is_archived == False

if not include_drafts:
where &= Query.is_draft == False
where &= cls.is_draft == False

where &= DataSourceGroup.group_id.in_(group_ids)
query_ids = (
db.session.query(Query.id).join(
DataSourceGroup,
Query.data_source_id == DataSourceGroup.data_source_id)
.filter(where)).distinct()

return Query.query.options(joinedload(Query.user)).filter(Query.id.in_(query_ids))
return cls.query.join(
DataSourceGroup,
cls.data_source_id == DataSourceGroup.data_source_id
).options(
joinedload(cls.user)
).filter(where).search(
term,
# sort the result using the weight as defined in the search vector column
sort=True
).distinct().limit(limit)

@classmethod
def recent(cls, group_ids, user_id=None, limit=20):
Expand Down Expand Up @@ -1074,6 +1095,14 @@ def groups(self):
def __unicode__(self):
return unicode(self.id)

def __repr__(self):
return '<Query %s: "%s">' % (self.id, self.name or 'untitled')


@vectorizer(db.Integer)
def integer_vectorizer(column):
return db.func.cast(column, db.Text)


@listens_for(Query.query_text, 'set')
def gen_query_hash(target, val, oldval, initiator):
Expand Down Expand Up @@ -1382,7 +1411,7 @@ def all(cls, org, group_ids, user_id):

@classmethod
def search(cls, term, user_id, group_ids, include_drafts=False):
# limit_to_users_dashboards=False,
# limit_to_users_dashboards=False,
# TODO: This is very naive implementation of search, to be replaced with PostgreSQL full-text-search solution.
where = (Dashboard.name.ilike(u"%{}%".format(term)))

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ redis==2.10.5
requests==2.11.1
six==1.10.0
SQLAlchemy==1.1.4
SQLAlchemy-Searchable==0.10.6
SQLAlchemy-Utils>=0.29.0
sqlparse==0.1.8
wsgiref==0.1.2
honcho==0.5.0
Expand Down
57 changes: 57 additions & 0 deletions tests/models/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def test_search_by_id_returns_query(self):
self.assertNotIn(q1, queries)
self.assertNotIn(q2, queries)

def test_search_by_number(self):
q = self.factory.create_query(description="Testing search 12345")
db.session.flush()
queries = Query.search('12345', [self.factory.default_group.id])

self.assertIn(q, queries)

def test_search_respects_groups(self):
other_group = Group(org=self.factory.org, name="Other Group")
db.session.add(other_group)
Expand Down Expand Up @@ -98,6 +105,56 @@ def test_search_is_case_insensitive(self):

self.assertIn(q, Query.search('testing', [self.factory.default_group.id]))

def test_search_query_parser_or(self):
q1 = self.factory.create_query(name="Testing")
q2 = self.factory.create_query(name="search")

queries = list(Query.search('testing or search', [self.factory.default_group.id]))
self.assertIn(q1, queries)
self.assertIn(q2, queries)

def test_search_query_parser_negation(self):
q1 = self.factory.create_query(name="Testing")
q2 = self.factory.create_query(name="search")

queries = list(Query.search('testing -search', [self.factory.default_group.id]))
self.assertIn(q1, queries)
self.assertNotIn(q2, queries)

def test_search_query_parser_parenthesis(self):
q1 = self.factory.create_query(name="Testing search")
q2 = self.factory.create_query(name="Testing searching")
q3 = self.factory.create_query(name="Testing finding")

queries = list(Query.search('(testing search) or finding', [self.factory.default_group.id]))
self.assertIn(q1, queries)
self.assertIn(q2, queries)
self.assertIn(q3, queries)

def test_search_query_parser_hyphen(self):
q1 = self.factory.create_query(name="Testing search")
q2 = self.factory.create_query(name="Testing-search")

queries = list(Query.search('testing search', [self.factory.default_group.id]))
self.assertIn(q1, queries)
self.assertIn(q2, queries)

def test_search_query_parser_emails(self):
q1 = self.factory.create_query(name="janedoe@example.com")
q2 = self.factory.create_query(name="johndoe@example.com")

queries = list(Query.search('example', [self.factory.default_group.id]))
self.assertIn(q1, queries)
self.assertIn(q2, queries)

queries = list(Query.search('com', [self.factory.default_group.id]))
self.assertIn(q1, queries)
self.assertIn(q2, queries)

queries = list(Query.search('johndoe', [self.factory.default_group.id]))
self.assertNotIn(q1, queries)
self.assertIn(q2, queries)


class QueryRecentTest(BaseTestCase):
def test_global_recent(self):
Expand Down

0 comments on commit 712f577

Please sign in to comment.