Skip to content

Commit

Permalink
added tags search resources with an ngram field and autocomplete capa…
Browse files Browse the repository at this point in the history
…bilties in the base class
  • Loading branch information
loolmeh committed Jul 15, 2015
1 parent 78fde4a commit 15516fc
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 9 deletions.
21 changes: 20 additions & 1 deletion tatoeba2-django/tatoeba2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .api_base import BaseSearchResource, UCharField, IDPaginator
from .models import Sentences
from .search_indexes import (
SentencesIndex,
SentencesIndex, TagsIndex
)
from datetime import datetime

Expand Down Expand Up @@ -54,3 +54,22 @@ class Meta:
filtering.update({f: SEARCH_FILTERS})

SentencesSearchResource._meta.filtering = filtering


class TagsSearchResource(BaseSearchResource):
class Meta:
resource_name = 'tags_search'
index = TagsIndex()
autoquery_fields = [
'name', 'user'
]
autocomplete_fields = ['name_ngram']
allowed_methods = ['get']

filtering = {'django_id': SEARCH_FILTERS}

for f in TagsSearchResource._meta.index.fields.keys():
if f == 'text': continue
filtering.update({f: SEARCH_FILTERS})

TagsSearchResource._meta.filtering = filtering
19 changes: 16 additions & 3 deletions tatoeba2-django/tatoeba2/api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tastypie.paginator import Paginator
from tastypie.exceptions import InvalidFilterError, InvalidSortError
from haystack.query import SearchQuerySet, AutoQuery, SQ
from .utils import stemmer
from .utils import stemmer, uclean
from django.db import connection


Expand All @@ -15,7 +15,7 @@ def convert(self, value):
if value is None:
return None

return value.decode('utf-8', 'ignore')
return uclean(value)


class IDPaginator(Paginator):
Expand Down Expand Up @@ -72,6 +72,7 @@ class SearchOptions(ResourceOptions):
index = None
model = None
autoquery_fields = []
autocomplete_fields = []
stem_fields = []
max_limit = 100

Expand Down Expand Up @@ -232,7 +233,9 @@ def apply_filters(self, request, filters=None, join_op='and'):
for fltr, val in filters.items():
query = query & ~SQ(**{fltr: val})

return self.get_object_list(request).filter(query)
result = self.get_object_list(request).filter(query)

return result

def apply_sort(self, obj_list, sort_expr):
field_name = sort_expr[1:] if sort_expr.startswith('-') else sort_expr
Expand Down Expand Up @@ -266,6 +269,12 @@ def obj_get_list(self, request=None, **kwargs):
if 'offset' in filters.keys(): del filters['offset']
if 'limit' in filters.keys(): del filters['limit']

autocomp_filters = {}
for fltr, val in filters.items():
if fltr in self._meta.autocomplete_fields:
autocomp_filters[fltr] = val
del filters[fltr]

for fltr, val in filters.items():

if fltr[0] == '|':
Expand All @@ -279,6 +288,10 @@ def obj_get_list(self, request=None, **kwargs):

result = self.get_object_list(request)

if autocomp_filters:
for fltr, val in autocomp_filters.items():
result = result.autocomplete(**{fltr: val})

if and_filters:
stem_lang = and_filters.get('lang') or ''
applicable_filters = self.build_filters(and_filters, stem_lang)
Expand Down
41 changes: 37 additions & 4 deletions tatoeba2-django/tatoeba2/search_indexes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from haystack import indexes
from datetime import datetime
from .utils import now, stemmer
from .utils import now, stemmer, uclean
from .models import (
Sentences, Users, SentencesTranslations, UsersLanguages, Tags,
TagsSentences
Expand Down Expand Up @@ -41,12 +41,12 @@ def index_queryset(self, using=None):
def prepare(self, object):
self.prepared_data = super(SentencesIndex, self).prepare(object)

text = object.text.decode('utf-8', 'ignore')
text = uclean(object.text)
lang = object.lang
user = Users.objects.filter(id=object.user_id)
user = user[0] if user else None
owner = user.username if user else ''
owner = owner.decode('utf-8', 'ignore')
owner = uclean(owner)
is_orphan = not bool(owner)
owner_is_native = bool(
UsersLanguages.objects.filter(
Expand All @@ -61,7 +61,7 @@ def prepare(self, object):
).values_list('name', flat=True)
tags = list(set(tags))
tags = ' | '.join(tags) if tags else ''
tags = tags.decode('utf-8', 'ignore')
tags = uclean(tags)
is_tagged = bool(tags)
is_unapproved = bool(object.correctness == -1)
has_audio = bool(object.hasaudio == 'shtooka' or object.hasaudio == 'from_users')
Expand Down Expand Up @@ -106,3 +106,36 @@ def prepare(self, object):
self.prepared_data['has_audio'] = has_audio

return self.prepared_data


class TagsIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True)
id = indexes.IntegerField(model_attr='id')
name = indexes.CharField(default='')
name_ngram = indexes.EdgeNgramField(default='')
description = indexes.CharField(default='')
user = indexes.CharField(default='')
created = indexes.DateTimeField(model_attr='created', default=datetime(1,1,1))

def get_model(self):
return Tags

def get_updated_field(self):
return 'created'

def index_queryset(self, using=None):
return self.get_model().objects.all()

def prepare(self, object):
self.prepared_data = super(TagsIndex, self).prepare(object)

name = uclean(object.name)
description = uclean(object.description) if object.description else ''
user = Users.objects.filter(id=object.user_id)
user = user[0] if user else ''

self.prepared_data['name'] = name
self.prepared_data['name_ngram'] = name
self.prepared_data['user'] = user

return self.prepared_data
3 changes: 2 additions & 1 deletion tatoeba2-django/tatoeba2/urls.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from django.conf.urls import patterns, include, url
from tastypie.api import Api
from .api import (
SentencesResource, SentencesSearchResource
SentencesResource, SentencesSearchResource, TagsSearchResource
)


api = Api(api_name='0.1')
api.register(SentencesResource())
api.register(SentencesSearchResource())
api.register(TagsSearchResource())

urlpatterns = patterns('',
url(r'^', include(api.urls)),
Expand Down
3 changes: 3 additions & 0 deletions tatoeba2-django/tatoeba2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
def now():
return datetime.utcnow().replace(tzinfo=utc)

def uclean(string):
return string.decode('utf-8', 'ignore')

STEMMERS = getattr(settings, 'HAYSTACK_STEMMERS')

TOKENIZERS = getattr(settings, 'HAYSTACK_TOKENIZERS', {})
Expand Down

0 comments on commit 15516fc

Please sign in to comment.