Skip to content

Commit

Permalink
Improve on routing updates and querying
Browse files Browse the repository at this point in the history
  • Loading branch information
agjohnson committed Feb 23, 2016
1 parent 6854488 commit 8f36ce5
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 30 deletions.
41 changes: 26 additions & 15 deletions readthedocs/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,16 @@ def delete_versions(project, version_data):

def index_search_request(version, page_list, commit, project_scale, page_scale,
section=True, delete=True):
log_msg = ' '.join([page['path'] for page in page_list])
log.info("(Server Search) Indexing Pages: %s [%s]" % (
version.project.slug, log_msg))
"""Update search indexes with build output JSON
In order to keep sub-projects all indexed on the same shard, indexes will be
updated using the parent project's slug as the routing value.
"""
project = version.project
page_obj = PageIndex()
section_obj = SectionIndex()

# tags = [tag.name for tag in project.tags.all()]
log_msg = ' '.join([page['path'] for page in page_list])
log.info("Updating search index: project=%s pages=[%s]",
project.slug, log_msg)

project_obj = ProjectIndex()
project_obj.index_document(data={
Expand All @@ -107,11 +109,17 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
'weight': project_scale,
})

page_obj = PageIndex()
section_obj = SectionIndex()
index_list = []
section_index_list = []
routes = [project.slug]
routes.extend([p.parent.slug for p in project.superprojects.all()])
for page in page_list:
log.debug("(API Index) %s:%s" % (project.slug, page['path']))
page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest()
log.debug("Indexing page: %s:%s" % (project.slug, page['path']))
page_id = (hashlib
.md5('-'.join([project.slug, version.slug, page['path']]))
.hexdigest())
index_list.append({
'id': page_id,
'project': project.slug,
Expand All @@ -127,10 +135,10 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
if section:
for section in page['sections']:
section_index_list.append({
'id': hashlib.md5(
'%s-%s-%s-%s' % (project.slug, version.slug,
page['path'], section['id'])
).hexdigest(),
'id': (hashlib
.md5('-'.join([project.slug, version.slug,
page['path'], section['id']]))
.hexdigest()),
'project': project.slug,
'version': version.slug,
'path': page['path'],
Expand All @@ -139,12 +147,15 @@ def index_search_request(version, page_list, commit, project_scale, page_scale,
'content': section['content'],
'weight': page_scale,
})
section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug)
for route in routes:
section_obj.bulk_index(section_index_list, parent=page_id,
routing=route)

page_obj.bulk_index(index_list, parent=project.slug)
for route in routes:
page_obj.bulk_index(index_list, parent=project.slug, routing=route)

if delete:
log.info("(Server Search) Deleting files not in commit: %s" % commit)
log.info("Deleting files not in commit: %s", commit)
# TODO: AK Make sure this works
delete_query = {
"query": {
Expand Down
36 changes: 21 additions & 15 deletions readthedocs/search/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,6 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
:param version_slug: slug for :py:cls:`Project` version slug
:param taxonomy: taxonomy for search
"""
try:
project = (Project.objects
.api(request.user)
.get(slug=project_slug))
except Project.DoesNotExist:
raise Http404("Project does not exist")

kwargs = {}
body = {
"query": {
Expand Down Expand Up @@ -115,14 +108,27 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
if project_slug or version_slug or taxonomy:
final_filter = {"and": []}

if project:
project_slugs = [project.slug]
project_slugs.extend(s.child.slug for s in project.subprojects.all())
project_slugs.extend(s.parent.slug for s in project.superprojects.all())
final_filter['and'].append({"terms": {"project": project_slugs}})

# Add routing to optimize search by hitting the right shard.
kwargs['routing'] = project_slug
if project_slug:
try:
project = (Project.objects
.api(request.user)
.get(slug=project_slug))
project_slugs = [project.slug]
project_slugs.extend(s.child.slug for s
in project.subprojects.all())
final_filter['and'].append({"terms": {"project": project_slugs}})

# Add routing to optimize search by hitting the right shard.
# This purposely doesn't apply routing if the project has more
# than one parent project.
if project.superprojects.exists():
if project.superprojects.count() == 1:
kwargs['routing'] = (project.superprojects.first()
.parent.slug)
else:
kwargs['routing'] = project_slug
except Project.DoesNotExist:
return None

if version_slug:
final_filter['and'].append({'term': {'version': version_slug}})
Expand Down

0 comments on commit 8f36ce5

Please sign in to comment.