-
Notifications
You must be signed in to change notification settings - Fork 3
/
searcher.py
104 lines (90 loc) · 4.38 KB
/
searcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import argparse
import web
import subprocess
urls = (
'/(.+)', 'index',
'/', 'search'
)
use_elasticsearch = True
class search:
def GET(self):
render = web.template.render('templates/')
return render.search()
class index:
def GET(self, query):
data_input = web.input()
page = 0
if "page" in data_input:
page = int(data_input["page"])
render = web.template.render('templates/')
anses = []
num_pages = 0
if use_elasticsearch:
# importing libraries for Elasticsearch
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, document, field, connections, Q
from elasticsearch_dsl.connections import connections
from booktype import Book
es = Elasticsearch()
es.indices.create(index='book-index', ignore=[400, 404])
connections.create_connection(hosts=['localhost'], timeout=20)
connections.add_connection('book', es)
# print(connections.get_connection().cluster.health())
# s = Search(es).index('book-index').doc_type('book').query(Q('match', title=query.strip()) | Q('match', description=query.strip()) | Q("match", userreviews_userReview=query.strip()))
s = Search(using=es, index='book-index').doc_type('book').query(Q('match', title=query.strip()) | Q('match', description=query.strip()) | Q("match", userreviews_userReview=query.strip()))
## This damn statement took half an hour from me! Nowhere in the documentation indicated that this statement should be before s.execute()
s = s[page*10 : page * 10 + 10]
response = s.execute()
# print 'total number of hits: ', response.hits.total
num_pages = (response.hits.total / 10) + 1
for res in response:
authors = zip(res.authors_name, res.authors_url)
anses.append({'title':res.title, 'description': res.description.encode('utf-8'), 'url': res.url, 'cover':res.cover, 'authors':authors})
else:
# importing libraries for Lucene
import lucene
from java.io import File
from org.apache.lucene.index import DirectoryReader, Term
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher, BooleanClause, BooleanQuery, TermQuery
from org.apache.lucene.util import Version
from org.apache.lucene.analysis.standard import StandardAnalyzer
import os
# fields
title_field = 'title'
description_field = 'description'
cover_field = 'cover'
authors_name_field = 'authors_name'
authors_url_field = 'authors_url'
url_field = 'url'
index_folder = '.'
index_name = 'lucene.index'
index_path = os.path.join(index_folder, index_name)
lucene.initVM()
version = Version.LUCENE_CURRENT
directory = SimpleFSDirectory(File(index_path))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(version)
title_tq = TermQuery(Term(title_field, query))
desc_tq = TermQuery(Term(description_field, query))
query = BooleanQuery()
query.add(BooleanClause(title_tq, BooleanClause.Occur.SHOULD))
query.add(BooleanClause(desc_tq, BooleanClause.Occur.SHOULD))
scoreDocs = searcher.search(query, 1000).scoreDocs
num_pages = (len(scoreDocs) / 10) + 1
for scoreDoc in scoreDocs[page*10 : page * 10 + 10]:
doc = searcher.doc(scoreDoc.doc)
authors = zip([doc.get(authors_name_field)], [doc.get(authors_url_field)])
anses.append({'title':doc.get(title_field), 'description': doc.get(description_field).encode('utf-8'), 'url': doc.get(url_field), 'cover':doc.get(cover_field), 'authors':authors})
return render.index(anses, query, num_pages)
if __name__ == "__main__":
"""
main entry of the searching module
"""
if use_elasticsearch:
'Searching using Elasticsearch index.'
else:
'Searching using Lucene index.'
app = web.application(urls, globals())
app.run()