Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rest api: add simple query support
Browse files Browse the repository at this point in the history
The current elasticsearch query string used by the REST API is powerful
but should not be used for search boxes. It raises exceptions when
the query syntax contains errors. As described in the elaticsearch
documentation, a simple query string should be used. A new http query
optional parameters has been added and can be specified as follows:
`&simple=1`.
For example: `https://ils.rero.ch/global/search/?q=potter&simple=1`.

When the simple query syntax is chosen, the default boolean operator is
`AND`. Except this parameter, nothing has been modified.

* Adds new type of aggregation filter to perform a AND boolean operator
between the terms in the same aggregation. All aggregations filters
use now AND boolean operator.
* Updates elasticsearch mappings to enhance the search engine quality.
* Adds a new REST API list records operator to use a `simple_query_string`
instead of a `query_string`. The simple query string preform an AND
boolean operator by default.
* Adds search query tests.
* Adds missing utils test.
* Renames `global_lowercase_asciifolding` elasticsearch analyzer to
`default`. This makes the rero-ils custom analyzer to be the default for
all elasticsearch `text` fields. All elasticsearch mappings has been
simplified.
* Creates a new `custom_keyword` analyzer.
* Creates a custom elasticsearch image with the icu analysis plugin
(https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu.html).
* Closes rero#755.

Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
jma committed May 7, 2020

Verified

This commit was signed with the committer’s verified signature.
darcyclarke Darcy Clarke
1 parent c7d118d commit e7cc19e
Showing 23 changed files with 529 additions and 210 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ include .tx/config
include docker/haproxy/Dockerfile
include docker/nginx/Dockerfile
include docker/postgres/Dockerfile
include docker/elasticsearch/Dockerfile
include Dockerfile
exclude scripts/all_year_days
include scripts/bootstrap
3 changes: 2 additions & 1 deletion docker-services.yml
Original file line number Diff line number Diff line change
@@ -73,7 +73,8 @@ services:
- "15672:15672"
- "5672:5672"
es:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
build: ./docker/elasticsearch/
image: elasticsearch-icu
restart: "always"
environment:
- bootstrap.memory_lock=true
2 changes: 2 additions & 0 deletions docker/elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
RUN bin/elasticsearch-plugin install analysis-icu
40 changes: 20 additions & 20 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
@@ -38,7 +38,6 @@
ItemAtDeskToItemOnLoan, ItemOnLoanToItemInTransitHouse, \
ItemOnLoanToItemOnLoan, PendingToItemAtDesk, \
PendingToItemInTransitPickup, ToItemOnLoan
from invenio_records_rest.facets import terms_filter
from invenio_records_rest.utils import allow_all, deny_all

from .modules.acq_accounts.api import AcqAccount
@@ -94,6 +93,7 @@
librarian_delete_permission_factory, librarian_permission_factory, \
librarian_update_permission_factory, wiki_edit_ui_permission, \
wiki_edit_view_permission
from .query import and_term_filter
from .utils import get_current_language


@@ -1179,18 +1179,18 @@ def _(x):
)
),
filters={
_('document_type'): terms_filter('type'),
_('organisation'): terms_filter(
_('document_type'): and_term_filter('type'),
_('organisation'): and_term_filter(
'holdings.organisation.organisation_pid'
),
_('library'): terms_filter('holdings.organisation.library_pid'),
_('author__en'): terms_filter('facet_authors_en'),
_('author__fr'): terms_filter('facet_authors_fr'),
_('author__de'): terms_filter('facet_authors_de'),
_('author__it'): terms_filter('facet_authors_it'),
_('language'): terms_filter('language.value'),
_('subject'): terms_filter('facet_subjects'),
_('status'): terms_filter('holdings.items.status'),
_('library'): and_term_filter('holdings.organisation.library_pid'),
_('author__en'): and_term_filter('facet_authors_en'),
_('author__fr'): and_term_filter('facet_authors_fr'),
_('author__de'): and_term_filter('facet_authors_de'),
_('author__it'): and_term_filter('facet_authors_it'),
_('language'): and_term_filter('language.value'),
_('subject'): and_term_filter('facet_subjects'),
_('status'): and_term_filter('holdings.items.status'),
}
),
patrons=dict(
@@ -1206,7 +1206,7 @@ def _(x):
)
),
filters={
_('roles'): terms_filter('roles')
_('roles'): and_term_filter('roles')
},
),
acq_accounts=dict(
@@ -1227,8 +1227,8 @@ def _(x):
)
),
filters={
_('library'): terms_filter('library.pid'),
_('budget'): terms_filter('budget')
_('library'): and_term_filter('library.pid'),
_('budget'): and_term_filter('budget')
},
),
acq_invoices=dict(
@@ -1249,8 +1249,8 @@ def _(x):
)
),
filters={
_('library'): terms_filter('library.pid'),
_('status'): terms_filter('invoice_status')
_('library'): and_term_filter('library.pid'),
_('status'): and_term_filter('invoice_status')
},
),
acq_orders=dict(
@@ -1271,8 +1271,8 @@ def _(x):
)
),
filters={
_('library'): terms_filter('library.pid'),
_('status'): terms_filter('order_status')
_('library'): and_term_filter('library.pid'),
_('status'): and_term_filter('order_status')
},
),
persons=dict(
@@ -1288,15 +1288,15 @@ def _(x):
)
),
filters={
_('sources'): terms_filter('sources')
_('sources'): and_term_filter('sources')
}
),
)

# Elasticsearch fields boosting by index
RERO_ILS_QUERY_BOOSTING = {
'documents': {
'title.*': 3,
'title._text.*': 3,
'titlesProper.*': 3,
'authors.name': 2,
'authors.name_*': 2,
14 changes: 12 additions & 2 deletions rero_ils/es_templates/v6/record.json
Original file line number Diff line number Diff line change
@@ -12,13 +12,23 @@
}
},
"analyzer": {
"global_lowercase_asciifolding": {
"custom_keyword": {
"type": "custom",
"tokenizer": "char_group_tokenizer",
"tokenizer": "keyword",
"filter": [
"lowercase",
"german_normalization",
"asciifolding"
]
},
"default": {
"type": "custom",
"tokenizer": "char_group_tokenizer",
"filter": [
"lowercase",
"german_normalization",
"icu_folding"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -17,15 +17,13 @@
},
"name": {
"type": "text",
"copy_to": "circ_policy_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "circ_policy_name"
},
"circ_policy_name": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"organisation": {
"properties": {
@@ -99,4 +97,4 @@
}
}
}
}
}
158 changes: 36 additions & 122 deletions rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json
Original file line number Diff line number Diff line change
@@ -45,29 +45,12 @@
"properties": {
"value": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "autocomplete_title",
"fields": {
"eng": {
"type": "text",
"analyzer": "english"
},
"fre": {
"type": "text",
"analyzer": "french"
},
"ger": {
"type": "text",
"analyzer": "german"
},
"ita": {
"type": "text",
"analyzer": "italian"
}
}
"index": false
},
"language": {
"type": "keyword"
"type": "keyword",
"index": false
}
}
},
@@ -76,25 +59,7 @@
"properties": {
"value": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"fields": {
"eng": {
"type": "text",
"analyzer": "english"
},
"fre": {
"type": "text",
"analyzer": "french"
},
"ger": {
"type": "text",
"analyzer": "german"
},
"ita": {
"type": "text",
"analyzer": "italian"
}
}
"index": false
},
"language": {
"type": "keyword"
@@ -108,36 +73,25 @@
"type": "object",
"properties": {
"value": {
"type": "keyword"
"type": "keyword",
"index": false
}
}
},
"partName": {
"type": "object",
"properties": {
"value": {
"type": "keyword"
"type": "keyword",
"index": false
}
}
}
}
},
"_text": {
"type": "text"
}
}
},
"autocomplete_title": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
},
"responsibilityStatement": {
"type": "object",
"properties": {
"value": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"index": false,
"fields": {
"eng": {
"type": "text",
@@ -156,6 +110,19 @@
"analyzer": "italian"
}
}
}
}
},
"autocomplete_title": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
},
"responsibilityStatement": {
"type": "object",
"properties": {
"value": {
"type": "text"
},
"language": {
"type": "keyword"
@@ -164,7 +131,6 @@
},
"titlesProper": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"fields": {
"eng": {
"type": "text",
@@ -200,7 +166,6 @@
},
"is_part_of": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"fields": {
"eng": {
"type": "text",
@@ -228,7 +193,6 @@
"properties": {
"name": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": [
"facet_authors_en",
"facet_authors_fr",
@@ -238,27 +202,22 @@
},
"name_en": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "facet_authors_en"
},
"name_fr": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "facet_authors_fr"
},
"name_de": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "facet_authors_de"
},
"name_it": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "facet_authors_it"
},
"variant_name": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"type": {
"type": "keyword"
@@ -306,8 +265,7 @@
"type": "object",
"properties": {
"value": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"language": {
"type": "keyword"
@@ -318,8 +276,7 @@
"type": "object",
"properties": {
"value": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"language": {
"type": "keyword"
@@ -352,8 +309,7 @@
"type": "object",
"properties": {
"value": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"language": {
"type": "keyword"
@@ -366,8 +322,7 @@
}
},
"note": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"startDate": {
"type": "date",
@@ -393,8 +348,7 @@
}
},
"extent": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"otherMaterialCharacteristics": {
"type": "keyword"
@@ -409,8 +363,13 @@
"type": "object",
"properties": {
"name": {
"type": "text"
},
"number": {
"type": "keyword"
},
"_text": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"fields": {
"eng": {
"type": "text",
@@ -429,40 +388,14 @@
"analyzer": "italian"
}
}
},
"number": {
"type": "keyword"
},
"_text": {
"type": "text"
}
}
},
"notes": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"fields": {
"eng": {
"type": "text",
"analyzer": "english"
},
"fre": {
"type": "text",
"analyzer": "french"
},
"ger": {
"type": "text",
"analyzer": "german"
},
"ita": {
"type": "text",
"analyzer": "italian"
}
}
"type": "text"
},
"abstracts": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"fields": {
"eng": {
"type": "text",
@@ -510,26 +443,7 @@
},
"subjects": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "facet_subjects",
"fields": {
"eng": {
"type": "text",
"analyzer": "english"
},
"fre": {
"type": "text",
"analyzer": "french"
},
"ger": {
"type": "text",
"analyzer": "german"
},
"ita": {
"type": "text",
"analyzer": "italian"
}
}
"copy_to": "facet_subjects"
},
"facet_subjects": {
"type": "keyword"
Original file line number Diff line number Diff line change
@@ -17,15 +17,13 @@
},
"name": {
"type": "text",
"copy_to": "item_type_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "item_type_name"
},
"item_type_name": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"type": {
"type": "keyword"
Original file line number Diff line number Diff line change
@@ -16,20 +16,17 @@
"type": "keyword"
},
"code": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"name": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "name_sort"
},
"name_sort": {
"type": "keyword"
},
"address": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"email": {
"type": "keyword"
Original file line number Diff line number Diff line change
@@ -35,8 +35,7 @@
"type": "keyword"
},
"note": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"library": {
"properties": {
Original file line number Diff line number Diff line change
@@ -17,15 +17,13 @@
},
"name": {
"type": "text",
"copy_to": "patron_type_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "patron_type_name"
},
"patron_type_name": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"organisation": {
"properties": {
Original file line number Diff line number Diff line change
@@ -17,15 +17,13 @@
},
"first_name": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "first_name_sort"
},
"first_name_sort": {
"type": "keyword"
},
"last_name": {
"type": "text",
"analyzer": "global_lowercase_asciifolding",
"copy_to": "last_name_sort"
},
"last_name_sort": {
@@ -38,15 +36,13 @@
"type": "keyword"
},
"street": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"postal_code": {
"type": "keyword"
},
"city": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"phone": {
"type": "text"
39 changes: 13 additions & 26 deletions rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json
Original file line number Diff line number Diff line change
@@ -69,22 +69,18 @@
"type": "keyword"
},
"biographical_information": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"preferred_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"variant_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"authorized_access_point_representing_a_person": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"pid": {
"type": "keyword"
@@ -115,18 +111,15 @@
"type": "keyword"
},
"biographical_information": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"preferred_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"variant_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"authorized_access_point_representing_a_person": {
"type": "text"
@@ -154,18 +147,15 @@
"type": "keyword"
},
"biographical_information": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"preferred_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"variant_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"authorized_access_point_representing_a_person": {
"type": "text"
@@ -193,18 +183,15 @@
"type": "keyword"
},
"biographical_information": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"preferred_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"variant_name_for_person": {
"type": "text",
"copy_to": "autocomplete_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "autocomplete_name"
},
"authorized_access_point_representing_a_person": {
"type": "text"
Original file line number Diff line number Diff line change
@@ -17,8 +17,7 @@
},
"name": {
"type": "text",
"copy_to": "vendor_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "vendor_name"
},
"vendor_name": {
"type": "keyword"
37 changes: 33 additions & 4 deletions rero_ils/query.py
Original file line number Diff line number Diff line change
@@ -19,13 +19,31 @@

from __future__ import absolute_import, print_function

import re

from elasticsearch_dsl.query import Q
from flask import current_app, request
from invenio_records_rest.errors import InvalidQueryRESTError

from .modules.organisations.api import Organisation
from .modules.patrons.api import current_patron

_PUNCTUATION_REGEX = re.compile(r'\s+[:,\?,\,,\.,;,\/,!,=,-]+\s*?')


def and_term_filter(field):
"""Create a term filter.
:param field: Field name.
:returns: Function that returns a boolean AND query between term values.
"""
def inner(values):
must = []
for value in values:
must.append(Q('term', **{field: value}))
return Q('bool', must=must)
return inner


def view_search_factory(self, search, query_parser=None):
"""Search factory with view code parameter."""
@@ -135,14 +153,25 @@ def search_factory(self, search, query_parser=None):
"""
def _default_parser(qstr=None, query_boosting=[]):
"""Default parser that uses the Q() from elasticsearch_dsl."""
query_type = 'query_string'
default_operator = 'OR'
if request.args.get('simple'):
query_type = 'simple_query_string'
default_operator = 'AND'

if qstr:
# TODO: remove this bad hack
qstr = _PUNCTUATION_REGEX.sub(' ', qstr).rstrip()
if not query_boosting:
return Q('query_string', query=qstr)
return Q(query_type, query=qstr,
default_operator=default_operator)
else:
return Q('bool', should=[
Q('query_string', query=qstr, boost=2,
fields=query_boosting),
Q('query_string', query=qstr)
Q(query_type, query=qstr, boost=2,
fields=query_boosting,
default_operator=default_operator),
Q(query_type, query=qstr,
default_operator=default_operator)
])
return Q()

61 changes: 60 additions & 1 deletion tests/api/conftest.py
Original file line number Diff line number Diff line change
@@ -20,11 +20,70 @@
from __future__ import absolute_import, print_function

import pytest
from utils import flush_index

from rero_ils.modules.documents.api import Document, DocumentsSearch

@pytest.fixture(scope="module")

@pytest.fixture(scope='module')
def create_app():
"""Create test app."""
from invenio_app.factory import create_api

return create_api


@pytest.fixture(scope='module')
def doc_title_travailleurs(app):
"""Document with title with travailleur."""
data = {
'$schema': 'https://ils.rero.ch/schema/documents/'
'document-minimal-v0.0.1.json',
'pid': 'doc_title_test', 'type': 'book',
'language': [{'type': 'bf:Language', 'value': 'fre'}],
'title': [{
'type': 'bf:Title',
'mainTitle': [{
'value': 'Les travailleurs assidus sont de retours'
}],
'subtitle': [{'value': 'les jeunes arrivent bientôt ?'}]
}]
}
doc = Document.create(
data=data,
delete_pid=False,
dbcommit=True,
reindex=True)
flush_index(DocumentsSearch.Meta.index)
return doc


@pytest.fixture(scope='module')
def doc_title_travailleuses(app):
"""Document with title with travailleuses."""
data = {
'$schema': 'https://ils.rero.ch/schema/documents/'
'document-minimal-v0.0.1.json',
'pid': 'doc_title_test', 'type': 'book',
'language': [{'type': 'bf:Language', 'value': 'fre'}],
'title': [{
'type': 'bf:Title',
'mainTitle': [{
'value': "Les travailleuses partent à l'école"
}],
'subtitle': [{'value': "lorsqu'un est bœuf ex æquo"}]

}],
'authors': [{
'name': 'Müller, John', 'type': 'person'
}, {
'name': 'Corminbœuf, Gruß', 'type': 'person'
}],
}
doc = Document.create(
data=data,
delete_pid=False,
dbcommit=True,
reindex=True)
flush_index(DocumentsSearch.Meta.index)
return doc
40 changes: 38 additions & 2 deletions tests/api/test_documents_rest.py
Original file line number Diff line number Diff line change
@@ -57,22 +57,58 @@ def test_documents_permissions(client, document, json_header):
@mock.patch('invenio_records_rest.views.verify_record_permission',
mock.MagicMock(return_value=VerifyRecordPermissionPatch))
def test_documents_facets(
client, document, item_lib_martigny, rero_json_header
client, document, ebook_1, ebook_2, ebook_3, ebook_4,
item_lib_martigny, rero_json_header
):
"""Test record retrieval."""
list_url = url_for('invenio_records_rest.doc_list', view='global')

res = client.get(list_url, headers=rero_json_header)
data = get_json(res)
aggs = data['aggregations']

# check all facets are present
for facet in [
'document_type', 'author__en', 'author__fr',
'author__de', 'author__it', 'language', 'subject', 'status'
]:
assert aggs[facet]

# FILTERS
# person author
list_url = url_for('invenio_records_rest.doc_list', view='global',
author__de='Peter James')
res = client.get(list_url, headers=rero_json_header)
data = get_json(res)
assert data['hits']['total'] == 2

# organisation author
list_url = url_for('invenio_records_rest.doc_list', view='global',
author__de='Great Edition')
res = client.get(list_url, headers=rero_json_header)
data = get_json(res)
assert data['hits']['total'] == 1

# an other person author
list_url = url_for('invenio_records_rest.doc_list', view='global',
author__de='J.K. Rowling')
res = client.get(list_url, headers=rero_json_header)
data = get_json(res)
assert data['hits']['total'] == 1

# two authors in the same document
list_url = url_for('invenio_records_rest.doc_list', view='global',
author__de=['Great Edition', 'Peter James'])
res = client.get(list_url, headers=rero_json_header)
data = get_json(res)
assert data['hits']['total'] == 1

# two authors: each in a separate document
list_url = url_for('invenio_records_rest.doc_list', view='global',
author__de=['J.K. Rowling', 'Peter James'])
res = client.get(list_url, headers=rero_json_header)
data = get_json(res)
assert data['hits']['total'] == 0


@mock.patch('invenio_records_rest.views.verify_record_permission',
mock.MagicMock(return_value=VerifyRecordPermissionPatch))
277 changes: 277 additions & 0 deletions tests/api/test_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Search tests."""

from flask import url_for
from utils import get_json


def test_document_search(
client,
doc_title_travailleurs,
doc_title_travailleuses
):
"""Test document boosting."""
# phrase search
list_url = url_for(
'invenio_records_rest.doc_list',
q='"Les travailleurs assidus sont de retours"',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# phrase search with punctuations
list_url = url_for(
'invenio_records_rest.doc_list',
q='"Les travailleurs assidus sont de retours."',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# word search
list_url = url_for(
'invenio_records_rest.doc_list',
q='travailleurs',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 2

# travailleurs == travailleur == travailleuses
list_url = url_for(
'invenio_records_rest.doc_list',
q='travailleur',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 2

# ecole == école
list_url = url_for(
'invenio_records_rest.doc_list',
q='ecole',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# Ecole == école
list_url = url_for(
'invenio_records_rest.doc_list',
q='Ecole',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# ECOLE == école
list_url = url_for(
'invenio_records_rest.doc_list',
q='Ecole',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# _école_ == école
list_url = url_for(
'invenio_records_rest.doc_list',
q=' école ',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# Müller
list_url = url_for(
'invenio_records_rest.doc_list',
q='Müller',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# Müller == Muller
list_url = url_for(
'invenio_records_rest.doc_list',
q='Muller',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# Müller == Mueller
list_url = url_for(
'invenio_records_rest.doc_list',
q='Mueller',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# test AND
list_url = url_for(
'invenio_records_rest.doc_list',
q='travailleuse école',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# test OR in two docs
list_url = url_for(
'invenio_records_rest.doc_list',
q='retours | école',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 2

# test AND in two fields (travailleuses == travailleur)
list_url = url_for(
'invenio_records_rest.doc_list',
q='travailleuses bientôt',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

list_url = url_for(
'invenio_records_rest.doc_list',
q='travailleuses + bientôt',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# test OR in two docs (each match only one term)
list_url = url_for(
'invenio_records_rest.doc_list',
q='retours | école',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 2

# test AND in two docs (each match only one term) => no result
list_url = url_for(
'invenio_records_rest.doc_list',
q='retours école',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 0

list_url = url_for(
'invenio_records_rest.doc_list',
q='retours + école',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 0

# title + subtitle
list_url = url_for(
'invenio_records_rest.doc_list',
q='Les travailleurs assidus sont de retours : '
'les jeunes arrivent bientôt ?',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# punctuation
list_url = url_for(
'invenio_records_rest.doc_list',
q=r'école : . ... , ; ? \ ! = == - --',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

list_url = url_for(
'invenio_records_rest.doc_list',
q=r'école:.,;?\!...=-==--',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# special chars
# œ in title
list_url = url_for(
'invenio_records_rest.doc_list',
q=r'bœuf',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# æ in title
list_url = url_for(
'invenio_records_rest.doc_list',
q=r'ex æquo',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# æ in title
list_url = url_for(
'invenio_records_rest.doc_list',
q=r'ÆQUO',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1

# œ in author
list_url = url_for(
'invenio_records_rest.doc_list',
q=r'Corminbœuf',
simple='1'
)
res = client.get(list_url)
hits = get_json(res)['hits']
assert hits['total'] == 1
6 changes: 5 additions & 1 deletion tests/data/data.json
Original file line number Diff line number Diff line change
@@ -1639,6 +1639,10 @@
{
"name": "Peter James",
"type": "person"
},
{
"name": "Great Edition",
"type": "organisation"
}
],
"provisionActivity": [
@@ -2365,4 +2369,4 @@
},
"status": "open"
}
}
}
10 changes: 9 additions & 1 deletion tests/ui/conftest.py
Original file line number Diff line number Diff line change
@@ -55,7 +55,15 @@ def es_default_index(es):
"""ES default index."""
current_search_client.indices.create(
index='records-record-v1.0.0',
body={},
body={
'mappings': {
'record-v1.0.0': {
'properties': {
'pid': {'type': 'keyword'}
}
}
}
},
ignore=[400]
)
yield es
2 changes: 1 addition & 1 deletion tests/ui/documents/test_documents_mapping.py
Original file line number Diff line number Diff line change
@@ -66,6 +66,6 @@ def test_document_search_mapping(app, document_records):
assert c == 1

query = MultiMatch(query='Chamber of Secrets',
fields=['title.mainTitle.value.eng'])
fields=['title._text.*'])
c = search.query(query).count()
assert c == 1
2 changes: 1 addition & 1 deletion tests/ui/test_api.py
Original file line number Diff line number Diff line change
@@ -106,7 +106,7 @@ def test_ilsrecord(app, es_default_index, ils_record, ils_record_2):
"""Test IlsRecord update."""
current_search.delete(ignore=[404])

# the created records will be acessible in all function of this test file
# the created records will be accessible in all function of this test file
record_1 = RecordTest.create(
data=ils_record,
dbcommit=True,
6 changes: 6 additions & 0 deletions tests/ui/test_utils_app.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@

from rero_ils.modules.documents.api import Document
from rero_ils.modules.utils import get_ref_for_pid
from rero_ils.utils import get_current_language


def test_get_ref_for_pid(app):
@@ -28,3 +29,8 @@ def test_get_ref_for_pid(app):
assert get_ref_for_pid('doc', '3') == url
assert get_ref_for_pid(Document, '3') == url
assert get_ref_for_pid('test', '3') is None


def test_get_language(app):
"""Test get the current language of the application."""
assert get_current_language() == 'en'

0 comments on commit e7cc19e

Please sign in to comment.