diff --git a/MANIFEST.in b/MANIFEST.in index a579614acd..52c615d4b5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -23,6 +23,7 @@ include .tx/config include docker/haproxy/Dockerfile include docker/nginx/Dockerfile include docker/postgres/Dockerfile +include docker/elasticsearch/Dockerfile include Dockerfile exclude scripts/all_year_days include scripts/bootstrap diff --git a/docker-services.yml b/docker-services.yml index 51a62a8319..7114a6bbd1 100644 --- a/docker-services.yml +++ b/docker-services.yml @@ -73,7 +73,8 @@ services: - "15672:15672" - "5672:5672" es: - image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2 + build: ./docker/elasticsearch/ + image: elasticsearch-icu restart: "always" environment: - bootstrap.memory_lock=true diff --git a/docker/elasticsearch/Dockerfile b/docker/elasticsearch/Dockerfile new file mode 100644 index 0000000000..9292643df1 --- /dev/null +++ b/docker/elasticsearch/Dockerfile @@ -0,0 +1,2 @@ +FROM docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2 +RUN bin/elasticsearch-plugin install analysis-icu diff --git a/rero_ils/config.py b/rero_ils/config.py index 9cb3e1c1ff..68200f4c09 100644 --- a/rero_ils/config.py +++ b/rero_ils/config.py @@ -38,7 +38,6 @@ ItemAtDeskToItemOnLoan, ItemOnLoanToItemInTransitHouse, \ ItemOnLoanToItemOnLoan, PendingToItemAtDesk, \ PendingToItemInTransitPickup, ToItemOnLoan -from invenio_records_rest.facets import terms_filter from invenio_records_rest.utils import allow_all, deny_all from .modules.acq_accounts.api import AcqAccount @@ -94,6 +93,7 @@ librarian_delete_permission_factory, librarian_permission_factory, \ librarian_update_permission_factory, wiki_edit_ui_permission, \ wiki_edit_view_permission +from .query import and_term_filter from .utils import get_current_language @@ -1179,18 +1179,18 @@ def _(x): ) ), filters={ - _('document_type'): terms_filter('type'), - _('organisation'): terms_filter( + _('document_type'): and_term_filter('type'), + _('organisation'): and_term_filter( 'holdings.organisation.organisation_pid' ), - _('library'): terms_filter('holdings.organisation.library_pid'), - _('author__en'): terms_filter('facet_authors_en'), - _('author__fr'): terms_filter('facet_authors_fr'), - _('author__de'): terms_filter('facet_authors_de'), - _('author__it'): terms_filter('facet_authors_it'), - _('language'): terms_filter('language.value'), - _('subject'): terms_filter('facet_subjects'), - _('status'): terms_filter('holdings.items.status'), + _('library'): and_term_filter('holdings.organisation.library_pid'), + _('author__en'): and_term_filter('facet_authors_en'), + _('author__fr'): and_term_filter('facet_authors_fr'), + _('author__de'): and_term_filter('facet_authors_de'), + _('author__it'): and_term_filter('facet_authors_it'), + _('language'): and_term_filter('language.value'), + _('subject'): and_term_filter('facet_subjects'), + _('status'): and_term_filter('holdings.items.status'), } ), patrons=dict( @@ -1206,7 +1206,7 @@ def _(x): ) ), filters={ - _('roles'): terms_filter('roles') + _('roles'): and_term_filter('roles') }, ), acq_accounts=dict( @@ -1227,8 +1227,8 @@ def _(x): ) ), filters={ - _('library'): terms_filter('library.pid'), - _('budget'): terms_filter('budget') + _('library'): and_term_filter('library.pid'), + _('budget'): and_term_filter('budget') }, ), acq_invoices=dict( @@ -1249,8 +1249,8 @@ def _(x): ) ), filters={ - _('library'): terms_filter('library.pid'), - _('status'): terms_filter('invoice_status') + _('library'): and_term_filter('library.pid'), + _('status'): and_term_filter('invoice_status') }, ), acq_orders=dict( @@ -1271,8 +1271,8 @@ def _(x): ) ), filters={ - _('library'): terms_filter('library.pid'), - _('status'): terms_filter('order_status') + _('library'): and_term_filter('library.pid'), + _('status'): and_term_filter('order_status') }, ), persons=dict( @@ -1288,7 +1288,7 @@ def _(x): ) ), filters={ - _('sources'): terms_filter('sources') + _('sources'): and_term_filter('sources') } ), ) @@ -1296,7 +1296,7 @@ def _(x): # Elasticsearch fields boosting by index RERO_ILS_QUERY_BOOSTING = { 'documents': { - 'title.*': 3, + 'title._text.*': 3, 'titlesProper.*': 3, 'authors.name': 2, 'authors.name_*': 2, diff --git a/rero_ils/es_templates/v6/record.json b/rero_ils/es_templates/v6/record.json index c0b5b1783d..5b6c6e65d7 100644 --- a/rero_ils/es_templates/v6/record.json +++ b/rero_ils/es_templates/v6/record.json @@ -12,13 +12,23 @@ } }, "analyzer": { - "global_lowercase_asciifolding": { + "custom_keyword": { "type": "custom", - "tokenizer": "char_group_tokenizer", + "tokenizer": "keyword", "filter": [ "lowercase", + "german_normalization", "asciifolding" ] + }, + "default": { + "type": "custom", + "tokenizer": "char_group_tokenizer", + "filter": [ + "lowercase", + "german_normalization", + "icu_folding" + ] } } } diff --git a/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json b/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json index 25828c20e0..a8cacaeec3 100644 --- a/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json +++ b/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json @@ -17,15 +17,13 @@ }, "name": { "type": "text", - "copy_to": "circ_policy_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "circ_policy_name" }, "circ_policy_name": { "type": "keyword" }, "description": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "organisation": { "properties": { @@ -99,4 +97,4 @@ } } } -} \ No newline at end of file +} diff --git a/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json b/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json index 227e487030..cbe65e33ea 100644 --- a/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json +++ b/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json @@ -45,29 +45,12 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "autocomplete_title", - "fields": { - "eng": { - "type": "text", - "analyzer": "english" - }, - "fre": { - "type": "text", - "analyzer": "french" - }, - "ger": { - "type": "text", - "analyzer": "german" - }, - "ita": { - "type": "text", - "analyzer": "italian" - } - } + "index": false }, "language": { - "type": "keyword" + "type": "keyword", + "index": false } } }, @@ -76,25 +59,7 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding", - "fields": { - "eng": { - "type": "text", - "analyzer": "english" - }, - "fre": { - "type": "text", - "analyzer": "french" - }, - "ger": { - "type": "text", - "analyzer": "german" - }, - "ita": { - "type": "text", - "analyzer": "italian" - } - } + "index": false }, "language": { "type": "keyword" @@ -108,7 +73,8 @@ "type": "object", "properties": { "value": { - "type": "keyword" + "type": "keyword", + "index": false } } }, @@ -116,28 +82,16 @@ "type": "object", "properties": { "value": { - "type": "keyword" + "type": "keyword", + "index": false } } } } }, "_text": { - "type": "text" - } - } - }, - "autocomplete_title": { - "type": "text", - "analyzer": "autocomplete", - "search_analyzer": "standard" - }, - "responsibilityStatement": { - "type": "object", - "properties": { - "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding", + "index": false, "fields": { "eng": { "type": "text", @@ -156,6 +110,19 @@ "analyzer": "italian" } } + } + } + }, + "autocomplete_title": { + "type": "text", + "analyzer": "autocomplete", + "search_analyzer": "standard" + }, + "responsibilityStatement": { + "type": "object", + "properties": { + "value": { + "type": "text" }, "language": { "type": "keyword" @@ -164,7 +131,6 @@ }, "titlesProper": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -200,7 +166,6 @@ }, "is_part_of": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -228,7 +193,6 @@ "properties": { "name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": [ "facet_authors_en", "facet_authors_fr", @@ -238,27 +202,22 @@ }, "name_en": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_en" }, "name_fr": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_fr" }, "name_de": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_de" }, "name_it": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_it" }, "variant_name": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "type": { "type": "keyword" @@ -306,8 +265,7 @@ "type": "object", "properties": { "value": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "language": { "type": "keyword" @@ -318,8 +276,7 @@ "type": "object", "properties": { "value": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "language": { "type": "keyword" @@ -352,8 +309,7 @@ "type": "object", "properties": { "value": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "language": { "type": "keyword" @@ -366,8 +322,7 @@ } }, "note": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "startDate": { "type": "date", @@ -393,8 +348,7 @@ } }, "extent": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "otherMaterialCharacteristics": { "type": "keyword" @@ -409,8 +363,13 @@ "type": "object", "properties": { "name": { + "type": "text" + }, + "number": { + "type": "keyword" + }, + "_text": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -429,40 +388,14 @@ "analyzer": "italian" } } - }, - "number": { - "type": "keyword" - }, - "_text": { - "type": "text" } } }, "notes": { - "type": "text", - "analyzer": "global_lowercase_asciifolding", - "fields": { - "eng": { - "type": "text", - "analyzer": "english" - }, - "fre": { - "type": "text", - "analyzer": "french" - }, - "ger": { - "type": "text", - "analyzer": "german" - }, - "ita": { - "type": "text", - "analyzer": "italian" - } - } + "type": "text" }, "abstracts": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -510,26 +443,7 @@ }, "subjects": { "type": "text", - "analyzer": "global_lowercase_asciifolding", - "copy_to": "facet_subjects", - "fields": { - "eng": { - "type": "text", - "analyzer": "english" - }, - "fre": { - "type": "text", - "analyzer": "french" - }, - "ger": { - "type": "text", - "analyzer": "german" - }, - "ita": { - "type": "text", - "analyzer": "italian" - } - } + "copy_to": "facet_subjects" }, "facet_subjects": { "type": "keyword" diff --git a/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json b/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json index 1440307eae..67a8018f45 100644 --- a/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json +++ b/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json @@ -17,15 +17,13 @@ }, "name": { "type": "text", - "copy_to": "item_type_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "item_type_name" }, "item_type_name": { "type": "keyword" }, "description": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "type": { "type": "keyword" diff --git a/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json b/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json index 2e4ee2ce4c..89b5f8dbb4 100644 --- a/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json +++ b/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json @@ -16,20 +16,17 @@ "type": "keyword" }, "code": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "name_sort" }, "name_sort": { "type": "keyword" }, "address": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "email": { "type": "keyword" diff --git a/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json b/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json index d073f7eb42..e0a994aad1 100644 --- a/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json +++ b/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json @@ -35,8 +35,7 @@ "type": "keyword" }, "note": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "library": { "properties": { diff --git a/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json b/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json index ef4e79fe82..99f5cfc818 100644 --- a/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json +++ b/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json @@ -17,15 +17,13 @@ }, "name": { "type": "text", - "copy_to": "patron_type_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "patron_type_name" }, "patron_type_name": { "type": "keyword" }, "description": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "organisation": { "properties": { diff --git a/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json b/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json index ef27d52fdb..653f2c4494 100644 --- a/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json +++ b/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json @@ -17,7 +17,6 @@ }, "first_name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "first_name_sort" }, "first_name_sort": { @@ -25,7 +24,6 @@ }, "last_name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "last_name_sort" }, "last_name_sort": { @@ -38,15 +36,13 @@ "type": "keyword" }, "street": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "postal_code": { "type": "keyword" }, "city": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "phone": { "type": "text" diff --git a/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json b/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json index 8aca7db1bd..d31ea0cf78 100644 --- a/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json +++ b/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json @@ -69,22 +69,18 @@ "type": "keyword" }, "biographical_information": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "pid": { "type": "keyword" @@ -115,18 +111,15 @@ "type": "keyword" }, "biographical_information": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { "type": "text" @@ -154,18 +147,15 @@ "type": "keyword" }, "biographical_information": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { "type": "text" @@ -193,18 +183,15 @@ "type": "keyword" }, "biographical_information": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { "type": "text" diff --git a/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json b/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json index ff3a0db3d6..4996396b35 100644 --- a/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json +++ b/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json @@ -17,8 +17,7 @@ }, "name": { "type": "text", - "copy_to": "vendor_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "vendor_name" }, "vendor_name": { "type": "keyword" diff --git a/rero_ils/query.py b/rero_ils/query.py index a3fd0c6bbd..fa2c54bae6 100644 --- a/rero_ils/query.py +++ b/rero_ils/query.py @@ -19,6 +19,8 @@ from __future__ import absolute_import, print_function +import re + from elasticsearch_dsl.query import Q from flask import current_app, request from invenio_records_rest.errors import InvalidQueryRESTError @@ -26,6 +28,22 @@ from .modules.organisations.api import Organisation from .modules.patrons.api import current_patron +_PUNCTUATION_REGEX = re.compile(r'\s+[:,\?,\,,\.,;,\/,!,=,-]+\s*?') + + +def and_term_filter(field): + """Create a term filter. + + :param field: Field name. + :returns: Function that returns a boolean AND query between term values. + """ + def inner(values): + must = [] + for value in values: + must.append(Q('term', **{field: value})) + return Q('bool', must=must) + return inner + def view_search_factory(self, search, query_parser=None): """Search factory with view code parameter.""" @@ -135,14 +153,25 @@ def search_factory(self, search, query_parser=None): """ def _default_parser(qstr=None, query_boosting=[]): """Default parser that uses the Q() from elasticsearch_dsl.""" + query_type = 'query_string' + default_operator = 'OR' + if request.args.get('simple'): + query_type = 'simple_query_string' + default_operator = 'AND' + if qstr: + # TODO: remove this bad hack + qstr = _PUNCTUATION_REGEX.sub(' ', qstr).rstrip() if not query_boosting: - return Q('query_string', query=qstr) + return Q(query_type, query=qstr, + default_operator=default_operator) else: return Q('bool', should=[ - Q('query_string', query=qstr, boost=2, - fields=query_boosting), - Q('query_string', query=qstr) + Q(query_type, query=qstr, boost=2, + fields=query_boosting, + default_operator=default_operator), + Q(query_type, query=qstr, + default_operator=default_operator) ]) return Q() diff --git a/tests/api/conftest.py b/tests/api/conftest.py index d24053fc9e..4488966304 100644 --- a/tests/api/conftest.py +++ b/tests/api/conftest.py @@ -20,11 +20,70 @@ from __future__ import absolute_import, print_function import pytest +from utils import flush_index +from rero_ils.modules.documents.api import Document, DocumentsSearch -@pytest.fixture(scope="module") + +@pytest.fixture(scope='module') def create_app(): """Create test app.""" from invenio_app.factory import create_api return create_api + + +@pytest.fixture(scope='module') +def doc_title_travailleurs(app): + """Document with title with travailleur.""" + data = { + '$schema': 'https://ils.rero.ch/schema/documents/' + 'document-minimal-v0.0.1.json', + 'pid': 'doc_title_test', 'type': 'book', + 'language': [{'type': 'bf:Language', 'value': 'fre'}], + 'title': [{ + 'type': 'bf:Title', + 'mainTitle': [{ + 'value': 'Les travailleurs assidus sont de retours' + }], + 'subtitle': [{'value': 'les jeunes arrivent bientôt ?'}] + }] + } + doc = Document.create( + data=data, + delete_pid=False, + dbcommit=True, + reindex=True) + flush_index(DocumentsSearch.Meta.index) + return doc + + +@pytest.fixture(scope='module') +def doc_title_travailleuses(app): + """Document with title with travailleuses.""" + data = { + '$schema': 'https://ils.rero.ch/schema/documents/' + 'document-minimal-v0.0.1.json', + 'pid': 'doc_title_test', 'type': 'book', + 'language': [{'type': 'bf:Language', 'value': 'fre'}], + 'title': [{ + 'type': 'bf:Title', + 'mainTitle': [{ + 'value': "Les travailleuses partent à l'école" + }], + 'subtitle': [{'value': "lorsqu'un est bœuf ex æquo"}] + + }], + 'authors': [{ + 'name': 'Müller, John', 'type': 'person' + }, { + 'name': 'Corminbœuf, Gruß', 'type': 'person' + }], + } + doc = Document.create( + data=data, + delete_pid=False, + dbcommit=True, + reindex=True) + flush_index(DocumentsSearch.Meta.index) + return doc diff --git a/tests/api/test_documents_rest.py b/tests/api/test_documents_rest.py index 8bbdada7f5..557af16885 100644 --- a/tests/api/test_documents_rest.py +++ b/tests/api/test_documents_rest.py @@ -57,7 +57,8 @@ def test_documents_permissions(client, document, json_header): @mock.patch('invenio_records_rest.views.verify_record_permission', mock.MagicMock(return_value=VerifyRecordPermissionPatch)) def test_documents_facets( - client, document, item_lib_martigny, rero_json_header + client, document, ebook_1, ebook_2, ebook_3, ebook_4, + item_lib_martigny, rero_json_header ): """Test record retrieval.""" list_url = url_for('invenio_records_rest.doc_list', view='global') @@ -65,7 +66,6 @@ def test_documents_facets( res = client.get(list_url, headers=rero_json_header) data = get_json(res) aggs = data['aggregations'] - # check all facets are present for facet in [ 'document_type', 'author__en', 'author__fr', @@ -73,6 +73,42 @@ def test_documents_facets( ]: assert aggs[facet] + # FILTERS + # person author + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de='Peter James') + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 2 + + # organisation author + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de='Great Edition') + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 1 + + # an other person author + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de='J.K. Rowling') + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 1 + + # two authors in the same document + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de=['Great Edition', 'Peter James']) + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 1 + + # two authors: each in a separate document + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de=['J.K. Rowling', 'Peter James']) + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 0 + @mock.patch('invenio_records_rest.views.verify_record_permission', mock.MagicMock(return_value=VerifyRecordPermissionPatch)) diff --git a/tests/api/test_search.py b/tests/api/test_search.py new file mode 100644 index 0000000000..f58db8873c --- /dev/null +++ b/tests/api/test_search.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2019 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Search tests.""" + +from flask import url_for +from utils import get_json + + +def test_document_search( + client, + doc_title_travailleurs, + doc_title_travailleuses +): + """Test document boosting.""" + # phrase search + list_url = url_for( + 'invenio_records_rest.doc_list', + q='"Les travailleurs assidus sont de retours"', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # phrase search with punctuations + list_url = url_for( + 'invenio_records_rest.doc_list', + q='"Les travailleurs assidus sont de retours."', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # word search + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleurs', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # travailleurs == travailleur == travailleuses + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleur', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # ecole == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q='ecole', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Ecole == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Ecole', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # ECOLE == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Ecole', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # _école_ == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q=' école ', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Müller + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Müller', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Müller == Muller + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Muller', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Müller == Mueller + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Mueller', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # test AND + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleuse école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # test OR in two docs + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours | école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # test AND in two fields (travailleuses == travailleur) + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleuses bientôt', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleuses + bientôt', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # test OR in two docs (each match only one term) + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours | école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # test AND in two docs (each match only one term) => no result + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 0 + + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours + école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 0 + + # title + subtitle + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Les travailleurs assidus sont de retours : ' + 'les jeunes arrivent bientôt ?', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # punctuation + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'école : . ... , ; ? \ ! = == - --', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'école:.,;?\!...=-==--', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # special chars + # œ in title + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'bœuf', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # æ in title + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'ex æquo', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # æ in title + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'ÆQUO', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # œ in author + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'Corminbœuf', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 diff --git a/tests/data/data.json b/tests/data/data.json index 64558d57b8..0215a9eae3 100644 --- a/tests/data/data.json +++ b/tests/data/data.json @@ -1639,6 +1639,10 @@ { "name": "Peter James", "type": "person" + }, + { + "name": "Great Edition", + "type": "organisation" } ], "provisionActivity": [ @@ -2365,4 +2369,4 @@ }, "status": "open" } -} \ No newline at end of file +} diff --git a/tests/ui/conftest.py b/tests/ui/conftest.py index d64023fcc7..09450e542b 100644 --- a/tests/ui/conftest.py +++ b/tests/ui/conftest.py @@ -55,7 +55,15 @@ def es_default_index(es): """ES default index.""" current_search_client.indices.create( index='records-record-v1.0.0', - body={}, + body={ + 'mappings': { + 'record-v1.0.0': { + 'properties': { + 'pid': {'type': 'keyword'} + } + } + } + }, ignore=[400] ) yield es diff --git a/tests/ui/documents/test_documents_mapping.py b/tests/ui/documents/test_documents_mapping.py index 7aa93a5c14..e25948c97f 100644 --- a/tests/ui/documents/test_documents_mapping.py +++ b/tests/ui/documents/test_documents_mapping.py @@ -66,6 +66,6 @@ def test_document_search_mapping(app, document_records): assert c == 1 query = MultiMatch(query='Chamber of Secrets', - fields=['title.mainTitle.value.eng']) + fields=['title._text.*']) c = search.query(query).count() assert c == 1 diff --git a/tests/ui/test_api.py b/tests/ui/test_api.py index fbecc42665..8312acf0f2 100644 --- a/tests/ui/test_api.py +++ b/tests/ui/test_api.py @@ -106,7 +106,7 @@ def test_ilsrecord(app, es_default_index, ils_record, ils_record_2): """Test IlsRecord update.""" current_search.delete(ignore=[404]) - # the created records will be acessible in all function of this test file + # the created records will be accessible in all function of this test file record_1 = RecordTest.create( data=ils_record, dbcommit=True, diff --git a/tests/ui/test_utils_app.py b/tests/ui/test_utils_app.py index 751fe5e5d1..f24f4bdbd8 100644 --- a/tests/ui/test_utils_app.py +++ b/tests/ui/test_utils_app.py @@ -19,6 +19,7 @@ from rero_ils.modules.documents.api import Document from rero_ils.modules.utils import get_ref_for_pid +from rero_ils.utils import get_current_language def test_get_ref_for_pid(app): @@ -28,3 +29,8 @@ def test_get_ref_for_pid(app): assert get_ref_for_pid('doc', '3') == url assert get_ref_for_pid(Document, '3') == url assert get_ref_for_pid('test', '3') is None + + +def test_get_language(app): + """Test get the current language of the application.""" + assert get_current_language() == 'en'