diff --git a/MANIFEST.in b/MANIFEST.in index a579614acd..52c615d4b5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -23,6 +23,7 @@ include .tx/config include docker/haproxy/Dockerfile include docker/nginx/Dockerfile include docker/postgres/Dockerfile +include docker/elasticsearch/Dockerfile include Dockerfile exclude scripts/all_year_days include scripts/bootstrap diff --git a/docker-services.yml b/docker-services.yml index 51a62a8319..7114a6bbd1 100644 --- a/docker-services.yml +++ b/docker-services.yml @@ -73,7 +73,8 @@ services: - "15672:15672" - "5672:5672" es: - image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2 + build: ./docker/elasticsearch/ + image: elasticsearch-icu restart: "always" environment: - bootstrap.memory_lock=true diff --git a/docker/elasticsearch/Dockerfile b/docker/elasticsearch/Dockerfile new file mode 100644 index 0000000000..9292643df1 --- /dev/null +++ b/docker/elasticsearch/Dockerfile @@ -0,0 +1,2 @@ +FROM docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2 +RUN bin/elasticsearch-plugin install analysis-icu diff --git a/rero_ils/config.py b/rero_ils/config.py index 9cb3e1c1ff..90864b7f13 100644 --- a/rero_ils/config.py +++ b/rero_ils/config.py @@ -38,7 +38,6 @@ ItemAtDeskToItemOnLoan, ItemOnLoanToItemInTransitHouse, \ ItemOnLoanToItemOnLoan, PendingToItemAtDesk, \ PendingToItemInTransitPickup, ToItemOnLoan -from invenio_records_rest.facets import terms_filter from invenio_records_rest.utils import allow_all, deny_all from .modules.acq_accounts.api import AcqAccount @@ -94,6 +93,7 @@ librarian_delete_permission_factory, librarian_permission_factory, \ librarian_update_permission_factory, wiki_edit_ui_permission, \ wiki_edit_view_permission +from .query import and_term_filter from .utils import get_current_language @@ -1179,18 +1179,18 @@ def _(x): ) ), filters={ - _('document_type'): terms_filter('type'), - _('organisation'): terms_filter( + _('document_type'): and_term_filter('type'), + _('organisation'): and_term_filter( 'holdings.organisation.organisation_pid' ), - _('library'): terms_filter('holdings.organisation.library_pid'), - _('author__en'): terms_filter('facet_authors_en'), - _('author__fr'): terms_filter('facet_authors_fr'), - _('author__de'): terms_filter('facet_authors_de'), - _('author__it'): terms_filter('facet_authors_it'), - _('language'): terms_filter('language.value'), - _('subject'): terms_filter('facet_subjects'), - _('status'): terms_filter('holdings.items.status'), + _('library'): and_term_filter('holdings.organisation.library_pid'), + _('author__en'): and_term_filter('facet_authors_en'), + _('author__fr'): and_term_filter('facet_authors_fr'), + _('author__de'): and_term_filter('facet_authors_de'), + _('author__it'): and_term_filter('facet_authors_it'), + _('language'): and_term_filter('language.value'), + _('subject'): and_term_filter('facet_subjects'), + _('status'): and_term_filter('holdings.items.status'), } ), patrons=dict( @@ -1206,7 +1206,7 @@ def _(x): ) ), filters={ - _('roles'): terms_filter('roles') + _('roles'): and_term_filter('roles') }, ), acq_accounts=dict( @@ -1227,8 +1227,8 @@ def _(x): ) ), filters={ - _('library'): terms_filter('library.pid'), - _('budget'): terms_filter('budget') + _('library'): and_term_filter('library.pid'), + _('budget'): and_term_filter('budget') }, ), acq_invoices=dict( @@ -1249,8 +1249,8 @@ def _(x): ) ), filters={ - _('library'): terms_filter('library.pid'), - _('status'): terms_filter('invoice_status') + _('library'): and_term_filter('library.pid'), + _('status'): and_term_filter('invoice_status') }, ), acq_orders=dict( @@ -1271,8 +1271,8 @@ def _(x): ) ), filters={ - _('library'): terms_filter('library.pid'), - _('status'): terms_filter('order_status') + _('library'): and_term_filter('library.pid'), + _('status'): and_term_filter('order_status') }, ), persons=dict( @@ -1288,7 +1288,7 @@ def _(x): ) ), filters={ - _('sources'): terms_filter('sources') + _('sources'): and_term_filter('sources') } ), ) @@ -1296,7 +1296,7 @@ def _(x): # Elasticsearch fields boosting by index RERO_ILS_QUERY_BOOSTING = { 'documents': { - 'title.*': 3, + 'title._text.*': 3, 'titlesProper.*': 3, 'authors.name': 2, 'authors.name_*': 2, @@ -1354,7 +1354,7 @@ def _(x): # ------ BUDGETS SORT RECORDS_REST_SORT_OPTIONS['budgets']['name'] = dict( - fields=['name'], title='Budget name', + fields=['budget_name'], title='Budget name', default_order='asc' ) RECORDS_REST_DEFAULT_SORT['budgets'] = dict( @@ -1378,7 +1378,7 @@ def _(x): # ------ LIBRARIES SORT RECORDS_REST_SORT_OPTIONS['libraries']['name'] = dict( - fields=['name_sort'], title='Library name', + fields=['library_name'], title='Library name', default_order='asc' ) RECORDS_REST_DEFAULT_SORT['libraries'] = dict( @@ -1394,11 +1394,11 @@ def _(x): # ------ LOCATIONS SORT RECORDS_REST_SORT_OPTIONS['locations']['name'] = dict( - fields=['name'], title='Location name', + fields=['location_name'], title='Location name', default_order='asc' ) RECORDS_REST_SORT_OPTIONS['locations']['pickup_name'] = dict( - fields=['pickup_name'], title='Pickup Location name', + fields=['pickup_name.keyword'], title='Pickup Location name', default_order='asc' ) RECORDS_REST_DEFAULT_SORT['locations'] = dict( diff --git a/rero_ils/es_templates/v6/record.json b/rero_ils/es_templates/v6/record.json index c0b5b1783d..805bfb5486 100644 --- a/rero_ils/es_templates/v6/record.json +++ b/rero_ils/es_templates/v6/record.json @@ -12,12 +12,22 @@ } }, "analyzer": { - "global_lowercase_asciifolding": { + "custom_keyword": { + "type": "custom", + "tokenizer": "keyword", + "filter": [ + "lowercase", + "german_normalization", + "icu_folding" + ] + }, + "default": { "type": "custom", "tokenizer": "char_group_tokenizer", "filter": [ "lowercase", - "asciifolding" + "german_normalization", + "icu_folding" ] } } diff --git a/rero_ils/modules/acq_accounts/mappings/v6/acq_accounts/acq_account-v0.0.1.json b/rero_ils/modules/acq_accounts/mappings/v6/acq_accounts/acq_account-v0.0.1.json index e111f88335..0486dbe194 100644 --- a/rero_ils/modules/acq_accounts/mappings/v6/acq_accounts/acq_account-v0.0.1.json +++ b/rero_ils/modules/acq_accounts/mappings/v6/acq_accounts/acq_account-v0.0.1.json @@ -16,10 +16,10 @@ "type": "keyword" }, "name": { - "type": "keyword" + "type": "text" }, "description": { - "type": "keyword" + "type": "text" }, "budget": { "properties": { diff --git a/rero_ils/modules/budgets/mappings/v6/budgets/budget-v0.0.1.json b/rero_ils/modules/budgets/mappings/v6/budgets/budget-v0.0.1.json index 419b6797cc..4563b6ec52 100644 --- a/rero_ils/modules/budgets/mappings/v6/budgets/budget-v0.0.1.json +++ b/rero_ils/modules/budgets/mappings/v6/budgets/budget-v0.0.1.json @@ -16,6 +16,9 @@ "type": "keyword" }, "name": { + "type": "text" + }, + "budget_name": { "type": "keyword" }, "start_date": { diff --git a/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json b/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json index 25828c20e0..a8cacaeec3 100644 --- a/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json +++ b/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json @@ -17,15 +17,13 @@ }, "name": { "type": "text", - "copy_to": "circ_policy_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "circ_policy_name" }, "circ_policy_name": { "type": "keyword" }, "description": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "organisation": { "properties": { @@ -99,4 +97,4 @@ } } } -} \ No newline at end of file +} diff --git a/rero_ils/modules/documents/jsonschemas/documents/document-v0.0.1_src.json b/rero_ils/modules/documents/jsonschemas/documents/document-v0.0.1_src.json index 37d8321c35..5925067cbf 100644 --- a/rero_ils/modules/documents/jsonschemas/documents/document-v0.0.1_src.json +++ b/rero_ils/modules/documents/jsonschemas/documents/document-v0.0.1_src.json @@ -1188,6 +1188,7 @@ "minItems": 1, "items": { "type": "string", + "minLength": 1, "form": { "placeholder": "Example: Access only from the library" } diff --git a/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json b/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json index 227e487030..90a6e90388 100644 --- a/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json +++ b/rero_ils/modules/documents/mappings/v6/documents/document-v0.0.1.json @@ -45,29 +45,12 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "autocomplete_title", - "fields": { - "eng": { - "type": "text", - "analyzer": "english" - }, - "fre": { - "type": "text", - "analyzer": "french" - }, - "ger": { - "type": "text", - "analyzer": "german" - }, - "ita": { - "type": "text", - "analyzer": "italian" - } - } + "index": false }, "language": { - "type": "keyword" + "type": "keyword", + "index": false } } }, @@ -76,25 +59,7 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding", - "fields": { - "eng": { - "type": "text", - "analyzer": "english" - }, - "fre": { - "type": "text", - "analyzer": "french" - }, - "ger": { - "type": "text", - "analyzer": "german" - }, - "ita": { - "type": "text", - "analyzer": "italian" - } - } + "index": false }, "language": { "type": "keyword" @@ -108,7 +73,8 @@ "type": "object", "properties": { "value": { - "type": "keyword" + "type": "text", + "index": false } } }, @@ -116,28 +82,16 @@ "type": "object", "properties": { "value": { - "type": "keyword" + "type": "text", + "index": false } } } } }, "_text": { - "type": "text" - } - } - }, - "autocomplete_title": { - "type": "text", - "analyzer": "autocomplete", - "search_analyzer": "standard" - }, - "responsibilityStatement": { - "type": "object", - "properties": { - "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding", + "index": false, "fields": { "eng": { "type": "text", @@ -156,6 +110,19 @@ "analyzer": "italian" } } + } + } + }, + "autocomplete_title": { + "type": "text", + "analyzer": "autocomplete", + "search_analyzer": "standard" + }, + "responsibilityStatement": { + "type": "object", + "properties": { + "value": { + "type": "text" }, "language": { "type": "keyword" @@ -164,7 +131,6 @@ }, "titlesProper": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -200,7 +166,6 @@ }, "is_part_of": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -228,7 +193,6 @@ "properties": { "name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": [ "facet_authors_en", "facet_authors_fr", @@ -238,27 +202,22 @@ }, "name_en": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_en" }, "name_fr": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_fr" }, "name_de": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_de" }, "name_it": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_authors_it" }, "variant_name": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "type": { "type": "keyword" @@ -307,10 +266,11 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "index": false }, "language": { - "type": "keyword" + "type": "keyword", + "index": false } } }, @@ -319,15 +279,29 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "index": false }, "language": { - "type": "keyword" + "type": "keyword", + "index": false } } } } }, + "electronicLocator": { + "properties": { + "publicNote": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "url": { + "type": "keyword" + } + } + }, "provisionActivity": { "type": "object", "properties": { @@ -335,13 +309,16 @@ "type": "object", "properties": { "type": { - "type": "text" + "type": "text", + "index": false }, "canton": { - "type": "text" + "type": "text", + "index": false }, "country": { - "type": "text" + "type": "text", + "index": false } } }, @@ -353,32 +330,55 @@ "properties": { "value": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "index": false }, "language": { - "type": "keyword" + "type": "keyword", + "index": false } } }, "type": { - "type": "text" + "type": "text", + "index": false } } }, "note": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "index": false, + "fields": { + "eng": { + "type": "text", + "analyzer": "english" + }, + "fre": { + "type": "text", + "analyzer": "french" + }, + "ger": { + "type": "text", + "analyzer": "german" + }, + "ita": { + "type": "text", + "analyzer": "italian" + } + } }, "startDate": { "type": "date", - "format": "yyyy" + "format": "yyyy", + "index": false }, "endDate": { "type": "date", - "format": "yyyy" + "format": "yyyy", + "index": false }, "type": { - "type": "text" + "type": "text", + "index": false }, "_text": { "properties": { @@ -393,24 +393,30 @@ } }, "extent": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "otherMaterialCharacteristics": { - "type": "keyword" + "type": "text" }, "formats": { - "type": "keyword" + "type": "text" }, "additionalMaterials": { - "type": "keyword" + "type": "text" }, "series": { "type": "object", "properties": { "name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", + "index": false + }, + "number": { + "type": "keyword", + "index": false + }, + "_text": { + "type": "text", "fields": { "eng": { "type": "text", @@ -429,18 +435,11 @@ "analyzer": "italian" } } - }, - "number": { - "type": "keyword" - }, - "_text": { - "type": "text" } } }, "notes": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -462,7 +461,6 @@ }, "abstracts": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "fields": { "eng": { "type": "text", @@ -510,7 +508,6 @@ }, "subjects": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "facet_subjects", "fields": { "eng": { @@ -543,17 +540,11 @@ "available": { "type": "boolean" }, - "call_number": { - "type": "keyword" - }, "location": { "type": "object", "properties": { "pid": { "type": "keyword" - }, - "name": { - "type": "text" } } }, @@ -562,9 +553,6 @@ "properties": { "pid": { "type": "keyword" - }, - "name": { - "type": "text" } } }, @@ -579,22 +567,6 @@ } } }, - "electronicLocator": { - "properties": { - "content": { - "type": "keyword" - }, - "publicNote": { - "type": "text" - }, - "type": { - "type": "keyword" - }, - "url": { - "type": "keyword" - } - } - }, "items": { "type": "object", "properties": { @@ -605,7 +577,7 @@ "type": "keyword" }, "call_number": { - "type": "keyword" + "type": "text" }, "status": { "type": "keyword" diff --git a/rero_ils/modules/holdings/mappings/v6/holdings/holding-v0.0.1.json b/rero_ils/modules/holdings/mappings/v6/holdings/holding-v0.0.1.json index d281bf114e..835ebafe70 100644 --- a/rero_ils/modules/holdings/mappings/v6/holdings/holding-v0.0.1.json +++ b/rero_ils/modules/holdings/mappings/v6/holdings/holding-v0.0.1.json @@ -43,7 +43,7 @@ "type": "object", "properties": { "uri": { - "type": "text" + "type": "keyword" }, "source": { "type": "keyword" @@ -65,7 +65,8 @@ } }, "holdings_type": { - "type": "keyword" + "type": "text", + "analyzer": "custom_keyword" }, "patterns": { "properties": { diff --git a/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json b/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json index 1440307eae..67a8018f45 100644 --- a/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json +++ b/rero_ils/modules/item_types/mappings/v6/item_types/item_type-v0.0.1.json @@ -17,15 +17,13 @@ }, "name": { "type": "text", - "copy_to": "item_type_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "item_type_name" }, "item_type_name": { "type": "keyword" }, "description": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "type": { "type": "keyword" diff --git a/rero_ils/modules/items/mappings/v6/items/item-v0.0.1.json b/rero_ils/modules/items/mappings/v6/items/item-v0.0.1.json index 367d7ffc6a..acd8e6cfec 100644 --- a/rero_ils/modules/items/mappings/v6/items/item-v0.0.1.json +++ b/rero_ils/modules/items/mappings/v6/items/item-v0.0.1.json @@ -71,4 +71,4 @@ } } } -} \ No newline at end of file +} diff --git a/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json b/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json index 2e4ee2ce4c..f47088e40e 100644 --- a/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json +++ b/rero_ils/modules/libraries/mappings/v6/libraries/library-v0.0.1.json @@ -16,23 +16,26 @@ "type": "keyword" }, "code": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "keyword" }, "name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", - "copy_to": "name_sort" + "copy_to": "library_name" }, - "name_sort": { + "library_name": { "type": "keyword" }, "address": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "email": { - "type": "keyword" + "type": "keyword", + "fields": { + "analyzed": { + "type": "text", + "analyzer": "custom_keyword" + } + } }, "exception_dates": { "properties": { diff --git a/rero_ils/modules/locations/mappings/v6/locations/location-v0.0.1.json b/rero_ils/modules/locations/mappings/v6/locations/location-v0.0.1.json index 5c4a49f452..a6a2a8c43c 100644 --- a/rero_ils/modules/locations/mappings/v6/locations/location-v0.0.1.json +++ b/rero_ils/modules/locations/mappings/v6/locations/location-v0.0.1.json @@ -19,6 +19,10 @@ "type": "keyword" }, "name": { + "type": "text", + "copy_to": "location_name" + }, + "location_name": { "type": "keyword" }, "is_pickup": { @@ -28,7 +32,12 @@ "type": "boolean" }, "pickup_name": { - "type": "keyword" + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "library": { "properties": { diff --git a/rero_ils/modules/organisations/mappings/v6/organisations/organisation-v0.0.1.json b/rero_ils/modules/organisations/mappings/v6/organisations/organisation-v0.0.1.json index 7e55a2b112..77ffb3b57e 100644 --- a/rero_ils/modules/organisations/mappings/v6/organisations/organisation-v0.0.1.json +++ b/rero_ils/modules/organisations/mappings/v6/organisations/organisation-v0.0.1.json @@ -16,7 +16,11 @@ "type": "keyword" }, "name": { - "type": "text" + "type": "text", + "copy_to": "organisation_name" + }, + "organisation_name": { + "type": "keyword" }, "address": { "type": "text" diff --git a/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json b/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json index d073f7eb42..e0a994aad1 100644 --- a/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json +++ b/rero_ils/modules/patron_transaction_events/mappings/v6/patron_transaction_events/patron_transaction_event-v0.0.1.json @@ -35,8 +35,7 @@ "type": "keyword" }, "note": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "library": { "properties": { diff --git a/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json b/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json index ef4e79fe82..99f5cfc818 100644 --- a/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json +++ b/rero_ils/modules/patron_types/mappings/v6/patron_types/patron_type-v0.0.1.json @@ -17,15 +17,13 @@ }, "name": { "type": "text", - "copy_to": "patron_type_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "patron_type_name" }, "patron_type_name": { "type": "keyword" }, "description": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "organisation": { "properties": { diff --git a/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json b/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json index ef27d52fdb..7b531382c7 100644 --- a/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json +++ b/rero_ils/modules/patrons/mappings/v6/patrons/patron-v0.0.1.json @@ -17,7 +17,6 @@ }, "first_name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "first_name_sort" }, "first_name_sort": { @@ -25,7 +24,6 @@ }, "last_name": { "type": "text", - "analyzer": "global_lowercase_asciifolding", "copy_to": "last_name_sort" }, "last_name_sort": { @@ -35,21 +33,25 @@ "type": "date" }, "email": { - "type": "keyword" + "type": "keyword", + "fields": { + "analyzed": { + "type": "text", + "analyzer": "custom_keyword" + } + } }, "street": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "postal_code": { "type": "keyword" }, "city": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "phone": { - "type": "text" + "type": "keyword" }, "patron_type": { "properties": { @@ -76,7 +78,7 @@ } }, "circulation_location_pid": { - "type": "text" + "type": "keyword" }, "roles": { "type": "keyword" diff --git a/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json b/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json index 8aca7db1bd..a81947a48c 100644 --- a/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json +++ b/rero_ils/modules/persons/mappings/v6/persons/person-v0.0.1.json @@ -70,21 +70,35 @@ }, "biographical_information": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "fields": { + "eng": { + "type": "text", + "analyzer": "english" + }, + "fre": { + "type": "text", + "analyzer": "french" + }, + "ger": { + "type": "text", + "analyzer": "german" + }, + "ita": { + "type": "text", + "analyzer": "italian" + } + } }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { - "type": "text", - "analyzer": "global_lowercase_asciifolding" + "type": "text" }, "pid": { "type": "keyword" @@ -116,17 +130,32 @@ }, "biographical_information": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "fields": { + "eng": { + "type": "text", + "analyzer": "english" + }, + "fre": { + "type": "text", + "analyzer": "french" + }, + "ger": { + "type": "text", + "analyzer": "german" + }, + "ita": { + "type": "text", + "analyzer": "italian" + } + } }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { "type": "text" @@ -147,6 +176,9 @@ "identifier_for_person": { "type": "keyword" }, + "language_of_person": { + "type": "keyword" + }, "date_of_birth": { "type": "keyword" }, @@ -155,17 +187,32 @@ }, "biographical_information": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "fields": { + "eng": { + "type": "text", + "analyzer": "english" + }, + "fre": { + "type": "text", + "analyzer": "french" + }, + "ger": { + "type": "text", + "analyzer": "german" + }, + "ita": { + "type": "text", + "analyzer": "italian" + } + } }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { "type": "text" @@ -194,17 +241,32 @@ }, "biographical_information": { "type": "text", - "analyzer": "global_lowercase_asciifolding" + "fields": { + "eng": { + "type": "text", + "analyzer": "english" + }, + "fre": { + "type": "text", + "analyzer": "french" + }, + "ger": { + "type": "text", + "analyzer": "german" + }, + "ita": { + "type": "text", + "analyzer": "italian" + } + } }, "preferred_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "variant_name_for_person": { "type": "text", - "copy_to": "autocomplete_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "autocomplete_name" }, "authorized_access_point_representing_a_person": { "type": "text" diff --git a/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json b/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json index ff3a0db3d6..639f1b6e1f 100644 --- a/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json +++ b/rero_ils/modules/vendors/mappings/v6/vendors/vendor-v0.0.1.json @@ -17,8 +17,7 @@ }, "name": { "type": "text", - "copy_to": "vendor_name", - "analyzer": "global_lowercase_asciifolding" + "copy_to": "vendor_name" }, "vendor_name": { "type": "keyword" @@ -27,7 +26,7 @@ "type": "text" }, "website": { - "type": "text" + "type": "keyword" }, "currency": { "type": "keyword" @@ -51,13 +50,19 @@ "type": "text" }, "city": { - "type": "keyword" + "type": "text" }, "country": { - "type": "keyword" + "type": "text" }, "email": { - "type": "keyword" + "type": "keyword", + "fields": { + "analyzed": { + "type": "text", + "analyzer": "custom_keyword" + } + } }, "phone": { "type": "keyword" @@ -76,13 +81,19 @@ "type": "text" }, "city": { - "type": "keyword" + "type": "text" }, "country": { - "type": "keyword" + "type": "text" }, "email": { - "type": "keyword" + "type": "keyword", + "fields": { + "analyzed": { + "type": "text", + "analyzer": "custom_keyword" + } + } }, "phone": { "type": "keyword" diff --git a/rero_ils/query.py b/rero_ils/query.py index a3fd0c6bbd..fa2c54bae6 100644 --- a/rero_ils/query.py +++ b/rero_ils/query.py @@ -19,6 +19,8 @@ from __future__ import absolute_import, print_function +import re + from elasticsearch_dsl.query import Q from flask import current_app, request from invenio_records_rest.errors import InvalidQueryRESTError @@ -26,6 +28,22 @@ from .modules.organisations.api import Organisation from .modules.patrons.api import current_patron +_PUNCTUATION_REGEX = re.compile(r'\s+[:,\?,\,,\.,;,\/,!,=,-]+\s*?') + + +def and_term_filter(field): + """Create a term filter. + + :param field: Field name. + :returns: Function that returns a boolean AND query between term values. + """ + def inner(values): + must = [] + for value in values: + must.append(Q('term', **{field: value})) + return Q('bool', must=must) + return inner + def view_search_factory(self, search, query_parser=None): """Search factory with view code parameter.""" @@ -135,14 +153,25 @@ def search_factory(self, search, query_parser=None): """ def _default_parser(qstr=None, query_boosting=[]): """Default parser that uses the Q() from elasticsearch_dsl.""" + query_type = 'query_string' + default_operator = 'OR' + if request.args.get('simple'): + query_type = 'simple_query_string' + default_operator = 'AND' + if qstr: + # TODO: remove this bad hack + qstr = _PUNCTUATION_REGEX.sub(' ', qstr).rstrip() if not query_boosting: - return Q('query_string', query=qstr) + return Q(query_type, query=qstr, + default_operator=default_operator) else: return Q('bool', should=[ - Q('query_string', query=qstr, boost=2, - fields=query_boosting), - Q('query_string', query=qstr) + Q(query_type, query=qstr, boost=2, + fields=query_boosting, + default_operator=default_operator), + Q(query_type, query=qstr, + default_operator=default_operator) ]) return Q() diff --git a/tests/api/conftest.py b/tests/api/conftest.py index d24053fc9e..4488966304 100644 --- a/tests/api/conftest.py +++ b/tests/api/conftest.py @@ -20,11 +20,70 @@ from __future__ import absolute_import, print_function import pytest +from utils import flush_index +from rero_ils.modules.documents.api import Document, DocumentsSearch -@pytest.fixture(scope="module") + +@pytest.fixture(scope='module') def create_app(): """Create test app.""" from invenio_app.factory import create_api return create_api + + +@pytest.fixture(scope='module') +def doc_title_travailleurs(app): + """Document with title with travailleur.""" + data = { + '$schema': 'https://ils.rero.ch/schema/documents/' + 'document-minimal-v0.0.1.json', + 'pid': 'doc_title_test', 'type': 'book', + 'language': [{'type': 'bf:Language', 'value': 'fre'}], + 'title': [{ + 'type': 'bf:Title', + 'mainTitle': [{ + 'value': 'Les travailleurs assidus sont de retours' + }], + 'subtitle': [{'value': 'les jeunes arrivent bientôt ?'}] + }] + } + doc = Document.create( + data=data, + delete_pid=False, + dbcommit=True, + reindex=True) + flush_index(DocumentsSearch.Meta.index) + return doc + + +@pytest.fixture(scope='module') +def doc_title_travailleuses(app): + """Document with title with travailleuses.""" + data = { + '$schema': 'https://ils.rero.ch/schema/documents/' + 'document-minimal-v0.0.1.json', + 'pid': 'doc_title_test', 'type': 'book', + 'language': [{'type': 'bf:Language', 'value': 'fre'}], + 'title': [{ + 'type': 'bf:Title', + 'mainTitle': [{ + 'value': "Les travailleuses partent à l'école" + }], + 'subtitle': [{'value': "lorsqu'un est bœuf ex æquo"}] + + }], + 'authors': [{ + 'name': 'Müller, John', 'type': 'person' + }, { + 'name': 'Corminbœuf, Gruß', 'type': 'person' + }], + } + doc = Document.create( + data=data, + delete_pid=False, + dbcommit=True, + reindex=True) + flush_index(DocumentsSearch.Meta.index) + return doc diff --git a/tests/api/test_documents_rest.py b/tests/api/test_documents_rest.py index 8bbdada7f5..557af16885 100644 --- a/tests/api/test_documents_rest.py +++ b/tests/api/test_documents_rest.py @@ -57,7 +57,8 @@ def test_documents_permissions(client, document, json_header): @mock.patch('invenio_records_rest.views.verify_record_permission', mock.MagicMock(return_value=VerifyRecordPermissionPatch)) def test_documents_facets( - client, document, item_lib_martigny, rero_json_header + client, document, ebook_1, ebook_2, ebook_3, ebook_4, + item_lib_martigny, rero_json_header ): """Test record retrieval.""" list_url = url_for('invenio_records_rest.doc_list', view='global') @@ -65,7 +66,6 @@ def test_documents_facets( res = client.get(list_url, headers=rero_json_header) data = get_json(res) aggs = data['aggregations'] - # check all facets are present for facet in [ 'document_type', 'author__en', 'author__fr', @@ -73,6 +73,42 @@ def test_documents_facets( ]: assert aggs[facet] + # FILTERS + # person author + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de='Peter James') + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 2 + + # organisation author + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de='Great Edition') + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 1 + + # an other person author + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de='J.K. Rowling') + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 1 + + # two authors in the same document + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de=['Great Edition', 'Peter James']) + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 1 + + # two authors: each in a separate document + list_url = url_for('invenio_records_rest.doc_list', view='global', + author__de=['J.K. Rowling', 'Peter James']) + res = client.get(list_url, headers=rero_json_header) + data = get_json(res) + assert data['hits']['total'] == 0 + @mock.patch('invenio_records_rest.views.verify_record_permission', mock.MagicMock(return_value=VerifyRecordPermissionPatch)) diff --git a/tests/api/test_search.py b/tests/api/test_search.py new file mode 100644 index 0000000000..f58db8873c --- /dev/null +++ b/tests/api/test_search.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +# +# RERO ILS +# Copyright (C) 2019 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Search tests.""" + +from flask import url_for +from utils import get_json + + +def test_document_search( + client, + doc_title_travailleurs, + doc_title_travailleuses +): + """Test document boosting.""" + # phrase search + list_url = url_for( + 'invenio_records_rest.doc_list', + q='"Les travailleurs assidus sont de retours"', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # phrase search with punctuations + list_url = url_for( + 'invenio_records_rest.doc_list', + q='"Les travailleurs assidus sont de retours."', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # word search + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleurs', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # travailleurs == travailleur == travailleuses + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleur', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # ecole == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q='ecole', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Ecole == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Ecole', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # ECOLE == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Ecole', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # _école_ == école + list_url = url_for( + 'invenio_records_rest.doc_list', + q=' école ', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Müller + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Müller', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Müller == Muller + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Muller', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # Müller == Mueller + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Mueller', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # test AND + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleuse école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # test OR in two docs + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours | école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # test AND in two fields (travailleuses == travailleur) + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleuses bientôt', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + list_url = url_for( + 'invenio_records_rest.doc_list', + q='travailleuses + bientôt', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # test OR in two docs (each match only one term) + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours | école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 2 + + # test AND in two docs (each match only one term) => no result + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 0 + + list_url = url_for( + 'invenio_records_rest.doc_list', + q='retours + école', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 0 + + # title + subtitle + list_url = url_for( + 'invenio_records_rest.doc_list', + q='Les travailleurs assidus sont de retours : ' + 'les jeunes arrivent bientôt ?', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # punctuation + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'école : . ... , ; ? \ ! = == - --', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'école:.,;?\!...=-==--', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # special chars + # œ in title + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'bœuf', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # æ in title + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'ex æquo', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # æ in title + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'ÆQUO', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 + + # œ in author + list_url = url_for( + 'invenio_records_rest.doc_list', + q=r'Corminbœuf', + simple='1' + ) + res = client.get(list_url) + hits = get_json(res)['hits'] + assert hits['total'] == 1 diff --git a/tests/data/data.json b/tests/data/data.json index 64558d57b8..0215a9eae3 100644 --- a/tests/data/data.json +++ b/tests/data/data.json @@ -1639,6 +1639,10 @@ { "name": "Peter James", "type": "person" + }, + { + "name": "Great Edition", + "type": "organisation" } ], "provisionActivity": [ @@ -2365,4 +2369,4 @@ }, "status": "open" } -} \ No newline at end of file +} diff --git a/tests/ui/conftest.py b/tests/ui/conftest.py index d64023fcc7..09450e542b 100644 --- a/tests/ui/conftest.py +++ b/tests/ui/conftest.py @@ -55,7 +55,15 @@ def es_default_index(es): """ES default index.""" current_search_client.indices.create( index='records-record-v1.0.0', - body={}, + body={ + 'mappings': { + 'record-v1.0.0': { + 'properties': { + 'pid': {'type': 'keyword'} + } + } + } + }, ignore=[400] ) yield es diff --git a/tests/ui/documents/test_documents_mapping.py b/tests/ui/documents/test_documents_mapping.py index 7aa93a5c14..e25948c97f 100644 --- a/tests/ui/documents/test_documents_mapping.py +++ b/tests/ui/documents/test_documents_mapping.py @@ -66,6 +66,6 @@ def test_document_search_mapping(app, document_records): assert c == 1 query = MultiMatch(query='Chamber of Secrets', - fields=['title.mainTitle.value.eng']) + fields=['title._text.*']) c = search.query(query).count() assert c == 1 diff --git a/tests/ui/test_api.py b/tests/ui/test_api.py index fbecc42665..8312acf0f2 100644 --- a/tests/ui/test_api.py +++ b/tests/ui/test_api.py @@ -106,7 +106,7 @@ def test_ilsrecord(app, es_default_index, ils_record, ils_record_2): """Test IlsRecord update.""" current_search.delete(ignore=[404]) - # the created records will be acessible in all function of this test file + # the created records will be accessible in all function of this test file record_1 = RecordTest.create( data=ils_record, dbcommit=True, diff --git a/tests/ui/test_utils_app.py b/tests/ui/test_utils_app.py index 751fe5e5d1..f24f4bdbd8 100644 --- a/tests/ui/test_utils_app.py +++ b/tests/ui/test_utils_app.py @@ -19,6 +19,7 @@ from rero_ils.modules.documents.api import Document from rero_ils.modules.utils import get_ref_for_pid +from rero_ils.utils import get_current_language def test_get_ref_for_pid(app): @@ -28,3 +29,8 @@ def test_get_ref_for_pid(app): assert get_ref_for_pid('doc', '3') == url assert get_ref_for_pid(Document, '3') == url assert get_ref_for_pid('test', '3') is None + + +def test_get_language(app): + """Test get the current language of the application.""" + assert get_current_language() == 'en'