rest api: add simple query support

The current elasticsearch query string used by the REST API is powerful but should not be used for search boxes. It raises exceptions when the query syntax contains errors. As described in the elaticsearch documentation, a simple query string should be used. A new http query optional parameters has been added and can be specified as follows: `&simple=1`. For example: `https://ils.rero.ch/global/search/?q=potter&simple=1`. When the simple query syntax is chosen, the default boolean operator is `AND`. Except this parameter, nothing has been modified. * Adds new type of aggregation filter to perform a AND boolean operator between the terms in the same aggregation. All aggregations filters use now AND boolean operator. * Updates elasticsearch mappings to enhance the search engine quality. * Adds a new REST API list records operator to use a `simple_query_string` instead of a `query_string`. The simple query string preform an AND boolean operator by default. * Adds search query tests. * Adds missing utils test. * Renames `global_lowercase_asciifolding` elasticsearch analyzer to `default`. This makes the rero-ils custom analyzer to be the default for all elasticsearch `text` fields. All elasticsearch mappings has been simplified. * Creates a new `custom_keyword` analyzer. * Creates a custom elasticsearch image with the icu analysis plugin (https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu.html). * Closes rero#755. Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
jma · May 7, 2020 · e7cc19e · e7cc19e
1 parent c7d118d
commit e7cc19e
Showing 23 changed files with 529 additions and 210 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -23,6 +23,7 @@ include .tx/config
 include docker/haproxy/Dockerfile
 include docker/nginx/Dockerfile
 include docker/postgres/Dockerfile
+include docker/elasticsearch/Dockerfile
 include Dockerfile
 exclude scripts/all_year_days
 include scripts/bootstrap

diff --git a/docker-services.yml b/docker-services.yml
@@ -73,7 +73,8 @@ services:
       - "15672:15672"
       - "5672:5672"
   es:
-    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
+    build: ./docker/elasticsearch/
+    image: elasticsearch-icu
     restart: "always"
     environment:
       - bootstrap.memory_lock=true

diff --git a/docker/elasticsearch/Dockerfile b/docker/elasticsearch/Dockerfile
@@ -0,0 +1,2 @@
+FROM docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
+RUN bin/elasticsearch-plugin install analysis-icu
diff --git a/rero_ils/config.py b/rero_ils/config.py
@@ -38,7 +38,6 @@
     ItemAtDeskToItemOnLoan, ItemOnLoanToItemInTransitHouse, \
     ItemOnLoanToItemOnLoan, PendingToItemAtDesk, \
     PendingToItemInTransitPickup, ToItemOnLoan
-from invenio_records_rest.facets import terms_filter
 from invenio_records_rest.utils import allow_all, deny_all
 
 from .modules.acq_accounts.api import AcqAccount
@@ -94,6 +93,7 @@
     librarian_delete_permission_factory, librarian_permission_factory, \
     librarian_update_permission_factory, wiki_edit_ui_permission, \
     wiki_edit_view_permission
+from .query import and_term_filter
 from .utils import get_current_language
 
 
@@ -1179,18 +1179,18 @@ def _(x):
             )
         ),
         filters={
-            _('document_type'): terms_filter('type'),
-            _('organisation'): terms_filter(
+            _('document_type'): and_term_filter('type'),
+            _('organisation'): and_term_filter(
                 'holdings.organisation.organisation_pid'
             ),
-            _('library'): terms_filter('holdings.organisation.library_pid'),
-            _('author__en'): terms_filter('facet_authors_en'),
-            _('author__fr'): terms_filter('facet_authors_fr'),
-            _('author__de'): terms_filter('facet_authors_de'),
-            _('author__it'): terms_filter('facet_authors_it'),
-            _('language'): terms_filter('language.value'),
-            _('subject'): terms_filter('facet_subjects'),
-            _('status'): terms_filter('holdings.items.status'),
+            _('library'): and_term_filter('holdings.organisation.library_pid'),
+            _('author__en'): and_term_filter('facet_authors_en'),
+            _('author__fr'): and_term_filter('facet_authors_fr'),
+            _('author__de'): and_term_filter('facet_authors_de'),
+            _('author__it'): and_term_filter('facet_authors_it'),
+            _('language'): and_term_filter('language.value'),
+            _('subject'): and_term_filter('facet_subjects'),
+            _('status'): and_term_filter('holdings.items.status'),
         }
     ),
     patrons=dict(
@@ -1206,7 +1206,7 @@ def _(x):
             )
         ),
         filters={
-            _('roles'): terms_filter('roles')
+            _('roles'): and_term_filter('roles')
         },
     ),
     acq_accounts=dict(
@@ -1227,8 +1227,8 @@ def _(x):
             )
         ),
         filters={
-            _('library'): terms_filter('library.pid'),
-            _('budget'): terms_filter('budget')
+            _('library'): and_term_filter('library.pid'),
+            _('budget'): and_term_filter('budget')
         },
     ),
     acq_invoices=dict(
@@ -1249,8 +1249,8 @@ def _(x):
             )
         ),
         filters={
-            _('library'): terms_filter('library.pid'),
-            _('status'): terms_filter('invoice_status')
+            _('library'): and_term_filter('library.pid'),
+            _('status'): and_term_filter('invoice_status')
         },
     ),
     acq_orders=dict(
@@ -1271,8 +1271,8 @@ def _(x):
             )
         ),
         filters={
-            _('library'): terms_filter('library.pid'),
-            _('status'): terms_filter('order_status')
+            _('library'): and_term_filter('library.pid'),
+            _('status'): and_term_filter('order_status')
         },
     ),
     persons=dict(
@@ -1288,15 +1288,15 @@ def _(x):
             )
         ),
         filters={
-            _('sources'): terms_filter('sources')
+            _('sources'): and_term_filter('sources')
         }
     ),
 )
 
 # Elasticsearch fields boosting by index
 RERO_ILS_QUERY_BOOSTING = {
     'documents': {
-        'title.*': 3,
+        'title._text.*': 3,
         'titlesProper.*': 3,
         'authors.name': 2,
         'authors.name_*': 2,

diff --git a/rero_ils/es_templates/v6/record.json b/rero_ils/es_templates/v6/record.json
@@ -12,13 +12,23 @@
         }
       },
       "analyzer": {
-        "global_lowercase_asciifolding": {
+        "custom_keyword": {
           "type": "custom",
-          "tokenizer": "char_group_tokenizer",
+          "tokenizer": "keyword",
           "filter": [
             "lowercase",
+            "german_normalization",
             "asciifolding"
           ]
+        },
+        "default": {
+          "type": "custom",
+          "tokenizer": "char_group_tokenizer",
+          "filter": [
+            "lowercase",
+            "german_normalization",
+            "icu_folding"
+          ]
         }
       }
     }

diff --git a/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json b/rero_ils/modules/circ_policies/mappings/v6/circ_policies/circ_policy-v0.0.1.json
@@ -17,15 +17,13 @@
         },
         "name": {
           "type": "text",
-          "copy_to": "circ_policy_name",
-          "analyzer": "global_lowercase_asciifolding"
+          "copy_to": "circ_policy_name"
         },
         "circ_policy_name": {
           "type": "keyword"
         },
         "description": {
-          "type": "text",
-          "analyzer": "global_lowercase_asciifolding"
+          "type": "text"
         },
         "organisation": {
           "properties": {
@@ -99,4 +97,4 @@
       }
     }
   }
-}
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		FROM docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
		RUN bin/elasticsearch-plugin install analysis-icu