Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rest api: add simple query support
Browse files Browse the repository at this point in the history
The current elasticsearch query string used by the REST API is powerful
but should not be used for search boxes. It raises exceptions when
the query syntax contains errors. As described in the elaticsearch
documentation, a simple query string should be used. A new http query
optional parameters has been added and can be specified as follows:
`&simple=1`.
For example: `https://ils.rero.ch/global/search/?q=potter&simple=1`.

When the simple query syntax is chosen, the default boolean operator is
`AND`. Except this parameter, nothing has been modified.

* Adds new type of aggregation filter to perform a AND boolean operator
between the terms in the same aggregation. All aggregations filters
use now AND boolean operator.
* Updates elasticsearch mappings to enhance the search engine quality.
* Adds a new REST API list records operator to use a `simple_query_string`
instead of a `query_string`. The simple query string preform an AND
boolean operator by default.
* Adds search query tests.
* Adds missing utils test.
* Renames `global_lowercase_asciifolding` elasticsearch analyzer to
`default`. This makes the rero-ils custom analyzer to be the default for
all elasticsearch `text` fields. All elasticsearch mappings has been
simplified.
* Creates a new `custom_keyword` analyzer.
* Creates a custom elasticsearch image with the icu analysis plugin
(https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu.html).
* Closes rero#755.

Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
jma committed May 7, 2020
1 parent c7d118d commit e7cc19e
Showing 23 changed files with 529 additions and 210 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ include .tx/config
include docker/haproxy/Dockerfile
include docker/nginx/Dockerfile
include docker/postgres/Dockerfile
include docker/elasticsearch/Dockerfile
include Dockerfile
exclude scripts/all_year_days
include scripts/bootstrap
3 changes: 2 additions & 1 deletion docker-services.yml
Original file line number Diff line number Diff line change
@@ -73,7 +73,8 @@ services:
- "15672:15672"
- "5672:5672"
es:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
build: ./docker/elasticsearch/
image: elasticsearch-icu
restart: "always"
environment:
- bootstrap.memory_lock=true
2 changes: 2 additions & 0 deletions docker/elasticsearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM docker.elastic.co/elasticsearch/elasticsearch-oss:6.6.2
RUN bin/elasticsearch-plugin install analysis-icu
40 changes: 20 additions & 20 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
@@ -38,7 +38,6 @@
ItemAtDeskToItemOnLoan, ItemOnLoanToItemInTransitHouse, \
ItemOnLoanToItemOnLoan, PendingToItemAtDesk, \
PendingToItemInTransitPickup, ToItemOnLoan
from invenio_records_rest.facets import terms_filter
from invenio_records_rest.utils import allow_all, deny_all

from .modules.acq_accounts.api import AcqAccount
@@ -94,6 +93,7 @@
librarian_delete_permission_factory, librarian_permission_factory, \
librarian_update_permission_factory, wiki_edit_ui_permission, \
wiki_edit_view_permission
from .query import and_term_filter
from .utils import get_current_language


@@ -1179,18 +1179,18 @@ def _(x):
)
),
filters={
_('document_type'): terms_filter('type'),
_('organisation'): terms_filter(
_('document_type'): and_term_filter('type'),
_('organisation'): and_term_filter(
'holdings.organisation.organisation_pid'
),
_('library'): terms_filter('holdings.organisation.library_pid'),
_('author__en'): terms_filter('facet_authors_en'),
_('author__fr'): terms_filter('facet_authors_fr'),
_('author__de'): terms_filter('facet_authors_de'),
_('author__it'): terms_filter('facet_authors_it'),
_('language'): terms_filter('language.value'),
_('subject'): terms_filter('facet_subjects'),
_('status'): terms_filter('holdings.items.status'),
_('library'): and_term_filter('holdings.organisation.library_pid'),
_('author__en'): and_term_filter('facet_authors_en'),
_('author__fr'): and_term_filter('facet_authors_fr'),
_('author__de'): and_term_filter('facet_authors_de'),
_('author__it'): and_term_filter('facet_authors_it'),
_('language'): and_term_filter('language.value'),
_('subject'): and_term_filter('facet_subjects'),
_('status'): and_term_filter('holdings.items.status'),
}
),
patrons=dict(
@@ -1206,7 +1206,7 @@ def _(x):
)
),
filters={
_('roles'): terms_filter('roles')
_('roles'): and_term_filter('roles')
},
),
acq_accounts=dict(
@@ -1227,8 +1227,8 @@ def _(x):
)
),
filters={
_('library'): terms_filter('library.pid'),
_('budget'): terms_filter('budget')
_('library'): and_term_filter('library.pid'),
_('budget'): and_term_filter('budget')
},
),
acq_invoices=dict(
@@ -1249,8 +1249,8 @@ def _(x):
)
),
filters={
_('library'): terms_filter('library.pid'),
_('status'): terms_filter('invoice_status')
_('library'): and_term_filter('library.pid'),
_('status'): and_term_filter('invoice_status')
},
),
acq_orders=dict(
@@ -1271,8 +1271,8 @@ def _(x):
)
),
filters={
_('library'): terms_filter('library.pid'),
_('status'): terms_filter('order_status')
_('library'): and_term_filter('library.pid'),
_('status'): and_term_filter('order_status')
},
),
persons=dict(
@@ -1288,15 +1288,15 @@ def _(x):
)
),
filters={
_('sources'): terms_filter('sources')
_('sources'): and_term_filter('sources')
}
),
)

# Elasticsearch fields boosting by index
RERO_ILS_QUERY_BOOSTING = {
'documents': {
'title.*': 3,
'title._text.*': 3,
'titlesProper.*': 3,
'authors.name': 2,
'authors.name_*': 2,
14 changes: 12 additions & 2 deletions rero_ils/es_templates/v6/record.json
Original file line number Diff line number Diff line change
@@ -12,13 +12,23 @@
}
},
"analyzer": {
"global_lowercase_asciifolding": {
"custom_keyword": {
"type": "custom",
"tokenizer": "char_group_tokenizer",
"tokenizer": "keyword",
"filter": [
"lowercase",
"german_normalization",
"asciifolding"
]
},
"default": {
"type": "custom",
"tokenizer": "char_group_tokenizer",
"filter": [
"lowercase",
"german_normalization",
"icu_folding"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -17,15 +17,13 @@
},
"name": {
"type": "text",
"copy_to": "circ_policy_name",
"analyzer": "global_lowercase_asciifolding"
"copy_to": "circ_policy_name"
},
"circ_policy_name": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "global_lowercase_asciifolding"
"type": "text"
},
"organisation": {
"properties": {
@@ -99,4 +97,4 @@
}
}
}
}
}
Loading

0 comments on commit e7cc19e

Please sign in to comment.