diff --git a/rero_ils/config.py b/rero_ils/config.py
index 3b83308dec..c78142d2bc 100644
--- a/rero_ils/config.py
+++ b/rero_ils/config.py
@@ -2205,6 +2205,54 @@ def _(x):
RECORDS_REST_DEFAULT_SORT['circ_policies'] = dict(
query='bestmatch', noquery='name')
+# ------ COLLECTIONS SORT
+RECORDS_REST_SORT_OPTIONS['collections']['start_date'] = dict(
+ fields=['start_date', 'title_sort'], title='Start date and title',
+ default_order='asc'
+)
+RECORDS_REST_SORT_OPTIONS['collections']['title'] = dict(
+ fields=['title_sort'], title='title',
+ default_order='asc'
+)
+RECORDS_REST_DEFAULT_SORT['collections'] = dict(
+ query='bestmatch', noquery='start_date')
+
+# ------ CONTRIBUTIONS SORT
+RECORDS_REST_SORT_OPTIONS['contributions']['fr_name'] = dict(
+ fields=[
+ 'idref_authorized_access_point_sort',
+ 'rero_authorized_access_point_sort',
+ 'gnd_authorized_access_point_sort',
+ ],
+ title='Collection french name',
+ default_order='asc'
+)
+RECORDS_REST_SORT_OPTIONS['contributions']['de_name'] = dict(
+ fields=[
+ 'gnd_authorized_access_point_sort',
+ 'idref_authorized_access_point_sort',
+ 'rero_authorized_access_point_sort'
+ ],
+ title='Collection german name',
+ default_order='asc'
+)
+
+# ------ DOCUMENTS SORT
+RECORDS_REST_SORT_OPTIONS['documents']['title'] = dict(
+ fields=['sort_title'], title='Document title',
+ default_order='asc'
+)
+
+RECORDS_REST_SORT_OPTIONS['documents']['pub_date_new'] = dict(
+ fields=['-sort_date_new'], title='Document date (newest)',
+ default_order='desc'
+)
+
+RECORDS_REST_SORT_OPTIONS['documents']['pub_date_old'] = dict(
+ fields=['sort_date_old'], title='Document date (oldest)',
+ default_order='asc'
+)
+
# ------ HOLDINGS SORT
RECORDS_REST_SORT_OPTIONS['holdings']['library_location'] = dict(
fields=['library.pid', 'location.pid'],
@@ -2214,7 +2262,24 @@ def _(x):
RECORDS_REST_DEFAULT_SORT['holdings'] = dict(
query='bestmatch', noquery='library_location')
-# ------ ITEM SORT
+# ------ ITEMS SORT
+RECORDS_REST_SORT_OPTIONS['items']['barcode'] = dict(
+ fields=['barcode'], title='Barcode',
+ default_order='asc'
+)
+
+RECORDS_REST_SORT_OPTIONS['items']['call_number'] = dict(
+ fields=['call_number'], title='Call Number',
+ default_order='asc'
+)
+RECORDS_REST_SORT_OPTIONS['items']['second_call_number'] = dict(
+ fields=['second_call_number'], title='Second call Number',
+ default_order='asc'
+)
+RECORDS_REST_SORT_OPTIONS['items']['issue_expected_date'] = dict(
+ fields=['issue.expected_date'], title='Issue expected date',
+ default_order='asc'
+)
RECORDS_REST_SORT_OPTIONS['items']['enumeration_chronology'] = dict(
fields=['-enumerationAndChronology'], title='Enumeration and Chronology',
default_order='desc'
@@ -2239,6 +2304,10 @@ def _(x):
fields=['library_name'], title='Library name',
default_order='asc'
)
+RECORDS_REST_SORT_OPTIONS['libraries']['code'] = dict(
+ fields=['code'], title='Library code',
+ default_order='asc'
+)
RECORDS_REST_DEFAULT_SORT['libraries'] = dict(
query='bestmatch', noquery='name')
@@ -2282,19 +2351,6 @@ def _(x):
RECORDS_REST_DEFAULT_SORT['patron_types'] = dict(
query='bestmatch', noquery='name')
-# ------ VENDORS SORT
-RECORDS_REST_SORT_OPTIONS['vendors']['name'] = dict(
- fields=['vendor_name'], title='Vendor name',
- default_order='asc'
-)
-RECORDS_REST_DEFAULT_SORT['vendors'] = dict(
- query='bestmatch', noquery='name')
-
-# ------ ITEMS SORT
-RECORDS_REST_SORT_OPTIONS['items']['issue_expected_date'] = dict(
- fields=['issue.expected_date'], title='Issue expected date',
- default_order='asc'
-)
# ------ TEMPLATES SORT
RECORDS_REST_SORT_OPTIONS['templates']['name'] = dict(
fields=['name_sort'], title='Template name',
@@ -2303,18 +2359,14 @@ def _(x):
RECORDS_REST_DEFAULT_SORT['templates'] = dict(
query='bestmatch', noquery='name')
-# ------ COLLECTIONS SORT
-RECORDS_REST_SORT_OPTIONS['collections']['start_date'] = dict(
- fields=['start_date', 'title_sort'], title='Start date and title',
- default_order='asc'
-)
-RECORDS_REST_SORT_OPTIONS['collections']['title'] = dict(
- fields=['title_sort'], title='title',
+# ------ VENDORS SORT
+RECORDS_REST_SORT_OPTIONS['vendors']['name'] = dict(
+ fields=['vendor_name'], title='Vendor name',
default_order='asc'
)
+RECORDS_REST_DEFAULT_SORT['vendors'] = dict(
+ query='bestmatch', noquery='name')
-RECORDS_REST_DEFAULT_SORT['collections'] = dict(
- query='bestmatch', noquery='start_date')
# Detailed View Configuration
# ===========================
@@ -2805,3 +2857,43 @@ def _(x):
# OAuth base template
OAUTH2SERVER_COVER_TEMPLATE = 'rero_ils/oauth/base.html'
+
+# STOP WORDS
+# Disregarded articles for sorting processes
+# ==========
+# ACTIVATE STOP WORDS NORMALIZATION
+RERO_ILS_STOP_WORDS_ACTIVATE = True
+# PUNCTUATION
+RERO_ILS_STOP_WORDS_PUNCTUATION = [
+ r'\[', r'\]', '"', ',', ';', ':', r'\.', '_',
+ r'\?', r'\!', r'\*', r'\+', '\n'
+]
+# STOP WORDS BY LANGUAGE
+# Possibility to add a default configuration with a "default" entry.
+# This default configuration will be used if the language is not present
+RERO_ILS_STOP_WORDS = {
+ 'dan': ["de", "den", "det", "en", "et"],
+ 'dut': [
+ "d'", "de", "den", "der", "des", "het", "'s", "'t", "een",
+ "eene", "eener", "eens", "ene", "'n"],
+ 'eng': ["a", "an", "the"],
+ 'epo': ["la", "l'", "unu"],
+ 'fre': ["de", "des", "du", "l'", "la", "le", "les", "un", "une"],
+ 'ger': [
+ "das", "dem", "den", "der", "des", "die",
+ "ein", "eine", "einem", "einen", "einer", "eines"],
+ 'hun': [ "a", "az", "egy"],
+ 'ita': [
+ "gli", "i", "il", "l'", "la", "le", "li", "lo",
+ "un", "un'", "una", "uno"],
+ 'nor': ["de", "dei", "den", "det", "ei", "en", "et"],
+ 'por': ["a", "as", "o", "os", "um", "uma", "umas", "uns"],
+ 'spa': ["el", "la", "las", "lo", "los", "un", "una", "unas", "unos"],
+ 'swe': ["de", "den", "det", "en", "ett"]
+}
+
+# LANGUAGE MAPPING
+# ================
+RERO_ILS_LANGUAGE_MAPPING = {
+ 'dum': 'dut' # neerlandais
+}
diff --git a/rero_ils/es_templates/v7/record.json b/rero_ils/es_templates/v7/record.json
index ae0cd26838..ba0a909ac8 100644
--- a/rero_ils/es_templates/v7/record.json
+++ b/rero_ils/es_templates/v7/record.json
@@ -6,6 +6,11 @@
"max_result_window": "100000",
"analysis": {
"filter": {
+ "edge_ngram_filter": {
+ "type": "edge_ngram",
+ "min_gram": 3,
+ "max_gram": 10
+ },
"french_elision": {
"type": "elision",
"articles_case": true,
@@ -25,6 +30,32 @@
"puisqu"
]
},
+ "italian_elision": {
+ "type": "elision",
+ "articles": [
+ "c", "l", "all", "dall", "dell",
+ "nell", "sull", "coll", "pell",
+ "gl", "agl", "dagl", "degl", "negl",
+ "sugl", "un", "m", "t", "s", "v", "d"
+ ],
+ "articles_case": true
+ },
+ "english_stop": {
+ "type": "stop",
+ "stopwords": "_english_"
+ },
+ "french_stop": {
+ "type": "stop",
+ "stopwords": "_french_"
+ },
+ "german_stop": {
+ "type": "stop",
+ "stopwords": "_german_"
+ },
+ "italian_stop": {
+ "type": "stop",
+ "stopwords": "_italian_"
+ },
"french_stemmer": {
"type": "stemmer",
"language": "light_french"
@@ -65,6 +96,26 @@
"icu_folding",
"german_normalization"
]
+ },
+ "autocomplete": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "icu_normalizer",
+ "icu_folding",
+ "french_elision",
+ "italian_elision",
+ "edge_ngram_filter"
+ ]
+ }
+ },
+ "normalizer": {
+ "sort_normalizer": {
+ "type": "custom",
+ "filter": [
+ "lowercase"
+ ]
}
}
}
diff --git a/rero_ils/modules/contributions/mappings/v7/contributions/contribution-v0.0.1.json b/rero_ils/modules/contributions/mappings/v7/contributions/contribution-v0.0.1.json
index ac1bf5afc1..3bc1b25499 100644
--- a/rero_ils/modules/contributions/mappings/v7/contributions/contribution-v0.0.1.json
+++ b/rero_ils/modules/contributions/mappings/v7/contributions/contribution-v0.0.1.json
@@ -29,6 +29,18 @@
"analyzer": "autocomplete",
"search_analyzer": "standard"
},
+ "gnd_authorized_access_point_sort": {
+ "type": "keyword",
+ "normalizer": "sort_normalizer"
+ },
+ "idref_authorized_access_point_sort": {
+ "type": "keyword",
+ "normalizer": "sort_normalizer"
+ },
+ "rero_authorized_access_point_sort": {
+ "type": "keyword",
+ "normalizer": "sort_normalizer"
+ },
"$schema": {
"type": "keyword"
},
@@ -100,7 +112,10 @@
},
"authorized_access_point": {
"type": "text",
- "copy_to": "autocomplete_name"
+ "copy_to": [
+ "autocomplete_name",
+ "gnd_authorized_access_point_sort"
+ ]
},
"qualifier": {
"type": "keyword"
@@ -203,7 +218,10 @@
},
"authorized_access_point": {
"type": "text",
- "copy_to": "autocomplete_name"
+ "copy_to": [
+ "autocomplete_name",
+ "idref_authorized_access_point_sort"
+ ]
},
"qualifier": {
"type": "keyword"
@@ -306,7 +324,10 @@
},
"authorized_access_point": {
"type": "text",
- "copy_to": "autocomplete_name"
+ "copy_to": [
+ "autocomplete_name",
+ "rero_authorized_access_point_sort"
+ ]
},
"qualifier": {
"type": "keyword"
diff --git a/rero_ils/modules/documents/listener.py b/rero_ils/modules/documents/listener.py
index 0c78e6cd71..83d5d63d6d 100644
--- a/rero_ils/modules/documents/listener.py
+++ b/rero_ils/modules/documents/listener.py
@@ -17,6 +17,7 @@
"""Signals connector for Document."""
+from flask.globals import current_app
from isbnlib import is_isbn10, is_isbn13, to_isbn10, to_isbn13
from .utils import create_contributions, title_format_text_head
@@ -26,6 +27,7 @@
from ..items.models import ItemNoteTypes
from ..local_fields.api import LocalField
from ..utils import extracted_data_from_ref
+from ...utils import language_mapping
def enrich_document_data(sender, json=None, record=None, index=None,
@@ -139,17 +141,24 @@ def enrich_document_data(sender, json=None, record=None, index=None,
)
json['title'].append(title)
- json['sort_title'] = title_format_text_head(
+ # sort title
+ sort_title = title_format_text_head(
json.get('title', []),
with_subtitle=True
)
+ language = language_mapping(json.get('language')[0].get('value'))
+ if current_app.config.get('RERO_ILS_STOP_WORDS_ACTIVATE', False):
+ sort_title = current_app.\
+ extensions['reroils-normalizer-stop-words'].\
+ normalize(sort_title, language)
+ json['sort_title'] = sort_title
# Local fields in JSON
local_fields = LocalField.get_local_fields_by_resource(
'doc', document_pid)
if local_fields:
json['local_fields'] = local_fields
- # index both ISBN 10 and 13 format
+ # index both ISBN 10 and 13 format
def filter_isbn(identified_by):
"""Filter identified_by for type bf:Isbn."""
return identified_by.get('type') == 'bf:Isbn'
@@ -168,3 +177,14 @@ def filter_isbn(identified_by):
isbns.add(to_isbn10(isbn))
if isbns:
json['isbn'] = list(isbns)
+
+ # Populate sort date new and old for use in sorting
+ pub_provisions = [
+ p for p in record.get('provisionActivity', [])
+ if p['type'] == 'bf:Publication'
+ ]
+ pub_provision = next(iter(pub_provisions), None)
+ if pub_provision:
+ json['sort_date_new'] = \
+ pub_provision.get('endDate', pub_provision.get('startDate'))
+ json['sort_date_old'] = pub_provision.get('startDate')
diff --git a/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json b/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json
index 22d8d2a1f9..cc2c6b99b5 100644
--- a/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json
+++ b/rero_ils/modules/documents/mappings/v7/documents/document-v0.0.1.json
@@ -1,25 +1,4 @@
{
- "settings": {
- "analysis": {
- "filter": {
- "autocomplete_filter": {
- "type": "edge_ngram",
- "min_gram": 1,
- "max_gram": 20
- }
- },
- "analyzer": {
- "autocomplete": {
- "type": "custom",
- "tokenizer": "standard",
- "filter": [
- "lowercase",
- "autocomplete_filter"
- ]
- }
- }
- }
- },
"mappings": {
"date_detection": false,
"numeric_detection": false,
@@ -154,11 +133,11 @@
},
"autocomplete_title": {
"type": "text",
- "analyzer": "autocomplete",
- "search_analyzer": "standard"
+ "analyzer": "autocomplete"
},
"sort_title": {
- "type": "keyword"
+ "type": "keyword",
+ "normalizer": "sort_normalizer"
},
"responsibilityStatement": {
"type": "object",
@@ -368,6 +347,12 @@
}
}
},
+ "sort_date_new": {
+ "type": "integer"
+ },
+ "sort_date_old": {
+ "type": "integer"
+ },
"provisionActivity": {
"type": "object",
"properties": {
diff --git a/rero_ils/modules/ext.py b/rero_ils/modules/ext.py
index 93dc58c0ba..756d01185f 100644
--- a/rero_ils/modules/ext.py
+++ b/rero_ils/modules/ext.py
@@ -47,6 +47,7 @@
from .items.listener import enrich_item_data
from .loans.listener import enrich_loan_data, listener_loan_state_changed
from .locations.listener import enrich_location_data
+from .normalizer_stop_words import NormalizerStopWords
from .notifications.listener import enrich_notification_data
from .patron_transaction_events.listener import \
enrich_patron_transaction_event_data
@@ -95,6 +96,7 @@ def init_app(self, app):
"""Flask application initialization."""
Bootstrap(app)
Wiki(app)
+ NormalizerStopWords(app)
self.init_config(app)
app.extensions['rero-ils'] = self
self.register_import_api_blueprint(app)
diff --git a/rero_ils/modules/normalizer_stop_words.py b/rero_ils/modules/normalizer_stop_words.py
new file mode 100644
index 0000000000..7070587414
--- /dev/null
+++ b/rero_ils/modules/normalizer_stop_words.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+#
+# RERO ILS
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Normalized sort for rero-ils."""
+
+import re
+
+
+class NormalizerStopWords():
+ """Normalizer Stop words."""
+
+ stop_words_punctuation = []
+ stop_words_regex = {}
+
+ def __init__(self, app=None):
+ """Init."""
+ self.app = app
+ if app is not None:
+ self.init_app(app)
+
+ def init_app(self, app):
+ """Flask application initialization."""
+ if app.config.get('RERO_ILS_STOP_WORDS_ACTIVATE', False):
+ self.init_config(app)
+ app.extensions['reroils-normalizer-stop-words'] = self
+
+ def init_config(self, app):
+ """Initialize configuration."""
+ punc = app.config.get('RERO_ILS_STOP_WORDS_PUNCTUATION', [])
+ self.stop_words_punctuation = '|'.join(punc)
+ stop_words = app.config.get('RERO_ILS_STOP_WORDS', {})
+ if stop_words:
+ # Generating a regex per language
+ for lang, words in stop_words.items():
+ self.stop_words_regex[lang] = \
+ r'\b(' + r'|'.join(words) + r')\b\s*'
+
+ def normalize(self, text, language=None):
+ """Normalize.
+
+ :param text: Text to be normalized
+ :param language: Language of the text
+ :returns: Normalized text
+ """
+ word_regex = self.stop_words_regex.get(
+ language,
+ self.stop_words_regex.get('default')
+ )
+ if word_regex:
+ compiled = re.compile(fr'{word_regex}', re.IGNORECASE)
+ text = compiled.sub('', text)
+ if self.stop_words_punctuation:
+ compiled = re.compile(
+ fr'{self.stop_words_punctuation}', re.IGNORECASE)
+ text = compiled.sub('', text)
+ return re.sub(r'\s+', ' ', text).strip()
diff --git a/rero_ils/utils.py b/rero_ils/utils.py
index 4ff15d3ff8..b2b147bfdf 100644
--- a/rero_ils/utils.py
+++ b/rero_ils/utils.py
@@ -127,3 +127,13 @@ def language_iso639_2to1(lang):
return default_ln
supported_languages = [v[0] for v in current_i18n.get_languages()]
return ln if ln in supported_languages else default_ln
+
+
+def language_mapping(lang):
+ """Language mapping.
+
+ :param lang: bibliographic language code
+ :returns: language mapping
+ """
+ return current_app.config.get('RERO_ILS_LANGUAGE_MAPPING', {})\
+ .get(lang, lang)
diff --git a/tests/api/test_external_services.py b/tests/api/test_external_services.py
index be1227eea5..140755bd74 100644
--- a/tests/api/test_external_services.py
+++ b/tests/api/test_external_services.py
@@ -45,6 +45,7 @@ def clean_authorized_access_point(data):
contribution['agent'] = agent
contributions.append(contribution)
+ data.pop('sort_date_new', None)
data.pop('sort_title', None)
data.pop('isbn', None)
return data
diff --git a/tests/unit/test_normalizer_stop_words.py b/tests/unit/test_normalizer_stop_words.py
new file mode 100644
index 0000000000..f75d205a67
--- /dev/null
+++ b/tests/unit/test_normalizer_stop_words.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+#
+# RERO ILS
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Normalizer stop words tests."""
+
+from rero_ils.modules.normalizer_stop_words import NormalizerStopWords
+
+
+def test_normalize(app):
+ """Test stop words normalize."""
+ # ---- The string is not analyzed
+ app.config['RERO_ILS_STOP_WORDS_ACTIVATE'] = False
+ normalizer = NormalizerStopWords(app)
+ text = "L'été a été très chaud."
+ assert text == normalizer.normalize(text)
+
+ # ---- The string is analyzed
+ app.config['RERO_ILS_STOP_WORDS_ACTIVATE'] = True
+ app.config['RERO_ILS_STOP_WORDS_PUNCTUATION'] = [
+ '"', ',', ';', ':', r'\.', '_', r'\?', r'\!', r'\*', r'\+', '\n'
+ ]
+ normalizer = NormalizerStopWords(app)
+ text = "L'été a été très chaud."
+ text_norm = "L'été a été très chaud"
+ # The language is not defined. Removal of punctuation only.
+ assert text_norm == normalizer.normalize(text)
+
+ # Deleting words for the defined language.
+ text_norm = "été a été très chaud"
+ app.config['RERO_ILS_STOP_WORDS'] = {
+ 'fre': ["de", "des", "du", "l'", "la", "le", "les", "un", "une"]
+ }
+ assert text_norm == normalizer.normalize(text, 'fre')
+
+ text = 'Journal des tribunaux : jurisprudence fédérale. ' \
+ '4, Droit pénal et procédure pénale'
+ text_norm = 'Journal tribunaux jurisprudence fédérale ' \
+ '4 Droit pénal et procédure pénale'
+ assert text_norm == normalizer.normalize(text, 'fre')
+
+ # The language was not found in the definition of stop words.
+ text = "He plays this musical phrase quite well."
+ text_norm = "He plays this musical phrase quite well"
+ assert text_norm == normalizer.normalize(text, 'eng')
+
+ # Deleting words with the default definition.
+ text = "L'été a été très chaud."
+ text_norm = "été a été chaud"
+ app.config['RERO_ILS_STOP_WORDS'] = {
+ 'default': ["l'", "très"]
+ }
+ normalizer = NormalizerStopWords(app)
+ assert text_norm == normalizer.normalize(text, 'und')
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 82d4c335ad..21362edfe0 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -25,7 +25,7 @@
from rero_ils.modules.utils import add_years, extracted_data_from_ref, \
get_endpoint_configuration, get_schema_for_resource, read_json_record
from rero_ils.utils import get_current_language, language_iso639_2to1, \
- unique_list
+ language_mapping, unique_list
def test_unique_list():
@@ -110,3 +110,9 @@ def test_language_iso639_2to1(app):
assert language_iso639_2to1('ita') == 'it'
# default language
assert language_iso639_2to1('rus') == 'en'
+
+
+def test_language_mapping(app):
+ """Test language mapping."""
+ assert 'fre' == language_mapping('fre')
+ assert 'dut' == language_mapping('dum')