Skip to content

Commit

Permalink
document: add new acquisition aggregation
Browse files Browse the repository at this point in the history
* Adds aggregation acquisition.
* Optimizes aggregation year.
* Adds values according to facet results (year, acquisition).
* Changes acquistion filter to include max date.
* Closes #3525.

Co-Authored-by: Bertrand Zuchuat <bertrand.zuchuat@rero.ch>
  • Loading branch information
Garfield-fr committed Nov 20, 2023
1 parent 9154312 commit 438ab6a
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 21 deletions.
19 changes: 18 additions & 1 deletion rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,7 +1744,20 @@ def _(x):
),
status=dict(terms=dict(field='holdings.items.status', size=DOCUMENTS_AGGREGATION_SIZE)),
intendedAudience=dict(terms=dict(field='intendedAudience.value', size=DOCUMENTS_AGGREGATION_SIZE)),
year=dict(date_histogram=dict(field='provisionActivity.startDate', interval='year', format='yyyy'))
year=dict(
filter=dict(bool=dict(filter=[])),
aggs=dict(
year_min=dict(min=dict(field='provisionActivity.startDate')),
year_max=dict(max=dict(field='provisionActivity.startDate'))
)
),
acquisition=dict(
nested=dict(path='holdings.items.acquisition'),
aggs=dict(
date_min=dict(min=dict(field='holdings.items.acquisition.date', format='yyyy-MM-dd')),
date_max=dict(max=dict(field='holdings.items.acquisition.date', format='yyyy-MM-dd'))
)
)
),
filters={
_('online'): or_terms_filter_by_criteria({
Expand All @@ -1771,6 +1784,10 @@ def _(x):
'facet_genre_form_en': FICTIONS_TERMS}
]
),
# This filter is used with timestamp
_('acquisition'): acquisition_filter(),
# This filter is only used for constructed queries
# --> Ex: &new_acquisition=2020-01-01:2021-01-01
_('new_acquisition'): acquisition_filter(),
_('identifiers'): nested_identified_filter()
},
Expand Down
25 changes: 18 additions & 7 deletions rero_ils/modules/documents/query.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2022 RERO
# Copyright (C) 2019-2022 UCLouvain
# Copyright (C) 2019-2023 RERO
# Copyright (C) 2019-2023 UCLouvain
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -18,6 +18,7 @@

"""Query factories for Document REST API."""
import re
from datetime import datetime

from elasticsearch_dsl import Q
from flask import request
Expand All @@ -36,7 +37,7 @@ def inner(values):
# this date will be included into the search result ('<=').
# if not specified the '*' value will be used
# 2) until_date (optional) : the upper limit range acquisition_date.
# this date will be excluded from the search result ('>').
# this date will be included from the search result ('>=').
# if not specified the current timestamp value will be used
# !!! Other filters could be used to restrict data result : This
# function will check for 'organisation' and/or 'library' and/or
Expand All @@ -45,15 +46,25 @@ def inner(values):
# SOME EXAMPLES :
# * ?new_acquisition=2020-01-01&organisation=1
# --> all new acq for org with pid=1 from 2020-01-01 to now
# * ?library=3&new_acquisition=2020-01-01:2021-01-01
# * ?library=3&new_acquisition=2020-01-01:2020-12-31
# --> all new acq for library with pid=3 for the 2020 year
# * ?location=17&library=2&new_acquisition=:2020-01-01
# * ?location=17&library=2&new_acquisition=:2019-12-31
# --> all new acq for (location with pid=17 and library with
# pid=2) until Jan, 1 2020

# build acquisition date range query
values = dict(zip(['from', 'to'], values.pop().split(':')))
range_acquisition_dates = {'lt': values.get('to') or 'now/d'}
range_values = values.pop()
if '--' in range_values:
values = dict(zip(['from', 'to'], range_values.split('--')))
if 'from' in values:
values['from'] = datetime.fromtimestamp(
float(values['from'])/1000).strftime('%Y-%m-%d')
if 'to' in values:
values['to'] = datetime.fromtimestamp(
float(values['to'])/1000).strftime('%Y-%m-%d')
else:
values = dict(zip(['from', 'to'], range_values.split(':')))
range_acquisition_dates = {'lte': values.get('to') or 'now/d'}
if values.get('from'):
range_acquisition_dates['gte'] = values.get('from')

Expand Down
56 changes: 49 additions & 7 deletions rero_ils/modules/documents/serializers/json.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2022 RERO
# Copyright (C) 2019-2022 UCLouvain
# Copyright (C) 2019-2023 RERO
# Copyright (C) 2019-2023 UCLouvain
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -18,6 +18,8 @@

"""RERO Document JSON serialization."""

from datetime import datetime

from flask import current_app, json, request, stream_with_context
from werkzeug.local import LocalProxy

Expand Down Expand Up @@ -117,11 +119,51 @@ def _postprocess_search_aggregations(self, aggregations: dict) -> None:
# format the results of the facet 'year' to be displayed
# as range
if aggregations.get('year'):
aggregations['year']['type'] = 'range'
aggregations['year']['config'] = {
'min': -9999,
'max': 9999,
'step': 1
def extract_year(key, default):
"""Extract year from year aggregation.
:param: key: the dict key.
:param: default: the default year.
:return: the year in yyyy format.
"""
if 'value' in aggregations['year'][key] and\
aggregations['year'][key]['value']:
return int(aggregations['year'][key]['value'])
return default

aggregations['year'] = {
'type': 'range',
'config': {
'min': extract_year('year_min', -9999),
'max': extract_year('year_max', 9999),
'step': 1
}
}

if aggregations.get('acquisition'):
# format the results of facet 'acquisition' to be displayed
# as date range with min and max date (limit)
def extract_acquisition_date(key, default):
"""Exact date from acquisition aggregation.
:param: key: the dict key.
:param: default: the default date.
:return: the date in yyyy-MM-dd format.
"""
if ('value_as_string' in aggregations['acquisition'][key]):
return aggregations['acquisition'][key]['value_as_string']
elif aggregations['acquisition'][key]['value']:
return aggregations['acquisition'][key]['value']
return default

del aggregations['acquisition']['doc_count']
aggregations['acquisition'] = {
'type': 'date-range',
'config': {
'min': extract_acquisition_date('date_min', '1900-01-01'),
'max': extract_acquisition_date(
'date_max', datetime.now().strftime('%Y-%m-%d'))
}
}

if aggr_org := aggregations.get('organisation', {}).get('buckets', []):
Expand Down
6 changes: 6 additions & 0 deletions rero_ils/theme/templates/rero_ils/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
#}

{% extends 'rero_ils/page.html' %}
{%- block css %}
{{ super() }}
{{ node_assets('@rero/rero-ils-ui/dist/public-search', ['styles.*css'], 'css') }}
{%- endblock %}

{%- block page_body %}
<public-search-root class="container mt-4"></public-search-root>
Expand All @@ -26,4 +30,6 @@
{%- block javascript %}
{{ webpack['reroils_public.js']}}
{{ node_assets('@rero/rero-ils-ui/dist/public-search', tags='type="module"') }}


{%- endblock javascript %}
33 changes: 27 additions & 6 deletions tests/api/documents/test_documents_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,24 @@ def test_documents_newacq_filters(app, client,
):
login_user_via_session(client, system_librarian_martigny.user)

def datetime_delta(**args):
"""Apply delta on date time."""
return datetime.now() + timedelta(**args)

def datetime_milliseconds(date):
"""datetime get milliseconds."""
return round(date.timestamp() * 1000)

# compute useful date
today = datetime.today()
past = (today - timedelta(days=1)).strftime('%Y-%m-%d')
future = (today + timedelta(days=10)).strftime('%Y-%m-%d')
future_1 = (today + timedelta(days=11)).strftime('%Y-%m-%d')
today = datetime.now()
past = datetime_delta(days=-1).strftime('%Y-%m-%d')
past_timestamp = datetime_milliseconds(datetime_delta(days=-30))
future = datetime_delta(days=10).strftime('%Y-%m-%d')
future_1 = datetime_delta(days=11).strftime('%Y-%m-%d')
future_1_timestamp = datetime_milliseconds(datetime_delta(days=1))
today = today.strftime('%Y-%m-%d')

# Add a new items with acq_date
# # Add a new items with acq_date
new_acq1 = deepcopy(item_lib_martigny_data)
new_acq1['pid'] = 'itemacq1'
new_acq1['acquisition_date'] = today
Expand Down Expand Up @@ -135,6 +145,17 @@ def test_documents_newacq_filters(app, client,
data = get_json(res)
assert data['hits']['total']['value'] == 0

# check new_acquisition filters with -- separator and timestamp
# Ex: 1696111200000--1700089200000
doc_list = url_for(
'invenio_records_rest.doc_list',
view='global',
acquisition='{0}--{1}'.format(past_timestamp, future_1_timestamp),
)
res = client.get(doc_list, headers=rero_json_header)
data = get_json(res)
assert data['hits']['total']['value'] == 1


@mock.patch('invenio_records_rest.views.verify_record_permission',
mock.MagicMock(return_value=VerifyRecordPermissionPatch))
Expand All @@ -150,7 +171,7 @@ def test_documents_facets(
facet_keys = [
'document_type', 'author', 'language', 'subject_no_fiction',
'subject_fiction', 'genreForm', 'intendedAudience',
'year', 'status'
'year', 'status', 'acquisition'
]
assert all(key in data['aggregations'] for key in facet_keys)

Expand Down

0 comments on commit 438ab6a

Please sign in to comment.