Skip to content

Commit

Permalink
stats: add periodic task to generate reports
Browse files Browse the repository at this point in the history
* Renames private method.
* Adds periodic task for monthly and yearly.
* Adds cli to generate synchronous and asynchronous reports.
* Enables all tasks but ebook during the setup.

Co-Authored-by: Johnny Mariéthoz <Johnny.Mariethoz@rero.ch>
  • Loading branch information
jma committed Nov 16, 2023
1 parent 36001d1 commit 6b5633c
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 32 deletions.
16 changes: 16 additions & 0 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,22 @@ def _(x):
'schedule': crontab(minute=30, hour=1, day_of_month='1'), # First day of the month at 01:30 UTC,
'enabled': False
},
'collect-stats-report-month': {
'task': ('rero_ils.modules.stats.tasks.collect_stats_reports'),
'schedule': crontab(minute=0, hour=1, day_of_month='1'), # First day of the month at 01:30 UTC,
'kwargs': {
'types': 'month'
},
'enabled': False
},
'collect-stats-report-year': {
'task': ('rero_ils.modules.stats.tasks.collect_stats_reports'),
'schedule': crontab(minute=0, hour=1, day_of_month='1', month_of_year='1'), # First day of the month at 01:30 UTC,
'kwargs': {
'types': 'year'
},
'enabled': False
},
'delete-provisional-items': {
'task': 'rero_ils.modules.items.tasks.delete_provisional_items',
'schedule': crontab(minute=0, hour=3), # Every day at 03:00 UTC,
Expand Down
2 changes: 1 addition & 1 deletion rero_ils/modules/stats/api/indicators/circulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def query(self):
.filter('term', loan__trigger=self.trigger)
if period := self.cfg.period:
es_query = es_query.filter(
'range', date=self.cfg._get_range_period(period))
'range', date=self.cfg.get_range_period(period))
if lib_pids := self.cfg.filter_by_libraries:
loc_pids = [
hit.pid for hit in LocationsSearch().filter(
Expand Down
4 changes: 2 additions & 2 deletions rero_ils/modules/stats/api/indicators/others.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def query(self):
.filter('term', operation='delete')
if period := self.cfg.period:
es_query = es_query.filter(
'range', date=self.cfg._get_range_period(period))
'range', date=self.cfg.get_range_period(period))
if pids := self.cfg.filter_by_libraries:
es_query = es_query.filter(
'terms', record__library_pid=pids)
Expand Down Expand Up @@ -315,7 +315,7 @@ def query(self):
'term', organisation__pid=self.cfg.org_pid)
if period := self.cfg.period:
es_query = es_query.filter(
'range', _created=self.cfg._get_range_period(period))
'range', _created=self.cfg.get_range_period(period))
if pids := self.cfg.filter_by_libraries:
es_query = es_query.filter(
'terms', library__pid=pids)
Expand Down
2 changes: 1 addition & 1 deletion rero_ils/modules/stats/api/indicators/patron.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def query(self):
:returns: an elasticsearch query object
"""
es_query = super().query
range_period = self.cfg._get_range_period(self.cfg.period)
range_period = self.cfg.get_range_period(self.cfg.period)
op_query = LoanOperationLogsSearch()\
.source('loan.patron.pid')\
.get_logs_by_trigger(
Expand Down
25 changes: 24 additions & 1 deletion rero_ils/modules/stats/api/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from .indicators import NumberOfActivePatronsCfg, NumberOfCirculationCfg, \
NumberOfDeletedItemsCfg, NumberOfDocumentsCfg, NumberOfILLRequests, \
NumberOfItemsCfg, NumberOfPatronsCfg, NumberOfSerialHoldingsCfg
from ..api.api import Stat
from ..models import StatType


class StatsReport:
Expand Down Expand Up @@ -174,7 +176,7 @@ def collect(self, force=False):

return self._process_aggregations(results)

def _get_range_period(self, period):
def get_range_period(self, period):
"""Get the range period for elasticsearch date range aggs."""
if period == 'month':
# now - 1 month
Expand All @@ -193,3 +195,24 @@ def _get_range_period(self, period):
_from = f'{previous_year}-01-01T00:00:00'
_to = f'{previous_year}-12-31T23:59:59'
return dict(gte=_from, lte=_to)

def create_stat(self, values, dbcommit=True, reindex=True):
"""Create a stat report.
:params values: array - value computed by the StatReport class.
:param dbcommit: bool - if True commit the database transaction.
:param reindex: bool - if True index the document.
:returns: the create report.
"""
data = dict(
type=StatType.REPORT,
config=self.config.dumps(),
values=[dict(results=values)]
)
if self.period:
range = self.get_range_period(self.period)
data['date_range'] = {
'from': range['gte'],
'to': range['lte']
}
return Stat.create(data, dbcommit=dbcommit, reindex=reindex)
35 changes: 21 additions & 14 deletions rero_ils/modules/stats/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,24 @@ def collect(pid):
click.secho(f'Configuration does not exists.', fg='red')
else:
stat_report = StatsReport(cfg)
res = stat_report.collect()
data = dict(
type=StatType.REPORT,
config=cfg.dumps(),
values=[dict(results=res)]
)
if stat_report.period:
range = stat_report._get_range_period(stat_report.period)
data['date_range'] = {
'from': range['gte'],
'to': range['lte']
}

Stat.create(data, dbcommit=True, reindex=True)
values = stat_report.collect()
stat_report.create_stat(values)


@report.command()
@click.argument('frequency', type=click.Choice(['month', 'year']))
@click.option('--delayed', '-d', is_flag=True,
help='Run indexing in background.')
@with_appcontext
def collect_all(frequency, delayed):
"""Extract the stats report values and store it.
:param pid: pid value of the configuration to use.
"""
from .tasks import collect_stats_reports
if delayed:
res = collect_stats_reports.delay(frequency)
click.secho(f'Generated reports delayed, task id: {res}', fg='green')
else:
res = collect_stats_reports(frequency)
click.secho(f'Generated {len(res)} reports.', fg='green')
31 changes: 30 additions & 1 deletion rero_ils/modules/stats/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@
from celery import shared_task
from flask import current_app

from rero_ils.modules.stats_cfg.api import StatConfiguration, \
StatsConfigurationSearch

from .api.api import Stat
from .api.librarian import StatsForLibrarian
from .api.pricing import StatsForPricing
from .api.report import StatsReport
from .models import StatType


Expand All @@ -40,7 +44,7 @@ def collect_stats_billing():

@shared_task()
def collect_stats_librarian():
"""Collect and store the montly statistics for librarian."""
"""Collect and store the monthly statistics for librarian."""
stats_librarian = StatsForLibrarian()
date_range = {'from': stats_librarian.date_range['gte'],
'to': stats_librarian.date_range['lte']}
Expand All @@ -52,3 +56,28 @@ def collect_stats_librarian():
dbcommit=True, reindex=True)
return f'New statistics of type {stat["type"]} has\
been created with a pid of: {stat.pid}'


@shared_task()
def collect_stats_reports(frequency='month'):
"""Collect and store the montly statistics for librarian."""
pids = [
hit.pid for hit in
StatsConfigurationSearch().filter(
'term', frequency=frequency).source('pid').scan()
]
to_return = []
logger = current_app.logger
for pid in pids:
try:
cfg = StatConfiguration.get_record_by_pid(pid)
stat_report = StatsReport(cfg)
values = stat_report.collect()
report = stat_report.create_stat(values)
to_return.append(report.pid)
except Exception as error:
logger.error(
f'Unable to generate report from config({pid}) :: {error}',
exc_info=True, stack_info=True
)
return to_return
4 changes: 2 additions & 2 deletions rero_ils/modules/stats_cfg/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from rero_ils.modules.fetchers import id_fetcher
from rero_ils.modules.minters import id_minter
from rero_ils.modules.providers import Provider
from rero_ils.modules.stats_cfg.models import StatCfgIdentifier, \
StatCfgMetadata

from .models import StatCfgIdentifier, StatCfgMetadata

# provider
StatCfgProvider = type(
Expand Down
13 changes: 7 additions & 6 deletions scripts/setup
Original file line number Diff line number Diff line change
Expand Up @@ -608,18 +608,17 @@ fi
# # OAI configuration
info_msg "OAI configuration: ${DATA_PATH}/oaisources.yml"
eval ${PREFIX} invenio reroils oaiharvester initconfig ${DATA_PATH}/oaisources.yml

eval ${PREFIX} invenio reroils scheduler enable_tasks -a -v
# disable ebook harvesting
eval ${PREFIX} invenio reroils scheduler enable_tasks -n ebooks-harvester -d

if ${DEPLOYMENT}
then
eval ${PREFIX} invenio reroils scheduler enable_tasks -a -v
# disable ebook harvesting
eval ${PREFIX} invenio reroils scheduler enable_tasks -n ebooks-harvester -d
# start oai harvesting asynchrone: beats must be running
info_msg "Start OAI harvesting asynchrone"
eval ${PREFIX} invenio reroils oaiharvester harvest -n ebooks -a max=150 -q -k
else
eval ${PREFIX} invenio reroils scheduler enable_tasks -n scheduler-timestamp -n bulk-indexer -n anonymize-loans -n claims-creation -n accounts -n clear_and_renew_subscriptions -n collect-stats-billing -n collect-stats-librarian -v
eval ${PREFIX} invenio reroils scheduler enable_tasks -n notification-creation -n notification-dispatch-availability -n notification-dispatch-recall -n sync-agents -v
eval ${PREFIX} invenio reroils scheduler enable_tasks -n cancel-expired-request -n delete-provisional-items -n delete-loans-created -v
info_msg "For ebooks harvesting run:"
msg "\tinvenio reroils oaiharvester harvest -n ebooks -a max=100 -q"
fi
Expand Down Expand Up @@ -650,6 +649,8 @@ fi

info_msg "Collect statistics"
eval ${PREFIX} invenio reroils stats collect billing
eval ${PREFIX} invenio reroils stats report collect-all month
eval ${PREFIX} invenio reroils stats report collect-all year

date
success_msg "Perfect ${PROGRAM}! See you soon…"
Expand Down
8 changes: 4 additions & 4 deletions tests/ui/stats/test_stats_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ def test_stats_report_range(app):
'rero_ils.modules.stats.api.report.datetime'
) as mock_datetime:
mock_datetime.now.return_value = datetime(year=2023, month=2, day=1)
assert StatsReport(cfg)._get_range_period('month') == \
assert StatsReport(cfg).get_range_period('month') == \
dict(gte='2023-01-01T00:00:00', lte='2023-01-31T23:59:59')
assert StatsReport(cfg)._get_range_period('year') == \
assert StatsReport(cfg).get_range_period('year') == \
dict(gte='2022-01-01T00:00:00', lte='2022-12-31T23:59:59')
mock_datetime.now.return_value = datetime(year=2023, month=1, day=5)
assert StatsReport(cfg)._get_range_period('month') == \
assert StatsReport(cfg).get_range_period('month') == \
dict(gte='2022-12-01T00:00:00', lte='2022-12-31T23:59:59')
assert not StatsReport(cfg)._get_range_period('foo')
assert not StatsReport(cfg).get_range_period('foo')

0 comments on commit 6b5633c

Please sign in to comment.